diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html
index b83d63754..6b96b1b17 100644
--- a/htroot/IndexControlURLs_p.html
+++ b/htroot/IndexControlURLs_p.html
@@ -193,6 +193,9 @@ function updatepage(str) {
URL Filter
+ query
+
+
Export Format
Only Domain:
Plain Text List (domains only)
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java
index 94d46ba0e..48da0982c 100644
--- a/htroot/IndexControlURLs_p.java
+++ b/htroot/IndexControlURLs_p.java
@@ -261,7 +261,8 @@ public class IndexControlURLs_p {
final File f = new File(s);
f.getParentFile().mkdirs();
final String filter = post.get("exportfilter", ".*");
- final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
+ final String query = post.get("exportquery", "*:*");
+ final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);
prop.put("lurlexport_exportfile", s);
prop.put("lurlexport_urlcount", running.count());
diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java
index ea8a2bac5..cc127ecbe 100644
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@@ -752,12 +752,12 @@ public final class Fulltext {
}
// export methods
- public Export export(final File f, final String filter, final int format, final boolean dom) {
+ public Export export(final File f, final String filter, final String query, final int format, final boolean dom) {
if ((this.exportthread != null) && (this.exportthread.isAlive())) {
ConcurrentLog.warn("LURL-EXPORT", "cannot start another export thread, already one running");
return this.exportthread;
}
- this.exportthread = new Export(f, filter, format, dom);
+ this.exportthread = new Export(f, filter, query, format, dom);
this.exportthread.start();
return this.exportthread;
}
@@ -770,14 +770,15 @@ public final class Fulltext {
private final File f;
private final Pattern pattern;
private int count;
- private String failure;
+ private String failure, query;
private final int format;
private final boolean dom;
- private Export(final File f, final String filter, final int format, boolean dom) {
+ private Export(final File f, final String filter, final String query, final int format, boolean dom) {
// format: 0=text, 1=html, 2=rss/xml
this.f = f;
this.pattern = filter == null ? null : Pattern.compile(filter);
+ this.query = query == null? "*:*" : query;
this.count = 0;
this.failure = null;
this.format = format;
@@ -806,7 +807,7 @@ public final class Fulltext {
if (this.dom) {
- Map> scores = Fulltext.this.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
+ Map> scores = Fulltext.this.getDefaultConnector().getFacets(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap stats = scores.get(CollectionSchema.host_s.getSolrFieldName());
for (final String host: stats) {
if (this.pattern != null && !this.pattern.matcher(host).matches()) continue;
@@ -815,21 +816,19 @@ public final class Fulltext {
this.count++;
}
} else {
- BlockingQueue docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
+ BlockingQueue docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
SolrDocument doc;
- ArrayList> title;
- String url, author, hash;
- String[] descriptions;
+ String url, hash, title, author, description;
Integer size;
Date date;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
- hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
- url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
- title = (ArrayList>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
- author = (String) doc.getFieldValue(CollectionSchema.author.getSolrFieldName());
- descriptions = (String[]) doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName());
+ hash = getStringFrom(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+ url = getStringFrom(doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
+ title = getStringFrom(doc.getFieldValue(CollectionSchema.title.getSolrFieldName()));
+ author = getStringFrom(doc.getFieldValue(CollectionSchema.author.getSolrFieldName()));
+ description = getStringFrom(doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName()));
size = (Integer) doc.getFieldValue(CollectionSchema.size_i.getSolrFieldName());
date = (Date) doc.getFieldValue(CollectionSchema.last_modified.getSolrFieldName());
if (this.pattern != null && !this.pattern.matcher(url).matches()) continue;
@@ -837,16 +836,14 @@ public final class Fulltext {
pw.println(url);
}
if (this.format == 1) {
- if (title != null) pw.println("" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "");
+ if (title != null) pw.println("" + CharacterCoding.unicode2xml(title, true) + "");
}
if (this.format == 2) {
pw.println("- ");
- if (title != null) pw.println("" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "");
+ if (title != null) pw.println("" + CharacterCoding.unicode2xml(title, true) + "");
pw.println("" + MultiProtocolURL.escape(url) + "");
if (author != null && !author.isEmpty()) pw.println("" + CharacterCoding.unicode2xml(author, true) + "");
- if (descriptions != null && descriptions.length > 0) {
- for (String d: descriptions) pw.println("" + CharacterCoding.unicode2xml(d, true) + "");
- }
+ if (description != null && !description.isEmpty()) pw.println("" + CharacterCoding.unicode2xml(description, true) + "");
if (date != null) pw.println("" + HeaderFramework.formatRFC1123(date) + "");
if (size != null) pw.println("" + size.intValue() + "");
pw.println("" + hash + "");
@@ -884,6 +881,13 @@ public final class Fulltext {
public int count() {
return this.count;
}
+
+ @SuppressWarnings("unchecked")
+ private String getStringFrom(final Object o) {
+ if (o == null) return "";
+ if (o instanceof ArrayList) return ((ArrayList) o).get(0);
+ return (String) o;
+ }
}
diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java
index 745bbb2ac..50861de59 100644
--- a/source/net/yacy/search/query/QueryGoal.java
+++ b/source/net/yacy/search/query/QueryGoal.java
@@ -242,7 +242,8 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
- q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))");
+ q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
+ q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
// parse special requests
if (isCatchall()) return q;