- added field options to all solr queries. This can be used to restrict

the actual data which is fetched from solr.
- used the new field options to reduce generic options like getting the
load date or the count of search results. should increase overall speed
- used the new field options to reduce overhead in the host browser
during aquisition of links.
- used the field options to make checking of links in crawler faster
- if the crawler is paused, the crawl queue is not cleaned
pull/1/head
Michael Peter Christen 12 years ago
parent 46be4af5b9
commit f5ca5cea44

@ -54,6 +54,8 @@ import net.yacy.server.serverSwitch;
public class HostBrowser { public class HostBrowser {
final static long TIMEOUT = 10000L;
public static enum StoreType { public static enum StoreType {
LINK, INDEX, ERROR; LINK, INDEX, ERROR;
} }
@ -141,7 +143,7 @@ public class HostBrowser {
int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index // collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getSolr().getFacets("*:*", new String[]{YaCySchema.host_s.getSolrFieldName()}, maxcount).get(YaCySchema.host_s.getSolrFieldName()); ReversibleScoreMap<String> hostscore = fulltext.getSolr().getFacets("*:*", maxcount, YaCySchema.host_s.getSolrFieldName()).get(YaCySchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>(); if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
// collect hosts from crawler // collect hosts from crawler
@ -151,7 +153,7 @@ public class HostBrowser {
} }
// collect the errorurls // collect the errorurls
ReversibleScoreMap<String> errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]", new String[]{YaCySchema.host_s.getSolrFieldName()}, maxcount).get(YaCySchema.host_s.getSolrFieldName()) : null; ReversibleScoreMap<String> errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]", maxcount, YaCySchema.host_s.getSolrFieldName()).get(YaCySchema.host_s.getSolrFieldName()) : null;
if (errorscore == null) errorscore = new ClusteredScoreMap<String>(); if (errorscore == null) errorscore = new ClusteredScoreMap<String>();
int c = 0; int c = 0;
@ -225,7 +227,15 @@ public class HostBrowser {
q.append(" AND ").append(YaCySchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]"); q.append(" AND ").append(YaCySchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]");
} }
} }
BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, 10000, 100); BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, TIMEOUT, 100,
YaCySchema.id.getSolrFieldName(),
YaCySchema.sku.getSolrFieldName(),
YaCySchema.failreason_t.getSolrFieldName(),
YaCySchema.inboundlinks_protocol_sxt.getSolrFieldName(),
YaCySchema.inboundlinks_urlstub_txt.getSolrFieldName(),
YaCySchema.outboundlinks_protocol_sxt.getSolrFieldName(),
YaCySchema.outboundlinks_urlstub_txt.getSolrFieldName()
);
SolrDocument doc; SolrDocument doc;
Set<String> storedDocs = new HashSet<String>(); Set<String> storedDocs = new HashSet<String>();
Map<String, String> errorDocs = new HashMap<String, String>(); Map<String, String> errorDocs = new HashMap<String, String>();
@ -233,19 +243,20 @@ public class HostBrowser {
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>(); Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
int hostsize = 0; int hostsize = 0;
final List<byte[]> deleteIDs = new ArrayList<byte[]>(); final List<byte[]> deleteIDs = new ArrayList<byte[]>();
long timeout = System.currentTimeMillis() + 10000; long timeout = System.currentTimeMillis() + TIMEOUT;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()); String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
String error = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName()); String error = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
if (u.startsWith(path)) { if (u.startsWith(path)) {
if (delete) { if (delete) {
deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.name()))); deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
} else { } else {
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error); if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
} }
} else if (complete) { } else if (complete) {
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error); if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
} }
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link
if (error == null) { if (error == null) {
hostsize++; hostsize++;
// collect inboundlinks to browse the host // collect inboundlinks to browse the host

@ -45,14 +45,14 @@ public abstract class AbstractSolrConnector implements SolrConnector {
public final static SolrQuery catchallQuery = new SolrQuery(); public final static SolrQuery catchallQuery = new SolrQuery();
static { static {
catchallQuery.setQuery("*:*"); catchallQuery.setQuery("*:*");
catchallQuery.setFields(YaCySchema.id.name()); catchallQuery.setFields(YaCySchema.id.getSolrFieldName());
catchallQuery.setRows(1); catchallQuery.setRows(1);
catchallQuery.setStart(0); catchallQuery.setStart(0);
} }
public final static SolrQuery catchSuccessQuery = new SolrQuery(); public final static SolrQuery catchSuccessQuery = new SolrQuery();
static { static {
catchSuccessQuery.setQuery("-" + YaCySchema.failreason_t.name() + ":[* TO *]"); catchSuccessQuery.setQuery("-" + YaCySchema.failreason_t.name() + ":[* TO *]");
catchSuccessQuery.setFields(YaCySchema.id.name()); catchSuccessQuery.setFields(YaCySchema.id.getSolrFieldName());
catchSuccessQuery.setRows(1); catchSuccessQuery.setRows(1);
catchSuccessQuery.setStart(0); catchSuccessQuery.setStart(0);
} }
@ -61,7 +61,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override @Override
public boolean exists(final String id) throws IOException { public boolean exists(final String id) throws IOException {
try { try {
final SolrDocument doc = get(id); final SolrDocument doc = get(id, YaCySchema.id.getSolrFieldName());
return doc != null; return doc != null;
} catch (final Throwable e) { } catch (final Throwable e) {
log.warn(e); log.warn(e);
@ -81,7 +81,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element * @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/ */
@Override @Override
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize) { public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final String ... fields) {
final BlockingQueue<SolrDocument> queue = buffersize <= 0 ? new LinkedBlockingQueue<SolrDocument>() : new ArrayBlockingQueue<SolrDocument>(buffersize); final BlockingQueue<SolrDocument> queue = buffersize <= 0 ? new LinkedBlockingQueue<SolrDocument>() : new ArrayBlockingQueue<SolrDocument>(buffersize);
final long endtime = System.currentTimeMillis() + maxtime; final long endtime = System.currentTimeMillis() + maxtime;
final Thread t = new Thread() { final Thread t = new Thread() {
@ -90,7 +90,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
int o = offset; int o = offset;
while (System.currentTimeMillis() < endtime) { while (System.currentTimeMillis() < endtime) {
try { try {
SolrDocumentList sdl = query(querystring, o, pagesize); SolrDocumentList sdl = query(querystring, o, pagesize, fields);
for (SolrDocument d: sdl) { for (SolrDocument d: sdl) {
try {queue.put(d);} catch (InterruptedException e) {break;} try {queue.put(d);} catch (InterruptedException e) {break;}
} }
@ -119,7 +119,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
int o = offset; int o = offset;
while (System.currentTimeMillis() < endtime) { while (System.currentTimeMillis() < endtime) {
try { try {
SolrDocumentList sdl = query(querystring, o, pagesize); SolrDocumentList sdl = query(querystring, o, pagesize, YaCySchema.id.getSolrFieldName());
for (SolrDocument d: sdl) { for (SolrDocument d: sdl) {
try {queue.put((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));} catch (InterruptedException e) {break;} try {queue.put((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));} catch (InterruptedException e) {break;}
} }

@ -231,8 +231,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
} }
@Override @Override
public SolrDocument get(String id) throws IOException { public SolrDocument get(final String id, final String ... fields) throws IOException {
SolrDocument doc = this.documentCache.get(id); SolrDocument doc = fields.length == 0 ? this.documentCache.get(id) : null;
if (doc != null) { if (doc != null) {
this.documentCache_Hit++; this.documentCache_Hit++;
return doc; return doc;
@ -243,24 +243,22 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return null; return null;
} }
missCache_Miss++; missCache_Miss++;
if ((solr0 != null && ((doc = solr0.get(id)) != null)) || (solr1 != null && ((doc = solr1.get(id)) != null))) { if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id); this.missCache.remove(id);
this.hitCache.put(id, EXIST); this.hitCache.put(id, EXIST);
this.hitCache_Insert++; this.hitCache_Insert++;
this.documentCache.put(id, doc); if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
this.documentCache_Insert++;
return doc; return doc;
} }
// check if there is a autocommit problem // check if there is a autocommit problem
if (this.hitCache.containsKey(id)) { if (this.hitCache.containsKey(id)) {
// the document should be there, therefore make a commit and check again // the document should be there, therefore make a commit and check again
this.commit(); this.commit();
if ((solr0 != null && ((doc = solr0.get(id)) != null)) || (solr1 != null && ((doc = solr1.get(id)) != null))) { if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id); this.missCache.remove(id);
this.hitCache.put(id, EXIST); this.hitCache.put(id, EXIST);
this.hitCache_Insert++; this.hitCache_Insert++;
this.documentCache.put(id, doc); if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
this.documentCache_Insert++;
return doc; return doc;
} }
} }
@ -276,7 +274,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
*/ */
@Override @Override
public void add(final SolrInputDocument solrdoc) throws IOException { public void add(final SolrInputDocument solrdoc) throws IOException {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name()); String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName());
assert id != null; assert id != null;
if (id == null) return; if (id == null) return;
this.missCache.remove(id); this.missCache.remove(id);
@ -300,47 +298,47 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException * @throws IOException
*/ */
@Override @Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException { public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList(); if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
if (offset == 0 && count == 1 && querystring.startsWith("id:")) { if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
final SolrDocumentList list = new SolrDocumentList(); final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3)); SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc); list.add(doc);
// no addToCache(list) here because that was already handlet in get(); // no addToCache(list) here because that was already handlet in get();
return list; return list;
} }
if (this.solr0 != null && this.solr1 == null) { if (this.solr0 != null && this.solr1 == null) {
SolrDocumentList list = this.solr0.query(querystring, offset, count); SolrDocumentList list = this.solr0.query(querystring, offset, count, fields);
addToCache(list); if (fields.length == 0) addToCache(list);
return list; return list;
} }
if (this.solr1 != null && this.solr0 == null) { if (this.solr1 != null && this.solr0 == null) {
SolrDocumentList list = this.solr1.query(querystring, offset, count); SolrDocumentList list = this.solr1.query(querystring, offset, count, fields);
addToCache(list); if (fields.length == 0) addToCache(list);
return list; return list;
} }
// combine both lists // combine both lists
SolrDocumentList l; SolrDocumentList l;
l = this.solr0.query(querystring, offset, count); l = this.solr0.query(querystring, offset, count, fields);
if (l.size() >= count) return l; if (l.size() >= count) return l;
// at this point we need to know how many results are in solr0 // at this point we need to know how many results are in solr0
// compute this with a very bad hack; replace with better method later // compute this with a very bad hack; replace with better method later
int size0 = 0; int size0 = 0;
{ //bad hack - TODO: replace { //bad hack - TODO: replace
SolrDocumentList lHack = this.solr0.query(querystring, 0, Integer.MAX_VALUE); SolrDocumentList lHack = this.solr0.query(querystring, 0, Integer.MAX_VALUE, fields);
size0 = lHack.size(); size0 = lHack.size();
} }
// now use the size of the first query to do a second query // now use the size of the first query to do a second query
final SolrDocumentList list = new SolrDocumentList(); final SolrDocumentList list = new SolrDocumentList();
for (final SolrDocument d: l) list.add(d); for (final SolrDocument d: l) list.add(d);
l = this.solr1.query(querystring, offset + l.size() - size0, count - l.size()); l = this.solr1.query(querystring, offset + l.size() - size0, count - l.size(), fields);
for (final SolrDocument d: l) list.add(d); for (final SolrDocument d: l) list.add(d);
// add caching // add caching
addToCache(list); if (fields.length == 0) addToCache(list);
return list; return list;
} }
@ -422,16 +420,16 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
} }
@Override @Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException { public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new HashMap<String, ReversibleScoreMap<String>>(0); if (this.solr0 == null && this.solr1 == null) return new HashMap<String, ReversibleScoreMap<String>>(0);
if (this.solr0 != null && this.solr1 == null) { if (this.solr0 != null && this.solr1 == null) {
return this.solr0.getFacets(query, fields, maxresults); return this.solr0.getFacets(query, maxresults, fields);
} }
if (this.solr1 != null && this.solr0 == null) { if (this.solr1 != null && this.solr0 == null) {
return this.solr1.getFacets(query, fields, maxresults); return this.solr1.getFacets(query, maxresults, fields);
} }
Map<String, ReversibleScoreMap<String>> facets0 = this.solr0.getFacets(query, fields, maxresults); Map<String, ReversibleScoreMap<String>> facets0 = this.solr0.getFacets(query, maxresults, fields);
Map<String, ReversibleScoreMap<String>> facets1 = this.solr1.getFacets(query, fields, maxresults); Map<String, ReversibleScoreMap<String>> facets1 = this.solr1.getFacets(query, maxresults, fields);
for (Map.Entry<String, ReversibleScoreMap<String>> facet0: facets0.entrySet()) { for (Map.Entry<String, ReversibleScoreMap<String>> facet0: facets0.entrySet()) {
ReversibleScoreMap<String> facet1 = facets1.remove(facet0.getKey()); ReversibleScoreMap<String> facet1 = facets1.remove(facet0.getKey());
if (facet1 == null) continue; if (facet1 == null) continue;

@ -44,7 +44,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
private final SolrConnector solr; private final SolrConnector solr;
private int commitWithinMs; private int commitWithinMs;
public MultipleSolrConnector(final String url, int connections) throws IOException { public MultipleSolrConnector(final String url, final int connections) throws IOException {
this.solr = new RemoteSolrConnector(url); this.solr = new RemoteSolrConnector(url);
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000); this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
this.worker = new AddWorker[connections]; this.worker = new AddWorker[connections];
@ -91,7 +91,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
* @param c the maximum waiting time after a solr command until it is transported to the server * @param c the maximum waiting time after a solr command until it is transported to the server
*/ */
@Override @Override
public void setCommitWithinMs(int c) { public void setCommitWithinMs(final int c) {
this.commitWithinMs = c; this.commitWithinMs = c;
this.solr.setCommitWithinMs(c); this.solr.setCommitWithinMs(c);
for (AddWorker w: this.worker) w.solr.setCommitWithinMs(c); for (AddWorker w: this.worker) w.solr.setCommitWithinMs(c);
@ -129,12 +129,12 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
} }
@Override @Override
public void delete(String id) throws IOException { public void delete(final String id) throws IOException {
this.solr.delete(id); this.solr.delete(id);
} }
@Override @Override
public void delete(List<String> ids) throws IOException { public void delete(final List<String> ids) throws IOException {
this.solr.delete(ids); this.solr.delete(ids);
} }
@ -144,8 +144,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
} }
@Override @Override
public SolrDocument get(String id) throws IOException { public SolrDocument get(final String id, final String ... fields) throws IOException {
return this.solr.get(id); return this.solr.get(id, fields);
} }
@Override @Override
@ -169,12 +169,12 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
} }
@Override @Override
public SolrDocumentList query(String querystring, int offset, int count) throws IOException { public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
return this.solr.query(querystring, offset, count); return this.solr.query(querystring, offset, count, fields);
} }
@Override @Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException { public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
return this.solr.query(query); return this.solr.query(query);
} }
@ -184,8 +184,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
} }
@Override @Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException { public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
return this.solr.getFacets(query, fields, maxresults); return this.solr.getFacets(query, maxresults, fields);
} }
@Override @Override

@ -145,11 +145,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public SolrDocument get(String id) throws IOException { public SolrDocument get(final String id, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime; final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null; Throwable ee = null;
while (System.currentTimeMillis() < t) try { while (System.currentTimeMillis() < t) try {
return this.solrConnector.get(id); return this.solrConnector.get(id, fields);
} catch (final Throwable e) { } catch (final Throwable e) {
ee = e; ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {} try {Thread.sleep(10);} catch (final InterruptedException e1) {}
@ -180,11 +180,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException { public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime; final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null; Throwable ee = null;
while (System.currentTimeMillis() < t) try { while (System.currentTimeMillis() < t) try {
return this.solrConnector.query(querystring, offset, count); return this.solrConnector.query(querystring, offset, count, fields);
} catch (final Throwable e) { } catch (final Throwable e) {
ee = e; ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {} try {Thread.sleep(10);} catch (final InterruptedException e1) {}
@ -195,7 +195,7 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException { public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
final long t = System.currentTimeMillis() + this.retryMaxTime; final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null; Throwable ee = null;
while (System.currentTimeMillis() < t) try { while (System.currentTimeMillis() < t) try {
@ -225,11 +225,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException { public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime; final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null; Throwable ee = null;
while (System.currentTimeMillis() < t) try { while (System.currentTimeMillis() < t) try {
return this.solrConnector.getFacets(query, fields, maxresults); return this.solrConnector.getFacets(query, maxresults, fields);
} catch (final Throwable e) { } catch (final Throwable e) {
ee = e; ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {} try {Thread.sleep(10);} catch (final InterruptedException e1) {}

@ -133,9 +133,9 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public SolrDocument get(String id) throws IOException { public SolrDocument get(String id, final String ... fields) throws IOException {
for (final SolrConnector connector: this.connectors) { for (final SolrConnector connector: this.connectors) {
SolrDocument doc = connector.get(id); SolrDocument doc = connector.get(id, fields);
if (doc != null) return doc; if (doc != null) return doc;
} }
return null; return null;
@ -172,7 +172,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
* @throws IOException * @throws IOException
*/ */
@Override @Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException { public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
final SolrDocumentList list = new SolrDocumentList(); final SolrDocumentList list = new SolrDocumentList();
List<Thread> t = new ArrayList<Thread>(); List<Thread> t = new ArrayList<Thread>();
for (final SolrConnector connector: this.connectors) { for (final SolrConnector connector: this.connectors) {
@ -180,7 +180,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
@Override @Override
public void run() { public void run() {
try { try {
final SolrDocumentList l = connector.query(querystring, offset, count); final SolrDocumentList l = connector.query(querystring, offset, count, fields);
for (final SolrDocument d: l) { for (final SolrDocument d: l) {
list.add(d); list.add(d);
} }
@ -228,10 +228,10 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
} }
@Override @Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException { public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException {
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(); Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>();
for (final SolrConnector connector: this.connectors) { for (final SolrConnector connector: this.connectors) {
Map<String, ReversibleScoreMap<String>> peer = connector.getFacets(query, fields, maxresults); Map<String, ReversibleScoreMap<String>> peer = connector.getFacets(query, maxresults, fields);
innerloop: for (Map.Entry<String, ReversibleScoreMap<String>> facet: facets.entrySet()) { innerloop: for (Map.Entry<String, ReversibleScoreMap<String>> facet: facets.entrySet()) {
ReversibleScoreMap<String> peerfacet = peer.remove(facet.getKey()); ReversibleScoreMap<String> peerfacet = peer.remove(facet.getKey());
if (peerfacet == null) continue innerloop; if (peerfacet == null) continue innerloop;

@ -107,10 +107,11 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/** /**
* get a document from solr by given id * get a document from solr by given id
* @param id * @param id
* @param fields list of fields
* @return one result or null if no result exists * @return one result or null if no result exists
* @throws IOException * @throws IOException
*/ */
public SolrDocument get(final String id) throws IOException; public SolrDocument get(final String id, final String ... fields) throws IOException;
/** /**
* get a query result from solr * get a query result from solr
@ -122,10 +123,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/** /**
* get a query result from solr * get a query result from solr
* to get all results set the query String to "*:*" * to get all results set the query String to "*:*"
* @param querystring * @param querystring the solr query string
* @param offset the first result offset
* @param count number of wanted results
* @param fields list of fields
* @throws IOException * @throws IOException
*/ */
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException, SolrException; public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException, SolrException;
/** /**
* get the number of results when this query is done. * get the number of results when this query is done.
@ -138,12 +142,12 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/** /**
* get facets of the index: a list of lists with values that are most common in a specific field * get facets of the index: a list of lists with values that are most common in a specific field
* @param query a query which is performed to get the facets * @param query a query which is performed to get the facets
* @param fields the field names which are selected as facet
* @param maxresults the maximum size of the resulting maps * @param maxresults the maximum size of the resulting maps
* @param fields the field names which are selected as facet
* @return a map with key = facet field name, value = an ordered map of field values for that field * @return a map with key = facet field name, value = an ordered map of field values for that field
* @throws IOException * @throws IOException
*/ */
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException; public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException;
/** /**
* Get a query result from solr as a stream of documents. * Get a query result from solr as a stream of documents.
@ -154,9 +158,10 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @param maxcount the maximum number of results * @param maxcount the maximum number of results
* @param maxtime the maximum time in milliseconds * @param maxtime the maximum time in milliseconds
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param fields list of fields
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element * @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/ */
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize); public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final String ... fields);
/** /**
* get a document id result stream from a solr query. * get a document id result stream from a solr query.

@ -29,6 +29,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap;
@ -40,14 +41,12 @@ import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count; import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector { public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
@ -203,6 +202,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server == null) return; if (this.server == null) return;
try { try {
synchronized (this.server) { synchronized (this.server) {
//this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName()));
this.server.add(solrdoc, this.commitWithinMs); this.server.add(solrdoc, this.commitWithinMs);
//this.server.commit(); //this.server.commit();
} }
@ -215,9 +215,12 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override @Override
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException { public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>(); ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrInputDocument d: solrdocs) l.add(d);
try { try {
synchronized (this.server) { synchronized (this.server) {
for (SolrInputDocument d: solrdocs) {
//this.server.deleteById((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));
l.add(d);
}
this.server.add(l, this.commitWithinMs); this.server.add(l, this.commitWithinMs);
//this.server.commit(); //this.server.commit();
} }
@ -234,7 +237,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @throws IOException * @throws IOException
*/ */
@Override @Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException { public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
// construct query // construct query
final SolrQuery params = new SolrQuery(); final SolrQuery params = new SolrQuery();
params.setQuery(querystring); params.setQuery(querystring);
@ -243,6 +246,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setFacet(false); params.setFacet(false);
//params.addSortField( "price", SolrQuery.ORDER.asc ); //params.addSortField( "price", SolrQuery.ORDER.asc );
if (fields.length > 0) params.setFields(fields);
// query the server // query the server
QueryResponse rsp = query(params); QueryResponse rsp = query(params);
final SolrDocumentList docs = rsp.getResults(); final SolrDocumentList docs = rsp.getResults();
@ -263,6 +268,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setRows(0); params.setRows(0);
params.setStart(0); params.setStart(0);
params.setFacet(false); params.setFacet(false);
params.setFields(YaCySchema.id.getSolrFieldName());
// query the server // query the server
QueryResponse rsp = query(params); QueryResponse rsp = query(params);
@ -278,8 +284,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @return a map with key = facet field name, value = an ordered map of field values for that field * @return a map with key = facet field name, value = an ordered map of field values for that field
* @throws IOException * @throws IOException
*/ */
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException { public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException {
// construct query // construct query
assert fields.length > 0;
final SolrQuery params = new SolrQuery(); final SolrQuery params = new SolrQuery();
params.setQuery(query); params.setQuery(query);
params.setRows(0); params.setRows(0);
@ -287,6 +294,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setFacet(true); params.setFacet(true);
params.setFacetLimit(maxresults); params.setFacetLimit(maxresults);
params.setFacetSort(FacetParams.FACET_SORT_COUNT); params.setFacetSort(FacetParams.FACET_SORT_COUNT);
params.setFields(fields);
for (String field: fields) params.addFacetField(field); for (String field: fields) params.addFacetField(field);
// query the server // query the server
@ -315,7 +323,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @throws IOException * @throws IOException
*/ */
@Override @Override
public SolrDocument get(final String id) throws IOException { public SolrDocument get(final String id, final String ... fields) throws IOException {
assert id.length() == 12; assert id.length() == 12;
// construct query // construct query
char[] q = new char[17]; char[] q = new char[17];
@ -325,6 +333,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
query.setQuery(new String(q)); query.setQuery(new String(q));
query.setRows(1); query.setRows(1);
query.setStart(0); query.setStart(0);
if (fields.length > 0) query.setFields(fields);
// query the server // query the server
try { try {

@ -60,7 +60,6 @@ import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.SMBLoader; import net.yacy.crawler.retrieval.SMBLoader;
import net.yacy.crawler.robots.RobotsTxt; import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB; import net.yacy.peers.SeedDB;
@ -448,8 +447,8 @@ public final class CrawlStacker {
// check if the url is double registered // check if the url is double registered
final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
final URIMetadataNode oldEntry = this.indexSegment.fulltext().getMetadata(url.hash()); final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash()));
if (oldEntry == null) { if (oldDate == null) {
if (dbocc != null) { if (dbocc != null) {
// do double-check // do double-check
if (dbocc.equals("errors")) { if (dbocc.equals("errors")) {
@ -459,11 +458,11 @@ public final class CrawlStacker {
return "double in: " + dbocc; return "double in: " + dbocc;
} }
} else { } else {
final boolean recrawl = profile.recrawlIfOlder() > oldEntry.loaddate().getTime(); final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime();
if (recrawl) { if (recrawl) {
if (this.log.isInfo()) if (this.log.isInfo())
this.log.logInfo("RE-CRAWL of URL '" + urlstring + "': this url was crawled " + this.log.logInfo("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000 / 60 / 24) + " days ago."); ((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago.");
} else { } else {
if (dbocc == null) { if (dbocc == null) {
return "double in: LURL-DB"; return "double in: LURL-DB";

@ -134,8 +134,7 @@ public class DidYouMean {
* @return * @return
*/ */
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) { public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength) if (this.word.length() < MinimumInputWordLength) {
{
return this.resultSet; // return nothing if input is too short return this.resultSet; // return nothing if input is too short
} }
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();

@ -63,7 +63,7 @@ public class AugmentParser extends AbstractParser implements Parser {
} }
} }
*/ */
private void parseAndAugment(Document origDoc, DigestURI url, String mimeType, String charset) { private void parseAndAugment(Document origDoc, DigestURI url, @SuppressWarnings("unused") String mimeType, @SuppressWarnings("unused") String charset) {
Iterator<net.yacy.kelondro.blob.Tables.Row> it; Iterator<net.yacy.kelondro.blob.Tables.Row> it;
try { try {

@ -513,18 +513,16 @@ public class URIMetadataRow {
final int p = this.latlon.indexOf(','); final int p = this.latlon.indexOf(',');
if (p < 0) { if (p < 0) {
return 0.0d; return 0.0d;
} else { // old index entries might contain text "NaN,NaN"
return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
} }
return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
} }
public double lon() { public double lon() {
if (this.latlon == null || this.latlon.isEmpty()) return 0.0d; if (this.latlon == null || this.latlon.isEmpty()) return 0.0d;
final int p = this.latlon.indexOf(','); final int p = this.latlon.indexOf(',');
if (p < 0) { if (p < 0) {
return 0.0d; return 0.0d;
} else { // old index entries might contain text "NaN,NaN"
return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
} }
return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
} }
} }

@ -2126,7 +2126,7 @@ public final class Switchboard extends serverSwitch {
// clean up profiles // clean up profiles
checkInterruption(); checkInterruption();
//cleanProfiles(); //cleanProfiles();
int cleanup = this.crawler.cleanFinishesProfiles(this.crawlQueues); int cleanup = this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? 0 : this.crawler.cleanFinishesProfiles(this.crawlQueues);
if (cleanup > 0) log.logInfo("cleanup removed " + cleanup + " crawl profiles"); if (cleanup > 0) log.logInfo("cleanup removed " + cleanup + " crawl profiles");
// clean up news // clean up news

@ -42,6 +42,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
@ -213,6 +214,21 @@ public final class Fulltext implements Iterable<byte[]> {
this.forcedCommitTime = System.currentTimeMillis(); // set the exact time this.forcedCommitTime = System.currentTimeMillis(); // set the exact time
} }
public Date getLoadDate(final String urlHash) {
if (urlHash == null) return null;
SolrDocument doc;
try {
doc = this.solr.get(urlHash, YaCySchema.load_date_dt.getSolrFieldName());
} catch (IOException e) {
return null;
}
if (doc == null) return null;
Date x = (Date) doc.getFieldValue(YaCySchema.load_date_dt.getSolrFieldName());
if (x == null) return new Date(0);
Date now = new Date();
return x.after(now) ? now : x;
}
/** /**
* generates an plasmaLURLEntry using the url hash * generates an plasmaLURLEntry using the url hash
* if the url cannot be found, this returns null * if the url cannot be found, this returns null
@ -259,7 +275,7 @@ public final class Fulltext implements Iterable<byte[]> {
} }
public void putDocument(final SolrInputDocument doc) throws IOException { public void putDocument(final SolrInputDocument doc) throws IOException {
String id = (String) doc.getFieldValue(YaCySchema.id.name()); String id = (String) doc.getFieldValue(YaCySchema.id.getSolrFieldName());
byte[] idb = ASCII.getBytes(id); byte[] idb = ASCII.getBytes(id);
try { try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
@ -385,7 +401,7 @@ public final class Fulltext implements Iterable<byte[]> {
final AtomicInteger count = new AtomicInteger(0); final AtomicInteger count = new AtomicInteger(0);
Thread t = new Thread(){ Thread t = new Thread(){
public void run() { public void run() {
final BlockingQueue<SolrDocument> docs = getSolr().concurrentQuery(q, 0, 1000000, 600000, -1); final BlockingQueue<SolrDocument> docs = getSolr().concurrentQuery(q, 0, 1000000, 600000, -1, YaCySchema.id.getSolrFieldName(), YaCySchema.sku.getSolrFieldName());
try { try {
SolrDocument doc; SolrDocument doc;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
@ -464,7 +480,7 @@ public final class Fulltext implements Iterable<byte[]> {
public String failReason(final String urlHash) throws IOException { public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null; if (urlHash == null) return null;
SolrDocument doc = this.solr.get(urlHash); SolrDocument doc = this.solr.get(urlHash, YaCySchema.failreason_t.getSolrFieldName());
if (doc == null) return null; if (doc == null) return null;
String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName()); String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
return reason == null ? null : reason.length() == 0 ? null : reason; return reason == null ? null : reason.length() == 0 ? null : reason;

Loading…
Cancel
Save