- added field options to all solr queries. This can be used to restrict

the actual data which is fetched from solr.
- used the new field options to reduce generic options like getting the
load date or the count of search results. should increase overall speed
- used the new field options to reduce overhead in the host browser
during aquisition of links.
- used the field options to make checking of links in crawler faster
- if the crawler is paused, the crawl queue is not cleaned
pull/1/head
Michael Peter Christen 12 years ago
parent 46be4af5b9
commit f5ca5cea44

@ -53,7 +53,9 @@ import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class HostBrowser {
final static long TIMEOUT = 10000L;
public static enum StoreType {
LINK, INDEX, ERROR;
}
@ -141,7 +143,7 @@ public class HostBrowser {
int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getSolr().getFacets("*:*", new String[]{YaCySchema.host_s.getSolrFieldName()}, maxcount).get(YaCySchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> hostscore = fulltext.getSolr().getFacets("*:*", maxcount, YaCySchema.host_s.getSolrFieldName()).get(YaCySchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
// collect hosts from crawler
@ -151,7 +153,7 @@ public class HostBrowser {
}
// collect the errorurls
ReversibleScoreMap<String> errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]", new String[]{YaCySchema.host_s.getSolrFieldName()}, maxcount).get(YaCySchema.host_s.getSolrFieldName()) : null;
ReversibleScoreMap<String> errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]", maxcount, YaCySchema.host_s.getSolrFieldName()).get(YaCySchema.host_s.getSolrFieldName()) : null;
if (errorscore == null) errorscore = new ClusteredScoreMap<String>();
int c = 0;
@ -225,7 +227,15 @@ public class HostBrowser {
q.append(" AND ").append(YaCySchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]");
}
}
BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, 10000, 100);
BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, TIMEOUT, 100,
YaCySchema.id.getSolrFieldName(),
YaCySchema.sku.getSolrFieldName(),
YaCySchema.failreason_t.getSolrFieldName(),
YaCySchema.inboundlinks_protocol_sxt.getSolrFieldName(),
YaCySchema.inboundlinks_urlstub_txt.getSolrFieldName(),
YaCySchema.outboundlinks_protocol_sxt.getSolrFieldName(),
YaCySchema.outboundlinks_urlstub_txt.getSolrFieldName()
);
SolrDocument doc;
Set<String> storedDocs = new HashSet<String>();
Map<String, String> errorDocs = new HashMap<String, String>();
@ -233,19 +243,20 @@ public class HostBrowser {
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
int hostsize = 0;
final List<byte[]> deleteIDs = new ArrayList<byte[]>();
long timeout = System.currentTimeMillis() + 10000;
long timeout = System.currentTimeMillis() + TIMEOUT;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
String error = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
if (u.startsWith(path)) {
if (delete) {
deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.name())));
deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
} else {
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
}
} else if (complete) {
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
}
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link
if (error == null) {
hostsize++;
// collect inboundlinks to browse the host

@ -45,14 +45,14 @@ public abstract class AbstractSolrConnector implements SolrConnector {
public final static SolrQuery catchallQuery = new SolrQuery();
static {
catchallQuery.setQuery("*:*");
catchallQuery.setFields(YaCySchema.id.name());
catchallQuery.setFields(YaCySchema.id.getSolrFieldName());
catchallQuery.setRows(1);
catchallQuery.setStart(0);
}
public final static SolrQuery catchSuccessQuery = new SolrQuery();
static {
catchSuccessQuery.setQuery("-" + YaCySchema.failreason_t.name() + ":[* TO *]");
catchSuccessQuery.setFields(YaCySchema.id.name());
catchSuccessQuery.setFields(YaCySchema.id.getSolrFieldName());
catchSuccessQuery.setRows(1);
catchSuccessQuery.setStart(0);
}
@ -61,7 +61,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override
public boolean exists(final String id) throws IOException {
try {
final SolrDocument doc = get(id);
final SolrDocument doc = get(id, YaCySchema.id.getSolrFieldName());
return doc != null;
} catch (final Throwable e) {
log.warn(e);
@ -81,7 +81,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/
@Override
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize) {
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final String ... fields) {
final BlockingQueue<SolrDocument> queue = buffersize <= 0 ? new LinkedBlockingQueue<SolrDocument>() : new ArrayBlockingQueue<SolrDocument>(buffersize);
final long endtime = System.currentTimeMillis() + maxtime;
final Thread t = new Thread() {
@ -90,7 +90,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
int o = offset;
while (System.currentTimeMillis() < endtime) {
try {
SolrDocumentList sdl = query(querystring, o, pagesize);
SolrDocumentList sdl = query(querystring, o, pagesize, fields);
for (SolrDocument d: sdl) {
try {queue.put(d);} catch (InterruptedException e) {break;}
}
@ -119,7 +119,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
int o = offset;
while (System.currentTimeMillis() < endtime) {
try {
SolrDocumentList sdl = query(querystring, o, pagesize);
SolrDocumentList sdl = query(querystring, o, pagesize, YaCySchema.id.getSolrFieldName());
for (SolrDocument d: sdl) {
try {queue.put((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));} catch (InterruptedException e) {break;}
}

@ -231,8 +231,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
}
@Override
public SolrDocument get(String id) throws IOException {
SolrDocument doc = this.documentCache.get(id);
public SolrDocument get(final String id, final String ... fields) throws IOException {
SolrDocument doc = fields.length == 0 ? this.documentCache.get(id) : null;
if (doc != null) {
this.documentCache_Hit++;
return doc;
@ -243,24 +243,22 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return null;
}
missCache_Miss++;
if ((solr0 != null && ((doc = solr0.get(id)) != null)) || (solr1 != null && ((doc = solr1.get(id)) != null))) {
if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id);
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
this.documentCache.put(id, doc);
this.documentCache_Insert++;
if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
return doc;
}
// check if there is a autocommit problem
if (this.hitCache.containsKey(id)) {
// the document should be there, therefore make a commit and check again
this.commit();
if ((solr0 != null && ((doc = solr0.get(id)) != null)) || (solr1 != null && ((doc = solr1.get(id)) != null))) {
if ((solr0 != null && ((doc = solr0.get(id, fields)) != null)) || (solr1 != null && ((doc = solr1.get(id, fields)) != null))) {
this.missCache.remove(id);
this.hitCache.put(id, EXIST);
this.hitCache_Insert++;
this.documentCache.put(id, doc);
this.documentCache_Insert++;
if (fields.length == 0) {this.documentCache.put(id, doc); this.documentCache_Insert++;}
return doc;
}
}
@ -276,7 +274,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
*/
@Override
public void add(final SolrInputDocument solrdoc) throws IOException {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name());
String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName());
assert id != null;
if (id == null) return;
this.missCache.remove(id);
@ -300,47 +298,47 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException
*/
@Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException {
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3));
SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc);
// no addToCache(list) here because that was already handlet in get();
return list;
}
if (this.solr0 != null && this.solr1 == null) {
SolrDocumentList list = this.solr0.query(querystring, offset, count);
addToCache(list);
SolrDocumentList list = this.solr0.query(querystring, offset, count, fields);
if (fields.length == 0) addToCache(list);
return list;
}
if (this.solr1 != null && this.solr0 == null) {
SolrDocumentList list = this.solr1.query(querystring, offset, count);
addToCache(list);
SolrDocumentList list = this.solr1.query(querystring, offset, count, fields);
if (fields.length == 0) addToCache(list);
return list;
}
// combine both lists
SolrDocumentList l;
l = this.solr0.query(querystring, offset, count);
l = this.solr0.query(querystring, offset, count, fields);
if (l.size() >= count) return l;
// at this point we need to know how many results are in solr0
// compute this with a very bad hack; replace with better method later
int size0 = 0;
{ //bad hack - TODO: replace
SolrDocumentList lHack = this.solr0.query(querystring, 0, Integer.MAX_VALUE);
SolrDocumentList lHack = this.solr0.query(querystring, 0, Integer.MAX_VALUE, fields);
size0 = lHack.size();
}
// now use the size of the first query to do a second query
final SolrDocumentList list = new SolrDocumentList();
for (final SolrDocument d: l) list.add(d);
l = this.solr1.query(querystring, offset + l.size() - size0, count - l.size());
l = this.solr1.query(querystring, offset + l.size() - size0, count - l.size(), fields);
for (final SolrDocument d: l) list.add(d);
// add caching
addToCache(list);
if (fields.length == 0) addToCache(list);
return list;
}
@ -422,16 +420,16 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
}
@Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new HashMap<String, ReversibleScoreMap<String>>(0);
if (this.solr0 != null && this.solr1 == null) {
return this.solr0.getFacets(query, fields, maxresults);
return this.solr0.getFacets(query, maxresults, fields);
}
if (this.solr1 != null && this.solr0 == null) {
return this.solr1.getFacets(query, fields, maxresults);
return this.solr1.getFacets(query, maxresults, fields);
}
Map<String, ReversibleScoreMap<String>> facets0 = this.solr0.getFacets(query, fields, maxresults);
Map<String, ReversibleScoreMap<String>> facets1 = this.solr1.getFacets(query, fields, maxresults);
Map<String, ReversibleScoreMap<String>> facets0 = this.solr0.getFacets(query, maxresults, fields);
Map<String, ReversibleScoreMap<String>> facets1 = this.solr1.getFacets(query, maxresults, fields);
for (Map.Entry<String, ReversibleScoreMap<String>> facet0: facets0.entrySet()) {
ReversibleScoreMap<String> facet1 = facets1.remove(facet0.getKey());
if (facet1 == null) continue;

@ -44,7 +44,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
private final SolrConnector solr;
private int commitWithinMs;
public MultipleSolrConnector(final String url, int connections) throws IOException {
public MultipleSolrConnector(final String url, final int connections) throws IOException {
this.solr = new RemoteSolrConnector(url);
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
this.worker = new AddWorker[connections];
@ -91,7 +91,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
* @param c the maximum waiting time after a solr command until it is transported to the server
*/
@Override
public void setCommitWithinMs(int c) {
public void setCommitWithinMs(final int c) {
this.commitWithinMs = c;
this.solr.setCommitWithinMs(c);
for (AddWorker w: this.worker) w.solr.setCommitWithinMs(c);
@ -129,12 +129,12 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
}
@Override
public void delete(String id) throws IOException {
public void delete(final String id) throws IOException {
this.solr.delete(id);
}
@Override
public void delete(List<String> ids) throws IOException {
public void delete(final List<String> ids) throws IOException {
this.solr.delete(ids);
}
@ -144,8 +144,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
}
@Override
public SolrDocument get(String id) throws IOException {
return this.solr.get(id);
public SolrDocument get(final String id, final String ... fields) throws IOException {
return this.solr.get(id, fields);
}
@Override
@ -169,12 +169,12 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
}
@Override
public SolrDocumentList query(String querystring, int offset, int count) throws IOException {
return this.solr.query(querystring, offset, count);
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
return this.solr.query(querystring, offset, count, fields);
}
@Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException {
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
return this.solr.query(query);
}
@ -184,8 +184,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
}
@Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
return this.solr.getFacets(query, fields, maxresults);
public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
return this.solr.getFacets(query, maxresults, fields);
}
@Override

@ -145,11 +145,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public SolrDocument get(String id) throws IOException {
public SolrDocument get(final String id, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.get(id);
return this.solrConnector.get(id, fields);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
@ -180,11 +180,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException {
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.query(querystring, offset, count);
return this.solrConnector.query(querystring, offset, count, fields);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
@ -195,7 +195,7 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException {
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
@ -225,11 +225,11 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
public Map<String, ReversibleScoreMap<String>> getFacets(final String query, final int maxresults, final String ... fields) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.getFacets(query, fields, maxresults);
return this.solrConnector.getFacets(query, maxresults, fields);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}

@ -133,9 +133,9 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public SolrDocument get(String id) throws IOException {
public SolrDocument get(String id, final String ... fields) throws IOException {
for (final SolrConnector connector: this.connectors) {
SolrDocument doc = connector.get(id);
SolrDocument doc = connector.get(id, fields);
if (doc != null) return doc;
}
return null;
@ -172,7 +172,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
* @throws IOException
*/
@Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException {
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
final SolrDocumentList list = new SolrDocumentList();
List<Thread> t = new ArrayList<Thread>();
for (final SolrConnector connector: this.connectors) {
@ -180,7 +180,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
@Override
public void run() {
try {
final SolrDocumentList l = connector.query(querystring, offset, count);
final SolrDocumentList l = connector.query(querystring, offset, count, fields);
for (final SolrDocument d: l) {
list.add(d);
}
@ -228,10 +228,10 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
}
@Override
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException {
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>();
for (final SolrConnector connector: this.connectors) {
Map<String, ReversibleScoreMap<String>> peer = connector.getFacets(query, fields, maxresults);
Map<String, ReversibleScoreMap<String>> peer = connector.getFacets(query, maxresults, fields);
innerloop: for (Map.Entry<String, ReversibleScoreMap<String>> facet: facets.entrySet()) {
ReversibleScoreMap<String> peerfacet = peer.remove(facet.getKey());
if (peerfacet == null) continue innerloop;

@ -107,10 +107,11 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/**
* get a document from solr by given id
* @param id
* @param fields list of fields
* @return one result or null if no result exists
* @throws IOException
*/
public SolrDocument get(final String id) throws IOException;
public SolrDocument get(final String id, final String ... fields) throws IOException;
/**
* get a query result from solr
@ -122,10 +123,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/**
* get a query result from solr
* to get all results set the query String to "*:*"
* @param querystring
* @param querystring the solr query string
* @param offset the first result offset
* @param count number of wanted results
* @param fields list of fields
* @throws IOException
*/
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException, SolrException;
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException, SolrException;
/**
* get the number of results when this query is done.
@ -138,12 +142,12 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
/**
* get facets of the index: a list of lists with values that are most common in a specific field
* @param query a query which is performed to get the facets
* @param fields the field names which are selected as facet
* @param maxresults the maximum size of the resulting maps
* @param fields the field names which are selected as facet
* @return a map with key = facet field name, value = an ordered map of field values for that field
* @throws IOException
*/
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException;
public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException;
/**
* Get a query result from solr as a stream of documents.
@ -154,9 +158,10 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @param maxcount the maximum number of results
* @param maxtime the maximum time in milliseconds
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param fields list of fields
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize);
public BlockingQueue<SolrDocument> concurrentQuery(final String querystring, final int offset, final int maxcount, final long maxtime, final int buffersize, final String ... fields);
/**
* get a document id result stream from a solr query.

@ -29,6 +29,7 @@ import java.util.List;
import java.util.Map;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
@ -40,14 +41,12 @@ import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
@ -203,6 +202,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server == null) return;
try {
synchronized (this.server) {
//this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName()));
this.server.add(solrdoc, this.commitWithinMs);
//this.server.commit();
}
@ -215,9 +215,12 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrInputDocument d: solrdocs) l.add(d);
try {
synchronized (this.server) {
for (SolrInputDocument d: solrdocs) {
//this.server.deleteById((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));
l.add(d);
}
this.server.add(l, this.commitWithinMs);
//this.server.commit();
}
@ -234,7 +237,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @throws IOException
*/
@Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException {
public SolrDocumentList query(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery(querystring);
@ -243,6 +246,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setFacet(false);
//params.addSortField( "price", SolrQuery.ORDER.asc );
if (fields.length > 0) params.setFields(fields);
// query the server
QueryResponse rsp = query(params);
final SolrDocumentList docs = rsp.getResults();
@ -263,6 +268,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setRows(0);
params.setStart(0);
params.setFacet(false);
params.setFields(YaCySchema.id.getSolrFieldName());
// query the server
QueryResponse rsp = query(params);
@ -278,8 +284,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @return a map with key = facet field name, value = an ordered map of field values for that field
* @throws IOException
*/
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
public Map<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException {
// construct query
assert fields.length > 0;
final SolrQuery params = new SolrQuery();
params.setQuery(query);
params.setRows(0);
@ -287,6 +294,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
params.setFacet(true);
params.setFacetLimit(maxresults);
params.setFacetSort(FacetParams.FACET_SORT_COUNT);
params.setFields(fields);
for (String field: fields) params.addFacetField(field);
// query the server
@ -315,7 +323,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
* @throws IOException
*/
@Override
public SolrDocument get(final String id) throws IOException {
public SolrDocument get(final String id, final String ... fields) throws IOException {
assert id.length() == 12;
// construct query
char[] q = new char[17];
@ -325,6 +333,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
query.setQuery(new String(q));
query.setRows(1);
query.setStart(0);
if (fields.length > 0) query.setFields(fields);
// query the server
try {

@ -60,7 +60,6 @@ import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.SMBLoader;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB;
@ -448,8 +447,8 @@ public final class CrawlStacker {
// check if the url is double registered
final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
final URIMetadataNode oldEntry = this.indexSegment.fulltext().getMetadata(url.hash());
if (oldEntry == null) {
final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash()));
if (oldDate == null) {
if (dbocc != null) {
// do double-check
if (dbocc.equals("errors")) {
@ -459,11 +458,11 @@ public final class CrawlStacker {
return "double in: " + dbocc;
}
} else {
final boolean recrawl = profile.recrawlIfOlder() > oldEntry.loaddate().getTime();
final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime();
if (recrawl) {
if (this.log.isInfo())
this.log.logInfo("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000 / 60 / 24) + " days ago.");
((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago.");
} else {
if (dbocc == null) {
return "double in: LURL-DB";

@ -134,8 +134,7 @@ public class DidYouMean {
* @return
*/
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength)
{
if (this.word.length() < MinimumInputWordLength) {
return this.resultSet; // return nothing if input is too short
}
final long startTime = System.currentTimeMillis();

@ -63,7 +63,7 @@ public class AugmentParser extends AbstractParser implements Parser {
}
}
*/
private void parseAndAugment(Document origDoc, DigestURI url, String mimeType, String charset) {
private void parseAndAugment(Document origDoc, DigestURI url, @SuppressWarnings("unused") String mimeType, @SuppressWarnings("unused") String charset) {
Iterator<net.yacy.kelondro.blob.Tables.Row> it;
try {

@ -513,18 +513,16 @@ public class URIMetadataRow {
final int p = this.latlon.indexOf(',');
if (p < 0) {
return 0.0d;
} else { // old index entries might contain text "NaN,NaN"
return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
}
return this.latlon.charAt(0) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(0, p));
}
public double lon() {
if (this.latlon == null || this.latlon.isEmpty()) return 0.0d;
final int p = this.latlon.indexOf(',');
if (p < 0) {
return 0.0d;
} else { // old index entries might contain text "NaN,NaN"
return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
}
return this.latlon.charAt(p + 1) > '9' ? 0.0d : Double.parseDouble(this.latlon.substring(p + 1));
}
}

@ -2126,7 +2126,7 @@ public final class Switchboard extends serverSwitch {
// clean up profiles
checkInterruption();
//cleanProfiles();
int cleanup = this.crawler.cleanFinishesProfiles(this.crawlQueues);
int cleanup = this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? 0 : this.crawler.cleanFinishesProfiles(this.crawlQueues);
if (cleanup > 0) log.logInfo("cleanup removed " + cleanup + " crawl profiles");
// clean up news

@ -42,6 +42,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
@ -213,6 +214,21 @@ public final class Fulltext implements Iterable<byte[]> {
this.forcedCommitTime = System.currentTimeMillis(); // set the exact time
}
public Date getLoadDate(final String urlHash) {
if (urlHash == null) return null;
SolrDocument doc;
try {
doc = this.solr.get(urlHash, YaCySchema.load_date_dt.getSolrFieldName());
} catch (IOException e) {
return null;
}
if (doc == null) return null;
Date x = (Date) doc.getFieldValue(YaCySchema.load_date_dt.getSolrFieldName());
if (x == null) return new Date(0);
Date now = new Date();
return x.after(now) ? now : x;
}
/**
* generates an plasmaLURLEntry using the url hash
* if the url cannot be found, this returns null
@ -259,7 +275,7 @@ public final class Fulltext implements Iterable<byte[]> {
}
public void putDocument(final SolrInputDocument doc) throws IOException {
String id = (String) doc.getFieldValue(YaCySchema.id.name());
String id = (String) doc.getFieldValue(YaCySchema.id.getSolrFieldName());
byte[] idb = ASCII.getBytes(id);
try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
@ -385,7 +401,7 @@ public final class Fulltext implements Iterable<byte[]> {
final AtomicInteger count = new AtomicInteger(0);
Thread t = new Thread(){
public void run() {
final BlockingQueue<SolrDocument> docs = getSolr().concurrentQuery(q, 0, 1000000, 600000, -1);
final BlockingQueue<SolrDocument> docs = getSolr().concurrentQuery(q, 0, 1000000, 600000, -1, YaCySchema.id.getSolrFieldName(), YaCySchema.sku.getSolrFieldName());
try {
SolrDocument doc;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
@ -464,7 +480,7 @@ public final class Fulltext implements Iterable<byte[]> {
public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null;
SolrDocument doc = this.solr.get(urlHash);
SolrDocument doc = this.solr.get(urlHash, YaCySchema.failreason_t.getSolrFieldName());
if (doc == null) return null;
String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
return reason == null ? null : reason.length() == 0 ? null : reason;

Loading…
Cancel
Save