disabled clickdepth computation during craling since that is repeated

during clean-up phase.
pull/1/head
Michael Peter Christen 12 years ago
parent a734fbc4a5
commit 840fa22135

@ -106,11 +106,13 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override
public void run() {
int o = offset;
while (System.currentTimeMillis() < endtime) {
int count = 0;
while (System.currentTimeMillis() < endtime && count < maxcount) {
try {
SolrDocumentList sdl = query(querystring, o, pagesize, fields);
for (SolrDocument d: sdl) {
try {queue.put(d);} catch (InterruptedException e) {break;}
count++;
}
if (sdl.size() < pagesize) break;
o += pagesize;

@ -178,7 +178,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} catch (Throwable e) {
// catches "version conflict for": try this again and delete the document in advance
try {
this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
synchronized (this.server) {
this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
}
} catch (SolrServerException e1) {}
try {
synchronized (this.server) {

@ -2257,7 +2257,7 @@ public final class Switchboard extends serverSwitch {
// that means we must search for those entries.
index.fulltext().getDefaultConnector().commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
BlockingQueue<SolrDocument> docs = index.fulltext().getDefaultConnector().concurrentQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10);
BlockingQueue<SolrDocument> docs = index.fulltext().getDefaultConnector().concurrentQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 10000, 60000, 50);
SolrDocument doc;
int proccount_clickdepth = 0;
@ -2280,7 +2280,10 @@ public final class Switchboard extends serverSwitch {
Integer oldclickdepth = (Integer) doc.getFieldValue(CollectionSchema.clickdepth_i.getSolrFieldName());
url = new DigestURI((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())));
int clickdepth = CollectionConfiguration.getClickDepth(index.urlCitation(), url);
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++;
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) {
//log.logInfo("new clickdepth " + clickdepth + " for " + url.toNormalform(true));
proccount_clickdepthchange++;
}
SolrInputDocument sid = index.fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
sid.setField(CollectionSchema.clickdepth_i.getSolrFieldName(), clickdepth);

@ -328,11 +328,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
this.lazy = lc;
} else {
// search the citations for references
try {
clickdepth = getClickDepth(citations, digestURI);
} catch (IOException e) {
add(doc, CollectionSchema.clickdepth_i, -1);
}
//try {
clickdepth = -1; //getClickDepth(citations, digestURI);
//} catch (IOException e) {
// add(doc, CollectionSchema.clickdepth_i, -1);
//}
if (clickdepth < 0 || clickdepth > 1) {
processTypes.add(ProcessType.CLICKDEPTH); // postprocessing needed; this is also needed if the depth is positive; there could be a shortcut
}

Loading…
Cancel
Save