disabled clickdepth computation during craling since that is repeated

during clean-up phase.
pull/1/head
Michael Peter Christen 12 years ago
parent a734fbc4a5
commit 840fa22135

@ -106,11 +106,13 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override @Override
public void run() { public void run() {
int o = offset; int o = offset;
while (System.currentTimeMillis() < endtime) { int count = 0;
while (System.currentTimeMillis() < endtime && count < maxcount) {
try { try {
SolrDocumentList sdl = query(querystring, o, pagesize, fields); SolrDocumentList sdl = query(querystring, o, pagesize, fields);
for (SolrDocument d: sdl) { for (SolrDocument d: sdl) {
try {queue.put(d);} catch (InterruptedException e) {break;} try {queue.put(d);} catch (InterruptedException e) {break;}
count++;
} }
if (sdl.size() < pagesize) break; if (sdl.size() < pagesize) break;
o += pagesize; o += pagesize;

@ -178,7 +178,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} catch (Throwable e) { } catch (Throwable e) {
// catches "version conflict for": try this again and delete the document in advance // catches "version conflict for": try this again and delete the document in advance
try { try {
this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); synchronized (this.server) {
this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
}
} catch (SolrServerException e1) {} } catch (SolrServerException e1) {}
try { try {
synchronized (this.server) { synchronized (this.server) {

@ -2257,7 +2257,7 @@ public final class Switchboard extends serverSwitch {
// that means we must search for those entries. // that means we must search for those entries.
index.fulltext().getDefaultConnector().commit(true); // make sure that we have latest information that can be found index.fulltext().getDefaultConnector().commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10); //BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
BlockingQueue<SolrDocument> docs = index.fulltext().getDefaultConnector().concurrentQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10); BlockingQueue<SolrDocument> docs = index.fulltext().getDefaultConnector().concurrentQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 10000, 60000, 50);
SolrDocument doc; SolrDocument doc;
int proccount_clickdepth = 0; int proccount_clickdepth = 0;
@ -2280,7 +2280,10 @@ public final class Switchboard extends serverSwitch {
Integer oldclickdepth = (Integer) doc.getFieldValue(CollectionSchema.clickdepth_i.getSolrFieldName()); Integer oldclickdepth = (Integer) doc.getFieldValue(CollectionSchema.clickdepth_i.getSolrFieldName());
url = new DigestURI((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()))); url = new DigestURI((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())));
int clickdepth = CollectionConfiguration.getClickDepth(index.urlCitation(), url); int clickdepth = CollectionConfiguration.getClickDepth(index.urlCitation(), url);
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++; if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) {
//log.logInfo("new clickdepth " + clickdepth + " for " + url.toNormalform(true));
proccount_clickdepthchange++;
}
SolrInputDocument sid = index.fulltext().getDefaultConfiguration().toSolrInputDocument(doc); SolrInputDocument sid = index.fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
sid.setField(CollectionSchema.clickdepth_i.getSolrFieldName(), clickdepth); sid.setField(CollectionSchema.clickdepth_i.getSolrFieldName(), clickdepth);

@ -328,11 +328,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
this.lazy = lc; this.lazy = lc;
} else { } else {
// search the citations for references // search the citations for references
try { //try {
clickdepth = getClickDepth(citations, digestURI); clickdepth = -1; //getClickDepth(citations, digestURI);
} catch (IOException e) { //} catch (IOException e) {
add(doc, CollectionSchema.clickdepth_i, -1); // add(doc, CollectionSchema.clickdepth_i, -1);
} //}
if (clickdepth < 0 || clickdepth > 1) { if (clickdepth < 0 || clickdepth > 1) {
processTypes.add(ProcessType.CLICKDEPTH); // postprocessing needed; this is also needed if the depth is positive; there could be a shortcut processTypes.add(ProcessType.CLICKDEPTH); // postprocessing needed; this is also needed if the depth is positive; there could be a shortcut
} }

Loading…
Cancel
Save