fixed webgraph postprocessing (did nothing and repeated to do this...)

pull/1/head
Michael Peter Christen 11 years ago
parent 74d0256e93
commit c833d02cf5

@ -40,6 +40,7 @@ import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.openjena.atlas.logging.Log;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
@ -314,11 +315,12 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
// for each to-be-processed entry work on the process tag
Collection<Object> proctags = doc.getFieldValues(WebgraphSchema.process_sxt.getSolrFieldName());
for (Object tag: proctags) {
try {
SolrInputDocument sid = this.toSolrInputDocument(doc);
try {
SolrInputDocument sid = this.toSolrInputDocument(doc);
for (Object tag: proctags) {
// switch over tag types
ProcessType tagtype = ProcessType.valueOf((String) tag);
if (tagtype == ProcessType.CLICKDEPTH) {
@ -337,18 +339,16 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
if (postprocessing_clickdepth(segment, doc, sid, url, WebgraphSchema.target_clickdepth_i)) proccount_clickdepthchange++;
}
}
// all processing steps checked, remove the processing tag
sid.removeField(WebgraphSchema.process_sxt.getSolrFieldName());
if (this.contains(WebgraphSchema.harvestkey_s)) sid.removeField(WebgraphSchema.harvestkey_s.getSolrFieldName());
// send back to index
connector.deleteById((String) doc.getFieldValue(WebgraphSchema.id.getSolrFieldName()));
connector.add(sid);
proccount++;
} catch (final Throwable e1) {
}
// all processing steps checked, remove the processing tag
sid.removeField(WebgraphSchema.process_sxt.getSolrFieldName());
if (this.contains(WebgraphSchema.harvestkey_s)) sid.removeField(WebgraphSchema.harvestkey_s.getSolrFieldName());
// send back to index
connector.add(sid);
proccount++;
} catch (Throwable e1) {
Log.warn(WebgraphConfiguration.class, "postprocessing failed", e1);
}
}
ConcurrentLog.info("WebgraphConfiguration", "cleanup_processing: re-calculated " + proccount + " new documents, " + proccount_clickdepthchange + " clickdepth values changed.");

Loading…
Cancel
Save