fixed a problem with re-feeding of already indexed documents whith

coordinates attached.
pull/1/head
Michael Peter Christen 12 years ago
parent cb38e860cf
commit 7806680ab8

@ -21,9 +21,11 @@
package net.yacy.cora.federate.solr;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
public enum YaCySchema implements Schema {
@ -350,5 +352,24 @@ public enum YaCySchema implements Schema {
doc.setField(this.getSolrFieldName(), value);
}
/**
* Convert a SolrDocument to a SolrInputDocument.
* This is useful if a document from the search index shall be modified and indexed again.
* This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
* which are created automatically during the indexing process.
* @param doc the solr document
* @return a solr input document
*/
public static SolrInputDocument toSolrInputDocument(SolrDocument doc) {
SolrInputDocument sid = new SolrInputDocument();
Set<String> omitFields = new HashSet<String>();
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
for (String name: doc.getFieldNames()) {
if (!omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
}
return sid;
}
}

@ -1161,7 +1161,7 @@ public final class Protocol
// passed all checks, store url
if (!localsearch) {
try {
event.query.getSegment().fulltext().putDocument(ClientUtils.toSolrInputDocument(doc));
event.query.getSegment().fulltext().putDocument(YaCySchema.toSolrInputDocument(doc));
ResultURLs.stack(
ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(),

@ -82,7 +82,6 @@ import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
@ -2233,7 +2232,9 @@ public final class Switchboard extends serverSwitch {
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) {
// that means we must search for those entries.
index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery(YaCySchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10);
SolrDocument doc;
int proccount_clickdepth = 0;
int proccount_clickdepthchange = 0;
@ -2256,7 +2257,7 @@ public final class Switchboard extends serverSwitch {
url = new DigestURI((String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
int clickdepth = SolrConfiguration.getClickDepth(index.urlCitation(), url);
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++;
SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc);
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
sid.setField(YaCySchema.clickdepth_i.getSolrFieldName(), clickdepth);
// refresh the link count; it's 'cheap' to do this here

@ -436,7 +436,7 @@ public class Segment {
// switch attribute also in all existing documents (which should be exactly only one!)
SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000);
for (SolrDocument doc: docs) {
SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc);
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
sid.setField(uniquefield.getSolrFieldName(), false);
this.fulltext.getSolr().add(sid);
}

Loading…
Cancel
Save