diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index e73ed3ae9..cdf5fc0a2 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -114,7 +114,7 @@ public final class crawlReceipt { } // generating a new loaded URL entry - final URIMetadataNode entry = URIMetadataNode.importEntry(propStr); + final URIMetadataNode entry = URIMetadataNode.importEntry(propStr, "dht"); if (entry == null) { if (log.isWarn()) log.warn("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr); prop.put("delay", "3600"); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index a032c411d..4b042376f 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -103,7 +103,7 @@ public final class transferURL { } // parse new lurl-entry - lEntry = URIMetadataNode.importEntry(urls); + lEntry = URIMetadataNode.importEntry(urls, "dht"); if (lEntry == null) { if (Network.log.isWarn()) Network.log.warn("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls); blocked++; diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index 8f603fe6e..17f2c772d 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -77,7 +77,7 @@ public class URIMetadataNode extends SolrDocument { protected String snippet = null; protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests - public URIMetadataNode(final Properties prop) { + public URIMetadataNode(final Properties prop, String collection) { // generates an plasmaLURLEntry using the properties from the argument // the property names must correspond to the one from toString //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); @@ -139,6 +139,9 @@ public class URIMetadataNode extends SolrDocument { this.appc = Integer.parseInt(prop.getProperty("lapp", "0")); this.snippet = crypt.simpleDecode(prop.getProperty("snippet", "")); this.score = Float.parseFloat(prop.getProperty("score", "0.0")); + List cs = new ArrayList(); + cs.add(collection); + this.setField(CollectionSchema.collection_sxt.name(), cs); this.word = null; if (prop.containsKey("wi")) { this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false); @@ -497,13 +500,13 @@ public class URIMetadataNode extends SolrDocument { return getStringList(CollectionSchema.description_txt); } - public static URIMetadataNode importEntry(final String propStr) { + public static URIMetadataNode importEntry(final String propStr, String collection) { if (propStr == null || propStr.isEmpty() || propStr.charAt(0) != '{' || !propStr.endsWith("}")) { ConcurrentLog.severe("URIMetadataNode", "importEntry: propStr is not proper: " + propStr); return null; } try { - return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1))); + return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection); } catch (final kelondroException e) { // wrong format ConcurrentLog.severe("URIMetadataNode", e.getMessage()); diff --git a/source/net/yacy/kelondro/logging/ConsoleOutErrHandler.java b/source/net/yacy/kelondro/logging/ConsoleOutErrHandler.java index 9965686d1..0d11cd577 100644 --- a/source/net/yacy/kelondro/logging/ConsoleOutErrHandler.java +++ b/source/net/yacy/kelondro/logging/ConsoleOutErrHandler.java @@ -171,7 +171,7 @@ public final class ConsoleOutErrHandler extends Handler { } @Override - public void setFormatter(final Formatter newFormatter) throws SecurityException { + public synchronized void setFormatter(final Formatter newFormatter) throws SecurityException { super.setFormatter(newFormatter); if (newFormatter == null) return; try { @@ -183,7 +183,7 @@ public final class ConsoleOutErrHandler extends Handler { } @Override - public final void setFilter(final Filter newFilter) throws SecurityException { + public final synchronized void setFilter(final Filter newFilter) throws SecurityException { super.setFilter(newFilter); if (newFilter == null) return; try { diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index b5230b996..77654639e 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -746,11 +746,8 @@ public final class Protocol { if (event.addResultsToLocalIndex) { for (URIMetadataNode entry : storeDocs) { try { - // firstSseen is set on access (crawl/index) to full resource, - // on existing firstSeen prevent that metadata overwrite this rich data (this can be the case if crawldata has older loaddate as metadata) - if (!event.query.getSegment().firstSeen().has(entry.hash())) { // TODO: cleanup firstSeen on document deletion from index - event.query.getSegment().fulltext().putMetadata(entry); - } + event.query.getSegment().setFirstSeenTime(entry.hash(), Math.min(entry.moddate().getTime(), System.currentTimeMillis())); + event.query.getSegment().fulltext().putMetadata(entry); // it will be checked inside the putMetadata that poor metadata does not overwrite rich metadata } catch (final IOException e) { ConcurrentLog.logException(e); } @@ -920,7 +917,7 @@ public final class Protocol { if ( resultLine == null ) { continue; } - final URIMetadataNode urlEntry = URIMetadataNode.importEntry(resultLine); + final URIMetadataNode urlEntry = URIMetadataNode.importEntry(resultLine, "dht"); if ( urlEntry == null ) { continue; } @@ -1115,6 +1112,7 @@ public final class Protocol { // passed all checks, store url if (!localsearch) { + // put the remote documents to the local index. We must convert the solr document to a solr input document: if (event.addResultsToLocalIndex) { final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 7bb610c61..9f9d8fb38 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -341,8 +341,24 @@ public final class Fulltext { try { // because node entries are richer than metadata entries we must check if they exist to prevent that they are overwritten long date = this.getLoadTime(id); - if (date < entry.loaddate().getTime()) { + if (date == -1) { + // document does not exist putDocument(getDefaultConfiguration().metadata2solr(entry)); + } else { + // check if document contains rich data + if (date < entry.loaddate().getTime()) { + SolrDocument doc = this.getDefaultConnector().getDocumentById(id, CollectionSchema.collection_sxt.getSolrFieldName()); + if (doc == null || !doc.containsKey(CollectionSchema.collection_sxt.getSolrFieldName())) { + putDocument(getDefaultConfiguration().metadata2solr(entry)); + } else { + Collection collections = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); + for (Object s: collections) { + if (!"dht".equals(s)) return; + } + // passed all checks, overwrite document + putDocument(getDefaultConfiguration().metadata2solr(entry)); + } + } } } catch (final SolrException e) { throw new IOException(e.getMessage(), e);