From ee01c12e56832dc4e3ed5f974d23a3d2a5f3bc99 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 18 Aug 2012 13:05:27 +0200 Subject: [PATCH] fixes for putDocument and putMetadata --- .../kelondro/data/meta/URIMetadataRow.java | 2 +- source/net/yacy/search/index/Fulltext.java | 62 +++++++++++++++---- .../yacy/search/index/SolrConfiguration.java | 19 +++++- source/net/yacy/search/index/YaCySchema.java | 4 +- 4 files changed, 72 insertions(+), 15 deletions(-) diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 07477c67c..ec379e0ca 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -279,7 +279,7 @@ public class URIMetadataRow implements URIMetadata { this.comp = null; } - public static URIMetadata importEntry(final String propStr) { + public static URIMetadataRow importEntry(final String propStr) { if (propStr == null || (!propStr.isEmpty() && propStr.charAt(0) != '{') || !propStr.endsWith("}")) { return null; } diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 0d32b2359..b814dfc47 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -259,41 +259,81 @@ public final class Fulltext implements Iterable { } public void putDocument(final SolrInputDocument doc) throws IOException { - if (!this.connectedSolr()) return; - this.solr.add(doc); + String id = (String) doc.getFieldValue(YaCySchema.id.name()); + byte[] idb = ASCII.getBytes(id); + if (this.connectedSolr()) { + try { + if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); + SolrDocument sd = this.solr.get(id); + if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { + this.solr.add(doc); + } + } catch (SolrException e) { + throw new IOException(e.getMessage(), e); + } + } else if (this.urlIndexFile != null) { + URIMetadata oldEntry = null; + try { + final Row.Entry oe = this.urlIndexFile.get(idb, false); + oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); + } catch (final Throwable e) { + Log.logException(e); + oldEntry = null; + } + URIMetadata entry = new URIMetadataNode(ClientUtils.toSolrDocument(doc)); + if (oldEntry == null || oldEntry.isOlder(entry)) { + try { + URIMetadataRow row = URIMetadataRow.importEntry(entry.toString()); + this.urlIndexFile.put(row.toRowEntry()); + } catch (final SpaceExceededException e) { + throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); + } + } + } + this.statsDump = null; + if (MemoryControl.shortStatus()) clearCache(); } public void putMetadata(final URIMetadata entry) throws IOException { + if (entry instanceof URIMetadataNode) { + putDocument(ClientUtils.toSolrInputDocument(((URIMetadataNode) entry).getDocument())); + } + assert entry instanceof URIMetadataRow; + URIMetadataRow row = (URIMetadataRow) entry; + + byte[] idb = row.hash(); + String id = ASCII.String(idb); if (this.connectedSolr()) { try { - SolrDocument sd = getSolr().get(ASCII.String(entry.url().hash())); - if (sd == null || !entry.isOlder(new URIMetadataNode(sd))) { - getSolr().add(getSolrScheme().metadata2solr(entry)); + if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); + SolrDocument sd = this.solr.get(id); + if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { + this.solr.add(getSolrScheme().metadata2solr(row)); } } catch (SolrException e) { throw new IOException(e.getMessage(), e); } - } else if (this.urlIndexFile != null && entry instanceof URIMetadataRow) { + } else if (this.urlIndexFile != null) { URIMetadata oldEntry = null; try { - final Row.Entry oe = this.urlIndexFile.get(entry.hash(), false); + final Row.Entry oe = this.urlIndexFile.get(idb, false); oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); } catch (final Throwable e) { Log.logException(e); oldEntry = null; } - if (oldEntry == null || !entry.isOlder(oldEntry)) { + if (oldEntry == null || oldEntry.isOlder(row)) { try { - this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry()); + this.urlIndexFile.put(row.toRowEntry()); } catch (final SpaceExceededException e) { throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); } - } + } } this.statsDump = null; if (MemoryControl.shortStatus()) clearCache(); } - + public boolean remove(final byte[] urlHash) { if (urlHash == null) return false; try { diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 6f646c785..1a1dbc3f7 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -51,10 +51,13 @@ import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ImageEntry; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; +import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Bitfield; +import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; import de.anomic.crawler.retrieval.Response; @@ -150,6 +153,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (isEmpty() || contains(key)) solrdoc.addSolr(key, value); } + public Date getDate(SolrInputDocument doc, final YaCySchema key) { + Date x = (Date) doc.getFieldValue(key.name()); + return (x == null) ? new Date(0) : x; + } + + public Date getDate(SolrDocument doc, final YaCySchema key) { + Date x = (Date) doc.getFieldValue(key.name()); + return (x == null) ? new Date(0) : x; + } + /** * save configuration to file and update enum SolrFields * @throws IOException @@ -171,7 +184,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } catch (final IOException e) {} } - public SolrDoc metadata2solr(final URIMetadata md) { + public SolrInputDocument metadata2solr(final URIMetadata md) { + if (md instanceof URIMetadataNode) { + return ClientUtils.toSolrInputDocument(((URIMetadataNode) md).getDocument()); + } + final SolrDoc solrdoc = new SolrDoc(); final DigestURI digestURI = new DigestURI(md.url()); boolean allAttr = this.isEmpty(); diff --git a/source/net/yacy/search/index/YaCySchema.java b/source/net/yacy/search/index/YaCySchema.java index 3e8322eec..d028541e9 100644 --- a/source/net/yacy/search/index/YaCySchema.java +++ b/source/net/yacy/search/index/YaCySchema.java @@ -211,7 +211,7 @@ public enum YaCySchema implements Schema { @Override public final String getComment() { return this.comment; - } - + } + }