From e816b88b55eb3d6301b19c18340289041d0a7f6d Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 10 Aug 2012 15:39:10 +0200 Subject: [PATCH] changed behaviour of metadata storage: in case that any solr is attached, the metadata is not written to the metadata-db, even if it is enabled but instead to solr. This prevents that metadata is written in two store systems at the same time. It is also the next step to migrate the current metadata-db to solr. --- htroot/IndexControlRWIs_p.java | 6 +-- htroot/yacy/crawlReceipt.java | 3 -- htroot/yacy/transferURL.java | 4 -- source/net/yacy/peers/Protocol.java | 3 -- .../yacy/search/index/MetadataRepository.java | 42 +++++++++++-------- source/net/yacy/search/index/Segment.java | 20 +++++---- 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index d2c560438..4e82edae0 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -93,7 +93,7 @@ public class IndexControlRWIs_p { prop.put("keyhash", ""); prop.put("result", ""); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); - prop.put("cleanup_solr", sb.index.connectedRemoteSolr() ? 1 : 0); + prop.put("cleanup_solr", sb.index.connectedSolr() ? 1 : 0); // switch off all optional forms/lists prop.put("searchresult", 0); @@ -158,9 +158,9 @@ public class IndexControlRWIs_p { if ( post.get("deleteIndex", "").equals("on") ) { segment.clear(); } - if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.connectedRemoteSolr()) { + if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.connectedSolr()) { try { - sb.index.getRemoteSolr().clear(); + sb.index.getSolr().clear(); } catch ( final Exception e ) { Log.logException(e); } diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index a3b639597..5ab0bedfe 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -148,9 +148,6 @@ public final class crawlReceipt { if ("fill".equals(result)) try { // put new entry into database sb.index.urlMetadata().store(entry); - if (!sb.index.urlMetadata().getSolr().exists(ASCII.String(entry.url().hash()))) { - sb.index.urlMetadata().getSolr().add(sb.index.urlMetadata().getSolrScheme().metadata2solr(entry)); - } ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true)); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 633c08dd4..ca1a27c0b 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.text.ParseException; import net.yacy.cora.date.GenericFormatter; -import net.yacy.cora.document.ASCII; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.URIMetadata; @@ -143,9 +142,6 @@ public final class transferURL { if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false)); try { sb.index.urlMetadata().store(lEntry); - if (!sb.index.urlMetadata().getSolr().exists(ASCII.String(lEntry.url().hash()))) { - sb.index.urlMetadata().getSolr().add(sb.index.urlMetadata().getSolrScheme().metadata2solr(lEntry)); - } ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER); if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); received++; diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 1480d00ec..9bff39417 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -754,9 +754,6 @@ public final class Protocol // passed all checks, store url try { indexSegment.urlMetadata().store(urlEntry); - if (!indexSegment.urlMetadata().getSolr().exists(ASCII.String(urlEntry.url().hash()))) { - indexSegment.urlMetadata().getSolr().add(indexSegment.urlMetadata().getSolrScheme().metadata2solr(urlEntry)); - } ResultURLs.stack( urlEntry, mySeed.hash.getBytes(), diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index c8ed8363b..b860da22d 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -63,6 +63,7 @@ import net.yacy.search.solr.EmbeddedSolrConnector; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrException; public final class MetadataRepository implements /*Metadata,*/ Iterable { @@ -105,6 +106,10 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable return this.solrScheme; } + public boolean connectedSolr() { + return this.solr.isConnected0() || this.solr.isConnected1(); + } + public boolean connectedLocalSolr() { return this.solr.isConnected0(); } @@ -224,33 +229,34 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable } public void store(final URIMetadata entry) throws IOException { - // Check if there is a more recent Entry already in the DB - if (this.urlIndexFile != null && entry instanceof URIMetadataRow) { + if (this.connectedSolr()) { + try { + SolrDocument sd = getSolr().get(ASCII.String(entry.url().hash())); + if (sd == null || !entry.isOlder(new URIMetadataNode(sd))) { + getSolr().add(getSolrScheme().metadata2solr(entry)); + } + } catch (SolrException e) { + throw new IOException(e.getMessage(), e); + } + } else if (this.urlIndexFile != null && entry instanceof URIMetadataRow) { URIMetadata oldEntry = null; try { final Row.Entry oe = this.urlIndexFile.get(entry.hash(), false); oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); - } catch (final Exception e) { + } catch (final Throwable e) { Log.logException(e); oldEntry = null; } - if (oldEntry != null && entry.isOlder(oldEntry)) { - // the fetched oldEntry is better, so return its properties instead of the new ones - // this.urlHash = oldEntry.urlHash; // unnecessary, should be the same - // this.url = oldEntry.url; // unnecessary, should be the same - // doesn't make sense, since no return value: - //entry = oldEntry; - return; // this did not need to be stored, but is updated - } - - try { - this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry()); - } catch (final SpaceExceededException e) { - throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); + if (oldEntry == null || !entry.isOlder(oldEntry)) { + try { + this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry()); + } catch (final SpaceExceededException e) { + throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); + } } - this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); } + this.statsDump = null; + if (MemoryControl.shortStatus()) clearCache(); } public boolean remove(final byte[] urlHash) { diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 766e28046..e876fcf9e 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -173,7 +173,11 @@ public class Segment { public void disconnectUrlDb() { this.urlMetadata.disconnectUrlDb(); } - + + public boolean connectedSolr() { + return this.urlMetadata.connectedSolr(); + } + public boolean connectedRemoteSolr() { return this.urlMetadata.connectedRemoteSolr(); } @@ -448,21 +452,21 @@ public class Segment { document.getVideolinks().size(), // lvideo document.getApplinks().size() // lapp ); - this.urlMetadata.store(metadata); - final long storageEndTime = System.currentTimeMillis(); - + // STORE TO SOLR - boolean localSolr = this.connectedLocalSolr(); - boolean remoteSolr = this.connectedRemoteSolr(); - if (localSolr || remoteSolr) { + // we do not store the data in metadatadb any more if a solr is connected + if (this.connectedSolr()) { try { SolrDoc solrDoc = this.urlMetadata.getSolrScheme().yacy2solr(id, responseHeader, document, metadata); this.getSolr().add(solrDoc); } catch ( final IOException e ) { Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage()); } + } else { + this.urlMetadata.store(metadata); } - + final long storageEndTime = System.currentTimeMillis(); + // STORE PAGE INDEX INTO WORD INDEX DB int outlinksSame = document.inboundLinks().size(); int outlinksOther = document.outboundLinks().size();