diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index d2c560438..4e82edae0 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -93,7 +93,7 @@ public class IndexControlRWIs_p { prop.put("keyhash", ""); prop.put("result", ""); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); - prop.put("cleanup_solr", sb.index.connectedRemoteSolr() ? 1 : 0); + prop.put("cleanup_solr", sb.index.connectedSolr() ? 1 : 0); // switch off all optional forms/lists prop.put("searchresult", 0); @@ -158,9 +158,9 @@ public class IndexControlRWIs_p { if ( post.get("deleteIndex", "").equals("on") ) { segment.clear(); } - if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.connectedRemoteSolr()) { + if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.connectedSolr()) { try { - sb.index.getRemoteSolr().clear(); + sb.index.getSolr().clear(); } catch ( final Exception e ) { Log.logException(e); } diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index a3b639597..5ab0bedfe 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -148,9 +148,6 @@ public final class crawlReceipt { if ("fill".equals(result)) try { // put new entry into database sb.index.urlMetadata().store(entry); - if (!sb.index.urlMetadata().getSolr().exists(ASCII.String(entry.url().hash()))) { - sb.index.urlMetadata().getSolr().add(sb.index.urlMetadata().getSolrScheme().metadata2solr(entry)); - } ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true)); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 633c08dd4..ca1a27c0b 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.text.ParseException; import net.yacy.cora.date.GenericFormatter; -import net.yacy.cora.document.ASCII; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.URIMetadata; @@ -143,9 +142,6 @@ public final class transferURL { if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false)); try { sb.index.urlMetadata().store(lEntry); - if (!sb.index.urlMetadata().getSolr().exists(ASCII.String(lEntry.url().hash()))) { - sb.index.urlMetadata().getSolr().add(sb.index.urlMetadata().getSolrScheme().metadata2solr(lEntry)); - } ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER); if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); received++; diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 1480d00ec..9bff39417 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -754,9 +754,6 @@ public final class Protocol // passed all checks, store url try { indexSegment.urlMetadata().store(urlEntry); - if (!indexSegment.urlMetadata().getSolr().exists(ASCII.String(urlEntry.url().hash()))) { - indexSegment.urlMetadata().getSolr().add(indexSegment.urlMetadata().getSolrScheme().metadata2solr(urlEntry)); - } ResultURLs.stack( urlEntry, mySeed.hash.getBytes(), diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index c8ed8363b..b860da22d 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -63,6 +63,7 @@ import net.yacy.search.solr.EmbeddedSolrConnector; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrException; public final class MetadataRepository implements /*Metadata,*/ Iterable { @@ -105,6 +106,10 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable return this.solrScheme; } + public boolean connectedSolr() { + return this.solr.isConnected0() || this.solr.isConnected1(); + } + public boolean connectedLocalSolr() { return this.solr.isConnected0(); } @@ -224,33 +229,34 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable } public void store(final URIMetadata entry) throws IOException { - // Check if there is a more recent Entry already in the DB - if (this.urlIndexFile != null && entry instanceof URIMetadataRow) { + if (this.connectedSolr()) { + try { + SolrDocument sd = getSolr().get(ASCII.String(entry.url().hash())); + if (sd == null || !entry.isOlder(new URIMetadataNode(sd))) { + getSolr().add(getSolrScheme().metadata2solr(entry)); + } + } catch (SolrException e) { + throw new IOException(e.getMessage(), e); + } + } else if (this.urlIndexFile != null && entry instanceof URIMetadataRow) { URIMetadata oldEntry = null; try { final Row.Entry oe = this.urlIndexFile.get(entry.hash(), false); oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); - } catch (final Exception e) { + } catch (final Throwable e) { Log.logException(e); oldEntry = null; } - if (oldEntry != null && entry.isOlder(oldEntry)) { - // the fetched oldEntry is better, so return its properties instead of the new ones - // this.urlHash = oldEntry.urlHash; // unnecessary, should be the same - // this.url = oldEntry.url; // unnecessary, should be the same - // doesn't make sense, since no return value: - //entry = oldEntry; - return; // this did not need to be stored, but is updated - } - - try { - this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry()); - } catch (final SpaceExceededException e) { - throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); + if (oldEntry == null || !entry.isOlder(oldEntry)) { + try { + this.urlIndexFile.put(((URIMetadataRow) entry).toRowEntry()); + } catch (final SpaceExceededException e) { + throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); + } } - this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); } + this.statsDump = null; + if (MemoryControl.shortStatus()) clearCache(); } public boolean remove(final byte[] urlHash) { diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 766e28046..e876fcf9e 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -173,7 +173,11 @@ public class Segment { public void disconnectUrlDb() { this.urlMetadata.disconnectUrlDb(); } - + + public boolean connectedSolr() { + return this.urlMetadata.connectedSolr(); + } + public boolean connectedRemoteSolr() { return this.urlMetadata.connectedRemoteSolr(); } @@ -448,21 +452,21 @@ public class Segment { document.getVideolinks().size(), // lvideo document.getApplinks().size() // lapp ); - this.urlMetadata.store(metadata); - final long storageEndTime = System.currentTimeMillis(); - + // STORE TO SOLR - boolean localSolr = this.connectedLocalSolr(); - boolean remoteSolr = this.connectedRemoteSolr(); - if (localSolr || remoteSolr) { + // we do not store the data in metadatadb any more if a solr is connected + if (this.connectedSolr()) { try { SolrDoc solrDoc = this.urlMetadata.getSolrScheme().yacy2solr(id, responseHeader, document, metadata); this.getSolr().add(solrDoc); } catch ( final IOException e ) { Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage()); } + } else { + this.urlMetadata.store(metadata); } - + final long storageEndTime = System.currentTimeMillis(); + // STORE PAGE INDEX INTO WORD INDEX DB int outlinksSame = document.inboundLinks().size(); int outlinksOther = document.outboundLinks().size();