From a4214694df9610e88aa08480c8835d71664ac373 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 26 Sep 2012 16:05:11 +0200 Subject: [PATCH] We assert that no other metadata storage than solr is used now. Therefore a property like solrConnected() must be true all the time. Removal of this method causes removal of all write operations to the old metadata index. --- htroot/IndexControlRWIs_p.java | 2 +- source/net/yacy/search/Switchboard.java | 59 ++++------ source/net/yacy/search/index/Fulltext.java | 121 ++++++--------------- source/net/yacy/search/index/Segment.java | 16 +-- 4 files changed, 63 insertions(+), 135 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index c87edebae..8476a67c2 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -159,7 +159,7 @@ public class IndexControlRWIs_p { if ( post.get("deleteIndex", "").equals("on") ) { segment.clear(); } - if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.fulltext().connectedSolr()) { + if ( post.get("deleteRemoteSolr", "").equals("on")) { try { sb.index.fulltext().getSolr().clear(); } catch ( final Exception e ) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 627a6cc8e..95d756740 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2541,45 +2541,28 @@ public final class Switchboard extends serverSwitch // STORE WORD INDEX URIMetadata newEntry = null; - try { - newEntry = - this.index.storeDocument( - url, - referrerURL, - queueEntry.lastModified(), - new Date(), - queueEntry.size(), - queueEntry.profile(), - queueEntry.getResponseHeader(), - document, - condenser, - searchEvent, - sourceName); - final RSSFeed feed = - EventChannel.channels(queueEntry.initiator() == null - ? EventChannel.PROXY - : Base64Order.enhancedCoder.equal( - queueEntry.initiator(), - ASCII.getBytes(this.peers.mySeed().hash)) - ? EventChannel.LOCALINDEXING - : EventChannel.REMOTEINDEXING); - feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url(), ASCII.String(queueEntry.url().hash()))); - } catch ( final IOException e ) { - //if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase); - addURLtoErrorDB( + newEntry = + this.index.storeDocument( url, - (referrerURL == null) ? null : referrerURL.hash(), - queueEntry.initiator(), - dc_title, - FailCategory.FINAL_LOAD_CONTEXT, - "error storing url: " - + url.toNormalform(false, true) - + "': process case=" - + processCase - + ", error = " - + e.getMessage()); - return; - } + referrerURL, + queueEntry.lastModified(), + new Date(), + queueEntry.size(), + queueEntry.profile(), + queueEntry.getResponseHeader(), + document, + condenser, + searchEvent, + sourceName); + final RSSFeed feed = + EventChannel.channels(queueEntry.initiator() == null + ? EventChannel.PROXY + : Base64Order.enhancedCoder.equal( + queueEntry.initiator(), + ASCII.getBytes(this.peers.mySeed().hash)) + ? EventChannel.LOCALINDEXING + : EventChannel.REMOTEINDEXING); + feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url(), ASCII.String(queueEntry.url().hash()))); // store rss feeds in document into rss table for ( final Map.Entry rssEntry : document.getRSS().entrySet() ) { diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 50f96838b..166f3a016 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -47,7 +47,6 @@ import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.ZIPReader; import net.yacy.cora.storage.ZIPWriter; -import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; @@ -110,10 +109,6 @@ public final class Fulltext implements Iterable { return this.solrScheme; } - public boolean connectedSolr() { - return this.solr.isConnected0() || this.solr.isConnected1(); - } - public boolean connectedLocalSolr() { return this.solr.isConnected0(); } @@ -227,16 +222,12 @@ public final class Fulltext implements Iterable { // get the metadata from the old metadata index if (this.urlIndexFile != null) try { - if (this.connectedSolr()) { - // slow migration to solr - final Row.Entry entry = this.urlIndexFile.remove(urlHash); - if (entry == null) return null; - URIMetadataRow row = new URIMetadataRow(entry, wre, weight); - this.putDocument(this.solrScheme.metadata2solr(row)); - return row; - } - final Row.Entry entry = this.urlIndexFile.get(urlHash, false); - if (entry != null) return new URIMetadataRow(entry, wre, weight); + // slow migration to solr + final Row.Entry entry = this.urlIndexFile.remove(urlHash); + if (entry == null) return null; + URIMetadataRow row = new URIMetadataRow(entry, wre, weight); + this.putDocument(this.solrScheme.metadata2solr(row)); + return row; } catch (final IOException e) { Log.logException(e); } @@ -269,17 +260,12 @@ public final class Fulltext implements Iterable { // get the document from the old metadata index if (this.urlIndexFile != null) try { - if (this.connectedSolr()) { - // slow migration to solr - final Row.Entry entry = this.urlIndexFile.remove(urlHash); - if (entry == null) return null; - URIMetadataRow row = new URIMetadataRow(entry, wre, weight); - this.putDocument(this.solrScheme.metadata2solr(row)); - return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(row)); - } - final Row.Entry entry = this.urlIndexFile.get(urlHash, false); + // slow migration to solr + final Row.Entry entry = this.urlIndexFile.remove(urlHash); if (entry == null) return null; - return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(new URIMetadataRow(entry, wre, weight))); + URIMetadataRow row = new URIMetadataRow(entry, wre, weight); + this.putDocument(this.solrScheme.metadata2solr(row)); + return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(row)); } catch (final IOException e) { Log.logException(e); } @@ -290,38 +276,19 @@ public final class Fulltext implements Iterable { public void putDocument(final SolrInputDocument doc) throws IOException { String id = (String) doc.getFieldValue(YaCySchema.id.name()); byte[] idb = ASCII.getBytes(id); - if (this.connectedSolr()) { - try { - if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); - SolrDocument sd = this.solr.get(id); - if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { - if (this.solrScheme.contains(YaCySchema.ip_s)) { - // ip_s needs a dns lookup which causes blockings during search here - this.solr.add(doc); - } else synchronized (this.solr) { - this.solr.add(doc); - } - } - } catch (SolrException e) { - throw new IOException(e.getMessage(), e); - } - } else if (this.urlIndexFile != null) { - URIMetadata oldEntry = null; - try { - final Row.Entry oe = this.urlIndexFile.get(idb, false); - oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); - } catch (final Throwable e) { - Log.logException(e); - oldEntry = null; - } - URIMetadataNode entry = new URIMetadataNode(ClientUtils.toSolrDocument(doc)); - if (oldEntry == null || oldEntry.isOlder(entry)) { - try { - this.urlIndexFile.put(entry.toRow().toRowEntry()); - } catch (final SpaceExceededException e) { - throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); - } + try { + if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); + SolrDocument sd = this.solr.get(id); + if (sd == null || this.solrScheme.getDate(sd, YaCySchema.last_modified).before(this.solrScheme.getDate(doc, YaCySchema.last_modified))) { + if (this.solrScheme.contains(YaCySchema.ip_s)) { + // ip_s needs a dns lookup which causes blockings during search here + this.solr.add(doc); + } else synchronized (this.solr) { + this.solr.add(doc); + } } + } catch (SolrException e) { + throw new IOException(e.getMessage(), e); } this.statsDump = null; if (MemoryControl.shortStatus()) clearCache(); @@ -336,37 +303,19 @@ public final class Fulltext implements Iterable { byte[] idb = row.hash(); String id = ASCII.String(idb); - if (this.connectedSolr()) { - try { - if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); - SolrDocument sd = this.solr.get(id); - if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { - if (this.solrScheme.contains(YaCySchema.ip_s)) { - // ip_s needs a dns lookup which causes blockings during search here - this.solr.add(getSolrScheme().metadata2solr(row)); - } else synchronized (this.solr) { - this.solr.add(getSolrScheme().metadata2solr(row)); - } + try { + if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); + SolrDocument sd = this.solr.get(id); + if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) { + if (this.solrScheme.contains(YaCySchema.ip_s)) { + // ip_s needs a dns lookup which causes blockings during search here + this.solr.add(getSolrScheme().metadata2solr(row)); + } else synchronized (this.solr) { + this.solr.add(getSolrScheme().metadata2solr(row)); } - } catch (SolrException e) { - throw new IOException(e.getMessage(), e); - } - } else if (this.urlIndexFile != null) { - URIMetadata oldEntry = null; - try { - final Row.Entry oe = this.urlIndexFile.get(idb, false); - oldEntry = (oe == null) ? null : new URIMetadataRow(oe, null, 0); - } catch (final Throwable e) { - Log.logException(e); - oldEntry = null; - } - if (oldEntry == null || oldEntry.isOlder(row)) { - try { - this.urlIndexFile.put(row.toRowEntry()); - } catch (final SpaceExceededException e) { - throw new IOException("RowSpaceExceededException in " + this.urlIndexFile.filename() + ": " + e.getMessage()); - } - } + } + } catch (SolrException e) { + throw new IOException(e.getMessage(), e); } this.statsDump = null; if (MemoryControl.shortStatus()) clearCache(); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 268004259..7723c57d5 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -352,7 +352,7 @@ public class Segment { final Condenser condenser, final SearchEvent searchEvent, final String sourceName - ) throws IOException { + ) { final long startTime = System.currentTimeMillis(); // CREATE INDEX @@ -394,16 +394,12 @@ public class Segment { ); // STORE TO SOLR - // we do not store the data in metadatadb any more if a solr is connected - if (this.fulltext.connectedSolr()) { - try { - this.fulltext.putDocument(this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, metadata)); - } catch ( final IOException e ) { - Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage()); - } - } else { - this.fulltext.putMetadata(metadata); + try { + this.fulltext.putDocument(this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, metadata)); + } catch ( final IOException e ) { + Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage()); } + final long storageEndTime = System.currentTimeMillis(); // STORE PAGE INDEX INTO WORD INDEX DB