From bd769de6047c9f6bd927c1e2ef16f4e3e9e1f1da Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 9 Oct 2012 11:48:55 +0200 Subject: [PATCH] since the solr index is now used for all pages that are indexed locally, there is no need for the RWI index if the index is not transfered to another peer. Therefore the creation of RWI index data is now suppressed if DHT is disabled. This applies for all intranet and portal mode configurations, but not for public robinson modes. A robinson may switch back to public mode and then transmit its data. That means if someone wants to switch never to DHT mode, it would be more appropriate to choose the portal mode. --- htroot/ConfigNetwork_p.java | 2 +- source/net/yacy/search/Switchboard.java | 7 +- .../net/yacy/search/SwitchboardConstants.java | 2 + .../net/yacy/search/index/DocumentIndex.java | 3 +- source/net/yacy/search/index/Segment.java | 103 +++++++++--------- 5 files changed, 62 insertions(+), 55 deletions(-) diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java index e24985ddc..1df009045 100644 --- a/htroot/ConfigNetwork_p.java +++ b/htroot/ConfigNetwork_p.java @@ -235,7 +235,7 @@ public class ConfigNetwork_p prop.putHTML("network.unit.name", sb.getConfig(SwitchboardConstants.NETWORK_NAME, "")); prop.putHTML("network.unit.description", sb.getConfig("network.unit.description", "")); prop.putHTML("network.unit.domain", sb.getConfig(SwitchboardConstants.NETWORK_DOMAIN, "")); - prop.putHTML("network.unit.dht", sb.getConfig("network.unit.dht", "")); + prop.putHTML("network.unit.dht", sb.getConfig(SwitchboardConstants.DHT_ENABLED, "")); networkBootstrapLocations.remove(sb.getConfig("network.unit.definition", "")); int c = 0; for ( final String s : networkBootstrapLocations ) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 5515e8fd3..cf4b965e7 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1177,7 +1177,7 @@ public final class Switchboard extends serverSwitch ClientIdentification.generateYaCyBot(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")); - if ( !getConfigBool("network.unit.dht", false) + if ( !getConfigBool(SwitchboardConstants.DHT_ENABLED, false) && getConfig("network.unit.tenant.agent", "").length() > 0 ) { newagent = getConfig("network.unit.tenant.agent", "").trim(); this.log.logInfo("new user agent: '" + newagent + "'"); @@ -2600,7 +2600,8 @@ public final class Switchboard extends serverSwitch document, condenser, searchEvent, - sourceName); + sourceName, + getConfigBool(SwitchboardConstants.DHT_ENABLED, false)); final RSSFeed feed = EventChannel.channels(queueEntry.initiator() == null ? EventChannel.PROXY @@ -2980,7 +2981,7 @@ public final class Switchboard extends serverSwitch if ( this.peers.noDHTActivity() ) { return "no DHT distribution: network too small"; } - if ( !getConfigBool("network.unit.dht", true) ) { + if ( !getConfigBool(SwitchboardConstants.DHT_ENABLED, true) ) { return "no DHT distribution: disabled by network.unit.dht"; } if ( getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false") ) { diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index ada62e2ee..8d79ac9f4 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -282,6 +282,8 @@ public final class SwitchboardConstants { public static final String CLUSTER_MODE_PRIVATE_PEER = "privatepeer"; public static final String CLUSTER_PEERS_IPPORT = "cluster.peers.ipport"; + + public static final String DHT_ENABLED = "network.unit.dht"; public static final String DHT_BURST_ROBINSON = "network.unit.dht.burst.robinson"; public static final String DHT_BURST_MULTIWORD = "network.unit.dht.burst.multiword"; diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index 73637c29b..5d9c6b033 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -177,7 +177,8 @@ public class DocumentIndex extends Segment { document, condenser, null, - DocumentIndex.class.getName() + ".add"); + DocumentIndex.class.getName() + ".add", + false); } return rows; } diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index a11ad8534..a3a6d5041 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -351,7 +351,8 @@ public class Segment { final Document document, final Condenser condenser, final SearchEvent searchEvent, - final String sourceName + final String sourceName, + final boolean storeToRWI ) { final long startTime = System.currentTimeMillis(); @@ -411,59 +412,61 @@ public class Segment { final int urlComps = MultiProtocolURI.urlComps(url.toString()).length; // create a word prototype which is re-used for all entries - final int len = (document == null) ? urlLength : document.dc_title().length(); - final WordReferenceRow ientry = new WordReferenceRow( - url.hash(), - urlLength, urlComps, len, - condenser.RESULT_NUMB_WORDS, - condenser.RESULT_NUMB_SENTENCES, - modDate.getTime(), - System.currentTimeMillis(), - UTF8.getBytes(language), - docType, - outlinksSame, outlinksOther); - - // iterate over all words of content text - Word wprop = null; - byte[] wordhash; - String word; - for (Map.Entry wentry: condenser.words().entrySet()) { - word = wentry.getKey(); - wprop = wentry.getValue(); - assert (wprop.flags != null); - ientry.setWord(wprop); - wordhash = Word.word2hash(word); + if ((this.termIndex != null && storeToRWI) || searchEvent != null) { + final int len = (document == null) ? urlLength : document.dc_title().length(); + final WordReferenceRow ientry = new WordReferenceRow( + url.hash(), + urlLength, urlComps, len, + condenser.RESULT_NUMB_WORDS, + condenser.RESULT_NUMB_SENTENCES, + modDate.getTime(), + System.currentTimeMillis(), + UTF8.getBytes(language), + docType, + outlinksSame, outlinksOther); + + // iterate over all words of content text + Word wprop = null; + byte[] wordhash; + String word; + for (Map.Entry wentry: condenser.words().entrySet()) { + word = wentry.getKey(); + wprop = wentry.getValue(); + assert (wprop.flags != null); + ientry.setWord(wprop); + wordhash = Word.word2hash(word); + if (this.termIndex != null && storeToRWI) try { + this.termIndex.add(wordhash, ientry); + } catch (final Exception e) { + Log.logException(e); + } + wordCount++; + + // during a search event it is possible that a heuristic is used which aquires index + // data during search-time. To transfer indexed data directly to the search process + // the following lines push the index data additionally to the search process + // this is done only for searched words + if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) { + // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result + ReferenceContainer container; + try { + container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1); + container.add(ientry); + rankingProcess.add(container, true, sourceName, -1, 5000); + } catch (final SpaceExceededException e) { + continue; + } + } + } + if (rankingProcess != null) rankingProcess.addFinalize(); + + // assign the catchall word + ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics if (this.termIndex != null) try { - this.termIndex.add(wordhash, ientry); + this.termIndex.add(catchallHash, ientry); } catch (final Exception e) { Log.logException(e); } - wordCount++; - - // during a search event it is possible that a heuristic is used which aquires index - // data during search-time. To transfer indexed data directly to the search process - // the following lines push the index data additionally to the search process - // this is done only for searched words - if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) { - // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result - ReferenceContainer container; - try { - container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1); - container.add(ientry); - rankingProcess.add(container, true, sourceName, -1, 5000); - } catch (final SpaceExceededException e) { - continue; - } - } - } - if (rankingProcess != null) rankingProcess.addFinalize(); - - // assign the catchall word - ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics - if (this.termIndex != null) try { - this.termIndex.add(catchallHash, ientry); - } catch (final Exception e) { - Log.logException(e); } // STORE PAGE REFERENCES INTO CITATION INDEX