diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 63e40b6ac..37e6e6f21 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -321,7 +321,7 @@ public final class search { // prepare reference hints final long timer = System.currentTimeMillis(); - final ArrayList ws = theSearch.topics(10); + final ArrayList ws = theSearch.getTopicNavigator(10); final StringBuilder refstr = new StringBuilder(); for (NavigatorEntry e: ws) { refstr.append(",").append(e.name); diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 1ce1c6dd8..cebddc713 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -26,15 +26,11 @@ import java.util.ArrayList; import java.util.Iterator; -import java.util.TreeSet; import de.anomic.http.httpRequestHeader; -import de.anomic.kelondro.order.NaturalOrder; -import de.anomic.kelondro.util.SetTools; import de.anomic.plasma.plasmaProfiling; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchQuery; -import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSearchRankingProcess.NavigatorEntry; import de.anomic.server.serverObjects; import de.anomic.server.serverProfiling; @@ -83,38 +79,17 @@ public class yacysearchtrailer { } // attach the bottom line with search references (topwords) - final ArrayList references = theSearch.topics(20); + final ArrayList references = theSearch.getTopicNavigator(10); if (references.size() > 0) { - // get the topwords - final TreeSet topwords = new TreeSet(NaturalOrder.naturalComparator); - for (NavigatorEntry e: references) { - if (e.name.matches("[a-z]+")) { - topwords.add(e.name); - } - } - - // filter out the badwords - final TreeSet filteredtopwords = SetTools.joinConstructive(topwords, plasmaSwitchboard.badwords); - if (filteredtopwords.size() > 0) { - SetTools.excludeDestructive(topwords, plasmaSwitchboard.badwords); - } - - // avoid stopwords being topwords - if (env.getConfig("filterOutStopwordsFromTopwords", "true").equals("true")) { - if ((plasmaSwitchboard.stopwords != null) && (plasmaSwitchboard.stopwords.size() > 0)) { - SetTools.excludeDestructive(topwords, plasmaSwitchboard.stopwords); - } - } - - String word; int hintcount = 0; - final Iterator iter = topwords.iterator(); + NavigatorEntry e; + Iterator iter = references.iterator(); while (iter.hasNext()) { - word = iter.next(); + e = iter.next(); if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break; - if (word != null) { - prop.putHTML("words_" + hintcount + "_word", word); - prop.putHTML("words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word); + if (e.name != null) { + prop.putHTML("words_" + hintcount + "_word", e.name); + prop.putHTML("words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + e.name); prop.put("words_" + hintcount + "_count", theQuery.displayResults()); prop.put("words_" + hintcount + "_offset", "0"); prop.put("words_" + hintcount + "_display", display); diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 40236c8a7..f063a0bb2 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -98,7 +98,6 @@ public final class plasmaSearchEvent { long urlRetrievalAllTime; long snippetComputationAllTime; public ResultURLs crawlResults; - private ArrayList hostNavigator; @SuppressWarnings("unchecked") private plasmaSearchEvent(final plasmaSearchQuery query, @@ -124,7 +123,6 @@ public final class plasmaSearchEvent { this.snippetComputationAllTime = 0; this.workerThreads = null; this.localSearchThread = null; - this.hostNavigator = null; this.result = new SortStore(-1); // this is the result, enriched with snippets, ranked and ordered by ranking this.images = new SortStore(-1); this.failedURLs = new HashMap(); // a map of urls to reason strings where a worker thread tried to work on, but failed. @@ -578,13 +576,12 @@ public final class plasmaSearchEvent { } public ArrayList getHostNavigator(int maxentries) { - if (this.hostNavigator != null) return this.hostNavigator; - if (localSearchThread != null && localSearchThread.isAlive()) { - try {Thread.sleep(100L);} catch (final InterruptedException e) {} - } - this.hostNavigator = rankedCache.getHostNavigator(10); - if (this.hostNavigator.size() == 0) this.hostNavigator = null; - return this.hostNavigator; + return this.rankedCache.getHostNavigator(maxentries); + } + + public ArrayList getTopicNavigator(final int maxentries) { + // returns a set of words that are computed as toplist + return this.rankedCache.getTopicNavigator(maxentries); } public ResultEntry oneResult(final int item) { @@ -730,6 +727,7 @@ public final class plasmaSearchEvent { if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself urls = entry1.getValue(); words = wordsFromPeer(peer, urls); + assert words.length() >= 12 : "words = " + words; //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words); secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( @@ -776,11 +774,6 @@ public final class plasmaSearchEvent { //assert e != null; } - public ArrayList topics(final int count) { - // returns a set of words that are computed as toplist - return this.rankedCache.getTopicNavigator(count); - } - public static class ResultEntry { // payload objects private final URLMetadataRow urlentry; diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 20be57ab2..2bcda72e1 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -459,9 +459,12 @@ public final class plasmaSearchRankingProcess { for (int i = 0; i < words.length; i++) { word = words[i].toLowerCase(); Integer c; - if ((word.length() > 2) && - ("http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) && - (!(query.queryHashes.contains(Word.word2hash(word))))) { + if (word.length() > 2 && + "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 && + !query.queryHashes.contains(Word.word2hash(word)) && + word.matches("[a-z]+") && + !plasmaSwitchboard.badwords.contains(word) && + !plasmaSwitchboard.stopwords.contains(word)) { c = ref.get(word); if (c == null) ref.put(word, 1); else ref.put(word, c.intValue() + 1); } diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index 599078d82..9425a11c9 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -289,7 +289,7 @@ public class yacySearch extends Thread { final String targethash, final Blacklist blacklist, final plasmaSearchRankingProfile rankingProfile, final Bitfield constraint, final TreeMap clusterselection) { - assert wordhashes.length() >= 12; + assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes; // check own peer status if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; }