diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 02337c7a9..8cc6c125e 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -30,7 +30,6 @@ import java.util.ArrayList; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; @@ -39,6 +38,7 @@ import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; +import net.yacy.cora.storage.StaticScore; import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.WordReference; @@ -51,7 +51,6 @@ import net.yacy.kelondro.util.ISO639; import de.anomic.crawler.CrawlProfile; import de.anomic.search.ContentDomain; -import de.anomic.search.Navigator; import de.anomic.search.QueryParams; import de.anomic.search.RankingProfile; import de.anomic.search.SearchEvent; @@ -338,13 +337,18 @@ public final class search { // prepare reference hints final long timer = System.currentTimeMillis(); - final List ws = theSearch.getTopicNavigator(10); + StaticScore topicNavigator = theSearch.getTopicNavigator(5); final StringBuilder refstr = new StringBuilder(6000); - for (Navigator.Item e: ws) { - refstr.append(",").append(e.name); + Iterator navigatorIterator = topicNavigator.keys(false); + int i = 0; + String name; + while (i < 5 && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + refstr.append(",").append(name); + i++; } prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString()); - EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.REFERENCECOLLECTION, "", ws.size(), System.currentTimeMillis() - timer), false); + EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.REFERENCECOLLECTION, "", i, System.currentTimeMillis() - timer), false); } prop.put("indexabstract", indexabstract.toString()); diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 1ed81111e..71973555e 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -96,7 +96,7 @@ $(function() { $("#sidebar2").accordion({}); $("#sidebar3").accordion({}); $("#sidebar3").accordion('activate', false); - $("#sidebar4").tagcloud({seed:0,sizemin:10,sizemax:20,height:60}).find("li").tsort(); + $("#sidebar4").tagcloud({seed:0,sizemin:10,sizemax:20,height:80}).find("li").tsort(); $("#sidebarAbout").accordion({}); $("#search").focus(); }); diff --git a/htroot/yacysearchtrailer.html b/htroot/yacysearchtrailer.html index aefe85bb7..b7912d0c3 100644 --- a/htroot/yacysearchtrailer.html +++ b/htroot/yacysearchtrailer.html @@ -9,7 +9,7 @@ #(/cat-location)# #(nav-topics)#:: -
+
    #{element}#
  • #[url]#
  • #{/element}#
diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 93c5cb50c..fe08de6c3 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -24,15 +24,13 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; import net.yacy.cora.protocol.RequestHeader; +import net.yacy.cora.storage.StaticScore; import net.yacy.kelondro.util.EventTracker; import de.anomic.data.LibraryProvider; -import de.anomic.search.Navigator; import de.anomic.search.QueryParams; import de.anomic.search.SearchEvent; import de.anomic.search.SearchEventCache; @@ -43,7 +41,7 @@ import de.anomic.yacy.graphics.ProfilingGraph; public class yacysearchtrailer { - private static final int MAX_TOPWORDS = 10; + private static final int MAX_TOPWORDS = 16; public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final serverObjects prop = new serverObjects(); @@ -62,100 +60,108 @@ public class yacysearchtrailer { // compose search navigation // namespace navigators - List namespaceNavigator = theSearch.getNamespaceNavigator(10); + StaticScore namespaceNavigator = theSearch.getNamespaceNavigator(); + String name; + int count; + Iterator navigatorIterator; if (namespaceNavigator == null || namespaceNavigator.isEmpty()) { prop.put("nav-namespace", 0); } else { prop.put("nav-namespace", 1); - Navigator.Item entry; - int i; - for (i = 0; i < Math.min(10, namespaceNavigator.size()); i++) { - entry = namespaceNavigator.get(i); - prop.put("nav-namespace_element_" + i + "_name", entry.name); - prop.put("nav-namespace_element_" + i + "_url", "" + entry.name + " (" + entry.count + ")"); - prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators)); - prop.put("nav-namespace_element_" + i + "_count", entry.count); - prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + entry.name); + navigatorIterator = namespaceNavigator.keys(false); + int i = 0; + while (i < 10 && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + count = namespaceNavigator.get(name); + prop.put("nav-namespace_element_" + i + "_name", name); + prop.put("nav-namespace_element_" + i + "_url", "" + name + " (" + count + ")"); + prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + name, theQuery.urlMask.toString(), theQuery.navigators)); + prop.put("nav-namespace_element_" + i + "_count", count); + prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + name); prop.put("nav-namespace_element_" + i + "_nl", 1); + i++; } i--; prop.put("nav-namespace_element_" + i + "_nl", 0); - prop.put("nav-namespace_element", namespaceNavigator.size()); + prop.put("nav-namespace_element", i); } // host navigators - List hostNavigator = theSearch.getHostNavigator(10); + StaticScore hostNavigator = theSearch.getHostNavigator(); if (hostNavigator == null || hostNavigator.isEmpty()) { prop.put("nav-domains", 0); } else { prop.put("nav-domains", 1); - Navigator.Item entry; - int i; - for (i = 0; i < Math.min(10, hostNavigator.size()); i++) { - entry = hostNavigator.get(i); - prop.put("nav-domains_element_" + i + "_name", entry.name); - prop.put("nav-domains_element_" + i + "_url", "" + entry.name + " (" + entry.count + ")"); - prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators)); - prop.put("nav-domains_element_" + i + "_count", entry.count); - prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name); + navigatorIterator = hostNavigator.keys(false); + int i = 0; + while (i < 20 && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + count = hostNavigator.get(name); + prop.put("nav-domains_element_" + i + "_name", name); + prop.put("nav-domains_element_" + i + "_url", "" + name + " (" + count + ")"); + prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + name, theQuery.urlMask.toString(), theQuery.navigators)); + prop.put("nav-domains_element_" + i + "_count", count); + prop.put("nav-domains_element_" + i + "_modifier", "site:" + name); prop.put("nav-domains_element_" + i + "_nl", 1); + i++; } i--; prop.put("nav-domains_element_" + i + "_nl", 0); - prop.put("nav-domains_element", hostNavigator.size()); + prop.put("nav-domains_element", i); } // author navigators - List authorNavigator = theSearch.getAuthorNavigator(10); + StaticScore authorNavigator = theSearch.getAuthorNavigator(); if (authorNavigator == null || authorNavigator.isEmpty()) { prop.put("nav-authors", 0); } else { prop.put("nav-authors", 1); - Navigator.Item entry; - int i; + navigatorIterator = authorNavigator.keys(false); + int i = 0; String anav; - for (i = 0; i < Math.min(10, authorNavigator.size()); i++) { - entry = authorNavigator.get(i); - anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name.replace(" ", "+") + "'"; - prop.put("nav-authors_element_" + i + "_name", entry.name); - prop.put("nav-authors_element_" + i + "_url", "" + entry.name + " (" + entry.count + ")"); + while (i < 20 && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + count = authorNavigator.get(name); + anav = (name.indexOf(' ') < 0) ? "author:" + name : "author:'" + name.replace(" ", "+") + "'"; + prop.put("nav-authors_element_" + i + "_name", name); + prop.put("nav-authors_element_" + i + "_url", "" + name + " (" + count + ")"); prop.putJSON("nav-authors_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators)); - prop.put("nav-authors_element_" + i + "_count", entry.count); - prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'"); + prop.put("nav-authors_element_" + i + "_count", count); + prop.put("nav-authors_element_" + i + "_modifier", "author:'" + name + "'"); prop.put("nav-authors_element_" + i + "_nl", 1); + i++; } i--; prop.put("nav-authors_element_" + i + "_nl", 0); - prop.put("nav-authors_element", authorNavigator.size()); + prop.put("nav-authors_element", i); } // topics navigator - List topicNavigator = theSearch.getTopicNavigator(30); + StaticScore topicNavigator = theSearch.getTopicNavigator(MAX_TOPWORDS); if (topicNavigator == null || topicNavigator.isEmpty()) { - topicNavigator = new ArrayList(); prop.put("nav-topics", "0"); } else { prop.put("nav-topics", "1"); + navigatorIterator = topicNavigator.keys(false); int i = 0; - Navigator.Item e; - Iterator iter = topicNavigator.iterator(); - while (iter.hasNext()) { - e = iter.next(); + while (i < MAX_TOPWORDS && navigatorIterator.hasNext()) { + name = navigatorIterator.next(); + count = topicNavigator.get(name); if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break; - if (e != null && e.name != null) { - prop.putHTML("nav-topics_element_" + i + "_name", e.name); + if (name != null) { + prop.putHTML("nav-topics_element_" + i + "_name", name); prop.put("nav-topics_element_" + i + "_url", - "" + e.name + ""); - //+"-")*/; - prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + e.name, theQuery.urlMask.toString(), theQuery.navigators)); - prop.put("nav-topics_element_" + i + "_count", e.count); - prop.put("nav-topics_element_" + i + "_modifier", e.name); - prop.put("nav-topics_element_" + i + "_nl", (iter.hasNext() && i < MAX_TOPWORDS) ? 1 : 0); - } - if (i++ > MAX_TOPWORDS) { - break; + "" + name + ""); + //+"-")*/; + prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + name, theQuery.urlMask.toString(), theQuery.navigators)); + prop.put("nav-topics_element_" + i + "_count", count); + prop.put("nav-topics_element_" + i + "_modifier", name); + prop.put("nav-topics_element_" + i + "_nl", 1); + i++; } } + i--; + prop.put("nav-topics_element_" + i + "_nl", 0); prop.put("nav-topics_element", i); } diff --git a/source/de/anomic/search/Navigator.java b/source/de/anomic/search/Navigator.java deleted file mode 100644 index c951f0e3c..000000000 --- a/source/de/anomic/search/Navigator.java +++ /dev/null @@ -1,97 +0,0 @@ -// Navigator.java -// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 05.03.2010 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2010-01-29 16:59:24 +0100 (Fr, 29 Jan 2010) $ -// $LastChangedRevision: 6630 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -package de.anomic.search; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -public class Navigator { - private ConcurrentHashMap map; - - public Navigator() { - this.map = new ConcurrentHashMap(); - } - - /** - * a reverse comparator for navigator items - */ - public static final Comparator itemComp = new Comparator() { - public int compare(Item o1, Item o2) { - if (o1.count < o2.count) return 1; - if (o2.count < o1.count) return -1; - return 0; - } - }; - - public void inc(String key, String name) { - Item item = map.get(key); - if (item == null) { - map.put(key, new Item(name)); - } else { - item.inc(); - } - } - - public Map map() { - return this.map; - } - - public Item[] entries() { - Item[] ii = this.map.values().toArray(new Item[this.map.size()]); - Arrays.sort(ii, itemComp); - return ii; - } - - public List entries(int maxcount) { - Item[] ii = entries(); - int c = Math.min(ii.length, maxcount); - ArrayList a = new ArrayList(c); - for (int i = 0; i < c; i++) a.add(ii[i]); - return a; - } - - public static class Item { - public int count; - public String name; - public Item(String name) { - this.count = 1; - this.name = name; - } - public Item(String name, int count) { - this.count = count; - this.name = name; - } - public void inc() { - this.count++; - } - } -} diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index cc2c71773..bee891395 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -28,11 +28,10 @@ package de.anomic.search; import java.io.File; import java.io.IOException; -import java.util.ArrayList; import java.util.Comparator; import java.util.ConcurrentModificationException; +import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; @@ -41,12 +40,13 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.storage.DynamicScore; +import net.yacy.cora.storage.ScoreCluster; +import net.yacy.cora.storage.StaticScore; import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.document.Condenser; -import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; -import net.yacy.kelondro.data.meta.URIMetadataRow.Components; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceVars; @@ -81,10 +81,11 @@ public final class RankingProcess extends Thread { private final ConcurrentHashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack //private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process - private final Navigator ref; // reference score computation for the commonSense heuristic - private final Navigator hostNavigator; - private final Navigator authorNavigator; - private final Navigator namespaceNavigator; + private final DynamicScore ref; // reference score computation for the commonSense heuristic + private final DynamicScore hostNavigator; + private final Map hostResolver; + private final DynamicScore authorNavigator; + private final DynamicScore namespaceNavigator; private final ReferenceOrder order; private final long startTime; @@ -108,10 +109,11 @@ public final class RankingProcess extends Thread { //this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.flagcount = new int[32]; for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} - this.hostNavigator = new Navigator(); - this.authorNavigator = new Navigator(); - this.namespaceNavigator = new Navigator(); - this.ref = new Navigator(); + this.hostNavigator = new ScoreCluster(); + this.hostResolver = new ConcurrentHashMap(); + this.authorNavigator = new ScoreCluster(); + this.namespaceNavigator = new ScoreCluster(); + this.ref = new ScoreCluster(); this.feeders = 1; this.startTime = System.currentTimeMillis(); } @@ -220,7 +222,8 @@ public final class RankingProcess extends Thread { if (query.sitehash == null) { // no site constraint there; maybe collect host navigation information if (nav_hosts && query.urlMask_isCatchall) { - this.hostNavigator.inc(domhash, new String(iEntry.metadataHash())); + this.hostNavigator.inc(domhash); + this.hostResolver.put(domhash, new String(iEntry.metadataHash())); } } else { if (!domhash.equals(query.sitehash)) { @@ -424,7 +427,8 @@ public final class RankingProcess extends Thread { // in case that we do not have e catchall filter for urls // we must also construct the domain navigator here if (query.sitehash == null) { - this.hostNavigator.inc(new String(urlhash, 6, 6), new String(urlhash)); + this.hostNavigator.inc(new String(urlhash, 6, 6)); + this.hostResolver.put(new String(urlhash, 6, 6), new String(urlhash)); } } @@ -474,7 +478,7 @@ public final class RankingProcess extends Thread { } // add author to the author navigator - this.authorNavigator.inc(authorhash, pageauthor); + this.authorNavigator.inc(pageauthor); } else if (this.query.authorhash != null) { continue; } @@ -486,7 +490,7 @@ public final class RankingProcess extends Thread { p = pagepath.lastIndexOf('/'); if (p >= 0) { pagepath = pagepath.substring(p + 1); - this.namespaceNavigator.inc(pagepath, pagepath); + this.namespaceNavigator.inc(pagepath); } } @@ -567,38 +571,25 @@ public final class RankingProcess extends Thread { return this.misses.iterator(); } - public ArrayList getNamespaceNavigator(int count) { - if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ArrayList(0); - - Navigator.Item[] hsa = this.namespaceNavigator.entries(); - int rc = Math.min(count, hsa.length); - ArrayList result = new ArrayList(); - for (int i = 0; i < rc; i++) result.add(hsa[i]); - if (result.size() < 2) result.clear(); // navigators with one entry are not useful - return result; + public StaticScore getNamespaceNavigator() { + if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ScoreCluster(); + if (this.namespaceNavigator.size() < 2) this.namespaceNavigator.clear(); // navigators with one entry are not useful + return this.namespaceNavigator; } - public List getHostNavigator(int count) { - List result = new ArrayList(); + public StaticScore getHostNavigator() { + ScoreCluster result = new ScoreCluster(); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result; - List hsa = this.hostNavigator.entries(10); - URIMetadataRow mr; - DigestURI url; - String hostname; - Components metadata; - loop: for (Navigator.Item item: hsa) { - mr = this.query.getSegment().urlMetadata().load(item.name.getBytes(), null, 0); - if (mr == null) continue; - metadata = mr.metadata(); - if (metadata == null) continue; - url = metadata.url(); - if (url == null) continue; - hostname = url.getHost(); - if (hostname == null) continue; - if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue; - for (Navigator.Item entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists - result.add(new Navigator.Item(hostname, item.count)); + Iterator domhashs = this.hostNavigator.keys(false); + URIMetadataRow row; + String domhash, urlhash, hostname; + while (domhashs.hasNext() && result.size() < 30) { + domhash = domhashs.next(); + urlhash = this.hostResolver.get(domhash); + row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash.getBytes(), null, 0); + hostname = row == null ? null : row.metadata().url().getHost(); + if (hostname != null) result.set(hostname, this.hostNavigator.get(domhash)); } if (result.size() < 2) result.clear(); // navigators with one entry are not useful return result; @@ -611,18 +602,35 @@ public final class RankingProcess extends Thread { return 0; } }; - - public Map getTopics() { - return this.ref.map(); - } - - public List getTopicNavigator(final int count) { + + public StaticScore getTopicNavigator(int count) { // create a list of words that had been computed by statistics over all // words that appeared in the url or the description of all urls - if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList(0); - List result = this.ref.entries(count); - if (result.size() < 2) result.clear(); // navigators with one entry are not useful - return result; + ScoreCluster result = new ScoreCluster(); + if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result; + if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful + Map counts = new HashMap(); + Iterator i = this.ref.keys(false); + String word; + byte[] termHash; + int c; + double q, min = Double.MAX_VALUE, max = Double.MIN_NORMAL; + int ic = count; + while (ic-- > 0 && i.hasNext()) { + word = i.next(); + termHash = Word.word2hash(word); + c = this.query.getSegment().termIndex().count(termHash); + if (c > 0) { + q = ((double) this.ref.get(word)) / ((double) c); + min = Math.min(min, q); + max = Math.max(max, q); + counts.put(word, q); + } + } + if (max > min) for (Map.Entry ce: counts.entrySet()) { + result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min))); + } + return this.ref; } public void addTopic(final String[] words) { @@ -630,12 +638,12 @@ public final class RankingProcess extends Thread { for (int i = 0; i < words.length; i++) { word = words[i].toLowerCase(); if (word.length() > 2 && - "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 && + "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 && !query.queryHashes.has(Word.word2hash(word)) && word.matches("[a-z]+") && !Switchboard.badwords.contains(word) && !Switchboard.stopwords.contains(word)) { - ref.inc(word, word); + ref.inc(word); } } } @@ -651,13 +659,12 @@ public final class RankingProcess extends Thread { addTopic(descrcomps); } - public List getAuthorNavigator(final int count) { + public StaticScore getAuthorNavigator() { // create a list of words that had been computed by statistics over all // words that appeared in the url or the description of all urls - if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList(0); - List result = this.authorNavigator.entries(count); - if (result.size() < 2) result.clear(); // navigators with one entry are not useful - return result; + if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ScoreCluster(); + if (this.authorNavigator.size() < 2) this.authorNavigator.clear(); // navigators with one entry are not useful + return this.authorNavigator; } public static void loadYBR(final File rankingPath, final int count) { diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 1613f3dfb..657f38e17 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -28,9 +28,9 @@ package de.anomic.search; import java.util.ArrayList; import java.util.Iterator; -import java.util.Map; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.storage.StaticScore; import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.document.Condenser; @@ -197,7 +197,7 @@ public class ResultFetcher { // place the result to the result vector // apply post-ranking long ranking = Long.valueOf(rankingProcess.getOrder().cardinal(resultEntry.word())); - ranking += postRanking(resultEntry, rankingProcess.getTopics()); + ranking += postRanking(resultEntry, rankingProcess.getTopicNavigator(10)); result.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow if (nav_topics) rankingProcess.addTopics(resultEntry); } @@ -393,7 +393,7 @@ public class ResultFetcher { public long postRanking( final ResultEntry rentry, - final Map topwords) { + final StaticScore topwords) { long r = 0; @@ -411,14 +411,14 @@ public class ResultFetcher { final String urlstring = rentry.url().toNormalform(true, true); final String[] urlcomps = MultiProtocolURI.urlComps(urlstring); final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase()); - Navigator.Item tc; + int tc; for (int j = 0; j < urlcomps.length; j++) { tc = topwords.get(urlcomps[j]); - if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_urlcompintoplist; + if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_urlcompintoplist; } for (int j = 0; j < descrcomps.length; j++) { tc = topwords.get(descrcomps[j]); - if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_descrcompintoplist; + if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist; } // apply query-in-result matching diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 454f29f85..d87ab65e6 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -27,15 +27,14 @@ package de.anomic.search; import java.io.IOException; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import net.yacy.cora.storage.StaticScore; import net.yacy.document.LargeNumberCache; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.index.HandleSet; @@ -330,22 +329,22 @@ public final class SearchEvent { return this.rankingProcess; } - public ArrayList getNamespaceNavigator(int maxentries) { - return this.rankingProcess.getNamespaceNavigator(maxentries); + public StaticScore getNamespaceNavigator() { + return this.rankingProcess.getNamespaceNavigator(); } - public List getHostNavigator(int maxentries) { - return this.rankingProcess.getHostNavigator(maxentries); + public StaticScore getHostNavigator() { + return this.rankingProcess.getHostNavigator(); } - public List getTopicNavigator(final int maxentries) { + public StaticScore getTopicNavigator(int count) { // returns a set of words that are computed as toplist - return this.rankingProcess.getTopicNavigator(maxentries); + return this.rankingProcess.getTopicNavigator(count); } - public List getAuthorNavigator(final int maxentries) { + public StaticScore getAuthorNavigator() { // returns a list of authors so far seen on result set - return this.rankingProcess.getAuthorNavigator(maxentries); + return this.rankingProcess.getAuthorNavigator(); } public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) { diff --git a/source/net/yacy/kelondro/io/CachedFileWriter.java b/source/net/yacy/kelondro/io/CachedFileWriter.java index d26785c1b..ca6711b2a 100644 --- a/source/net/yacy/kelondro/io/CachedFileWriter.java +++ b/source/net/yacy/kelondro/io/CachedFileWriter.java @@ -78,7 +78,7 @@ public final class CachedFileWriter extends AbstractWriter implements Writer { } // we fill the cache here long available = this.RAFile.length() - seek; - if (available < (long) len) throw new IOException("EOF, available = " + available + ", requested = " + len); + if (available < (long) len) throw new IOException("EOF, available = " + available + ", requested = " + len + ", this.RAFile.length() = " + this.RAFile.length() + ", seek = " + seek); if (cachestart + cachelen == seek && cache.length - cachelen >= len) { RAFile.readFully(cache, cachelen, len); //System.out.println("*** DEBUG FileRA " + this.file.getName() + ": append fill " + len + " bytes");