enhanced navigation implementation and enhanced tag cloud computation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7252 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent ca738ac924
commit ed4371dcf3

@ -30,7 +30,6 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
@ -39,6 +38,7 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.StaticScore;
import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
@ -51,7 +51,6 @@ import net.yacy.kelondro.util.ISO639;
import de.anomic.crawler.CrawlProfile;
import de.anomic.search.ContentDomain;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent;
@ -338,13 +337,18 @@ public final class search {
// prepare reference hints
final long timer = System.currentTimeMillis();
final List<Navigator.Item> ws = theSearch.getTopicNavigator(10);
StaticScore<String> topicNavigator = theSearch.getTopicNavigator(5);
final StringBuilder refstr = new StringBuilder(6000);
for (Navigator.Item e: ws) {
refstr.append(",").append(e.name);
Iterator<String> navigatorIterator = topicNavigator.keys(false);
int i = 0;
String name;
while (i < 5 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
refstr.append(",").append(name);
i++;
}
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.REFERENCECOLLECTION, "", ws.size(), System.currentTimeMillis() - timer), false);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.REFERENCECOLLECTION, "", i, System.currentTimeMillis() - timer), false);
}
prop.put("indexabstract", indexabstract.toString());

@ -96,7 +96,7 @@ $(function() {
$("#sidebar2").accordion({});
$("#sidebar3").accordion({});
$("#sidebar3").accordion('activate', false);
$("#sidebar4").tagcloud({seed:0,sizemin:10,sizemax:20,height:60}).find("li").tsort();
$("#sidebar4").tagcloud({seed:0,sizemin:10,sizemax:20,height:80}).find("li").tsort();
$("#sidebarAbout").accordion({});
$("#search").focus();
});

@ -9,7 +9,7 @@
#(/cat-location)#
#(nav-topics)#::
<div style="float: right; margin-top:5px; width: 220px; height: 60px">
<div style="float: right; margin-top:5px; width: 220px; height: 80px">
<div><ul id="sidebar4" style="padding-left: 0px;">#{element}#
<li value="#[count]#">#[url]#</li>
#{/element}#</ul></div>

@ -24,15 +24,13 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.StaticScore;
import net.yacy.kelondro.util.EventTracker;
import de.anomic.data.LibraryProvider;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams;
import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache;
@ -43,7 +41,7 @@ import de.anomic.yacy.graphics.ProfilingGraph;
public class yacysearchtrailer {
private static final int MAX_TOPWORDS = 10;
private static final int MAX_TOPWORDS = 16;
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
@ -62,100 +60,108 @@ public class yacysearchtrailer {
// compose search navigation
// namespace navigators
List<Navigator.Item> namespaceNavigator = theSearch.getNamespaceNavigator(10);
StaticScore<String> namespaceNavigator = theSearch.getNamespaceNavigator();
String name;
int count;
Iterator<String> navigatorIterator;
if (namespaceNavigator == null || namespaceNavigator.isEmpty()) {
prop.put("nav-namespace", 0);
} else {
prop.put("nav-namespace", 1);
Navigator.Item entry;
int i;
for (i = 0; i < Math.min(10, namespaceNavigator.size()); i++) {
entry = namespaceNavigator.get(i);
prop.put("nav-namespace_element_" + i + "_name", entry.name);
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_count", entry.count);
prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + entry.name);
navigatorIterator = namespaceNavigator.keys(false);
int i = 0;
while (i < 10 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = namespaceNavigator.get(name);
prop.put("nav-namespace_element_" + i + "_name", name);
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "inurl:" + name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_count", count);
prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + name);
prop.put("nav-namespace_element_" + i + "_nl", 1);
i++;
}
i--;
prop.put("nav-namespace_element_" + i + "_nl", 0);
prop.put("nav-namespace_element", namespaceNavigator.size());
prop.put("nav-namespace_element", i);
}
// host navigators
List<Navigator.Item> hostNavigator = theSearch.getHostNavigator(10);
StaticScore<String> hostNavigator = theSearch.getHostNavigator();
if (hostNavigator == null || hostNavigator.isEmpty()) {
prop.put("nav-domains", 0);
} else {
prop.put("nav-domains", 1);
Navigator.Item entry;
int i;
for (i = 0; i < Math.min(10, hostNavigator.size()); i++) {
entry = hostNavigator.get(i);
prop.put("nav-domains_element_" + i + "_name", entry.name);
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + entry.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-domains_element_" + i + "_count", entry.count);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name);
navigatorIterator = hostNavigator.keys(false);
int i = 0;
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = hostNavigator.get(name);
prop.put("nav-domains_element_" + i + "_name", name);
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + "site:" + name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-domains_element_" + i + "_count", count);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + name);
prop.put("nav-domains_element_" + i + "_nl", 1);
i++;
}
i--;
prop.put("nav-domains_element_" + i + "_nl", 0);
prop.put("nav-domains_element", hostNavigator.size());
prop.put("nav-domains_element", i);
}
// author navigators
List<Navigator.Item> authorNavigator = theSearch.getAuthorNavigator(10);
StaticScore<String> authorNavigator = theSearch.getAuthorNavigator();
if (authorNavigator == null || authorNavigator.isEmpty()) {
prop.put("nav-authors", 0);
} else {
prop.put("nav-authors", 1);
Navigator.Item entry;
int i;
navigatorIterator = authorNavigator.keys(false);
int i = 0;
String anav;
for (i = 0; i < Math.min(10, authorNavigator.size()); i++) {
entry = authorNavigator.get(i);
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name.replace(" ", "+") + "'";
prop.put("nav-authors_element_" + i + "_name", entry.name);
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = authorNavigator.get(name);
anav = (name.indexOf(' ') < 0) ? "author:" + name : "author:'" + name.replace(" ", "+") + "'";
prop.put("nav-authors_element_" + i + "_name", name);
prop.put("nav-authors_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-authors_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-authors_element_" + i + "_count", entry.count);
prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'");
prop.put("nav-authors_element_" + i + "_count", count);
prop.put("nav-authors_element_" + i + "_modifier", "author:'" + name + "'");
prop.put("nav-authors_element_" + i + "_nl", 1);
i++;
}
i--;
prop.put("nav-authors_element_" + i + "_nl", 0);
prop.put("nav-authors_element", authorNavigator.size());
prop.put("nav-authors_element", i);
}
// topics navigator
List<Navigator.Item> topicNavigator = theSearch.getTopicNavigator(30);
StaticScore<String> topicNavigator = theSearch.getTopicNavigator(MAX_TOPWORDS);
if (topicNavigator == null || topicNavigator.isEmpty()) {
topicNavigator = new ArrayList<Navigator.Item>();
prop.put("nav-topics", "0");
} else {
prop.put("nav-topics", "1");
navigatorIterator = topicNavigator.keys(false);
int i = 0;
Navigator.Item e;
Iterator<Navigator.Item> iter = topicNavigator.iterator();
while (iter.hasNext()) {
e = iter.next();
while (i < MAX_TOPWORDS && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = topicNavigator.get(name);
if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break;
if (e != null && e.name != null) {
prop.putHTML("nav-topics_element_" + i + "_name", e.name);
if (name != null) {
prop.putHTML("nav-topics_element_" + i + "_name", name);
prop.put("nav-topics_element_" + i + "_url",
"<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + e.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + e.name + "</a>");
//+"<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+-" + e.name, theQuery.urlMask.toString(), theQuery.navigators) + "\">-</a>")*/;
prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + e.name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-topics_element_" + i + "_count", e.count);
prop.put("nav-topics_element_" + i + "_modifier", e.name);
prop.put("nav-topics_element_" + i + "_nl", (iter.hasNext() && i < MAX_TOPWORDS) ? 1 : 0);
}
if (i++ > MAX_TOPWORDS) {
break;
"<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">" + name + "</a>");
//+"<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.queryStringForUrl() + "+-" + name, theQuery.urlMask.toString(), theQuery.navigators) + "\">-</a>")*/;
prop.putJSON("nav-topics_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.queryStringForUrl() + "+" + name, theQuery.urlMask.toString(), theQuery.navigators));
prop.put("nav-topics_element_" + i + "_count", count);
prop.put("nav-topics_element_" + i + "_modifier", name);
prop.put("nav-topics_element_" + i + "_nl", 1);
i++;
}
}
i--;
prop.put("nav-topics_element_" + i + "_nl", 0);
prop.put("nav-topics_element", i);
}

@ -1,97 +0,0 @@
// Navigator.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 05.03.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2010-01-29 16:59:24 +0100 (Fr, 29 Jan 2010) $
// $LastChangedRevision: 6630 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class Navigator {
private ConcurrentHashMap<String, Item> map;
public Navigator() {
this.map = new ConcurrentHashMap<String, Item>();
}
/**
* a reverse comparator for navigator items
*/
public static final Comparator<Item> itemComp = new Comparator<Item>() {
public int compare(Item o1, Item o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public void inc(String key, String name) {
Item item = map.get(key);
if (item == null) {
map.put(key, new Item(name));
} else {
item.inc();
}
}
public Map<String, Item> map() {
return this.map;
}
public Item[] entries() {
Item[] ii = this.map.values().toArray(new Item[this.map.size()]);
Arrays.sort(ii, itemComp);
return ii;
}
public List<Item> entries(int maxcount) {
Item[] ii = entries();
int c = Math.min(ii.length, maxcount);
ArrayList<Item> a = new ArrayList<Item>(c);
for (int i = 0; i < c; i++) a.add(ii[i]);
return a;
}
public static class Item {
public int count;
public String name;
public Item(String name) {
this.count = 1;
this.name = name;
}
public Item(String name, int count) {
this.count = count;
this.name = name;
}
public void inc() {
this.count++;
}
}
}

@ -28,11 +28,10 @@ package de.anomic.search;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
@ -41,12 +40,13 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.cora.storage.StaticScore;
import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Condenser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceVars;
@ -81,10 +81,11 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final Navigator ref; // reference score computation for the commonSense heuristic
private final Navigator hostNavigator;
private final Navigator authorNavigator;
private final Navigator namespaceNavigator;
private final DynamicScore<String> ref; // reference score computation for the commonSense heuristic
private final DynamicScore<String> hostNavigator;
private final Map<String, String> hostResolver;
private final DynamicScore<String> authorNavigator;
private final DynamicScore<String> namespaceNavigator;
private final ReferenceOrder order;
private final long startTime;
@ -108,10 +109,11 @@ public final class RankingProcess extends Thread {
//this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new Navigator();
this.authorNavigator = new Navigator();
this.namespaceNavigator = new Navigator();
this.ref = new Navigator();
this.hostNavigator = new ScoreCluster<String>();
this.hostResolver = new ConcurrentHashMap<String, String>();
this.authorNavigator = new ScoreCluster<String>();
this.namespaceNavigator = new ScoreCluster<String>();
this.ref = new ScoreCluster<String>();
this.feeders = 1;
this.startTime = System.currentTimeMillis();
}
@ -220,7 +222,8 @@ public final class RankingProcess extends Thread {
if (query.sitehash == null) {
// no site constraint there; maybe collect host navigation information
if (nav_hosts && query.urlMask_isCatchall) {
this.hostNavigator.inc(domhash, new String(iEntry.metadataHash()));
this.hostNavigator.inc(domhash);
this.hostResolver.put(domhash, new String(iEntry.metadataHash()));
}
} else {
if (!domhash.equals(query.sitehash)) {
@ -424,7 +427,8 @@ public final class RankingProcess extends Thread {
// in case that we do not have e catchall filter for urls
// we must also construct the domain navigator here
if (query.sitehash == null) {
this.hostNavigator.inc(new String(urlhash, 6, 6), new String(urlhash));
this.hostNavigator.inc(new String(urlhash, 6, 6));
this.hostResolver.put(new String(urlhash, 6, 6), new String(urlhash));
}
}
@ -474,7 +478,7 @@ public final class RankingProcess extends Thread {
}
// add author to the author navigator
this.authorNavigator.inc(authorhash, pageauthor);
this.authorNavigator.inc(pageauthor);
} else if (this.query.authorhash != null) {
continue;
}
@ -486,7 +490,7 @@ public final class RankingProcess extends Thread {
p = pagepath.lastIndexOf('/');
if (p >= 0) {
pagepath = pagepath.substring(p + 1);
this.namespaceNavigator.inc(pagepath, pagepath);
this.namespaceNavigator.inc(pagepath);
}
}
@ -567,38 +571,25 @@ public final class RankingProcess extends Thread {
return this.misses.iterator();
}
public ArrayList<Navigator.Item> getNamespaceNavigator(int count) {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ArrayList<Navigator.Item>(0);
Navigator.Item[] hsa = this.namespaceNavigator.entries();
int rc = Math.min(count, hsa.length);
ArrayList<Navigator.Item> result = new ArrayList<Navigator.Item>();
for (int i = 0; i < rc; i++) result.add(hsa[i]);
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
public StaticScore<String> getNamespaceNavigator() {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ScoreCluster<String>();
if (this.namespaceNavigator.size() < 2) this.namespaceNavigator.clear(); // navigators with one entry are not useful
return this.namespaceNavigator;
}
public List<Navigator.Item> getHostNavigator(int count) {
List<Navigator.Item> result = new ArrayList<Navigator.Item>();
public StaticScore<String> getHostNavigator() {
ScoreCluster<String> result = new ScoreCluster<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
List<Navigator.Item> hsa = this.hostNavigator.entries(10);
URIMetadataRow mr;
DigestURI url;
String hostname;
Components metadata;
loop: for (Navigator.Item item: hsa) {
mr = this.query.getSegment().urlMetadata().load(item.name.getBytes(), null, 0);
if (mr == null) continue;
metadata = mr.metadata();
if (metadata == null) continue;
url = metadata.url();
if (url == null) continue;
hostname = url.getHost();
if (hostname == null) continue;
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
for (Navigator.Item entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new Navigator.Item(hostname, item.count));
Iterator<String> domhashs = this.hostNavigator.keys(false);
URIMetadataRow row;
String domhash, urlhash, hostname;
while (domhashs.hasNext() && result.size() < 30) {
domhash = domhashs.next();
urlhash = this.hostResolver.get(domhash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash.getBytes(), null, 0);
hostname = row == null ? null : row.metadata().url().getHost();
if (hostname != null) result.set(hostname, this.hostNavigator.get(domhash));
}
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
@ -611,18 +602,35 @@ public final class RankingProcess extends Thread {
return 0;
}
};
public Map<String, Navigator.Item> getTopics() {
return this.ref.map();
}
public List<Navigator.Item> getTopicNavigator(final int count) {
public StaticScore<String> getTopicNavigator(int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList<Navigator.Item>(0);
List<Navigator.Item> result = this.ref.entries(count);
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
ScoreCluster<String> result = new ScoreCluster<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result;
if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful
Map<String, Double> counts = new HashMap<String, Double>();
Iterator<String> i = this.ref.keys(false);
String word;
byte[] termHash;
int c;
double q, min = Double.MAX_VALUE, max = Double.MIN_NORMAL;
int ic = count;
while (ic-- > 0 && i.hasNext()) {
word = i.next();
termHash = Word.word2hash(word);
c = this.query.getSegment().termIndex().count(termHash);
if (c > 0) {
q = ((double) this.ref.get(word)) / ((double) c);
min = Math.min(min, q);
max = Math.max(max, q);
counts.put(word, q);
}
}
if (max > min) for (Map.Entry<String, Double> ce: counts.entrySet()) {
result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min)));
}
return this.ref;
}
public void addTopic(final String[] words) {
@ -630,12 +638,12 @@ public final class RankingProcess extends Thread {
for (int i = 0; i < words.length; i++) {
word = words[i].toLowerCase();
if (word.length() > 2 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 &&
!query.queryHashes.has(Word.word2hash(word)) &&
word.matches("[a-z]+") &&
!Switchboard.badwords.contains(word) &&
!Switchboard.stopwords.contains(word)) {
ref.inc(word, word);
ref.inc(word);
}
}
}
@ -651,13 +659,12 @@ public final class RankingProcess extends Thread {
addTopic(descrcomps);
}
public List<Navigator.Item> getAuthorNavigator(final int count) {
public StaticScore<String> getAuthorNavigator() {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList<Navigator.Item>(0);
List<Navigator.Item> result = this.authorNavigator.entries(count);
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ScoreCluster<String>();
if (this.authorNavigator.size() < 2) this.authorNavigator.clear(); // navigators with one entry are not useful
return this.authorNavigator;
}
public static void loadYBR(final File rankingPath, final int count) {

@ -28,9 +28,9 @@ package de.anomic.search;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.storage.StaticScore;
import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Condenser;
@ -197,7 +197,7 @@ public class ResultFetcher {
// place the result to the result vector
// apply post-ranking
long ranking = Long.valueOf(rankingProcess.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankingProcess.getTopics());
ranking += postRanking(resultEntry, rankingProcess.getTopicNavigator(10));
result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (nav_topics) rankingProcess.addTopics(resultEntry);
}
@ -393,7 +393,7 @@ public class ResultFetcher {
public long postRanking(
final ResultEntry rentry,
final Map<String, Navigator.Item> topwords) {
final StaticScore<String> topwords) {
long r = 0;
@ -411,14 +411,14 @@ public class ResultFetcher {
final String urlstring = rentry.url().toNormalform(true, true);
final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
Navigator.Item tc;
int tc;
for (int j = 0; j < urlcomps.length; j++) {
tc = topwords.get(urlcomps[j]);
if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_urlcompintoplist;
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_urlcompintoplist;
}
for (int j = 0; j < descrcomps.length; j++) {
tc = topwords.get(descrcomps[j]);
if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_descrcompintoplist;
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching

@ -27,15 +27,14 @@
package de.anomic.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.storage.StaticScore;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.HandleSet;
@ -330,22 +329,22 @@ public final class SearchEvent {
return this.rankingProcess;
}
public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) {
return this.rankingProcess.getNamespaceNavigator(maxentries);
public StaticScore<String> getNamespaceNavigator() {
return this.rankingProcess.getNamespaceNavigator();
}
public List<Navigator.Item> getHostNavigator(int maxentries) {
return this.rankingProcess.getHostNavigator(maxentries);
public StaticScore<String> getHostNavigator() {
return this.rankingProcess.getHostNavigator();
}
public List<Navigator.Item> getTopicNavigator(final int maxentries) {
public StaticScore<String> getTopicNavigator(int count) {
// returns a set of words that are computed as toplist
return this.rankingProcess.getTopicNavigator(maxentries);
return this.rankingProcess.getTopicNavigator(count);
}
public List<Navigator.Item> getAuthorNavigator(final int maxentries) {
public StaticScore<String> getAuthorNavigator() {
// returns a list of authors so far seen on result set
return this.rankingProcess.getAuthorNavigator(maxentries);
return this.rankingProcess.getAuthorNavigator();
}
public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) {

@ -78,7 +78,7 @@ public final class CachedFileWriter extends AbstractWriter implements Writer {
}
// we fill the cache here
long available = this.RAFile.length() - seek;
if (available < (long) len) throw new IOException("EOF, available = " + available + ", requested = " + len);
if (available < (long) len) throw new IOException("EOF, available = " + available + ", requested = " + len + ", this.RAFile.length() = " + this.RAFile.length() + ", seek = " + seek);
if (cachestart + cachelen == seek && cache.length - cachelen >= len) {
RAFile.readFully(cache, cachelen, len);
//System.out.println("*** DEBUG FileRA " + this.file.getName() + ": append fill " + len + " bytes");

Loading…
Cancel
Save