- added a new Wiki Namespace Navigator

- some redesign of Navigator data structures

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6716 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent b0c6d0108b
commit 884b262130

@ -31,6 +31,7 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
@ -49,6 +50,7 @@ import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.net.natLib;
import de.anomic.search.ContentDomain;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent;
@ -57,7 +59,6 @@ import de.anomic.search.Segment;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.search.ResultEntry;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -336,9 +337,9 @@ public final class search {
// prepare reference hints
final long timer = System.currentTimeMillis();
final ArrayList<NavigatorEntry> ws = theSearch.getTopicNavigator(10);
final List<Navigator.Item> ws = theSearch.getTopicNavigator(10);
final StringBuilder refstr = new StringBuilder(6000);
for (NavigatorEntry e: ws) {
for (Navigator.Item e: ws) {
refstr.append(",").append(e.name);
}
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());

@ -53,9 +53,10 @@ $(function() {
});
$("#sidebar1").accordion({});
$("#sidebar2").accordion({});
$("#sidebar2").accordion('activate', false);
$("#sidebar3").accordion({});
$("#sidebar3").accordion('activate', false);
$("#sidebar4").accordion({});
$("#sidebar4").accordion('activate', false);
$("#sidebarAbout").accordion({});
$("#search").focus();
});

@ -8,6 +8,15 @@
</div>
<div id="sidebar2" style="float: right; margin-top:5px; width: 220px;">
#(nav-namespace)#::
<h3 style="padding-left:25px;">Name Space Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
#(/nav-namespace)#
</div>
<div id="sidebar3" style="float: right; margin-top:5px; width: 220px;">
#(nav-authors)#::
<h3 style="padding-left:25px;">Author Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
@ -16,7 +25,7 @@
#(/nav-authors)#
</div>
<div id="sidebar3" style="float: right; margin-top:5px; width: 220px;">
<div id="sidebar4" style="float: right; margin-top:5px; width: 220px;">
#(nav-topics)#::
<h3 style="padding-left:25px;">Topic Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#

@ -26,14 +26,15 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.yacy.kelondro.util.EventTracker;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams;
import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.graphics.ProfilingGraph;
@ -58,39 +59,61 @@ public class yacysearchtrailer {
final QueryParams theQuery = theSearch.getQuery();
// compose search navigation
// namespace navigators
ArrayList<Navigator.Item> namespaceNavigator = theSearch.getNamespaceNavigator(10);
if (namespaceNavigator == null || namespaceNavigator.isEmpty()) {
prop.put("nav-namespace", 0);
} else {
prop.put("nav-namespace", 1);
Navigator.Item entry;
int i;
for (i = 0; i < Math.min(10, namespaceNavigator.size()); i++) {
entry = namespaceNavigator.get(i);
prop.put("nav-namespace_element_" + i + "_name", entry.name);
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask, "inurl:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "inurl:" + entry.name, theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_count", entry.count);
prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + entry.name);
prop.put("nav-namespace_element_" + i + "_nl", 1);
}
i--;
prop.put("nav-namespace_element_" + i + "_nl", 0);
prop.put("nav-namespace_element", namespaceNavigator.size());
}
// host navigators
ArrayList<NavigatorEntry> hostNavigator = theSearch.getHostNavigator(10);
List<Navigator.Item> hostNavigator = theSearch.getHostNavigator(10);
if (hostNavigator == null || hostNavigator.isEmpty()) {
prop.put("nav-domains", 0);
prop.put("nav-domains", 0);
} else {
prop.put("nav-domains", 1);
NavigatorEntry entry;
int i;
for (i = 0; i < hostNavigator.size(); i++) {
entry = hostNavigator.get(i);
prop.put("nav-domains_element_" + i + "_name", entry.name);
prop.put("nav-domains", 1);
Navigator.Item entry;
int i;
for (i = 0; i < Math.min(10, hostNavigator.size()); i++) {
entry = hostNavigator.get(i);
prop.put("nav-domains_element_" + i + "_name", entry.name);
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators));
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators));
prop.put("nav-domains_element_" + i + "_count", entry.count);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name);
prop.put("nav-domains_element_" + i + "_nl", 1);
}
i--;
prop.put("nav-domains_element_" + i + "_nl", 0);
prop.put("nav-domains_element", hostNavigator.size());
}
i--;
prop.put("nav-domains_element_" + i + "_nl", 0);
prop.put("nav-domains_element", hostNavigator.size());
}
// author navigators
ArrayList<NavigatorEntry> authorNavigator = theSearch.getAuthorNavigator(10);
List<Navigator.Item> authorNavigator = theSearch.getAuthorNavigator(10);
if (authorNavigator == null || authorNavigator.isEmpty()) {
prop.put("nav-authors", 0);
} else {
prop.put("nav-authors", 1);
NavigatorEntry entry;
Navigator.Item entry;
int i;
String anav;
for (i = 0; i < authorNavigator.size(); i++) {
for (i = 0; i < Math.min(10, authorNavigator.size()); i++) {
entry = authorNavigator.get(i);
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'";
prop.put("nav-authors_element_" + i + "_name", entry.name);
@ -106,15 +129,15 @@ public class yacysearchtrailer {
}
// topics navigator
ArrayList<NavigatorEntry> topicNavigator = theSearch.getTopicNavigator(10);
List<Navigator.Item> topicNavigator = theSearch.getTopicNavigator(10);
if (topicNavigator == null || topicNavigator.isEmpty()) {
topicNavigator = new ArrayList<NavigatorEntry>();
topicNavigator = new ArrayList<Navigator.Item>();
prop.put("nav-topics", "0");
} else {
prop.put("nav-topics", "1");
int i = 0;
NavigatorEntry e;
Iterator<NavigatorEntry> iter = topicNavigator.iterator();
Navigator.Item e;
Iterator<Navigator.Item> iter = topicNavigator.iterator();
while (iter.hasNext()) {
e = iter.next();
if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break;

@ -11,7 +11,20 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-domains)##(nav-authors)#::
},#(/nav-domains)##(nav-namespace)#::
{
"facetname": "namespace",
"displayname": "Name Space",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-namespace)##(nav-authors)#::
{
"facetname": "authors",
"displayname": "Authors",

@ -6,6 +6,20 @@
#{/element}#
</yacy:facet>
#(/nav-domains)#
#(nav-namespace)#::
<yacy:facet name="domains" displayname="Namespace" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-namespace)#
#(nav-authors)#::
<yacy:facet name="domains" displayname="Authors" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-authors)#
#(nav-topics)#::
<yacy:facet name="topwords" displayname="Topics" type="String" min="0" max="0" mean="0">
#{element}#

@ -65,7 +65,7 @@ public class ListAccumulator {
/**
* Adds a new entry to a list identified by a given name.
* @param name The name of the list the entry is to be added to.
* @param key The name of the list the entry is to be added to.
* @param entry The new entry.
* @return True if the entry has been added, else false (if list does not exists).
*/

@ -29,6 +29,7 @@ package de.anomic.search;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.concurrent.BlockingQueue;
@ -54,18 +55,24 @@ public class DocumentIndex extends Segment {
private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT);
//private Bitfield zeroConstraint = new Bitfield(4);
final static File poison = new File(".");
BlockingQueue<File> queue;
private static DigestURI poison;
static {
try {
poison = new DigestURI("file://.");
} catch (MalformedURLException e) {}
}
BlockingQueue<DigestURI> queue; // a queue of document ID's
private Worker[] worker;
CallbackListener callback;
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
public DocumentIndex(final File segmentPath, CallbackListener callback, int cachesize) throws IOException {
super(new Log("DocumentIndex"), segmentPath, cachesize, targetFileSize * 4 - 1, false, false);
int cores = Runtime.getRuntime().availableProcessors() + 1;
this.callback = callback;
this.queue = new LinkedBlockingQueue<File>(cores * 300);
this.queue = new LinkedBlockingQueue<DigestURI>(cores * 300);
this.worker = new Worker[cores];
for (int i = 0; i < cores; i++) {
this.worker[i] = new Worker(i);
@ -79,7 +86,7 @@ public class DocumentIndex extends Segment {
}
public void run() {
File f;
DigestURI f;
URIMetadataRow resultRow;
try {
while ((f = queue.take()) != poison) try {
@ -110,31 +117,24 @@ public class DocumentIndex extends Segment {
this.queue.clear();
}
/**
* put a single file into the index
* @param file
* @return a metadata object that has been generated to identify the file
* @throws IOException in case that the file does not exist or cannot be parsed
*/
public URIMetadataRow add(File file) throws IOException {
if (file == null) throw new IOException("file = null");
if (file.isDirectory()) throw new IOException("file should be a document, not a path");
if (!file.canRead()) throw new IOException("cannot read file");
DigestURI url = new DigestURI("file:" + file.getAbsolutePath());
Document document;
public URIMetadataRow add(DigestURI url) throws IOException {
if (url == null) throw new IOException("file = null");
if (url.isDirectory()) throw new IOException("file should be a document, not a path");
if (!url.canRead()) throw new IOException("cannot read file");
Document document;
try {
document = TextParser.parseSource(url, null, null, file);
document = TextParser.parseSource(url, null, null, url.length(), url.getInputStream());
} catch (InterruptedException e) {
throw new IOException("cannot parse " + file.toString() + ": " + e.getMessage());
throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
} catch (ParserException e) {
throw new IOException("cannot parse " + file.toString() + ": " + e.getMessage());
throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
}
final Condenser condenser = new Condenser(document, true, true);
return super.storeDocument(
url,
null,
new Date(file.lastModified()),
file.length(),
new Date(url.lastModified()),
url.length(),
document,
condenser
);
@ -145,7 +145,7 @@ public class DocumentIndex extends Segment {
* If the given file is a path to a directory, the complete sub-tree is indexed
* @param start
*/
public void addConcurrent(File start) {
public void addConcurrent(DigestURI start) {
assert (start != null);
assert (start.canRead()) : start.toString();
if (!start.isDirectory()) {
@ -155,17 +155,21 @@ public class DocumentIndex extends Segment {
return;
}
String[] s = start.list();
File w;
DigestURI w;
for (String t: s) {
w = new File(start, t);
if (w.canRead() && !w.isHidden()) {
if (w.isDirectory()) {
addConcurrent(w);
} else {
try {
this.queue.put(w);
} catch (InterruptedException e) {}
try {
w = new DigestURI(start, t);
if (w.canRead() && !w.isHidden()) {
if (w.isDirectory()) {
addConcurrent(w);
} else {
try {
this.queue.put(w);
} catch (InterruptedException e) {}
}
}
} catch (MalformedURLException e1) {
Log.logException(e1);
}
}
}
@ -177,14 +181,14 @@ public class DocumentIndex extends Segment {
* @param count
* @return a list of files that contain the given string
*/
public ArrayList<File> find(String querystring, int pos, int count) {
public ArrayList<DigestURI> find(String querystring, int pos, int count) {
ArrayList<URIMetadataRow> result = findMetadata(querystring, this);
ArrayList<File> files = new ArrayList<File>();
ArrayList<DigestURI> files = new ArrayList<DigestURI>();
Components metadata;
for (URIMetadataRow row : result) {
metadata = row.metadata();
if (metadata == null) continue;
files.add(metadata.url().getLocalFile());
files.add(metadata.url());
count--;
if (count == 0) break;
}
@ -216,7 +220,7 @@ public class DocumentIndex extends Segment {
* @param querystring
* @return a list of files that contain the word
*/
public ArrayList<File> find(String querystring) {
public ArrayList<DigestURI> find(String querystring) {
return find(querystring, 0, 100);
}
@ -242,8 +246,8 @@ public class DocumentIndex extends Segment {
}
public interface CallbackListener {
public void commit(File f, URIMetadataRow resultRow);
public void fail(File f, String failReason);
public void commit(DigestURI f, URIMetadataRow resultRow);
public void fail(DigestURI f, String failReason);
}
public static void main(String[] args) {
@ -259,16 +263,16 @@ public class DocumentIndex extends Segment {
File segmentPath = new File(args[0]);
System.out.println("using index files at " + segmentPath.getAbsolutePath());
CallbackListener callback = new CallbackListener() {
public void commit(File f, URIMetadataRow resultRow) {
public void commit(DigestURI f, URIMetadataRow resultRow) {
System.out.println("indexed: " + f.toString());
}
public void fail(File f, String failReason) {
public void fail(DigestURI f, String failReason) {
System.out.println("not indexed " + f.toString() + ": " + failReason);
}
};
try {
if (args[1].equals("add")) {
File f = new File(args[2]);
DigestURI f = new DigestURI(args[2]);
DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
di.addConcurrent(f);
di.close();
@ -277,8 +281,8 @@ public class DocumentIndex extends Segment {
for (int i = 2; i < args.length; i++) query += args[i];
query.trim();
DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
ArrayList<File> results = di.find(query);
for (File f: results) {
ArrayList<DigestURI> results = di.find(query);
for (DigestURI f: results) {
if (f != null) System.out.println(f.toString());
}
di.close();

@ -0,0 +1,97 @@
// Navigator.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 05.03.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2010-01-29 16:59:24 +0100 (Fr, 29 Jan 2010) $
// $LastChangedRevision: 6630 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class Navigator {
private ConcurrentHashMap<String, Item> map;
public Navigator() {
this.map = new ConcurrentHashMap<String, Item>();
}
/**
* a reverse comparator for navigator items
*/
public static final Comparator<Item> itemComp = new Comparator<Item>() {
public int compare(Item o1, Item o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public void inc(String key, String name) {
Item item = map.get(key);
if (item == null) {
map.put(key, new Item(name));
} else {
item.inc();
}
}
public Map<String, Item> map() {
return this.map;
}
public Item[] entries() {
Item[] ii = this.map.values().toArray(new Item[this.map.size()]);
Arrays.sort(ii, itemComp);
return ii;
}
public List<Item> entries(int maxcount) {
Item[] ii = entries();
int c = Math.min(ii.length, maxcount);
ArrayList<Item> a = new ArrayList<Item>(c);
for (int i = 0; i < c; i++) a.add(ii[i]);
return a;
}
public static class Item {
public int count;
public String name;
public Item(String name) {
this.count = 1;
this.name = name;
}
public Item(String name, int count) {
this.count = count;
this.name = name;
}
public void inc() {
this.count++;
}
}
}

@ -1,4 +1,4 @@
// plasmaSearchRankingProcess.java
// RankingProcess.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 07.11.2007 on http://yacy.net
//
@ -29,12 +29,12 @@ package de.anomic.search;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue;
@ -81,9 +81,10 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
private final Navigator ref; // reference score computation for the commonSense heuristic
private final Navigator hostNavigator;
private final Navigator authorNavigator;
private final Navigator namespaceNavigator;
private final ReferenceOrder order;
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
@ -106,9 +107,10 @@ public final class RankingProcess extends Thread {
this.misses = new TreeSet<String>();
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentHashMap<String, HostInfo>();
this.authorNavigator = new ConcurrentHashMap<String, AuthorInfo>();
this.ref = new ConcurrentHashMap<String, Integer>();
this.hostNavigator = new Navigator();
this.authorNavigator = new Navigator();
this.namespaceNavigator = new Navigator();
this.ref = new Navigator();
//this.domZones = new int[8];
//for (int i = 0; i < 8; i++) {this.domZones[i] = 0;}
this.feeders = concurrency;
@ -171,8 +173,7 @@ public final class RankingProcess extends Thread {
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
// iterate over normalized entries and select some that are better than currently stored
timer = System.currentTimeMillis();
HostInfo hs;
timer = System.currentTimeMillis();
String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
Long r;
@ -221,12 +222,7 @@ public final class RankingProcess extends Thread {
// get statistics for host navigator
if (nav_hosts) {
domhash = iEntry.urlHash.substring(6);
hs = this.hostNavigator.get(domhash);
if (hs == null) {
this.hostNavigator.put(domhash, new HostInfo(iEntry.urlHash));
} else {
hs.inc();
}
this.hostNavigator.inc(domhash, iEntry.urlHash);
}
// accept
@ -377,6 +373,7 @@ public final class RankingProcess extends Thread {
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout;
int p = -1;
while (System.currentTimeMillis() < timeLimit) {
final SortStack<WordReferenceVars>.stackElement obrwi = takeRWI(skipDoubleDom);
if (obrwi == null) {
@ -444,17 +441,22 @@ public final class RankingProcess extends Thread {
}
// add author to the author navigator
AuthorInfo in = this.authorNavigator.get(authorhash);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(pageauthor));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
this.authorNavigator.inc(authorhash, pageauthor);
} else if (this.query.authorhash != null) {
continue;
}
// namespace navigation
String pagepath = metadata.url().getPath();
if ((p = pagepath.indexOf(':')) >= 0) {
pagepath = pagepath.substring(0,p);
p = pagepath.lastIndexOf('/');
if (p >= 0) {
pagepath = pagepath.substring(p + 1);
this.namespaceNavigator.inc(pagepath, pagepath);
}
}
// accept url
//System.out.println("handing over hash " + page.hash());
this.handover.add(page.hash()); // remember that we handed over this url
@ -525,68 +527,32 @@ public final class RankingProcess extends Thread {
return this.misses.iterator();
}
public class HostInfo {
public int count;
public String hashsample;
public HostInfo(String urlhash) {
this.count = 1;
this.hashsample = urlhash;
}
public void inc() {
this.count++;
}
}
public class AuthorInfo {
public int count;
public String author;
public AuthorInfo(String author) {
this.count = 1;
this.author = author;
}
public void inc() {
this.count++;
}
public ArrayList<Navigator.Item> getNamespaceNavigator(int count) {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ArrayList<Navigator.Item>(0);
Navigator.Item[] hsa = this.namespaceNavigator.entries();
int rc = Math.min(count, hsa.length);
ArrayList<Navigator.Item> result = new ArrayList<Navigator.Item>();
for (int i = 0; i < rc; i++) result.add(hsa[i]);
return result;
}
public static final Comparator<HostInfo> hscomp = new Comparator<HostInfo>() {
public int compare(HostInfo o1, HostInfo o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public static final Comparator<AuthorInfo> aicomp = new Comparator<AuthorInfo>() {
public int compare(AuthorInfo o1, AuthorInfo o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public class NavigatorEntry {
public int count;
public String name;
public NavigatorEntry(String name, int count) {
this.name = name;
this.count = count;
}
public List<Navigator.Item> getHostNavigators(int count) {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return new ArrayList<Navigator.Item>(0);
return this.hostNavigator.entries(10);
}
public ArrayList<NavigatorEntry> getHostNavigator(int count) {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return new ArrayList<NavigatorEntry>(0);
public List<Navigator.Item> getHostNavigator(int count) {
List<Navigator.Item> result = new ArrayList<Navigator.Item>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
HostInfo[] hsa = this.hostNavigator.values().toArray(new HostInfo[this.hostNavigator.size()]);
Arrays.sort(hsa, hscomp);
int rc = Math.min(count, hsa.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
List<Navigator.Item> hsa = this.hostNavigator.entries(10);
URIMetadataRow mr;
DigestURI url;
String hostname;
Components metadata;
loop: for (int i = 0; i < rc; i++) {
mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
loop: for (Navigator.Item item: hsa) {
mr = this.query.getSegment().urlMetadata().load(item.name, null, 0);
if (mr == null) continue;
metadata = mr.metadata();
if (metadata == null) continue;
@ -595,12 +561,13 @@ public final class RankingProcess extends Thread {
hostname = url.getHost();
if (hostname == null) continue;
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new NavigatorEntry(hostname, hsa[i].count));
for (Navigator.Item entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new Navigator.Item(hostname, item.count));
}
return result;
}
public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
if (o1.getValue().intValue() < o2.getValue().intValue()) return 1;
@ -609,44 +576,29 @@ public final class RankingProcess extends Thread {
}
};
public Map<String, Integer> getTopics() {
return this.ref;
public Map<String, Navigator.Item> getTopics() {
return this.ref.map();
}
@SuppressWarnings("unchecked")
public ArrayList<NavigatorEntry> getTopicNavigator(final int count) {
public List<Navigator.Item> getTopicNavigator(final int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList<NavigatorEntry>(0);
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList<Navigator.Item>(0);
Map.Entry<String, Integer>[] a = this.ref.entrySet().toArray(new Map.Entry[this.ref.size()]);
Arrays.sort(a, mecomp);
int rc = Math.min(count, a.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
Map.Entry<String, Integer> e;
int c;
for (int i = 0; i < rc; i++) {
e = a[i];
c = e.getValue().intValue();
if (c == 0) break;
result.add(new NavigatorEntry(e.getKey(), c));
}
return result;
return this.ref.entries(10);
}
public void addTopic(final String[] words) {
String word;
for (int i = 0; i < words.length; i++) {
word = words[i].toLowerCase();
Integer c;
if (word.length() > 2 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 &&
!query.queryHashes.contains(Word.word2hash(word)) &&
word.matches("[a-z]+") &&
!Switchboard.badwords.contains(word) &&
!Switchboard.stopwords.contains(word)) {
c = ref.get(word);
if (c == null) ref.put(word, 1); else ref.put(word, c.intValue() + 1);
ref.inc(word, word);
}
}
}
@ -662,22 +614,12 @@ public final class RankingProcess extends Thread {
addTopic(descrcomps);
}
public ArrayList<NavigatorEntry> getAuthorNavigator(final int count) {
public List<Navigator.Item> getAuthorNavigator(final int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList<NavigatorEntry>(0);
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList<Navigator.Item>(0);
AuthorInfo[] a = this.authorNavigator.values().toArray(new AuthorInfo[this.authorNavigator.size()]);
Arrays.sort(a, aicomp);
int rc = Math.min(count, a.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
AuthorInfo e;
for (int i = 0; i < rc; i++) {
e = a[i];
//System.out.println("*** DEBUG Author = " + e.author + ", count = " + e.count);
result.add(new NavigatorEntry(e.author, e.count));
}
return result;
return this.authorNavigator.entries(count);
}
public static void loadYBR(final File rankingPath, final int count) {
@ -733,12 +675,3 @@ public final class RankingProcess extends Thread {
}
}
/*
Thread= Thread-937 id=4224 BLOCKED
Thread= Thread-919 id=4206 BLOCKED
Thread= Thread-936 id=4223 BLOCKED
at net.yacy.kelondro.util.SortStack.pop(SortStack.java:118)
at de.anomic.search.RankingProcess.takeRWI(RankingProcess.java:310)
at de.anomic.search.RankingProcess.takeURL(RankingProcess.java:371)
at de.anomic.search.ResultFetcher$Worker.run(ResultFetcher.java:161)
*/

@ -44,7 +44,6 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.kelondro.util.SortStack;
import net.yacy.kelondro.util.SortStore;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.search.MediaSnippet;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph;
@ -260,20 +259,6 @@ public class ResultFetcher {
Log.logInfo("search", "sorted out hash " + urlhash + " during search: " + reason);
}
public ArrayList<NavigatorEntry> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries);
}
public int resultCount() {
return this.result.size();
}
@ -357,7 +342,7 @@ public class ResultFetcher {
public long postRanking(
final ResultEntry rentry,
final Map<String, Integer> topwords) {
final Map<String, Navigator.Item> topwords) {
long r = 0;
@ -375,14 +360,14 @@ public class ResultFetcher {
final String urlstring = rentry.url().toNormalform(true, true);
final String[] urlcomps = DigestURI.urlComps(urlstring);
final String[] descrcomps = DigestURI.splitpattern.split(rentry.title().toLowerCase());
Integer tc;
Navigator.Item tc;
for (int j = 0; j < urlcomps.length; j++) {
tc = topwords.get(urlcomps[j]);
if (tc != null) r += Math.max(1, tc.intValue()) << query.ranking.coeff_urlcompintoplist;
if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_urlcompintoplist;
}
for (int j = 0; j < descrcomps.length; j++) {
tc = topwords.get(descrcomps[j]);
if (tc != null) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist;
if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching

@ -29,6 +29,7 @@ package de.anomic.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
@ -43,7 +44,6 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.SetTools;
import de.anomic.crawler.ResultURLs;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.dht.FlatWordPartitionScheme;
@ -289,16 +289,20 @@ public final class SearchEvent {
return this.rankedCache;
}
public ArrayList<NavigatorEntry> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) {
return this.rankedCache.getNamespaceNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getTopicNavigator(final int maxentries) {
public List<Navigator.Item> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
}
public List<Navigator.Item> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getAuthorNavigator(final int maxentries) {
public List<Navigator.Item> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries);
}

@ -105,7 +105,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/**
* insert a map into the table
* @param key
* @param name
* @param map
* @throws RowSpaceExceededException
* @throws IOException
@ -175,7 +175,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/**
* select a map from the table
* @param key
* @param name
* @return the map if one found or NULL if no entry exists or the entry is corrupt
* @throws IOException
*/
@ -209,7 +209,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/**
* delete a map from the table
* @param key
* @param name
* @throws IOException
*/
public void delete(byte[] pk) throws IOException {
@ -218,7 +218,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/**
* check if a row with given key exists in the table
* @param key
* @param name
* @return true if the row exists
*/
public boolean has(byte[] pk) {

Loading…
Cancel
Save