- added a new Wiki Namespace Navigator

- some redesign of Navigator data structures

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6716 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent b0c6d0108b
commit 884b262130

@ -31,6 +31,7 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
@ -49,6 +50,7 @@ import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader; import de.anomic.http.server.RequestHeader;
import de.anomic.net.natLib; import de.anomic.net.natLib;
import de.anomic.search.ContentDomain; import de.anomic.search.ContentDomain;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams; import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile; import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent; import de.anomic.search.SearchEvent;
@ -57,7 +59,6 @@ import de.anomic.search.Segment;
import de.anomic.search.Segments; import de.anomic.search.Segments;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
import de.anomic.search.ResultEntry; import de.anomic.search.ResultEntry;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -336,9 +337,9 @@ public final class search {
// prepare reference hints // prepare reference hints
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
final ArrayList<NavigatorEntry> ws = theSearch.getTopicNavigator(10); final List<Navigator.Item> ws = theSearch.getTopicNavigator(10);
final StringBuilder refstr = new StringBuilder(6000); final StringBuilder refstr = new StringBuilder(6000);
for (NavigatorEntry e: ws) { for (Navigator.Item e: ws) {
refstr.append(",").append(e.name); refstr.append(",").append(e.name);
} }
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString()); prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());

@ -53,9 +53,10 @@ $(function() {
}); });
$("#sidebar1").accordion({}); $("#sidebar1").accordion({});
$("#sidebar2").accordion({}); $("#sidebar2").accordion({});
$("#sidebar2").accordion('activate', false);
$("#sidebar3").accordion({}); $("#sidebar3").accordion({});
$("#sidebar3").accordion('activate', false); $("#sidebar3").accordion('activate', false);
$("#sidebar4").accordion({});
$("#sidebar4").accordion('activate', false);
$("#sidebarAbout").accordion({}); $("#sidebarAbout").accordion({});
$("#search").focus(); $("#search").focus();
}); });

@ -8,6 +8,15 @@
</div> </div>
<div id="sidebar2" style="float: right; margin-top:5px; width: 220px;"> <div id="sidebar2" style="float: right; margin-top:5px; width: 220px;">
#(nav-namespace)#::
<h3 style="padding-left:25px;">Name Space Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
#(/nav-namespace)#
</div>
<div id="sidebar3" style="float: right; margin-top:5px; width: 220px;">
#(nav-authors)#:: #(nav-authors)#::
<h3 style="padding-left:25px;">Author Navigator</h3> <h3 style="padding-left:25px;">Author Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}# <div><ul style="padding-left: 0px;">#{element}#
@ -16,7 +25,7 @@
#(/nav-authors)# #(/nav-authors)#
</div> </div>
<div id="sidebar3" style="float: right; margin-top:5px; width: 220px;"> <div id="sidebar4" style="float: right; margin-top:5px; width: 220px;">
#(nav-topics)#:: #(nav-topics)#::
<h3 style="padding-left:25px;">Topic Navigator</h3> <h3 style="padding-left:25px;">Topic Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}# <div><ul style="padding-left: 0px;">#{element}#

@ -26,14 +26,15 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.EventTracker;
import de.anomic.http.server.RequestHeader; import de.anomic.http.server.RequestHeader;
import de.anomic.search.Navigator;
import de.anomic.search.QueryParams; import de.anomic.search.QueryParams;
import de.anomic.search.SearchEvent; import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache; import de.anomic.search.SearchEventCache;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.yacy.graphics.ProfilingGraph; import de.anomic.yacy.graphics.ProfilingGraph;
@ -58,39 +59,61 @@ public class yacysearchtrailer {
final QueryParams theQuery = theSearch.getQuery(); final QueryParams theQuery = theSearch.getQuery();
// compose search navigation // compose search navigation
// namespace navigators
ArrayList<Navigator.Item> namespaceNavigator = theSearch.getNamespaceNavigator(10);
if (namespaceNavigator == null || namespaceNavigator.isEmpty()) {
prop.put("nav-namespace", 0);
} else {
prop.put("nav-namespace", 1);
Navigator.Item entry;
int i;
for (i = 0; i < Math.min(10, namespaceNavigator.size()); i++) {
entry = namespaceNavigator.get(i);
prop.put("nav-namespace_element_" + i + "_name", entry.name);
prop.put("nav-namespace_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask, "inurl:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-namespace_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "inurl:" + entry.name, theQuery.navigators));
prop.put("nav-namespace_element_" + i + "_count", entry.count);
prop.put("nav-namespace_element_" + i + "_modifier", "inurl:" + entry.name);
prop.put("nav-namespace_element_" + i + "_nl", 1);
}
i--;
prop.put("nav-namespace_element_" + i + "_nl", 0);
prop.put("nav-namespace_element", namespaceNavigator.size());
}
// host navigators // host navigators
ArrayList<NavigatorEntry> hostNavigator = theSearch.getHostNavigator(10); List<Navigator.Item> hostNavigator = theSearch.getHostNavigator(10);
if (hostNavigator == null || hostNavigator.isEmpty()) { if (hostNavigator == null || hostNavigator.isEmpty()) {
prop.put("nav-domains", 0); prop.put("nav-domains", 0);
} else { } else {
prop.put("nav-domains", 1); prop.put("nav-domains", 1);
NavigatorEntry entry; Navigator.Item entry;
int i; int i;
for (i = 0; i < hostNavigator.size(); i++) { for (i = 0; i < Math.min(10, hostNavigator.size()); i++) {
entry = hostNavigator.get(i); entry = hostNavigator.get(i);
prop.put("nav-domains_element_" + i + "_name", entry.name); prop.put("nav-domains_element_" + i + "_name", entry.name);
prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>"); prop.put("nav-domains_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators) + "\">" + entry.name + " (" + entry.count + ")</a>");
prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators)); prop.putJSON("nav-domains_element_" + i + "_url-json", QueryParams.navurl("json", 0, display, theQuery, theQuery.urlMask, "site:" + entry.name, theQuery.navigators));
prop.put("nav-domains_element_" + i + "_count", entry.count); prop.put("nav-domains_element_" + i + "_count", entry.count);
prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name); prop.put("nav-domains_element_" + i + "_modifier", "site:" + entry.name);
prop.put("nav-domains_element_" + i + "_nl", 1); prop.put("nav-domains_element_" + i + "_nl", 1);
} }
i--; i--;
prop.put("nav-domains_element_" + i + "_nl", 0); prop.put("nav-domains_element_" + i + "_nl", 0);
prop.put("nav-domains_element", hostNavigator.size()); prop.put("nav-domains_element", hostNavigator.size());
} }
// author navigators // author navigators
ArrayList<NavigatorEntry> authorNavigator = theSearch.getAuthorNavigator(10); List<Navigator.Item> authorNavigator = theSearch.getAuthorNavigator(10);
if (authorNavigator == null || authorNavigator.isEmpty()) { if (authorNavigator == null || authorNavigator.isEmpty()) {
prop.put("nav-authors", 0); prop.put("nav-authors", 0);
} else { } else {
prop.put("nav-authors", 1); prop.put("nav-authors", 1);
NavigatorEntry entry; Navigator.Item entry;
int i; int i;
String anav; String anav;
for (i = 0; i < authorNavigator.size(); i++) { for (i = 0; i < Math.min(10, authorNavigator.size()); i++) {
entry = authorNavigator.get(i); entry = authorNavigator.get(i);
anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'"; anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'";
prop.put("nav-authors_element_" + i + "_name", entry.name); prop.put("nav-authors_element_" + i + "_name", entry.name);
@ -106,15 +129,15 @@ public class yacysearchtrailer {
} }
// topics navigator // topics navigator
ArrayList<NavigatorEntry> topicNavigator = theSearch.getTopicNavigator(10); List<Navigator.Item> topicNavigator = theSearch.getTopicNavigator(10);
if (topicNavigator == null || topicNavigator.isEmpty()) { if (topicNavigator == null || topicNavigator.isEmpty()) {
topicNavigator = new ArrayList<NavigatorEntry>(); topicNavigator = new ArrayList<Navigator.Item>();
prop.put("nav-topics", "0"); prop.put("nav-topics", "0");
} else { } else {
prop.put("nav-topics", "1"); prop.put("nav-topics", "1");
int i = 0; int i = 0;
NavigatorEntry e; Navigator.Item e;
Iterator<NavigatorEntry> iter = topicNavigator.iterator(); Iterator<Navigator.Item> iter = topicNavigator.iterator();
while (iter.hasNext()) { while (iter.hasNext()) {
e = iter.next(); e = iter.next();
if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break; if (/*(theQuery == null) ||*/ (theQuery.queryString == null)) break;

@ -11,7 +11,20 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)# {"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}# #{/element}#
] ]
},#(/nav-domains)##(nav-authors)#:: },#(/nav-domains)##(nav-namespace)#::
{
"facetname": "namespace",
"displayname": "Name Space",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-namespace)##(nav-authors)#::
{ {
"facetname": "authors", "facetname": "authors",
"displayname": "Authors", "displayname": "Authors",

@ -6,6 +6,20 @@
#{/element}# #{/element}#
</yacy:facet> </yacy:facet>
#(/nav-domains)# #(/nav-domains)#
#(nav-namespace)#::
<yacy:facet name="domains" displayname="Namespace" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-namespace)#
#(nav-authors)#::
<yacy:facet name="domains" displayname="Authors" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-authors)#
#(nav-topics)#:: #(nav-topics)#::
<yacy:facet name="topwords" displayname="Topics" type="String" min="0" max="0" mean="0"> <yacy:facet name="topwords" displayname="Topics" type="String" min="0" max="0" mean="0">
#{element}# #{element}#

@ -65,7 +65,7 @@ public class ListAccumulator {
/** /**
* Adds a new entry to a list identified by a given name. * Adds a new entry to a list identified by a given name.
* @param name The name of the list the entry is to be added to. * @param key The name of the list the entry is to be added to.
* @param entry The new entry. * @param entry The new entry.
* @return True if the entry has been added, else false (if list does not exists). * @return True if the entry has been added, else false (if list does not exists).
*/ */

@ -29,6 +29,7 @@ package de.anomic.search;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
@ -54,18 +55,24 @@ public class DocumentIndex extends Segment {
private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT); private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT);
//private Bitfield zeroConstraint = new Bitfield(4); //private Bitfield zeroConstraint = new Bitfield(4);
final static File poison = new File("."); private static DigestURI poison;
BlockingQueue<File> queue; static {
try {
poison = new DigestURI("file://.");
} catch (MalformedURLException e) {}
}
BlockingQueue<DigestURI> queue; // a queue of document ID's
private Worker[] worker; private Worker[] worker;
CallbackListener callback; CallbackListener callback;
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup"); static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
public DocumentIndex(final File segmentPath, CallbackListener callback, int cachesize) throws IOException { public DocumentIndex(final File segmentPath, CallbackListener callback, int cachesize) throws IOException {
super(new Log("DocumentIndex"), segmentPath, cachesize, targetFileSize * 4 - 1, false, false); super(new Log("DocumentIndex"), segmentPath, cachesize, targetFileSize * 4 - 1, false, false);
int cores = Runtime.getRuntime().availableProcessors() + 1; int cores = Runtime.getRuntime().availableProcessors() + 1;
this.callback = callback; this.callback = callback;
this.queue = new LinkedBlockingQueue<File>(cores * 300); this.queue = new LinkedBlockingQueue<DigestURI>(cores * 300);
this.worker = new Worker[cores]; this.worker = new Worker[cores];
for (int i = 0; i < cores; i++) { for (int i = 0; i < cores; i++) {
this.worker[i] = new Worker(i); this.worker[i] = new Worker(i);
@ -79,7 +86,7 @@ public class DocumentIndex extends Segment {
} }
public void run() { public void run() {
File f; DigestURI f;
URIMetadataRow resultRow; URIMetadataRow resultRow;
try { try {
while ((f = queue.take()) != poison) try { while ((f = queue.take()) != poison) try {
@ -110,31 +117,24 @@ public class DocumentIndex extends Segment {
this.queue.clear(); this.queue.clear();
} }
/** public URIMetadataRow add(DigestURI url) throws IOException {
* put a single file into the index if (url == null) throw new IOException("file = null");
* @param file if (url.isDirectory()) throw new IOException("file should be a document, not a path");
* @return a metadata object that has been generated to identify the file if (!url.canRead()) throw new IOException("cannot read file");
* @throws IOException in case that the file does not exist or cannot be parsed Document document;
*/
public URIMetadataRow add(File file) throws IOException {
if (file == null) throw new IOException("file = null");
if (file.isDirectory()) throw new IOException("file should be a document, not a path");
if (!file.canRead()) throw new IOException("cannot read file");
DigestURI url = new DigestURI("file:" + file.getAbsolutePath());
Document document;
try { try {
document = TextParser.parseSource(url, null, null, file); document = TextParser.parseSource(url, null, null, url.length(), url.getInputStream());
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new IOException("cannot parse " + file.toString() + ": " + e.getMessage()); throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
} catch (ParserException e) { } catch (ParserException e) {
throw new IOException("cannot parse " + file.toString() + ": " + e.getMessage()); throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
} }
final Condenser condenser = new Condenser(document, true, true); final Condenser condenser = new Condenser(document, true, true);
return super.storeDocument( return super.storeDocument(
url, url,
null, null,
new Date(file.lastModified()), new Date(url.lastModified()),
file.length(), url.length(),
document, document,
condenser condenser
); );
@ -145,7 +145,7 @@ public class DocumentIndex extends Segment {
* If the given file is a path to a directory, the complete sub-tree is indexed * If the given file is a path to a directory, the complete sub-tree is indexed
* @param start * @param start
*/ */
public void addConcurrent(File start) { public void addConcurrent(DigestURI start) {
assert (start != null); assert (start != null);
assert (start.canRead()) : start.toString(); assert (start.canRead()) : start.toString();
if (!start.isDirectory()) { if (!start.isDirectory()) {
@ -155,17 +155,21 @@ public class DocumentIndex extends Segment {
return; return;
} }
String[] s = start.list(); String[] s = start.list();
File w; DigestURI w;
for (String t: s) { for (String t: s) {
w = new File(start, t); try {
if (w.canRead() && !w.isHidden()) { w = new DigestURI(start, t);
if (w.isDirectory()) { if (w.canRead() && !w.isHidden()) {
addConcurrent(w); if (w.isDirectory()) {
} else { addConcurrent(w);
try { } else {
this.queue.put(w); try {
} catch (InterruptedException e) {} this.queue.put(w);
} catch (InterruptedException e) {}
}
} }
} catch (MalformedURLException e1) {
Log.logException(e1);
} }
} }
} }
@ -177,14 +181,14 @@ public class DocumentIndex extends Segment {
* @param count * @param count
* @return a list of files that contain the given string * @return a list of files that contain the given string
*/ */
public ArrayList<File> find(String querystring, int pos, int count) { public ArrayList<DigestURI> find(String querystring, int pos, int count) {
ArrayList<URIMetadataRow> result = findMetadata(querystring, this); ArrayList<URIMetadataRow> result = findMetadata(querystring, this);
ArrayList<File> files = new ArrayList<File>(); ArrayList<DigestURI> files = new ArrayList<DigestURI>();
Components metadata; Components metadata;
for (URIMetadataRow row : result) { for (URIMetadataRow row : result) {
metadata = row.metadata(); metadata = row.metadata();
if (metadata == null) continue; if (metadata == null) continue;
files.add(metadata.url().getLocalFile()); files.add(metadata.url());
count--; count--;
if (count == 0) break; if (count == 0) break;
} }
@ -216,7 +220,7 @@ public class DocumentIndex extends Segment {
* @param querystring * @param querystring
* @return a list of files that contain the word * @return a list of files that contain the word
*/ */
public ArrayList<File> find(String querystring) { public ArrayList<DigestURI> find(String querystring) {
return find(querystring, 0, 100); return find(querystring, 0, 100);
} }
@ -242,8 +246,8 @@ public class DocumentIndex extends Segment {
} }
public interface CallbackListener { public interface CallbackListener {
public void commit(File f, URIMetadataRow resultRow); public void commit(DigestURI f, URIMetadataRow resultRow);
public void fail(File f, String failReason); public void fail(DigestURI f, String failReason);
} }
public static void main(String[] args) { public static void main(String[] args) {
@ -259,16 +263,16 @@ public class DocumentIndex extends Segment {
File segmentPath = new File(args[0]); File segmentPath = new File(args[0]);
System.out.println("using index files at " + segmentPath.getAbsolutePath()); System.out.println("using index files at " + segmentPath.getAbsolutePath());
CallbackListener callback = new CallbackListener() { CallbackListener callback = new CallbackListener() {
public void commit(File f, URIMetadataRow resultRow) { public void commit(DigestURI f, URIMetadataRow resultRow) {
System.out.println("indexed: " + f.toString()); System.out.println("indexed: " + f.toString());
} }
public void fail(File f, String failReason) { public void fail(DigestURI f, String failReason) {
System.out.println("not indexed " + f.toString() + ": " + failReason); System.out.println("not indexed " + f.toString() + ": " + failReason);
} }
}; };
try { try {
if (args[1].equals("add")) { if (args[1].equals("add")) {
File f = new File(args[2]); DigestURI f = new DigestURI(args[2]);
DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000); DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
di.addConcurrent(f); di.addConcurrent(f);
di.close(); di.close();
@ -277,8 +281,8 @@ public class DocumentIndex extends Segment {
for (int i = 2; i < args.length; i++) query += args[i]; for (int i = 2; i < args.length; i++) query += args[i];
query.trim(); query.trim();
DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000); DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
ArrayList<File> results = di.find(query); ArrayList<DigestURI> results = di.find(query);
for (File f: results) { for (DigestURI f: results) {
if (f != null) System.out.println(f.toString()); if (f != null) System.out.println(f.toString());
} }
di.close(); di.close();

@ -0,0 +1,97 @@
// Navigator.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 05.03.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2010-01-29 16:59:24 +0100 (Fr, 29 Jan 2010) $
// $LastChangedRevision: 6630 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class Navigator {
private ConcurrentHashMap<String, Item> map;
public Navigator() {
this.map = new ConcurrentHashMap<String, Item>();
}
/**
* a reverse comparator for navigator items
*/
public static final Comparator<Item> itemComp = new Comparator<Item>() {
public int compare(Item o1, Item o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public void inc(String key, String name) {
Item item = map.get(key);
if (item == null) {
map.put(key, new Item(name));
} else {
item.inc();
}
}
public Map<String, Item> map() {
return this.map;
}
public Item[] entries() {
Item[] ii = this.map.values().toArray(new Item[this.map.size()]);
Arrays.sort(ii, itemComp);
return ii;
}
public List<Item> entries(int maxcount) {
Item[] ii = entries();
int c = Math.min(ii.length, maxcount);
ArrayList<Item> a = new ArrayList<Item>(c);
for (int i = 0; i < c; i++) a.add(ii[i]);
return a;
}
public static class Item {
public int count;
public String name;
public Item(String name) {
this.count = 1;
this.name = name;
}
public Item(String name, int count) {
this.count = count;
this.name = name;
}
public void inc() {
this.count++;
}
}
}

@ -1,4 +1,4 @@
// plasmaSearchRankingProcess.java // RankingProcess.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 07.11.2007 on http://yacy.net // first published 07.11.2007 on http://yacy.net
// //
@ -29,12 +29,12 @@ package de.anomic.search;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
@ -81,9 +81,10 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack private final ConcurrentHashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic private final Navigator ref; // reference score computation for the commonSense heuristic
private final ConcurrentHashMap<String, HostInfo> hostNavigator; private final Navigator hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator; private final Navigator authorNavigator;
private final Navigator namespaceNavigator;
private final ReferenceOrder order; private final ReferenceOrder order;
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) { public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
@ -106,9 +107,10 @@ public final class RankingProcess extends Thread {
this.misses = new TreeSet<String>(); this.misses = new TreeSet<String>();
this.flagcount = new int[32]; this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentHashMap<String, HostInfo>(); this.hostNavigator = new Navigator();
this.authorNavigator = new ConcurrentHashMap<String, AuthorInfo>(); this.authorNavigator = new Navigator();
this.ref = new ConcurrentHashMap<String, Integer>(); this.namespaceNavigator = new Navigator();
this.ref = new Navigator();
//this.domZones = new int[8]; //this.domZones = new int[8];
//for (int i = 0; i < 8; i++) {this.domZones[i] = 0;} //for (int i = 0; i < 8; i++) {this.domZones[i] = 0;}
this.feeders = concurrency; this.feeders = concurrency;
@ -171,8 +173,7 @@ public final class RankingProcess extends Thread {
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime); EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
// iterate over normalized entries and select some that are better than currently stored // iterate over normalized entries and select some that are better than currently stored
timer = System.currentTimeMillis(); timer = System.currentTimeMillis();
HostInfo hs;
String domhash; String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
Long r; Long r;
@ -221,12 +222,7 @@ public final class RankingProcess extends Thread {
// get statistics for host navigator // get statistics for host navigator
if (nav_hosts) { if (nav_hosts) {
domhash = iEntry.urlHash.substring(6); domhash = iEntry.urlHash.substring(6);
hs = this.hostNavigator.get(domhash); this.hostNavigator.inc(domhash, iEntry.urlHash);
if (hs == null) {
this.hostNavigator.put(domhash, new HostInfo(iEntry.urlHash));
} else {
hs.inc();
}
} }
// accept // accept
@ -377,6 +373,7 @@ public final class RankingProcess extends Thread {
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) { public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list // returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout; long timeLimit = System.currentTimeMillis() + timeout;
int p = -1;
while (System.currentTimeMillis() < timeLimit) { while (System.currentTimeMillis() < timeLimit) {
final SortStack<WordReferenceVars>.stackElement obrwi = takeRWI(skipDoubleDom); final SortStack<WordReferenceVars>.stackElement obrwi = takeRWI(skipDoubleDom);
if (obrwi == null) { if (obrwi == null) {
@ -444,17 +441,22 @@ public final class RankingProcess extends Thread {
} }
// add author to the author navigator // add author to the author navigator
AuthorInfo in = this.authorNavigator.get(authorhash); this.authorNavigator.inc(authorhash, pageauthor);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(pageauthor));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
} else if (this.query.authorhash != null) { } else if (this.query.authorhash != null) {
continue; continue;
} }
// namespace navigation
String pagepath = metadata.url().getPath();
if ((p = pagepath.indexOf(':')) >= 0) {
pagepath = pagepath.substring(0,p);
p = pagepath.lastIndexOf('/');
if (p >= 0) {
pagepath = pagepath.substring(p + 1);
this.namespaceNavigator.inc(pagepath, pagepath);
}
}
// accept url // accept url
//System.out.println("handing over hash " + page.hash()); //System.out.println("handing over hash " + page.hash());
this.handover.add(page.hash()); // remember that we handed over this url this.handover.add(page.hash()); // remember that we handed over this url
@ -525,68 +527,32 @@ public final class RankingProcess extends Thread {
return this.misses.iterator(); return this.misses.iterator();
} }
public class HostInfo { public ArrayList<Navigator.Item> getNamespaceNavigator(int count) {
public int count; if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ArrayList<Navigator.Item>(0);
public String hashsample;
public HostInfo(String urlhash) { Navigator.Item[] hsa = this.namespaceNavigator.entries();
this.count = 1; int rc = Math.min(count, hsa.length);
this.hashsample = urlhash; ArrayList<Navigator.Item> result = new ArrayList<Navigator.Item>();
} for (int i = 0; i < rc; i++) result.add(hsa[i]);
public void inc() { return result;
this.count++;
}
}
public class AuthorInfo {
public int count;
public String author;
public AuthorInfo(String author) {
this.count = 1;
this.author = author;
}
public void inc() {
this.count++;
}
} }
public static final Comparator<HostInfo> hscomp = new Comparator<HostInfo>() { public List<Navigator.Item> getHostNavigators(int count) {
public int compare(HostInfo o1, HostInfo o2) { if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return new ArrayList<Navigator.Item>(0);
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1; return this.hostNavigator.entries(10);
return 0;
}
};
public static final Comparator<AuthorInfo> aicomp = new Comparator<AuthorInfo>() {
public int compare(AuthorInfo o1, AuthorInfo o2) {
if (o1.count < o2.count) return 1;
if (o2.count < o1.count) return -1;
return 0;
}
};
public class NavigatorEntry {
public int count;
public String name;
public NavigatorEntry(String name, int count) {
this.name = name;
this.count = count;
}
} }
public List<Navigator.Item> getHostNavigator(int count) {
public ArrayList<NavigatorEntry> getHostNavigator(int count) { List<Navigator.Item> result = new ArrayList<Navigator.Item>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return new ArrayList<NavigatorEntry>(0); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
HostInfo[] hsa = this.hostNavigator.values().toArray(new HostInfo[this.hostNavigator.size()]); List<Navigator.Item> hsa = this.hostNavigator.entries(10);
Arrays.sort(hsa, hscomp);
int rc = Math.min(count, hsa.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
URIMetadataRow mr; URIMetadataRow mr;
DigestURI url; DigestURI url;
String hostname; String hostname;
Components metadata; Components metadata;
loop: for (int i = 0; i < rc; i++) { loop: for (Navigator.Item item: hsa) {
mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0); mr = this.query.getSegment().urlMetadata().load(item.name, null, 0);
if (mr == null) continue; if (mr == null) continue;
metadata = mr.metadata(); metadata = mr.metadata();
if (metadata == null) continue; if (metadata == null) continue;
@ -595,12 +561,13 @@ public final class RankingProcess extends Thread {
hostname = url.getHost(); hostname = url.getHost();
if (hostname == null) continue; if (hostname == null) continue;
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue; if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists for (Navigator.Item entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new NavigatorEntry(hostname, hsa[i].count)); result.add(new Navigator.Item(hostname, item.count));
} }
return result; return result;
} }
public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() { public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
if (o1.getValue().intValue() < o2.getValue().intValue()) return 1; if (o1.getValue().intValue() < o2.getValue().intValue()) return 1;
@ -609,44 +576,29 @@ public final class RankingProcess extends Thread {
} }
}; };
public Map<String, Integer> getTopics() { public Map<String, Navigator.Item> getTopics() {
return this.ref; return this.ref.map();
} }
@SuppressWarnings("unchecked") public List<Navigator.Item> getTopicNavigator(final int count) {
public ArrayList<NavigatorEntry> getTopicNavigator(final int count) {
// create a list of words that had been computed by statistics over all // create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls // words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList<NavigatorEntry>(0); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList<Navigator.Item>(0);
Map.Entry<String, Integer>[] a = this.ref.entrySet().toArray(new Map.Entry[this.ref.size()]); return this.ref.entries(10);
Arrays.sort(a, mecomp);
int rc = Math.min(count, a.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
Map.Entry<String, Integer> e;
int c;
for (int i = 0; i < rc; i++) {
e = a[i];
c = e.getValue().intValue();
if (c == 0) break;
result.add(new NavigatorEntry(e.getKey(), c));
}
return result;
} }
public void addTopic(final String[] words) { public void addTopic(final String[] words) {
String word; String word;
for (int i = 0; i < words.length; i++) { for (int i = 0; i < words.length; i++) {
word = words[i].toLowerCase(); word = words[i].toLowerCase();
Integer c;
if (word.length() > 2 && if (word.length() > 2 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 && "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0 &&
!query.queryHashes.contains(Word.word2hash(word)) && !query.queryHashes.contains(Word.word2hash(word)) &&
word.matches("[a-z]+") && word.matches("[a-z]+") &&
!Switchboard.badwords.contains(word) && !Switchboard.badwords.contains(word) &&
!Switchboard.stopwords.contains(word)) { !Switchboard.stopwords.contains(word)) {
c = ref.get(word); ref.inc(word, word);
if (c == null) ref.put(word, 1); else ref.put(word, c.intValue() + 1);
} }
} }
} }
@ -662,22 +614,12 @@ public final class RankingProcess extends Thread {
addTopic(descrcomps); addTopic(descrcomps);
} }
public ArrayList<NavigatorEntry> getAuthorNavigator(final int count) { public List<Navigator.Item> getAuthorNavigator(final int count) {
// create a list of words that had been computed by statistics over all // create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls // words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList<NavigatorEntry>(0); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList<Navigator.Item>(0);
AuthorInfo[] a = this.authorNavigator.values().toArray(new AuthorInfo[this.authorNavigator.size()]); return this.authorNavigator.entries(count);
Arrays.sort(a, aicomp);
int rc = Math.min(count, a.length);
ArrayList<NavigatorEntry> result = new ArrayList<NavigatorEntry>();
AuthorInfo e;
for (int i = 0; i < rc; i++) {
e = a[i];
//System.out.println("*** DEBUG Author = " + e.author + ", count = " + e.count);
result.add(new NavigatorEntry(e.author, e.count));
}
return result;
} }
public static void loadYBR(final File rankingPath, final int count) { public static void loadYBR(final File rankingPath, final int count) {
@ -733,12 +675,3 @@ public final class RankingProcess extends Thread {
} }
} }
/*
Thread= Thread-937 id=4224 BLOCKED
Thread= Thread-919 id=4206 BLOCKED
Thread= Thread-936 id=4223 BLOCKED
at net.yacy.kelondro.util.SortStack.pop(SortStack.java:118)
at de.anomic.search.RankingProcess.takeRWI(RankingProcess.java:310)
at de.anomic.search.RankingProcess.takeURL(RankingProcess.java:371)
at de.anomic.search.ResultFetcher$Worker.run(ResultFetcher.java:161)
*/

@ -44,7 +44,6 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.kelondro.util.SortStack; import net.yacy.kelondro.util.SortStack;
import net.yacy.kelondro.util.SortStore; import net.yacy.kelondro.util.SortStore;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.search.MediaSnippet; import de.anomic.search.MediaSnippet;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph; import de.anomic.yacy.graphics.ProfilingGraph;
@ -260,20 +259,6 @@ public class ResultFetcher {
Log.logInfo("search", "sorted out hash " + urlhash + " during search: " + reason); Log.logInfo("search", "sorted out hash " + urlhash + " during search: " + reason);
} }
public ArrayList<NavigatorEntry> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries);
}
public ArrayList<NavigatorEntry> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries);
}
public int resultCount() { public int resultCount() {
return this.result.size(); return this.result.size();
} }
@ -357,7 +342,7 @@ public class ResultFetcher {
public long postRanking( public long postRanking(
final ResultEntry rentry, final ResultEntry rentry,
final Map<String, Integer> topwords) { final Map<String, Navigator.Item> topwords) {
long r = 0; long r = 0;
@ -375,14 +360,14 @@ public class ResultFetcher {
final String urlstring = rentry.url().toNormalform(true, true); final String urlstring = rentry.url().toNormalform(true, true);
final String[] urlcomps = DigestURI.urlComps(urlstring); final String[] urlcomps = DigestURI.urlComps(urlstring);
final String[] descrcomps = DigestURI.splitpattern.split(rentry.title().toLowerCase()); final String[] descrcomps = DigestURI.splitpattern.split(rentry.title().toLowerCase());
Integer tc; Navigator.Item tc;
for (int j = 0; j < urlcomps.length; j++) { for (int j = 0; j < urlcomps.length; j++) {
tc = topwords.get(urlcomps[j]); tc = topwords.get(urlcomps[j]);
if (tc != null) r += Math.max(1, tc.intValue()) << query.ranking.coeff_urlcompintoplist; if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_urlcompintoplist;
} }
for (int j = 0; j < descrcomps.length; j++) { for (int j = 0; j < descrcomps.length; j++) {
tc = topwords.get(descrcomps[j]); tc = topwords.get(descrcomps[j]);
if (tc != null) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist; if (tc != null) r += Math.max(1, tc.count) << query.ranking.coeff_descrcompintoplist;
} }
// apply query-in-result matching // apply query-in-result matching

@ -29,6 +29,7 @@ package de.anomic.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet; import java.util.TreeSet;
@ -43,7 +44,6 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
import de.anomic.search.RankingProcess.NavigatorEntry;
import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.dht.FlatWordPartitionScheme; import de.anomic.yacy.dht.FlatWordPartitionScheme;
@ -289,16 +289,20 @@ public final class SearchEvent {
return this.rankedCache; return this.rankedCache;
} }
public ArrayList<NavigatorEntry> getHostNavigator(int maxentries) { public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries); return this.rankedCache.getNamespaceNavigator(maxentries);
} }
public ArrayList<NavigatorEntry> getTopicNavigator(final int maxentries) { public List<Navigator.Item> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
}
public List<Navigator.Item> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist // returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries); return this.rankedCache.getTopicNavigator(maxentries);
} }
public ArrayList<NavigatorEntry> getAuthorNavigator(final int maxentries) { public List<Navigator.Item> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set // returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries); return this.rankedCache.getAuthorNavigator(maxentries);
} }

@ -105,7 +105,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/** /**
* insert a map into the table * insert a map into the table
* @param key * @param name
* @param map * @param map
* @throws RowSpaceExceededException * @throws RowSpaceExceededException
* @throws IOException * @throws IOException
@ -175,7 +175,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/** /**
* select a map from the table * select a map from the table
* @param key * @param name
* @return the map if one found or NULL if no entry exists or the entry is corrupt * @return the map if one found or NULL if no entry exists or the entry is corrupt
* @throws IOException * @throws IOException
*/ */
@ -209,7 +209,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/** /**
* delete a map from the table * delete a map from the table
* @param key * @param name
* @throws IOException * @throws IOException
*/ */
public void delete(byte[] pk) throws IOException { public void delete(byte[] pk) throws IOException {
@ -218,7 +218,7 @@ public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte
/** /**
* check if a row with given key exists in the table * check if a row with given key exists in the table
* @param key * @param name
* @return true if the row exists * @return true if the row exists
*/ */
public boolean has(byte[] pk) { public boolean has(byte[] pk) {

Loading…
Cancel
Save