diff --git a/defaults/yacy.init b/defaults/yacy.init index 1562dad83..d74529a1e 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -740,7 +740,7 @@ search.result.show.pictures = true # search navigators: comma-separated list of default values for search navigation. # can be temporary different if search string is given with differen navigation values # assigning no value(s) means that no navigation is shown -search.navigation=hosts,authors,namespace,topics +search.navigation=hosts,authors,namespace,topics,filetype,protocol # search result verification and snippet fetch caching rules # each search result can be verified byloading the link from the web diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 4702f51a6..37b7cf00a 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -91,9 +91,11 @@ $(function() { header: "h3" }); $("#sidebarDomains").accordion({}); - $("#sidebarNameSpace").accordion({}); + $("#sidebarProtocols").accordion({}); + $("#sidebarFiletypes").accordion({}); $("#sidebarAuthors").accordion({}); $("#sidebarAuthors").accordion('activate', false); + $("#sidebarNameSpace").accordion({}); $("#sidebarTopics").tagcloud({type:"sphere",power:.25,seed:0,sizemin:10,sizemax:20,height:80,colormin:"682",colormax:"20C"}).find("li").tsort(); $("#sidebarAbout").accordion({}); $("#search").focus(); diff --git a/htroot/yacysearchtrailer.html b/htroot/yacysearchtrailer.html index c85c90efc..9e37529b4 100644 --- a/htroot/yacysearchtrailer.html +++ b/htroot/yacysearchtrailer.html @@ -16,6 +16,24 @@ #(/nav-topics)# +#(nav-protocols)#:: +
+

Protocol Navigator

+
+
+#(/nav-protocols)# + +#(nav-filetypes)#:: +
+

Filetype Navigator

+
+
+#(/nav-filetypes)# + #(nav-domains)#::

Domain Navigator

diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 3bd83d56d..77ec1c2cc 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -163,6 +163,58 @@ public class yacysearchtrailer { prop.put("nav-topics_element_" + i + "_nl", 0); } + // protocol navigators + final ScoreMap protocolNavigator = theSearch.getProtocolNavigator(); + if (protocolNavigator == null || protocolNavigator.isEmpty()) { + prop.put("nav-protocols", 0); + } else { + prop.put("nav-protocols", 1); + navigatorIterator = protocolNavigator.keys(false); + int i = 0; + String pnav; + while (i < 20 && navigatorIterator.hasNext()) { + name = navigatorIterator.next().trim(); + count = protocolNavigator.get(name); + pnav = "/" + name; + prop.putJSON("nav-protocols_element_" + i + "_name", name); + prop.put("nav-protocols_element_" + i + "_url", "" + name + " (" + count + ")"); + prop.putJSON("nav-protocols_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + pnav, theQuery.urlMask.toString(), theQuery.navigators).toString()); + prop.put("nav-protocols_element_" + i + "_count", count); + prop.put("nav-protocols_element_" + i + "_modifier", "protocol:'" + name + "'"); + prop.put("nav-protocols_element_" + i + "_nl", 1); + i++; + } + prop.put("nav-protocols_element", i); + i--; + prop.put("nav-protocols_element_" + i + "_nl", 0); + } + + // filetype navigators + final ScoreMap filetypeNavigator = theSearch.getFiletypeNavigator(); + if (filetypeNavigator == null || filetypeNavigator.isEmpty()) { + prop.put("nav-filetypes", 0); + } else { + prop.put("nav-filetypes", 1); + navigatorIterator = filetypeNavigator.keys(false); + int i = 0; + String tnav; + while (i < 20 && navigatorIterator.hasNext()) { + name = navigatorIterator.next().trim(); + count = filetypeNavigator.get(name); + tnav = "filetype:" + name; + prop.putJSON("nav-filetypes_element_" + i + "_name", name); + prop.put("nav-filetypes_element_" + i + "_url", "" + name + " (" + count + ")"); + prop.putJSON("nav-filetypes_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + tnav, theQuery.urlMask.toString(), theQuery.navigators).toString()); + prop.put("nav-filetypes_element_" + i + "_count", count); + prop.put("nav-filetypes_element_" + i + "_modifier", "filetype:'" + name + "'"); + prop.put("nav-filetypes_element_" + i + "_nl", 1); + i++; + } + prop.put("nav-filetypes_element", i); + i--; + prop.put("nav-filetypes_element_" + i + "_nl", 0); + } + // about box final String aboutBody = env.getConfig("about.body", ""); final String aboutHeadline = env.getConfig("about.headline", ""); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 7a8b44238..dc82e4f2e 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -79,14 +79,19 @@ public final class RankingProcess extends Thread { //private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process private final ScoreMap ref; // reference score computation for the commonSense heuristic - private final ScoreMap hostNavigator; // a counter for the appearance of the host hash private final Map hostResolver; // a mapping from a host hash (6 bytes) to the full url hash of one of these urls that have the host hash - private final ScoreMap authorNavigator; - private final ScoreMap namespaceNavigator; private final ReferenceOrder order; private final long startTime; private boolean addRunning; + // navigation scores + private final ScoreMap hostNavigator; // a counter for the appearance of the host hash + private final ScoreMap authorNavigator; // a counter for the appearances of authors + private final ScoreMap namespaceNavigator; // a counter for name spaces + private final ScoreMap protocolNavigator; // a counter for protocol types + private final ScoreMap filetypeNavigator; // a counter for file types + + public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime @@ -113,6 +118,8 @@ public final class RankingProcess extends Thread { this.hostResolver = new ConcurrentHashMap(); this.authorNavigator = new ConcurrentScoreMap(); this.namespaceNavigator = new ConcurrentScoreMap(); + this.protocolNavigator = new ConcurrentScoreMap(); + this.filetypeNavigator = new ConcurrentScoreMap(); this.ref = new ConcurrentScoreMap(); this.feeders = 1; this.startTime = System.currentTimeMillis(); @@ -516,6 +523,14 @@ public final class RankingProcess extends Thread { } } + // protocol navigation + final String protocol = metadata.url().getProtocol(); + this.protocolNavigator.inc(protocol); + + // file type navigation + final String fileext = metadata.url().getFileExtension(); + if (fileext.length() > 0) this.filetypeNavigator.inc(fileext); + // check Scanner if (!Scanner.acceptURL(metadata.url())) { this.sortout++; @@ -623,6 +638,18 @@ public final class RankingProcess extends Thread { return result; } + public ScoreMap getProtocolNavigator() { + if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("protocol") < 0) return new ClusteredScoreMap(); + if (this.protocolNavigator.sizeSmaller(2)) this.protocolNavigator.clear(); // navigators with one entry are not useful + return this.protocolNavigator; + } + + public ScoreMap getFiletypeNavigator() { + if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("filetype") < 0) return new ClusteredScoreMap(); + if (this.filetypeNavigator.sizeSmaller(2)) this.filetypeNavigator.clear(); // navigators with one entry are not useful + return this.filetypeNavigator; + } + public static final Comparator> mecomp = new Comparator>() { public int compare(final Map.Entry o1, final Map.Entry o2) { if (o1.getValue().intValue() < o2.getValue().intValue()) return 1; diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index bd2118231..75858ba49 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -348,6 +348,14 @@ public final class SearchEvent { return this.rankingProcess.getAuthorNavigator(); } + public ScoreMap getProtocolNavigator() { + return this.rankingProcess.getProtocolNavigator(); + } + + public ScoreMap getFiletypeNavigator() { + return this.rankingProcess.getFiletypeNavigator(); + } + public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) { synchronized (this.heuristics) { this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant)); diff --git a/source/net/yacy/migration.java b/source/net/yacy/migration.java index 3b9dd5001..5f4d9c6b0 100644 --- a/source/net/yacy/migration.java +++ b/source/net/yacy/migration.java @@ -29,7 +29,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.FileUtils; - import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; @@ -54,6 +53,10 @@ public class migration { migrateWorkFiles(sb); } installSkins(sb); // FIXME: yes, bad fix for quick release 0.47 + // add new navigation + if (sb.getConfig("search.navigation", "").equals("hosts,authors,namespace,topics")) { + sb.setConfig("search.navigation", "hosts,authors,namespace,topics,filetype,protocol"); + } } /* * remove the static defaultfiles. We use them through a overlay now. @@ -78,7 +81,7 @@ public class migration { if(file.exists()) delete(file); } - + /* * copy skins from the release to DATA/SKINS. */ @@ -88,10 +91,10 @@ public class migration { if (defaultSkinsPath.exists()) { final List skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath()); mkdirs(skinsPath); - for (String skinFile : skinFiles){ + for (final String skinFile : skinFiles){ if (skinFile.endsWith(".css")){ - File from = new File(defaultSkinsPath, skinFile); - File to = new File(skinsPath, skinFile); + final File from = new File(defaultSkinsPath, skinFile); + final File to = new File(skinsPath, skinFile); if (from.lastModified() > to.lastModified()) try { FileUtils.copy(from, to); } catch (final IOException e) {} @@ -141,7 +144,7 @@ public class migration { } try { sb.initBookmarks(); - } catch (IOException e) { + } catch (final IOException e) { Log.logException(e); } } @@ -165,7 +168,7 @@ public class migration { file.delete(); } catch (final IOException e) { } - + file = new File(sb.getDataPath(), "DATA/SETTINGS/wiki-bkp.db"); if (file.exists()) { Log.logInfo("MIGRATION", "Migrating wiki-bkp.db to "+ sb.workPath); @@ -173,16 +176,16 @@ public class migration { try { FileUtils.copy(file, file2); file.delete(); - } catch (final IOException e) {} + } catch (final IOException e) {} } try { sb.initWiki(); - } catch (IOException e) { + } catch (final IOException e) { Log.logException(e); } } - - + + file=new File(sb.getDataPath(), "DATA/SETTINGS/message.db"); if(file.exists()){ Log.logInfo("MIGRATION", "Migrating message.db to "+ sb.workPath); @@ -194,7 +197,7 @@ public class migration { } catch (final IOException e) {} try { sb.initMessages(); - } catch (IOException e) { + } catch (final IOException e) { Log.logException(e); } } @@ -207,7 +210,7 @@ public class migration { sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc))); sb.setConfig("adminAccount", ""); } - + // fix unsafe old passwords if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) { sb.setConfig("proxyAccountBase64MD5", Digest.encodeMD5Hex(acc)); @@ -224,14 +227,14 @@ public class migration { } public static void migrateSwitchConfigSettings(final Switchboard sb) { - + // migration for additional parser settings String value = ""; //Locales in DATA, because DATA must be writable, htroot not. if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){ sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot"); } - + // migration for blacklists if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) { sb.setConfig("proxy.BlackLists", value); @@ -239,16 +242,16 @@ public class migration { sb.setConfig("dht.BlackLists", value); sb.setConfig("search.BlackLists", value); sb.setConfig("surftips.BlackLists", value); - + sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared","")); sb.setConfig("proxyBlackListsActive", ""); } - + // migration of http specific crawler settings if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) { sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate")); sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5")); - sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7")); - } + sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7")); + } } }