diff --git a/defaults/yacy.init b/defaults/yacy.init
index 1562dad83..d74529a1e 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -740,7 +740,7 @@ search.result.show.pictures = true
# search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown
-search.navigation=hosts,authors,namespace,topics
+search.navigation=hosts,authors,namespace,topics,filetype,protocol
# search result verification and snippet fetch caching rules
# each search result can be verified byloading the link from the web
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index 4702f51a6..37b7cf00a 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -91,9 +91,11 @@ $(function() {
header: "h3"
});
$("#sidebarDomains").accordion({});
- $("#sidebarNameSpace").accordion({});
+ $("#sidebarProtocols").accordion({});
+ $("#sidebarFiletypes").accordion({});
$("#sidebarAuthors").accordion({});
$("#sidebarAuthors").accordion('activate', false);
+ $("#sidebarNameSpace").accordion({});
$("#sidebarTopics").tagcloud({type:"sphere",power:.25,seed:0,sizemin:10,sizemax:20,height:80,colormin:"682",colormax:"20C"}).find("li").tsort();
$("#sidebarAbout").accordion({});
$("#search").focus();
diff --git a/htroot/yacysearchtrailer.html b/htroot/yacysearchtrailer.html
index c85c90efc..9e37529b4 100644
--- a/htroot/yacysearchtrailer.html
+++ b/htroot/yacysearchtrailer.html
@@ -16,6 +16,24 @@
#(/nav-topics)#
+#(nav-protocols)#::
+
+#(/nav-protocols)#
+
+#(nav-filetypes)#::
+
+#(/nav-filetypes)#
+
#(nav-domains)#::
Domain Navigator
diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java
index 3bd83d56d..77ec1c2cc 100644
--- a/htroot/yacysearchtrailer.java
+++ b/htroot/yacysearchtrailer.java
@@ -163,6 +163,58 @@ public class yacysearchtrailer {
prop.put("nav-topics_element_" + i + "_nl", 0);
}
+ // protocol navigators
+ final ScoreMap
protocolNavigator = theSearch.getProtocolNavigator();
+ if (protocolNavigator == null || protocolNavigator.isEmpty()) {
+ prop.put("nav-protocols", 0);
+ } else {
+ prop.put("nav-protocols", 1);
+ navigatorIterator = protocolNavigator.keys(false);
+ int i = 0;
+ String pnav;
+ while (i < 20 && navigatorIterator.hasNext()) {
+ name = navigatorIterator.next().trim();
+ count = protocolNavigator.get(name);
+ pnav = "/" + name;
+ prop.putJSON("nav-protocols_element_" + i + "_name", name);
+ prop.put("nav-protocols_element_" + i + "_url", "" + name + " (" + count + ")");
+ prop.putJSON("nav-protocols_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + pnav, theQuery.urlMask.toString(), theQuery.navigators).toString());
+ prop.put("nav-protocols_element_" + i + "_count", count);
+ prop.put("nav-protocols_element_" + i + "_modifier", "protocol:'" + name + "'");
+ prop.put("nav-protocols_element_" + i + "_nl", 1);
+ i++;
+ }
+ prop.put("nav-protocols_element", i);
+ i--;
+ prop.put("nav-protocols_element_" + i + "_nl", 0);
+ }
+
+ // filetype navigators
+ final ScoreMap filetypeNavigator = theSearch.getFiletypeNavigator();
+ if (filetypeNavigator == null || filetypeNavigator.isEmpty()) {
+ prop.put("nav-filetypes", 0);
+ } else {
+ prop.put("nav-filetypes", 1);
+ navigatorIterator = filetypeNavigator.keys(false);
+ int i = 0;
+ String tnav;
+ while (i < 20 && navigatorIterator.hasNext()) {
+ name = navigatorIterator.next().trim();
+ count = filetypeNavigator.get(name);
+ tnav = "filetype:" + name;
+ prop.putJSON("nav-filetypes_element_" + i + "_name", name);
+ prop.put("nav-filetypes_element_" + i + "_url", "" + name + " (" + count + ")");
+ prop.putJSON("nav-filetypes_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + tnav, theQuery.urlMask.toString(), theQuery.navigators).toString());
+ prop.put("nav-filetypes_element_" + i + "_count", count);
+ prop.put("nav-filetypes_element_" + i + "_modifier", "filetype:'" + name + "'");
+ prop.put("nav-filetypes_element_" + i + "_nl", 1);
+ i++;
+ }
+ prop.put("nav-filetypes_element", i);
+ i--;
+ prop.put("nav-filetypes_element_" + i + "_nl", 0);
+ }
+
// about box
final String aboutBody = env.getConfig("about.body", "");
final String aboutHeadline = env.getConfig("about.headline", "");
diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java
index 7a8b44238..dc82e4f2e 100644
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@@ -79,14 +79,19 @@ public final class RankingProcess extends Thread {
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final ScoreMap ref; // reference score computation for the commonSense heuristic
- private final ScoreMap hostNavigator; // a counter for the appearance of the host hash
private final Map hostResolver; // a mapping from a host hash (6 bytes) to the full url hash of one of these urls that have the host hash
- private final ScoreMap authorNavigator;
- private final ScoreMap namespaceNavigator;
private final ReferenceOrder order;
private final long startTime;
private boolean addRunning;
+ // navigation scores
+ private final ScoreMap hostNavigator; // a counter for the appearance of the host hash
+ private final ScoreMap authorNavigator; // a counter for the appearances of authors
+ private final ScoreMap namespaceNavigator; // a counter for name spaces
+ private final ScoreMap protocolNavigator; // a counter for protocol types
+ private final ScoreMap filetypeNavigator; // a counter for file types
+
+
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@@ -113,6 +118,8 @@ public final class RankingProcess extends Thread {
this.hostResolver = new ConcurrentHashMap();
this.authorNavigator = new ConcurrentScoreMap();
this.namespaceNavigator = new ConcurrentScoreMap();
+ this.protocolNavigator = new ConcurrentScoreMap();
+ this.filetypeNavigator = new ConcurrentScoreMap();
this.ref = new ConcurrentScoreMap();
this.feeders = 1;
this.startTime = System.currentTimeMillis();
@@ -516,6 +523,14 @@ public final class RankingProcess extends Thread {
}
}
+ // protocol navigation
+ final String protocol = metadata.url().getProtocol();
+ this.protocolNavigator.inc(protocol);
+
+ // file type navigation
+ final String fileext = metadata.url().getFileExtension();
+ if (fileext.length() > 0) this.filetypeNavigator.inc(fileext);
+
// check Scanner
if (!Scanner.acceptURL(metadata.url())) {
this.sortout++;
@@ -623,6 +638,18 @@ public final class RankingProcess extends Thread {
return result;
}
+ public ScoreMap getProtocolNavigator() {
+ if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("protocol") < 0) return new ClusteredScoreMap();
+ if (this.protocolNavigator.sizeSmaller(2)) this.protocolNavigator.clear(); // navigators with one entry are not useful
+ return this.protocolNavigator;
+ }
+
+ public ScoreMap getFiletypeNavigator() {
+ if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("filetype") < 0) return new ClusteredScoreMap();
+ if (this.filetypeNavigator.sizeSmaller(2)) this.filetypeNavigator.clear(); // navigators with one entry are not useful
+ return this.filetypeNavigator;
+ }
+
public static final Comparator> mecomp = new Comparator>() {
public int compare(final Map.Entry o1, final Map.Entry o2) {
if (o1.getValue().intValue() < o2.getValue().intValue()) return 1;
diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java
index bd2118231..75858ba49 100644
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@@ -348,6 +348,14 @@ public final class SearchEvent {
return this.rankingProcess.getAuthorNavigator();
}
+ public ScoreMap getProtocolNavigator() {
+ return this.rankingProcess.getProtocolNavigator();
+ }
+
+ public ScoreMap getFiletypeNavigator() {
+ return this.rankingProcess.getFiletypeNavigator();
+ }
+
public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
synchronized (this.heuristics) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));
diff --git a/source/net/yacy/migration.java b/source/net/yacy/migration.java
index 3b9dd5001..5f4d9c6b0 100644
--- a/source/net/yacy/migration.java
+++ b/source/net/yacy/migration.java
@@ -29,7 +29,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.FileUtils;
-
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
@@ -54,6 +53,10 @@ public class migration {
migrateWorkFiles(sb);
}
installSkins(sb); // FIXME: yes, bad fix for quick release 0.47
+ // add new navigation
+ if (sb.getConfig("search.navigation", "").equals("hosts,authors,namespace,topics")) {
+ sb.setConfig("search.navigation", "hosts,authors,namespace,topics,filetype,protocol");
+ }
}
/*
* remove the static defaultfiles. We use them through a overlay now.
@@ -78,7 +81,7 @@ public class migration {
if(file.exists())
delete(file);
}
-
+
/*
* copy skins from the release to DATA/SKINS.
*/
@@ -88,10 +91,10 @@ public class migration {
if (defaultSkinsPath.exists()) {
final List skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath());
mkdirs(skinsPath);
- for (String skinFile : skinFiles){
+ for (final String skinFile : skinFiles){
if (skinFile.endsWith(".css")){
- File from = new File(defaultSkinsPath, skinFile);
- File to = new File(skinsPath, skinFile);
+ final File from = new File(defaultSkinsPath, skinFile);
+ final File to = new File(skinsPath, skinFile);
if (from.lastModified() > to.lastModified()) try {
FileUtils.copy(from, to);
} catch (final IOException e) {}
@@ -141,7 +144,7 @@ public class migration {
}
try {
sb.initBookmarks();
- } catch (IOException e) {
+ } catch (final IOException e) {
Log.logException(e);
}
}
@@ -165,7 +168,7 @@ public class migration {
file.delete();
} catch (final IOException e) {
}
-
+
file = new File(sb.getDataPath(), "DATA/SETTINGS/wiki-bkp.db");
if (file.exists()) {
Log.logInfo("MIGRATION", "Migrating wiki-bkp.db to "+ sb.workPath);
@@ -173,16 +176,16 @@ public class migration {
try {
FileUtils.copy(file, file2);
file.delete();
- } catch (final IOException e) {}
+ } catch (final IOException e) {}
}
try {
sb.initWiki();
- } catch (IOException e) {
+ } catch (final IOException e) {
Log.logException(e);
}
}
-
-
+
+
file=new File(sb.getDataPath(), "DATA/SETTINGS/message.db");
if(file.exists()){
Log.logInfo("MIGRATION", "Migrating message.db to "+ sb.workPath);
@@ -194,7 +197,7 @@ public class migration {
} catch (final IOException e) {}
try {
sb.initMessages();
- } catch (IOException e) {
+ } catch (final IOException e) {
Log.logException(e);
}
}
@@ -207,7 +210,7 @@ public class migration {
sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc)));
sb.setConfig("adminAccount", "");
}
-
+
// fix unsafe old passwords
if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) {
sb.setConfig("proxyAccountBase64MD5", Digest.encodeMD5Hex(acc));
@@ -224,14 +227,14 @@ public class migration {
}
public static void migrateSwitchConfigSettings(final Switchboard sb) {
-
+
// migration for additional parser settings
String value = "";
//Locales in DATA, because DATA must be writable, htroot not.
if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){
sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot");
}
-
+
// migration for blacklists
if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
@@ -239,16 +242,16 @@ public class migration {
sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value);
sb.setConfig("surftips.BlackLists", value);
-
+
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
sb.setConfig("proxyBlackListsActive", "");
}
-
+
// migration of http specific crawler settings
if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) {
sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate"));
sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"));
- sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
- }
+ sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
+ }
}
}