From 02020b590b19f9bd5896b6121d04fe80f5178ea4 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 19 Dec 2012 02:38:05 +0100 Subject: [PATCH] - removed all extension types from extension navigation which are not proper/known - automatically show the protocol navigation if there is more than http and https - automatically show the extension navigation if there is some media content --- htroot/yacysearchtrailer.java | 9 +++++++-- .../yacy/cora/document/analysis/Classification.java | 8 +++++++- source/net/yacy/search/query/QueryParams.java | 4 ++-- source/net/yacy/search/query/SearchEvent.java | 11 ++++++++++- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 87af58bd3..70f79c8e9 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -28,6 +28,7 @@ import java.util.Iterator; import java.util.Map; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.sorting.ScoreMap; @@ -222,10 +223,12 @@ public class yacysearchtrailer { navigatorIterator = theSearch.protocolNavigator.keys(false); int i = 0, p, pos = 0, neg = 0; String nav, queryStringForUrl; + boolean visible = false; while (i < 20 && navigatorIterator.hasNext()) { name = navigatorIterator.next().trim(); count = theSearch.protocolNavigator.get(name); if (count == 0) break; + visible = visible || "ftp,smb".indexOf(name) >= 0; nav = "%2F" + name; queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true); p = queryStringForUrl.indexOf(nav); @@ -247,7 +250,7 @@ public class yacysearchtrailer { i++; } prop.put("nav-protocols_element", i); - prop.put("nav-protocols_activate", neg > 0 ? 1 : 0); // by default off + prop.put("nav-protocols_activate", neg > 0 || visible ? 1 : 0); // by default off i--; prop.put("nav-protocols_element_" + i + "_nl", 0); if (pos == 1 && neg == 0) prop.put("nav-protocols", 0); // this navigation is not useful @@ -261,10 +264,12 @@ public class yacysearchtrailer { navigatorIterator = theSearch.filetypeNavigator.keys(false); int i = 0, p, pos = 0, neg = 0; String nav, queryStringForUrl; + boolean visible = false; while (i < 20 && navigatorIterator.hasNext()) { name = navigatorIterator.next().trim(); count = theSearch.filetypeNavigator.get(name); if (count == 0) break; + visible = visible || Classification.isMediaExtension(name) || "pdf,doc,docx".indexOf(name) >= 0; nav = "filetype%3A" + name; queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true); p = queryStringForUrl.indexOf(nav); @@ -286,7 +291,7 @@ public class yacysearchtrailer { i++; } prop.put("nav-filetypes_element", i); - prop.put("nav-filetypes_activate", neg > 0 ? 1 : 0); // by default off + prop.put("nav-filetypes_activate", neg > 0 || visible ? 1 : 0); // by default off i--; prop.put("nav-filetypes_element_" + i + "_nl", 0); if (pos == 1 && neg == 0) prop.put("nav-filetypes", 0); // this navigation is not useful diff --git a/source/net/yacy/cora/document/analysis/Classification.java b/source/net/yacy/cora/document/analysis/Classification.java index b3f568734..38a5f45fd 100644 --- a/source/net/yacy/cora/document/analysis/Classification.java +++ b/source/net/yacy/cora/document/analysis/Classification.java @@ -138,11 +138,17 @@ public class Classification { return appsExtSet.contains(appsExt.trim().toLowerCase()); } - private static boolean isControlExtension(final String ctrlExt) { + public static boolean isControlExtension(final String ctrlExt) { if (ctrlExt == null) return false; return ctrlExtSet.contains(ctrlExt.trim().toLowerCase()); } + public static boolean isAnyKnownExtension(String ext) { + if (ext == null) return false; + ext = ext.trim().toLowerCase(); + return textExtSet.contains(ext) || mediaExtSet.contains(ext) || ctrlExtSet.contains(ext); + } + public static ContentDomain getContentDomain(final String ext) { if (isTextExtension(ext)) return ContentDomain.TEXT; if (isImageExtension(ext)) return ContentDomain.IMAGE; diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 6b2128e7a..83c67a6ef 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -447,7 +447,7 @@ public final class QueryParams { String ext = urlMaskPattern.substring(extm + 4); int k = ext.indexOf('('); if (k > 0) ext = ext.substring(0, k); - fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext); + fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(ext).append('\"'); } // translate protocol navigation @@ -463,7 +463,7 @@ public final class QueryParams { while ((p = urlMaskPattern.indexOf(':')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1); while ((p = urlMaskPattern.indexOf('/')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1); while ((p = urlMaskPattern.indexOf('\\')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 2); - fq.append(" AND ").append(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/"); + //fq.append(" AND ").append(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/"); } if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) { diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 238c5f2da..d88321c2b 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -60,6 +60,7 @@ import net.yacy.data.WorkTables; import net.yacy.document.Condenser; import net.yacy.document.LargeNumberCache; import net.yacy.document.LibraryProvider; +import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; @@ -481,7 +482,15 @@ public final class SearchEvent { if (this.filetypeNavigator != null) { fcts = facets.get(YaCySchema.url_file_ext_s.getSolrFieldName()); - if (fcts != null) this.filetypeNavigator.inc(fcts); + if (fcts != null) { + // remove all filetypes that we don't know + Iterator i = fcts.iterator(); + while (i.hasNext()) { + String ext = i.next(); + if (TextParser.supportsExtension(ext) != null && !Classification.isAnyKnownExtension(ext)) i.remove(); + } + this.filetypeNavigator.inc(fcts); + } } if (this.authorNavigator != null) {