From 570e42c4e310060c4f117e0f57b4003b867f74c5 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 7 Nov 2012 13:53:29 +0100 Subject: [PATCH] fix for filetype naviagtor --- htroot/suggest.java | 23 +++++++++++-------- htroot/yacysearch.java | 11 ++++----- .../yacy/cora/sorting/AbstractScoreMap.java | 3 ++- .../yacy/kelondro/data/meta/DigestURI.java | 12 ---------- source/net/yacy/search/query/QueryParams.java | 14 +++++++---- .../net/yacy/search/query/RankingProcess.java | 11 --------- source/net/yacy/search/query/SearchEvent.java | 19 --------------- 7 files changed, 29 insertions(+), 64 deletions(-) diff --git a/htroot/suggest.java b/htroot/suggest.java index e6cb4cff4..27114fc99 100644 --- a/htroot/suggest.java +++ b/htroot/suggest.java @@ -22,6 +22,7 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import java.util.ConcurrentModificationException; import java.util.Iterator; import net.yacy.cora.protocol.HeaderFramework; @@ -72,16 +73,18 @@ public class suggest { String suggestion; //[#[query]#,[#{suggestions}##[text]##(eol)#,::#(/eol)##{/suggestions}#]] while (c < meanMax && meanIt.hasNext()) { - suggestion = meanIt.next().toString(); - if (json) { - prop.putJSON("suggestions_" + c + "_text", suggestion); - } else if (xml) { - prop.putXML("suggestions_" + c + "_text", suggestion); - } else { - prop.putHTML("suggestions_" + c + "_text", suggestion); - } - prop.put("suggestions_" + c + "_eol", 0); - c++; + try { + suggestion = meanIt.next().toString(); + if (json) { + prop.putJSON("suggestions_" + c + "_text", suggestion); + } else if (xml) { + prop.putXML("suggestions_" + c + "_text", suggestion); + } else { + prop.putHTML("suggestions_" + c + "_text", suggestion); + } + prop.put("suggestions_" + c + "_eol", 0); + c++; + } catch (ConcurrentModificationException e) {} } } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 811457f96..31fa9545a 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -374,15 +374,14 @@ public class yacysearch { ranking.coeff_date = RankingProfile.COEFF_MAX; modifier.append("/date "); } - if ( querystring.indexOf("/http", 0) >= 0 ) { - querystring = querystring.replace("/http", ""); - urlmask = "https?://.*"; - modifier.append("/http "); - } if ( querystring.indexOf("/https", 0) >= 0 ) { querystring = querystring.replace("/https", ""); - urlmask = "https?://.*"; + urlmask = "https://.*"; modifier.append("/https "); + } else if ( querystring.indexOf("/http", 0) >= 0 ) { + querystring = querystring.replace("/http", ""); + urlmask = "http://.*"; + modifier.append("/http "); } if ( querystring.indexOf("/ftp", 0) >= 0 ) { querystring = querystring.replace("/ftp", ""); diff --git a/source/net/yacy/cora/sorting/AbstractScoreMap.java b/source/net/yacy/cora/sorting/AbstractScoreMap.java index 40182d476..3d51d3402 100644 --- a/source/net/yacy/cora/sorting/AbstractScoreMap.java +++ b/source/net/yacy/cora/sorting/AbstractScoreMap.java @@ -36,7 +36,8 @@ public abstract class AbstractScoreMap implements ScoreMap { public void inc(ScoreMap map) { if (map == null) return; for (E entry: map) { - this.inc(entry, map.get(entry)); + int count = map.get(entry); + if (count > 0) this.inc(entry, count); } } diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index 332b2948a..a74f66986 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -270,18 +270,6 @@ public class DigestURI extends MultiProtocolURI implements Serializable { return b; } - /** - * return true if the protocol of the URL was 'http' - * this is not true if the protocol was 'https' - * @param hash - * @return true for url hashes that point to http services; false otherwise - */ - public static final boolean flag4HTTPset(final byte[] hash) { - assert hash.length == 12; - final byte flagbyte = hash[11]; - return (flagbyte & 32) == 1; - } - private static char subdomPortPath(final String subdom, final int port, final String rootpath) { final StringBuilder sb = new StringBuilder(subdom.length() + rootpath.length() + 8); sb.append(subdom).append(':').append(Integer.toString(port)).append(':').append(rootpath); diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index dccbb2d90..8b3cfc962 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -533,17 +533,21 @@ public final class QueryParams { q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"'); } String urlMaskPattern = this.urlMask.pattern(); + + // translate filetype navigation int extm = urlMaskPattern.indexOf(".*\\."); if (extm >= 0) { String ext = urlMaskPattern.substring(extm + 4); q.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext); } - extm = urlMaskPattern.indexOf("?://.*"); - if (extm >= 0) { - String protocol = urlMaskPattern.substring(0, extm); - q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append(protocol); - } + // translate protocol navigation + if (urlMaskPattern.startsWith("http://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("http"); + else if (urlMaskPattern.startsWith("https://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("https"); + else if (urlMaskPattern.startsWith("ftp://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("ftp"); + else if (urlMaskPattern.startsWith("smb://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("smb"); + else if (urlMaskPattern.startsWith("file://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("file"); + // construct query final SolrQuery params = new SolrQuery(); params.setQuery(q.toString()); diff --git a/source/net/yacy/search/query/RankingProcess.java b/source/net/yacy/search/query/RankingProcess.java index a8917c726..746181b81 100644 --- a/source/net/yacy/search/query/RankingProcess.java +++ b/source/net/yacy/search/query/RankingProcess.java @@ -51,7 +51,6 @@ import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.Condenser; import net.yacy.document.LibraryProvider; -import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; @@ -290,9 +289,6 @@ public final class RankingProcess extends Thread { long timeout = System.currentTimeMillis() + maxtime; try { WordReferenceVars iEntry; - final String pattern = this.query.urlMask.pattern(); - final boolean httpPattern = pattern.equals("http://.*"); - final boolean noHttpButProtocolPattern = pattern.equals("https://.*") || pattern.equals("ftp://.*") || pattern.equals("smb://.*") || pattern.equals("file://.*"); long remaining; pollloop: while ( true ) { remaining = timeout - System.currentTimeMillis(); @@ -347,13 +343,6 @@ public final class RankingProcess extends Thread { this.hostHashNavigator.inc(hosthash); this.hostHashResolver.put(hosthash, iEntry.urlhash()); - // check protocol - if (!this.query.urlMask_isCatchall) { - final boolean httpFlagSet = DigestURI.flag4HTTPset(iEntry.urlHash); - if (httpPattern && !httpFlagSet) continue pollloop; - if (noHttpButProtocolPattern && httpFlagSet) continue pollloop; - } - // check vocabulary constraint String subject = YaCyMetadata.hashURI(iEntry.urlhash()); Resource resource = JenaTripleStore.getResource(subject); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 16d0e0278..77e3225fb 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -62,7 +62,6 @@ import net.yacy.data.WorkTables; import net.yacy.document.Condenser; import net.yacy.document.LargeNumberCache; import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; -import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; @@ -469,9 +468,6 @@ public final class SearchEvent { // apply all constraints try { - final String pattern = this.query.urlMask.pattern(); - final boolean httpPattern = pattern.equals("http://.*"); - final boolean noHttpButProtocolPattern = pattern.equals("https://.*") || pattern.equals("ftp://.*") || pattern.equals("smb://.*") || pattern.equals("file://.*"); pollloop: for (URIMetadataNode iEntry: index) { if ( !this.query.urlMask_isCatchall ) { @@ -515,17 +511,6 @@ public final class SearchEvent { if (!hosthash.equals(this.query.nav_sitehash)) continue pollloop; } - // check protocol - if ( !this.query.urlMask_isCatchall ) { - final boolean httpFlagSet = DigestURI.flag4HTTPset(iEntry.hash()); - if ( httpPattern && !httpFlagSet ) { - continue pollloop; - } - if ( noHttpButProtocolPattern && httpFlagSet ) { - continue pollloop; - } - } - // check vocabulary constraint String subject = YaCyMetadata.hashURI(iEntry.hash()); Resource resource = JenaTripleStore.getResource(subject); @@ -850,10 +835,6 @@ public final class SearchEvent { } } - // protocol navigation - final String protocol = page.url().getProtocol(); - this.protocolNavigator.inc(protocol); - return page; // accept url } Log.logWarning("RWIProcess", "loop terminated");