From 241dd8410add553c4870096fbb18a4f841d50920 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 5 Jul 2012 09:21:27 +0200 Subject: [PATCH] removed snippet pattern filter - it was not used --- htroot/api/ymarks/manage_tags.java | 5 +-- htroot/yacy/search.java | 4 --- htroot/yacysearch.java | 2 -- source/de/anomic/data/ymark/YMarkTables.java | 4 +-- source/net/yacy/peers/Protocol.java | 5 --- source/net/yacy/peers/RemoteSearch.java | 12 +++---- source/net/yacy/search/query/QueryParams.java | 31 ------------------- source/net/yacy/search/query/SearchEvent.java | 1 - .../net/yacy/search/query/SnippetProcess.java | 9 ++---- 9 files changed, 11 insertions(+), 62 deletions(-) diff --git a/htroot/api/ymarks/manage_tags.java b/htroot/api/ymarks/manage_tags.java index d7f98596b..c01e0dd22 100644 --- a/htroot/api/ymarks/manage_tags.java +++ b/htroot/api/ymarks/manage_tags.java @@ -4,7 +4,6 @@ import java.util.regex.Pattern; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.blob.Tables.Row; -import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import de.anomic.data.UserDB; @@ -76,9 +75,7 @@ public class manage_tags { prop.put("status", 1); } catch (final IOException e) { Log.logException(e); - } catch (final RowSpaceExceededException e) { - Log.logException(e); - } + } } else { prop.put(serverObjects.ACTION_AUTHENTICATE, YMarkTables.USER_AUTHENTICATE_MSG); } diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 2697e1239..b3d233501 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -35,7 +35,6 @@ import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; -import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; @@ -120,7 +119,6 @@ public final class search { final String modifier = post.get("modifier", "").trim(); final String contentdom = post.get("contentdom", "all"); final String filter = post.get("filter", ".*"); // a filter on the url - final Pattern snippetPattern = Pattern.compile(post.get("snippet", ".*")); // a filter on the snippet String sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null; String authorhash = post.get("authorhash", ""); if (authorhash.length() == 0) authorhash = null; String language = post.get("language", ""); @@ -228,7 +226,6 @@ public final class search { abstractSet, new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0), null, - snippetPattern, null, modifier, maxdist, @@ -291,7 +288,6 @@ public final class search { queryhashes, excludehashes, null, - snippetPattern, null, modifier, maxdist, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index a803b4d80..924a7f7b2 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -713,7 +713,6 @@ public class yacysearch { // do the search final HandleSet queryHashes = Word.words2hashesHandles(query[0]); - final Pattern snippetPattern = QueryParams.stringSearchPattern(originalquerystring); // check filters try { @@ -740,7 +739,6 @@ public class yacysearch { queryHashes, Word.words2hashesHandles(query[1]), Word.words2hashesHandles(query[2]), - snippetPattern, tenant, modifier.toString().trim(), maxDistance, diff --git a/source/de/anomic/data/ymark/YMarkTables.java b/source/de/anomic/data/ymark/YMarkTables.java index df807c0d6..7f1236ee4 100644 --- a/source/de/anomic/data/ymark/YMarkTables.java +++ b/source/de/anomic/data/ymark/YMarkTables.java @@ -238,8 +238,8 @@ public class YMarkTables { } } - public void replaceTags(final Iterator rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException, RowSpaceExceededException { - final HashSet remove = YMarkUtil.keysStringToSet(YMarkUtil.cleanTagsString(tagString.toLowerCase())); + public void replaceTags(final Iterator rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException { + final HashSet remove = YMarkUtil.keysStringToSet(YMarkUtil.cleanTagsString(tagString.toLowerCase())); final StringBuilder t = new StringBuilder(200); HashSet tags; Row row; diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 928c56883..d5b04fc0c 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -589,7 +589,6 @@ public final class Protocol final String urlhashes, final Pattern prefer, final Pattern filter, - final Pattern snippet, final String modifier, final String language, final String sitehash, @@ -635,7 +634,6 @@ public final class Protocol urlhashes, prefer, filter, - snippet, modifier, language, sitehash, @@ -893,7 +891,6 @@ public final class Protocol final String urlhashes, final Pattern prefer, final Pattern filter, - final Pattern snippet, final String modifier, final String language, final String sitehash, @@ -946,7 +943,6 @@ public final class Protocol parts.put("urls", UTF8.StringBody(urlhashes)); parts.put("prefer", UTF8.StringBody(prefer.pattern())); parts.put("filter", UTF8.StringBody(filter.pattern())); - parts.put("snippet", UTF8.StringBody(snippet.pattern())); parts.put("modifier", UTF8.StringBody(modifier)); parts.put("language", UTF8.StringBody(language)); parts.put("sitehash", UTF8.StringBody(sitehash)); @@ -1518,7 +1514,6 @@ public final class Protocol "", // urlhashes, QueryParams.matchnothing_pattern, // prefer, QueryParams.catchall_pattern, // filter, - QueryParams.catchall_pattern, // snippet, "", // modifier "", // language, "", // sitehash, diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 9178a7f34..74ece99b4 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -59,7 +59,7 @@ public class RemoteSearch extends Thread { private final int count, maxDistance; private final long time; final private RankingProfile rankingProfile; - final private Pattern prefer, filter, snippet; + final private Pattern prefer, filter; final private QueryParams.Modifier modifier; final private String language; final private Bitfield constraint; @@ -70,7 +70,6 @@ public class RemoteSearch extends Thread { final String urlhashes, // this is the field that is filled during a secondary search to restrict to specific urls that are to be retrieved final Pattern prefer, final Pattern filter, - final Pattern snippet, final QueryParams.Modifier modifier, final String language, final String sitehash, final String authorhash, final String contentdom, @@ -92,7 +91,6 @@ public class RemoteSearch extends Thread { this.urlhashes = urlhashes; this.prefer = prefer; this.filter = filter; - this.snippet = snippet; this.modifier = modifier; this.language = language; this.sitehash = sitehash; @@ -121,7 +119,7 @@ public class RemoteSearch extends Thread { this.urls = Protocol.search( this.peers.mySeed(), this.wordhashes, this.excludehashes, this.urlhashes, - this.prefer, this.filter, this.snippet, this.modifier.getModifier(), + this.prefer, this.filter, this.modifier.getModifier(), this.language, this.sitehash, this.authorhash, this.contentdom, this.count, this.time, this.maxDistance, this.global, this.partitions, this.targetPeer, this.indexSegment, this.containerCache, this.secondarySearchSuperviser, @@ -163,7 +161,7 @@ public class RemoteSearch extends Thread { public static void primaryRemoteSearches( final List searchThreads, final String wordhashes, final String excludehashes, - final Pattern prefer, final Pattern filter, final Pattern snippet, + final Pattern prefer, final Pattern filter, final QueryParams.Modifier modifier, final String language, final String sitehash, @@ -202,7 +200,7 @@ public class RemoteSearch extends Thread { if (targetPeers[i] == null || targetPeers[i].hash == null) continue; try { RemoteSearch rs = new RemoteSearch( - wordhashes, excludehashes, "", prefer, filter, snippet, modifier, + wordhashes, excludehashes, "", prefer, filter, modifier, language, sitehash, authorhash, contentdom, count, time, maxDist, true, targets, targetPeers[i], indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint); @@ -237,7 +235,7 @@ public class RemoteSearch extends Thread { StringBuilder whs = new StringBuilder(24); for (String s: wordhashes) whs.append(s); final RemoteSearch searchThread = new RemoteSearch( - whs.toString(), "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", "all", 20, time, 9999, true, 0, targetPeer, + whs.toString(), "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", "all", 20, time, 9999, true, 0, targetPeer, indexSegment, peers, containerCache, null, blacklist, rankingProfile, constraint); searchThread.start(); return searchThread; diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 777fa5266..b3307475a 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -35,7 +35,6 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.SortedSet; -import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -108,7 +107,6 @@ public final class QueryParams { public final String queryString; public HandleSet fullqueryHashes, queryHashes, excludeHashes; - public Pattern snippetMatcher; public final int itemsPerPage; public int offset; public final Pattern urlMask, prefer; @@ -167,7 +165,6 @@ public final class QueryParams { this.excludeHashes = Word.words2hashesHandles(cq[1]); this.fullqueryHashes = Word.words2hashesHandles(cq[2]); } - this.snippetMatcher = QueryParams.catchall_pattern; this.ranking = ranking; this.tenant = null; this.modifier = new Modifier(""); @@ -210,7 +207,6 @@ public final class QueryParams { final String queryString, final HandleSet queryHashes, final HandleSet excludeHashes, final HandleSet fullqueryHashes, - final Pattern snippetMatcher, final String tenant, final String modifier, final int maxDistance, final String prefer, final ContentDomain contentdom, @@ -237,7 +233,6 @@ public final class QueryParams { this.queryHashes = queryHashes; this.excludeHashes = excludeHashes; this.fullqueryHashes = fullqueryHashes; - this.snippetMatcher = snippetMatcher; this.tenant = (tenant != null && tenant.length() == 0) ? null : tenant; this.modifier = new Modifier(modifier == null ? "" : modifier); this.ranking = ranking; @@ -607,30 +602,4 @@ public final class QueryParams { return sb; } - private static Pattern StringMatchPattern = Pattern.compile(".*?(\".*?\").*"); - /** - * calculate a pattern to match with a string search - * @param query - * @return - */ - public static Pattern stringSearchPattern(String query) { - final StringBuilder p = new StringBuilder(query.length()); - p.append("(?iu)"); - int seqc = 0; - while (query.length() > 0) { - final Matcher m = StringMatchPattern.matcher(query); - if (!m.matches()) break; - p.append(".*?").append(query.substring(m.start(1) + 1, m.end(1) - 1)); - query = query.substring(m.end(1)); - seqc++; - } - if (seqc == 0) return QueryParams.catchall_pattern; - p.append(".*"); - return Pattern.compile(p.toString()); - } - - public static void main(final String[] args) { - final Pattern p = stringSearchPattern("die \"peer-to-peer Suchmaschine\" ohne Zensur als \"freie Software\" runterladen"); - System.out.println(p.toString()); - } } diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 8a691c1aa..31b06fa64 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -172,7 +172,6 @@ public final class SearchEvent QueryParams.hashSet2hashString(SearchEvent.this.query.excludeHashes), SearchEvent.this.query.prefer, SearchEvent.this.query.urlMask, - SearchEvent.this.query.snippetMatcher, SearchEvent.this.query.modifier, SearchEvent.this.query.targetlang == null ? "" : SearchEvent.this.query.targetlang, SearchEvent.this.query.sitehash == null ? "" : SearchEvent.this.query.sitehash, diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index 1995d3077..d0745f374 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -29,7 +29,6 @@ package net.yacy.search.query; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; @@ -365,7 +364,7 @@ public class SnippetProcess { (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) { break; } - worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); + worker = new Worker(this.query.maxtime, this.query.snippetCacheStrategy, neededResults); worker.start(); this.workerThreads[i] = worker; if (this.rankingProcess.expectMoreRemoteReferences()) { @@ -387,7 +386,7 @@ public class SnippetProcess { break; } if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { - worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); + worker = new Worker(this.query.maxtime, this.query.snippetCacheStrategy, neededResults); worker.start(); this.workerThreads[i] = worker; deployCount--; @@ -437,14 +436,12 @@ public class SnippetProcess { private long lastLifeSign; // when the last time the run()-loop was executed private final CacheStrategy cacheStrategy; private final int neededResults; - private final Pattern snippetPattern; private boolean shallrun; private final SolrConnector solr; - public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) { + public Worker(final long maxlifetime, final CacheStrategy cacheStrategy, final int neededResults) { this.cacheStrategy = cacheStrategy; this.lastLifeSign = System.currentTimeMillis(); - this.snippetPattern = snippetPattern; this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; this.shallrun = true;