redesign of the QueryParams class: introduced QueryGoal which holds the

query string parser. This shall be used to create a proper full-string
matching which is handled then by QueryGoal.
pull/1/head
orbiter 12 years ago
parent 5fd3b93661
commit 5dfd6359cb

@ -165,11 +165,11 @@ public class AccessTracker_p {
if (page == 2) {
// local search
prop.putNum("page_list_" + m + "_offset", query.offset);
prop.putHTML("page_list_" + m + "_querystring", query.queryString);
prop.putHTML("page_list_" + m + "_querystring", query.getQueryGoal().getQueryString());
} else {
// remote search
prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "<unknown>" : query.remotepeer.getName());
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.query_include_hashes));
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.getQueryGoal().getIncludeHashes()));
}
prop.putNum("page_list_" + m + "_querycount", query.itemsPerPage);
prop.putNum("page_list_" + m + "_transmitcount", query.transmitcount);

@ -41,6 +41,7 @@ import net.yacy.kelondro.util.ISO639;
import net.yacy.peers.Network;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryParams;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -68,8 +69,8 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en";
}
final List<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
HandleSet q = Word.words2hashesHandles(query[0]);
final QueryGoal qg = new QueryGoal(querystring);
HandleSet q = qg.getIncludeHashes();
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(3000);
@ -87,7 +88,7 @@ public final class timeline {
// get the index container with the result vector
TermSearch<WordReference> search = null;
try {
search = segment.termIndex().query(q, Word.words2hashesHandles(query[1]), null, Segment.wordReferenceFactory, maxdist);
search = segment.termIndex().query(q, qg.getExcludeHashes(), null, Segment.wordReferenceFactory, maxdist);
} catch (SpaceExceededException e) {
Log.logException(e);
}

@ -36,6 +36,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.server.serverObjects;
@ -110,8 +111,8 @@ public class searchresult {
post.put("originalQuery", q);
// get a solr query string
List<String>[] cq = QueryParams.cleanQuery(q);
q = QueryParams.solrQueryString(cq[0], cq[1], sb.index.fulltext().getSolrScheme()).toString();
QueryGoal qg = new QueryGoal(q);
q = qg.solrQueryString(sb.index.fulltext().getSolrScheme()).toString();
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));

@ -68,6 +68,7 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
@ -219,12 +220,9 @@ public final class search {
if (query.isEmpty() && abstractSet != null) {
// this is _not_ a normal search, only a request for index abstracts
final Segment indexSegment = sb.index;
QueryGoal qg = new QueryGoal(abstractSet, new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0), abstractSet);
theQuery = new QueryParams(
null,
null, null, null,
abstractSet,
new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0),
abstractSet,
qg,
modifier,
maxdist,
prefer,
@ -251,11 +249,11 @@ public final class search {
header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d
);
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + theQuery.itemsPerPage() + " links");
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
final long timer = System.currentTimeMillis();
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.query_include_hashes, QueryParams.hashes2Handles(urls));
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.getQueryGoal().getIncludeHashes(), QueryParams.hashes2Handles(urls));
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.COLLECTION, "", incc.size(), System.currentTimeMillis() - timer), false);
if (incc != null) {
@ -284,12 +282,9 @@ public final class search {
RowHandleSet allHashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
try {allHashes.putAll(queryhashes);} catch (SpaceExceededException e) {}
try {allHashes.putAll(excludehashes);} catch (SpaceExceededException e) {}
QueryGoal qg = new QueryGoal(queryhashes, excludehashes, allHashes);
theQuery = new QueryParams(
null,
null, null, null,
queryhashes,
excludehashes,
allHashes,
qg,
modifier,
maxdist,
prefer,
@ -316,8 +311,8 @@ public final class search {
header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d
);
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + theQuery.itemsPerPage() + " links");
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes), ""));
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), ""));
// make event
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
@ -433,7 +428,7 @@ public final class search {
// log
Network.log.logInfo("EXIT HASH SEARCH: " +
QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + theQuery.getResultCount() + " links found, " +
QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.getResultCount() + " links found, " +
prop.get("linkcount", "?") + " links selected, " +
indexabstractContainercount + " index abstracts, " +
(System.currentTimeMillis() - timestamp) + " milliseconds");

@ -34,7 +34,6 @@ import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
@ -78,6 +77,7 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
@ -594,14 +594,13 @@ public class yacysearch {
}
// the query
final List<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE;
final QueryGoal qg = new QueryGoal(querystring.trim());
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? qg.getAllWords().size() - 1 : Integer.MAX_VALUE;
// filter out stopwords
final SortedSet<String> filtered = SetTools.joinConstructiveByTest(query[0], Switchboard.stopwords);
final SortedSet<String> filtered = SetTools.joinConstructiveByTest(qg.getIncludeWords(), Switchboard.stopwords);
if ( !filtered.isEmpty() ) {
SetTools.excludeDestructiveByTestSmallInLarge(query[0], Switchboard.stopwords);
SetTools.excludeDestructiveByTestSmallInLarge(qg.getIncludeWords(), Switchboard.stopwords);
}
// if a minus-button was hit, remove a special reference first
@ -614,7 +613,7 @@ public class yacysearch {
// delete the index entry locally
final String delHash = post.get("deleteref", ""); // urlhash
indexSegment.termIndex().remove(Word.words2hashesHandles(query[0]), delHash.getBytes());
indexSegment.termIndex().remove(qg.getIncludeHashes(), delHash.getBytes());
// make new news message with negative voting
if ( !sb.isRobinsonMode() ) {
@ -715,13 +714,7 @@ public class yacysearch {
// do the search
final QueryParams theQuery =
new QueryParams(
originalquerystring,
query[0],
query[1],
query[2],
Word.words2hashesHandles(query[0]),
Word.words2hashesHandles(query[1]),
Word.words2hashesHandles(query[2]),
qg,
modifier.toString().trim(),
maxDistance,
prefermask,
@ -764,22 +757,22 @@ public class yacysearch {
sb.intermissionAllThreads(3000);
// filter out words that appear in bluelist
theQuery.filterOut(Switchboard.blueList);
theQuery.getQueryGoal().filterOut(Switchboard.blueList);
// log
Log.logInfo(
"LOCAL_SEARCH",
"INIT WORD SEARCH: "
+ theQuery.queryString
+ theQuery.getQueryGoal().getQueryString()
+ ":"
+ QueryParams.hashSet2hashString(theQuery.query_include_hashes)
+ QueryParams.hashSet2hashString(theQuery.getQueryGoal().getIncludeHashes())
+ " - "
+ theQuery.neededResults()
+ " links to be computed, "
+ theQuery.itemsPerPage()
+ " lines to be displayed");
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(
new RSSMessage("Local Search Request", theQuery.queryString, ""));
new RSSMessage("Local Search Request", theQuery.getQueryGoal().getQueryString(), ""));
final long timestamp = System.currentTimeMillis();
// create a new search event
@ -818,7 +811,7 @@ public class yacysearch {
// log
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: "
+ theQuery.queryString
+ theQuery.getQueryGoal().getQueryString()
+ " - "
+ "local_rwi_available(" + theSearch.query.local_rwi_available.get() + "), "
+ "local_rwi_stored(" + theSearch.query.local_rwi_stored.get() + "), "

@ -25,7 +25,7 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
@ -134,10 +134,10 @@ public class yacysearchitem {
prop.put("content_authorized", authenticated ? "1" : "0");
final String urlhash = ASCII.String(result.hash());
prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1");
prop.putHTML("content_authorized_bookmark_bookmarklink", "/yacysearch.html?query=" + theSearch.query.queryString.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&bookmarkref=" + urlhash + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_bookmark_bookmarklink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&bookmarkref=" + urlhash + "&urlmaskfilter=.*");
prop.put("content_authorized_recommend", (sb.peers.newsPool.getSpecific(NewsPool.OUTGOING_DB, NewsPool.CATEGORY_SURFTIPP_ADD, "url", resultUrlstring) == null) ? "1" : "0");
prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + theSearch.query.queryString.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + theSearch.query.queryString.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
prop.put("content_authorized_urlhash", urlhash);
final String resulthashString = urlhash;
prop.putHTML("content_title", result.title());
@ -206,18 +206,18 @@ public class yacysearchitem {
prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject());
final Collection<String>[] query = theSearch.query.queryWords();
final StringBuilder s = new StringBuilder(query[0].size() * 20);
for (final String t: query[0]) {
final List<String> query = theSearch.query.getQueryGoal().getAllWords();
final StringBuilder s = new StringBuilder(query.size() * 20);
for (final String t: query) {
s.append('+').append(t);
}
final String words = (s.length() > 0) ? s.substring(1) : "";
prop.putHTML("content_words", words);
prop.putHTML("content_showParser_words", words);
prop.putHTML("content_former", theSearch.query.queryString);
prop.putHTML("content_showPictures_former", theSearch.query.queryString);
prop.putHTML("content_former", theSearch.query.getQueryGoal().getQueryString());
prop.putHTML("content_showPictures_former", theSearch.query.getQueryGoal().getQueryString());
final TextSnippet snippet = result.textSnippet();
final String desc = (snippet == null) ? "" : snippet.isMarked() ? snippet.getLineRaw() : snippet.getLineMarked(theSearch.query.query_all_hashes);
final String desc = (snippet == null) ? "" : snippet.isMarked() ? snippet.getLineRaw() : snippet.getLineMarked(theSearch.query.getQueryGoal());
prop.put("content_description", desc);
prop.putXML("content_description-xml", desc);
prop.putJSON("content_description-json", desc);

@ -79,7 +79,7 @@ public class yacysearchtrailer {
count = theSearch.namespaceNavigator.get(name);
if (count == 0) break;
nav = "inurl%3A" + name;
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
pos++;
@ -119,7 +119,7 @@ public class yacysearchtrailer {
count = hostNavigator.get(name);
if (count == 0) break;
nav = "site%3A" + name;
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
pos++;
@ -158,7 +158,7 @@ public class yacysearchtrailer {
count = theSearch.authorNavigator.get(name);
if (count == 0) break;
nav = (name.indexOf(' ', 0) < 0) ? "author%3A" + name : "author%3A%28" + name.replace(" ", "+") + "%29";
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
pos++;
@ -197,9 +197,9 @@ public class yacysearchtrailer {
name = navigatorIterator.next();
count = topicNavigator.get(name);
if (count == 0) break;
if (theSearch.query.queryString == null) break;
if (theSearch.query.getQueryGoal().getQueryString() == null) break;
if (name != null) {
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
prop.put("nav-topics_element_" + i + "_on", 1);
prop.put(fileType, "nav-topics_element_" + i + "_modifier", name);
prop.put(fileType, "nav-topics_element_" + i + "_name", name);
@ -227,7 +227,7 @@ public class yacysearchtrailer {
count = theSearch.protocolNavigator.get(name);
if (count == 0) break;
nav = "%2F" + name;
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
pos++;
@ -266,7 +266,7 @@ public class yacysearchtrailer {
count = theSearch.filetypeNavigator.get(name);
if (count == 0) break;
nav = "filetype%3A" + name;
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
pos++;
@ -310,7 +310,7 @@ public class yacysearchtrailer {
count = ve.getValue().get(name);
if (count == 0) break;
nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Tagging.encodePrintname(name)).toString();
queryStringForUrl = theSearch.query.queryStringForUrl();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {
queryStringForUrl += "+" + nav;

@ -77,8 +77,8 @@ public abstract class AbstractScraper implements Scraper {
@Override
public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text);
protected static String stripAllTags(final char[] s) {
if (!MemoryControl.request(s.length * 2, false)) return "";
public static String stripAllTags(final char[] s) {
if (s.length > 80 && !MemoryControl.request(s.length * 2, false)) return "";
final StringBuilder r = new StringBuilder(s.length);
int bc = 0;
for (final char c : s) {
@ -114,10 +114,6 @@ public abstract class AbstractScraper implements Scraper {
return sb.toString().trim();
}
public static String stripAll(final char[] s) {
return CharacterCoding.html2unicode(stripAllTags(s));
}
@Override
public void close() {
// free resources

@ -535,7 +535,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
private String recursiveParse(final char[] inlineHtml) {
if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml));
if (inlineHtml.length < 14) return cleanLine(CharacterCoding.html2unicode(stripAllTags(inlineHtml)));
// start a new scraper to parse links inside this text
// parsing the content
@ -545,7 +545,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
FileUtils.copy(new CharArrayReader(inlineHtml), writer);
} catch (final IOException e) {
Log.logException(e);
return cleanLine(super.stripAll(inlineHtml));
return cleanLine(CharacterCoding.html2unicode(stripAllTags(inlineHtml)));
} finally {
try {
writer.close();
@ -557,7 +557,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
this.images.putAll(scraper.images);
String line = cleanLine(super.stripAll(scraper.content.getChars()));
String line = cleanLine(CharacterCoding.html2unicode(stripAllTags(scraper.content.getChars())));
scraper.close();
return line;
}

@ -1033,7 +1033,7 @@ public final class Protocol
final Seed target,
final Blacklist blacklist) {
if (event.query.queryString == null || event.query.queryString.length() == 0) {
if (event.query.getQueryGoal().getQueryString() == null || event.query.getQueryGoal().getQueryString().length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string
}
event.addExpectedRemoteReferences(count);

@ -149,7 +149,7 @@ public class RemoteSearch extends Thread {
(clusterselection == null) ?
DHTSelection.selectSearchTargets(
event.peers,
event.query.query_include_hashes,
event.query.getQueryGoal().getIncludeHashes(),
event.peers.redundancy(),
burstRobinsonPercent,
burstMultiwordPercent)
@ -172,8 +172,8 @@ public class RemoteSearch extends Thread {
try {
RemoteSearch rs = new RemoteSearch(
event,
QueryParams.hashSet2hashString(event.query.query_include_hashes),
QueryParams.hashSet2hashString(event.query.query_exclude_hashes),
QueryParams.hashSet2hashString(event.query.getQueryGoal().getIncludeHashes()),
QueryParams.hashSet2hashString(event.query.getQueryGoal().getExcludeHashes()),
event.query.modifier,
event.query.targetlang == null ? "" : event.query.targetlang,
event.query.nav_sitehash == null ? "" : event.query.nav_sitehash,

@ -45,7 +45,6 @@ import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
import net.yacy.visualization.PrintTool;
@ -157,7 +156,7 @@ public class NetworkGraph {
*/
// draw in the search target
final Iterator<byte[]> i = event.query.query_include_hashes.iterator();
final Iterator<byte[]> i = event.query.getQueryGoal().getIncludeHashes().iterator();
eventPicture.setColor(RasterPlotter.GREY);
while (i.hasNext()) {
byte[] wordHash = i.next();

@ -428,7 +428,7 @@ public class Segment {
// data during search-time. To transfer indexed data directly to the search process
// the following lines push the index data additionally to the search process
// this is done only for searched words
if (searchEvent != null && !searchEvent.query.query_exclude_hashes.has(wordhash) && searchEvent.query.query_include_hashes.has(wordhash)) {
if (searchEvent != null && !searchEvent.query.getQueryGoal().getExcludeHashes().has(wordhash) && searchEvent.query.getQueryGoal().getIncludeHashes().has(wordhash)) {
// if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
ReferenceContainer<WordReference> container;
try {

@ -67,9 +67,9 @@ public class AccessTracker {
private static void add(final LinkedList<QueryParams> list, final QueryParams query) {
// learn that this word can be a word completion for the DidYouMeanLibrary
if (query.getResultCount() > 10 && query.queryString != null && query.queryString.length() > 0) {
final StringBuilder sb = new StringBuilder(query.queryString);
sb.append(query.queryString);
if (query.getResultCount() > 10 && query.getQueryGoal().getQueryString() != null && query.getQueryGoal().getQueryString().length() > 0) {
final StringBuilder sb = new StringBuilder(query.getQueryGoal().getQueryString());
sb.append(query.getQueryGoal().getQueryString());
WordCache.learn(sb);
}
@ -108,8 +108,8 @@ public class AccessTracker {
}
private static void addToDump(final QueryParams query) {
if (query.queryString == null || query.queryString.isEmpty()) return;
addToDump(query.queryString, Integer.toString(query.getResultCount()), new Date(query.starttime));
if (query.getQueryGoal().getQueryString() == null || query.getQueryGoal().getQueryString().isEmpty()) return;
addToDump(query.getQueryGoal().getQueryString(), Integer.toString(query.getResultCount()), new Date(query.starttime));
}
public static void addToDump(String querystring, String resultcount) {

@ -0,0 +1,230 @@
/**
* QueryGoal
* Copyright 2012 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First published 16.11.2005 on http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedSet;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrConfiguration;
public class QueryGoal {
private static String seps = "'.,/&_"; static {seps += '"';}
private String querystring;
private HandleSet include_hashes, exclude_hashes, all_hashes;
private final ArrayList<String> include_words, exclude_words, all_words;
public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes, HandleSet all_hashes) {
this.querystring = null;
this.include_words = null;
this.exclude_words = null;
this.all_words = null;
this.include_hashes = include_hashes;
this.exclude_hashes = exclude_hashes;
this.all_hashes = all_hashes;
}
public QueryGoal(String querystring) {
this.querystring = querystring;
this.include_words = new ArrayList<String>();
this.exclude_words = new ArrayList<String>();
this.all_words = new ArrayList<String>();
byte[] queryHash;
if ((querystring.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryHash = UTF8.getBytes(querystring)))) {
this.querystring = null;
this.include_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.exclude_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.all_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
try {
this.include_hashes.put(queryHash);
this.all_hashes.put(queryHash);
} catch (final SpaceExceededException e) {
Log.logException(e);
}
} else if ((querystring != null) && (!querystring.isEmpty())) {
// remove funny symbols
querystring = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(querystring.toCharArray())).toLowerCase().trim();
int c;
for (int i = 0; i < seps.length(); i++) {
while ((c = querystring.indexOf(seps.charAt(i))) >= 0) {
querystring = querystring.substring(0, c) + (((c + 1) < querystring.length()) ? (' ' + querystring.substring(c + 1)) : "");
}
}
String s;
int l;
// the string is clean now, but we must generate a set out of it
final String[] queries = querystring.split(" ");
for (String quer : queries) {
if (quer.startsWith("-")) {
String x = quer.substring(1);
if (!exclude_words.contains(x)) exclude_words.add(x);
} else {
while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c);
l = s.length();
if (l >= Condenser.wordminsize && !include_words.contains(s)) {include_words.add(s);}
if (l > 0 && !all_words.contains(s)) {all_words.add(s);}
quer = quer.substring(c + 1);
}
l = quer.length();
if (l >= Condenser.wordminsize && !include_words.contains(quer)) {include_words.add(quer);}
if (l > 0 && !all_words.contains(quer)) {all_words.add(quer);}
}
}
}
this.include_hashes = null;
this.exclude_hashes = null;
this.all_hashes = null;
}
public String getQueryString() {
return this.querystring;
}
public String queryStringForUrl() {
try {
return URLEncoder.encode(this.querystring, "UTF-8");
} catch (final UnsupportedEncodingException e) {
Log.logException(e);
return this.querystring;
}
}
public HandleSet getIncludeHashes() {
if (include_hashes == null) include_hashes = Word.words2hashesHandles(include_words);
return include_hashes;
}
public HandleSet getExcludeHashes() {
if (exclude_hashes == null) exclude_hashes = Word.words2hashesHandles(exclude_words);
return exclude_hashes;
}
public HandleSet getAllHashes() {
if (all_hashes == null) all_hashes = Word.words2hashesHandles(all_words);
return all_hashes;
}
public ArrayList<String> getIncludeWords() {
return include_words;
}
public ArrayList<String> getExcludeWords() {
return exclude_words;
}
public ArrayList<String> getAllWords() {
return all_words;
}
public void filterOut(final SortedSet<String> blueList) {
// filter out words that appear in this set
// this is applied to the queryHashes
final HandleSet blues = Word.words2hashesHandles(blueList);
for (final byte[] b: blues) this.include_hashes.remove(b);
}
private final static YaCySchema[] fields = new YaCySchema[]{
YaCySchema.sku,YaCySchema.title,YaCySchema.h1_txt,YaCySchema.h2_txt,
YaCySchema.author,YaCySchema.description,YaCySchema.keywords,YaCySchema.text_t,YaCySchema.synonyms_sxt
};
private final static Map<YaCySchema,Float> boosts = new LinkedHashMap<YaCySchema,Float>();
static {
boosts.put(YaCySchema.sku, 20.0f);
boosts.put(YaCySchema.url_paths_sxt, 20.0f);
boosts.put(YaCySchema.title, 15.0f);
boosts.put(YaCySchema.h1_txt, 11.0f);
boosts.put(YaCySchema.h2_txt, 10.0f);
boosts.put(YaCySchema.author, 8.0f);
boosts.put(YaCySchema.description, 5.0f);
boosts.put(YaCySchema.keywords, 2.0f);
boosts.put(YaCySchema.text_t, 1.0f);
}
public StringBuilder solrQueryString(SolrConfiguration configuration) {
final StringBuilder q = new StringBuilder(80);
// parse special requests
if (include_words.size() == 1 && exclude_words.size() == 0) {
String w = include_words.get(0);
if (Segment.catchallString.equals(w)) return new StringBuilder("*:*");
}
// add text query
int wc = 0;
StringBuilder w = new StringBuilder(80);
for (String s: include_words) {
if (wc > 0) w.append(" AND ");
w.append(s);
wc++;
}
for (String s: exclude_words){
if (wc > 0) w.append(" AND -");
w.append(s);
wc++;
}
if (wc > 1) {w.insert(0, '('); w.append(')');}
// combine these queries for all relevant fields
wc = 0;
Float boost;
for (YaCySchema field: fields) {
if (configuration != null && !configuration.contains(field.getSolrFieldName())) continue;
if (wc > 0) q.append(" OR ");
q.append('(');
q.append(field.getSolrFieldName()).append(':').append(w);
boost = boosts.get(field);
if (boost != null) q.append('^').append(boost.toString());
q.append(')');
wc++;
}
q.insert(0, '(');
q.append(')');
// add filter to prevent that results come from failed urls
q.append(" AND -").append(YaCySchema.failreason_t.getSolrFieldName()).append(":[* TO *]");
return q;
}
}

@ -26,15 +26,11 @@
package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
@ -50,7 +46,6 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.geo.GeoLocation;
@ -59,7 +54,6 @@ import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -71,7 +65,6 @@ import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrConfiguration;
import net.yacy.search.ranking.RankingProfile;
public final class QueryParams {
@ -113,9 +106,7 @@ public final class QueryParams {
public static final Pattern catchall_pattern = Pattern.compile(".*");
private static final Pattern matchnothing_pattern = Pattern.compile("");
public final String queryString;
public final HandleSet query_include_hashes, query_exclude_hashes, query_all_hashes;
private final List<String> query_include_words, query_exclude_words, query_all_words;
public final QueryGoal queryGoal;
public int itemsPerPage;
public int offset;
public final Pattern urlMask, prefer;
@ -167,31 +158,7 @@ public final class QueryParams {
final Segment indexSegment,
final RankingProfile ranking,
final String userAgent) {
byte[] queryHash;
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryHash = UTF8.getBytes(queryString)))) {
this.queryString = null;
this.query_include_words = null;
this.query_exclude_words = null;
this.query_all_words = null;
this.query_include_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.query_exclude_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.query_all_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
try {
this.query_include_hashes.put(queryHash);
this.query_all_hashes.put(queryHash);
} catch (final SpaceExceededException e) {
Log.logException(e);
}
} else {
this.queryString = queryString;
final List<String>[] cq = cleanQuery(queryString);
this.query_include_words = cq[0];
this.query_exclude_words = cq[1];
this.query_all_words = cq[2];
this.query_include_hashes = Word.words2hashesHandles(cq[0]);
this.query_exclude_hashes = Word.words2hashesHandles(cq[1]);
this.query_all_hashes = Word.words2hashesHandles(cq[2]);
}
this.queryGoal = new QueryGoal(queryString);
this.ranking = ranking;
this.modifier = new Modifier("");
this.maxDistance = Integer.MAX_VALUE;
@ -237,13 +204,7 @@ public final class QueryParams {
}
public QueryParams(
final String queryString,
final List<String> queryWords,
final List<String> excludeWords,
final List<String> fullqueryWords,
final HandleSet queryHashes,
final HandleSet excludeHashes,
final HandleSet fullqueryHashes,
final QueryGoal queryGoal,
final String modifier,
final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language,
@ -264,14 +225,7 @@ public final class QueryParams {
final String userAgent,
final boolean filterfailurls,
final double lat, final double lon, final double radius) {
this.queryString = queryString;
this.query_include_words = queryWords;
this.query_exclude_words = excludeWords;
this.query_all_words = fullqueryWords;
this.query_include_hashes = queryHashes;
this.query_exclude_hashes = excludeHashes;
this.query_all_hashes = fullqueryHashes;
this.queryGoal = queryGoal;
this.modifier = new Modifier(modifier == null ? "" : modifier);
this.ranking = ranking;
this.maxDistance = maxDistance;
@ -429,8 +383,8 @@ public final class QueryParams {
private final boolean matchesText(final String text) {
boolean ret = false;
final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text, null).keySet());
if (!SetTools.anymatch(wordhashes, this.query_exclude_hashes)) {
ret = SetTools.totalInclusion(this.query_include_hashes, wordhashes);
if (!SetTools.anymatch(wordhashes, this.queryGoal.getExcludeHashes())) {
ret = SetTools.totalInclusion(this.queryGoal.getIncludeHashes(), wordhashes);
}
return ret;
}
@ -443,83 +397,21 @@ public final class QueryParams {
return SetTools.anymatch(wordhashes, keyhashes);
}
private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked")
public static List<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, an exclude set and a full query set
final List<String> query_include_words = new ArrayList<String>();
final List<String> query_exclude_words = new ArrayList<String>();
final List<String> query_all_words = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) {
// convert Umlaute
querystring = AbstractScraper.stripAll(querystring.toCharArray()).toLowerCase().trim();
int c;
for (int i = 0; i < seps.length(); i++) {
while ((c = querystring.indexOf(seps.charAt(i))) >= 0) {
querystring = querystring.substring(0, c) + (((c + 1) < querystring.length()) ? (" " + querystring.substring(c + 1)) : "");
}
}
String s;
int l;
// the string is clean now, but we must generate a set out of it
final String[] queries = querystring.split(" ");
for (String quer : queries) {
if (quer.startsWith("-")) {
String x = quer.substring(1);
if (!query_exclude_words.contains(x)) query_exclude_words.add(x);
} else {
while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c);
l = s.length();
if (l >= Condenser.wordminsize && !query_include_words.contains(s)) {query_include_words.add(s);}
if (l > 0 && !query_all_words.contains(s)) {query_all_words.add(s);}
quer = quer.substring(c + 1);
}
l = quer.length();
if (l >= Condenser.wordminsize && !query_include_words.contains(quer)) {query_include_words.add(quer);}
if (l > 0 && !query_all_words.contains(quer)) {query_all_words.add(quer);}
}
}
}
return new List[]{query_include_words, query_exclude_words, query_all_words};
}
public String queryString(final boolean encodeHTML) {
final String ret;
if (encodeHTML){
ret = CharacterCoding.unicode2html(this.queryString, true);
ret = CharacterCoding.unicode2html(this.queryGoal.getQueryString(), true);
} else {
ret = this.queryString;
ret = this.queryGoal.getQueryString();
}
return ret;
}
private final static YaCySchema[] fields = new YaCySchema[]{
YaCySchema.sku,YaCySchema.title,YaCySchema.h1_txt,YaCySchema.h2_txt,
YaCySchema.author,YaCySchema.description,YaCySchema.keywords,YaCySchema.text_t,YaCySchema.synonyms_sxt
};
private final static Map<YaCySchema,Float> boosts = new LinkedHashMap<YaCySchema,Float>();
static {
boosts.put(YaCySchema.sku, 20.0f);
boosts.put(YaCySchema.url_paths_sxt, 20.0f);
boosts.put(YaCySchema.title, 15.0f);
boosts.put(YaCySchema.h1_txt, 11.0f);
boosts.put(YaCySchema.h2_txt, 10.0f);
boosts.put(YaCySchema.author, 8.0f);
boosts.put(YaCySchema.description, 5.0f);
boosts.put(YaCySchema.keywords, 2.0f);
boosts.put(YaCySchema.text_t, 1.0f);
}
public SolrQuery solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
if (this.queryGoal.getIncludeWords().size() == 0) return null;
// get text query
final StringBuilder q = solrQueryString(this.query_include_words, this.query_exclude_words, this.indexSegment.fulltext().getSolrScheme());
final StringBuilder q = this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrScheme());
// add constraints
if (this.nav_sitehash == null && this.nav_sitehost == null) {
@ -591,74 +483,11 @@ public final class QueryParams {
Log.logInfo("Protocol", "SOLR QUERY: " + params.toString());
return params;
}
public static StringBuilder solrQueryString(List<String> include, List<String> exclude, SolrConfiguration configuration) {
final StringBuilder q = new StringBuilder(80);
// parse special requests
if (include.size() == 1 && exclude.size() == 0) {
String w = include.get(0);
if (Segment.catchallString.equals(w)) return new StringBuilder("*:*");
}
// add text query
int wc = 0;
StringBuilder w = new StringBuilder(80);
for (String s: include) {
if (wc > 0) w.append(" AND ");
w.append(s);
wc++;
}
for (String s: exclude){
if (wc > 0) w.append(" AND -");
w.append(s);
wc++;
}
if (wc > 1) {w.insert(0, '('); w.append(')');}
// combine these queries for all relevant fields
wc = 0;
Float boost;
for (YaCySchema field: fields) {
if (configuration != null && !configuration.contains(field.getSolrFieldName())) continue;
if (wc > 0) q.append(" OR ");
q.append('(');
q.append(field.getSolrFieldName()).append(':').append(w);
boost = boosts.get(field);
if (boost != null) q.append('^').append(boost.toString());
q.append(')');
wc++;
}
q.insert(0, '(');
q.append(')');
// add filter to prevent that results come from failed urls
q.append(" AND -").append(YaCySchema.failreason_t.getSolrFieldName()).append(":[* TO *]");
return q;
public QueryGoal getQueryGoal() {
return this.queryGoal;
}
public String queryStringForUrl() {
try {
return URLEncoder.encode(this.queryString, "UTF-8");
} catch (final UnsupportedEncodingException e) {
Log.logException(e);
return this.queryString;
}
}
public List<String>[] queryWords() {
return cleanQuery(this.queryString);
}
public void filterOut(final SortedSet<String> blueList) {
// filter out words that appear in this set
// this is applied to the queryHashes
final HandleSet blues = Word.words2hashesHandles(blueList);
for (final byte[] b: blues) this.query_include_hashes.remove(b);
}
public final Map<MultiProtocolURI, String> separateMatches(final Map<MultiProtocolURI, String> links) {
final Map<MultiProtocolURI, String> matcher = new HashMap<MultiProtocolURI, String>();
final Iterator <Map.Entry<MultiProtocolURI, String>> i = links.entrySet().iterator();
@ -695,13 +524,13 @@ public final class QueryParams {
// generate a string that identifies a search so results can be re-used in a cache
final StringBuilder context = new StringBuilder(180);
if (anonymized) {
context.append(anonymizedQueryHashes(this.query_include_hashes));
context.append(anonymizedQueryHashes(this.queryGoal.getIncludeHashes()));
context.append('-');
context.append(anonymizedQueryHashes(this.query_exclude_hashes));
context.append(anonymizedQueryHashes(this.queryGoal.getExcludeHashes()));
} else {
context.append(hashSet2hashString(this.query_include_hashes));
context.append(hashSet2hashString(this.queryGoal.getIncludeHashes()));
context.append('-');
context.append(hashSet2hashString(this.query_exclude_hashes));
context.append(hashSet2hashString(this.queryGoal.getExcludeHashes()));
}
//context.append(asterisk);
//context.append(this.domType);
@ -755,7 +584,7 @@ public final class QueryParams {
sb.append("/yacysearch.");
sb.append(ext);
sb.append("?query=");
sb.append(newQueryString == null ? theQuery.queryStringForUrl() : newQueryString);
sb.append(newQueryString == null ? theQuery.getQueryGoal().queryStringForUrl() : newQueryString);
sb.append(ampersand);
sb.append("maximumRecords=");
@ -786,7 +615,7 @@ public final class QueryParams {
sb.append(ampersand);
sb.append("former=");
sb.append(theQuery.queryStringForUrl());
sb.append(theQuery.getQueryGoal().queryStringForUrl());
return sb;
}

@ -222,8 +222,8 @@ public final class RankingProcess extends Thread {
.getSegment()
.termIndex()
.query(
this.query.query_include_hashes,
this.query.query_exclude_hashes,
this.query.getQueryGoal().getIncludeHashes(),
this.query.getQueryGoal().getExcludeHashes(),
null,
Segment.wordReferenceFactory,
this.query.maxDistance);
@ -234,7 +234,7 @@ public final class RankingProcess extends Thread {
new ProfilingGraph.EventSearch(
this.query.id(true),
SearchEventType.JOIN,
this.query.queryString,
this.query.getQueryGoal().getQueryString(),
index.size(),
System.currentTimeMillis() - timer),
false);
@ -488,7 +488,7 @@ public final class RankingProcess extends Thread {
if ( word.length() > 2
&& "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off"
.indexOf(word) < 0
&& !this.query.query_include_hashes.has(Word.word2hash(word))
&& !this.query.getQueryGoal().getIncludeHashes().has(Word.word2hash(word))
&& lettermatch.matcher(word).matches()
&& !Switchboard.badwords.contains(word)
&& !Switchboard.stopwords.contains(word) ) {

@ -151,7 +151,7 @@ public final class SearchEvent {
this.snippets = new ConcurrentHashMap<String, String>();
this.secondarySearchSuperviser =
(this.query.query_include_hashes.size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
(this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
if ( this.secondarySearchSuperviser != null ) {
this.secondarySearchSuperviser.start();
}
@ -184,7 +184,7 @@ public final class SearchEvent {
if (this.remote) {
// start global searches
final long timer = System.currentTimeMillis();
if (this.query.query_include_hashes.isEmpty()) {
if (this.query.getQueryGoal().getIncludeHashes().isEmpty()) {
this.primarySearchThreadsL = null;
} else {
this.primarySearchThreadsL = new ArrayList<RemoteSearch>();
@ -281,12 +281,12 @@ public final class SearchEvent {
// only with the query minus the stopwords which had not been used for the search
HandleSet filtered;
try {
filtered = RowHandleSet.joinConstructive(query.query_include_hashes, Switchboard.stopwordHashes);
filtered = RowHandleSet.joinConstructive(query.getQueryGoal().getIncludeHashes(), Switchboard.stopwordHashes);
} catch (final SpaceExceededException e) {
Log.logException(e);
filtered = new RowHandleSet(query.query_include_hashes.keylen(), query.query_include_hashes.comparator(), 0);
filtered = new RowHandleSet(query.getQueryGoal().getIncludeHashes().keylen(), query.getQueryGoal().getIncludeHashes().comparator(), 0);
}
this.snippetFetchWordHashes = query.query_include_hashes.clone();
this.snippetFetchWordHashes = query.getQueryGoal().getIncludeHashes().clone();
if (filtered != null && !filtered.isEmpty()) {
this.snippetFetchWordHashes.excludeDestructive(Switchboard.stopwordHashes);
}
@ -763,17 +763,17 @@ public final class SearchEvent {
final String pagetitle = page.dc_title().toLowerCase();
// check exclusion
if ( this.query.query_exclude_hashes != null && !this.query.query_exclude_hashes.isEmpty() &&
((QueryParams.anymatch(pagetitle, this.query.query_exclude_hashes))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.query_exclude_hashes))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.query_exclude_hashes)))) {
if ( !this.query.getQueryGoal().getExcludeHashes().isEmpty() &&
((QueryParams.anymatch(pagetitle, this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())))) {
this.query.misses.add(page.hash());
continue;
}
// check index-of constraint
if ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof)) && (!(pagetitle.startsWith("index of")))) {
final Iterator<byte[]> wi = this.query.query_include_hashes.iterator();
final Iterator<byte[]> wi = this.query.getQueryGoal().getIncludeHashes().iterator();
while ( wi.hasNext() ) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
}

@ -109,7 +109,7 @@ public class SecondarySearchSuperviser extends Thread {
}
private void prepareSecondarySearch() {
if ( this.abstractsCache == null || this.abstractsCache.size() != this.searchEvent.query.query_include_hashes.size() ) {
if ( this.abstractsCache == null || this.abstractsCache.size() != this.searchEvent.query.getQueryGoal().getIncludeHashes().size() ) {
return; // secondary search not possible (yet)
}
@ -122,7 +122,7 @@ public class SecondarySearchSuperviser extends Thread {
*/
// find out if there are enough references for all words that are searched
if ( this.abstractsCache.size() != this.searchEvent.query.query_include_hashes.size() ) {
if ( this.abstractsCache.size() != this.searchEvent.query.getQueryGoal().getIncludeHashes().size() ) {
return;
}

@ -179,7 +179,7 @@ public class SnippetWorker extends Thread {
// apply query-in-result matching
final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
final Iterator<byte[]> shi = this.snippetProcess.query.query_include_hashes.iterator();
final Iterator<byte[]> shi = this.snippetProcess.query.getQueryGoal().getIncludeHashes().iterator();
byte[] queryhash;
while (shi.hasNext()) {
queryhash = shi.next();
@ -252,7 +252,7 @@ public class SnippetWorker extends Thread {
}
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.snippetProcess.deleteIfSnippetFail) {
this.snippetProcess.workTables.failURLsRegisterMissingWord(this.snippetProcess.query.getSegment().termIndex(), page.url(), this.snippetProcess.query.query_include_hashes, reason);
this.snippetProcess.workTables.failURLsRegisterMissingWord(this.snippetProcess.query.getSegment().termIndex(), page.url(), this.snippetProcess.query.getQueryGoal().getIncludeHashes(), reason);
}
SearchEvent.log.logInfo("sorted out url " + page.url().toNormalform(true) + " during search: " + reason);
return null;

@ -56,6 +56,7 @@ import net.yacy.peers.RemoteSearch;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.QueryGoal;
public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnippet> {
@ -380,7 +381,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
* @param queryHashes hashes of search words
* @return line with marked words
*/
public String getLineMarked(final HandleSet queryHashes) {
public String getLineMarked(final QueryGoal queryGoal) {
final HandleSet queryHashes = queryGoal.getAllHashes();
if (this.line == null) {
return "";
}

Loading…
Cancel
Save