distinguishing modified query string and original query string

pull/1/head
Michael Peter Christen 12 years ago
parent fb0fa9a102
commit cb5cbec14d

@ -165,7 +165,7 @@ public class AccessTracker_p {
if (page == 2) { if (page == 2) {
// local search // local search
prop.putNum("page_list_" + m + "_offset", query.offset); prop.putNum("page_list_" + m + "_offset", query.offset);
prop.putHTML("page_list_" + m + "_querystring", query.getQueryGoal().getQueryString()); prop.putHTML("page_list_" + m + "_querystring", query.getQueryGoal().getOriginalQueryString(false));
} else { } else {
// remote search // remote search
prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "<unknown>" : query.remotepeer.getName()); prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "<unknown>" : query.remotepeer.getName());

@ -649,7 +649,8 @@ public class IndexControlRWIs_p {
final byte[] keyhash, final byte[] keyhash,
final Bitfield filter) { final Bitfield filter) {
final QueryParams query = new QueryParams(ASCII.String(keyhash), -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p"); String khw = ASCII.String(keyhash);
final QueryParams query = new QueryParams(khw, khw, -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p");
final SearchEvent theSearch = SearchEventCache.getEvent(query, sb.peers, sb.tables, null, false, sb.loader, Integer.MAX_VALUE, Long.MAX_VALUE, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0)); final SearchEvent theSearch = SearchEventCache.getEvent(query, sb.peers, sb.tables, null, false, sb.loader, Integer.MAX_VALUE, Long.MAX_VALUE, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
//theSearch.rankingProcess.run(); //theSearch.rankingProcess.run();
RankingProcess ranked = theSearch.rankingProcess; RankingProcess ranked = theSearch.rankingProcess;

@ -67,7 +67,7 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent); language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en"; if (language == null) language = "en";
} }
final QueryGoal qg = new QueryGoal(querystring); final QueryGoal qg = new QueryGoal(querystring, querystring);
HandleSet q = qg.getIncludeHashes(); HandleSet q = qg.getIncludeHashes();
// tell all threads to do nothing for a specific time // tell all threads to do nothing for a specific time

@ -113,7 +113,7 @@ public class searchresult {
post.put("originalQuery", originalQuery); post.put("originalQuery", originalQuery);
// get a solr query string // get a solr query string
QueryGoal qg = new QueryGoal(originalQuery); QueryGoal qg = new QueryGoal(originalQuery, originalQuery);
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrScheme()); StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrScheme());
post.put("defType", "edismax"); post.put("defType", "edismax");
post.put(CommonParams.Q, solrQ.toString()); post.put(CommonParams.Q, solrQ.toString());

@ -356,8 +356,9 @@ public class yacysearch {
final RankingProfile ranking = sb.getRanking(); final RankingProfile ranking = sb.getRanking();
final StringBuilder modifier = new StringBuilder(20); final StringBuilder modifier = new StringBuilder(20);
if ("*".equals(querystring)) { int stp = querystring.indexOf('*');
querystring = Segment.catchallString; if (stp >= 0) {
querystring = querystring.substring(0, stp) + Segment.catchallString + querystring.substring(stp + 1);
} }
if ( querystring.indexOf("/near", 0) >= 0 ) { if ( querystring.indexOf("/near", 0) >= 0 ) {
querystring = querystring.replace("/near", ""); querystring = querystring.replace("/near", "");
@ -597,7 +598,7 @@ public class yacysearch {
} }
// the query // the query
final QueryGoal qg = new QueryGoal(querystring.trim()); final QueryGoal qg = new QueryGoal(originalquerystring, querystring.trim());
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? qg.getAllHashes().size() - 1 : Integer.MAX_VALUE; final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? qg.getAllHashes().size() - 1 : Integer.MAX_VALUE;
// filter out stopwords // filter out stopwords
@ -766,7 +767,7 @@ public class yacysearch {
Log.logInfo( Log.logInfo(
"LOCAL_SEARCH", "LOCAL_SEARCH",
"INIT WORD SEARCH: " "INIT WORD SEARCH: "
+ theQuery.getQueryGoal().getQueryString() + theQuery.getQueryGoal().getOriginalQueryString(false)
+ ":" + ":"
+ QueryParams.hashSet2hashString(theQuery.getQueryGoal().getIncludeHashes()) + QueryParams.hashSet2hashString(theQuery.getQueryGoal().getIncludeHashes())
+ " - " + " - "
@ -775,7 +776,7 @@ public class yacysearch {
+ theQuery.itemsPerPage() + theQuery.itemsPerPage()
+ " lines to be displayed"); + " lines to be displayed");
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage( EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(
new RSSMessage("Local Search Request", theQuery.getQueryGoal().getQueryString(), "")); new RSSMessage("Local Search Request", theQuery.getQueryGoal().getOriginalQueryString(false), ""));
final long timestamp = System.currentTimeMillis(); final long timestamp = System.currentTimeMillis();
// create a new search event // create a new search event
@ -814,7 +815,7 @@ public class yacysearch {
// log // log
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: "
+ theQuery.getQueryGoal().getQueryString() + theQuery.getQueryGoal().getOriginalQueryString(false)
+ " - " + " - "
+ "local_rwi_available(" + theSearch.query.local_rwi_available.get() + "), " + "local_rwi_available(" + theSearch.query.local_rwi_available.get() + "), "
+ "local_rwi_stored(" + theSearch.query.local_rwi_stored.get() + "), " + "local_rwi_stored(" + theSearch.query.local_rwi_stored.get() + "), "

@ -122,6 +122,7 @@ public class yacysearchitem {
faviconURL = null; faviconURL = null;
} }
final String resource = theSearch.query.domType.toString(); final String resource = theSearch.query.domType.toString();
final String origQ = theSearch.query.getQueryGoal().getOriginalQueryString(true);
prop.put("content", 1); // switch on specific content prop.put("content", 1); // switch on specific content
prop.put("content_showDate", sb.getConfigBool("search.result.show.date", true) ? 1 : 0); prop.put("content_showDate", sb.getConfigBool("search.result.show.date", true) ? 1 : 0);
prop.put("content_showSize", sb.getConfigBool("search.result.show.size", true) ? 1 : 0); prop.put("content_showSize", sb.getConfigBool("search.result.show.size", true) ? 1 : 0);
@ -134,10 +135,10 @@ public class yacysearchitem {
prop.put("content_authorized", authenticated ? "1" : "0"); prop.put("content_authorized", authenticated ? "1" : "0");
final String urlhash = ASCII.String(result.hash()); final String urlhash = ASCII.String(result.hash());
prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1"); prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1");
prop.putHTML("content_authorized_bookmark_bookmarklink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&bookmarkref=" + urlhash + "&urlmaskfilter=.*"); prop.putHTML("content_authorized_bookmark_bookmarklink", "/yacysearch.html?query=" + origQ.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&bookmarkref=" + urlhash + "&urlmaskfilter=.*");
prop.put("content_authorized_recommend", (sb.peers.newsPool.getSpecific(NewsPool.OUTGOING_DB, NewsPool.CATEGORY_SURFTIPP_ADD, "url", resultUrlstring) == null) ? "1" : "0"); prop.put("content_authorized_recommend", (sb.peers.newsPool.getSpecific(NewsPool.OUTGOING_DB, NewsPool.CATEGORY_SURFTIPP_ADD, "url", resultUrlstring) == null) ? "1" : "0");
prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*"); prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + origQ.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + theSearch.query.getQueryGoal().getQueryString().replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*"); prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + origQ.replace(' ', '+') + "&Enter=Search&count=" + theSearch.query.itemsPerPage() + "&offset=" + (theSearch.query.neededResults() - theSearch.query.itemsPerPage()) + "&order=" + crypt.simpleEncode(theSearch.query.ranking.toExternalString()) + "&resource=" + resource + "&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
prop.put("content_authorized_urlhash", urlhash); prop.put("content_authorized_urlhash", urlhash);
final String resulthashString = urlhash; final String resulthashString = urlhash;
prop.putHTML("content_title", result.title()); prop.putHTML("content_title", result.title());
@ -214,8 +215,8 @@ public class yacysearchitem {
final String words = (s.length() > 0) ? s.substring(1) : ""; final String words = (s.length() > 0) ? s.substring(1) : "";
prop.putHTML("content_words", words); prop.putHTML("content_words", words);
prop.putHTML("content_showParser_words", words); prop.putHTML("content_showParser_words", words);
prop.putHTML("content_former", theSearch.query.getQueryGoal().getQueryString()); prop.putHTML("content_former", origQ);
prop.putHTML("content_showPictures_former", theSearch.query.getQueryGoal().getQueryString()); prop.putHTML("content_showPictures_former", origQ);
final TextSnippet snippet = result.textSnippet(); final TextSnippet snippet = result.textSnippet();
final String desc = (snippet == null) ? "" : snippet.isMarked() ? snippet.getLineRaw() : snippet.getLineMarked(theSearch.query.getQueryGoal()); final String desc = (snippet == null) ? "" : snippet.isMarked() ? snippet.getLineRaw() : snippet.getLineMarked(theSearch.query.getQueryGoal());
prop.put("content_description", desc); prop.put("content_description", desc);

@ -79,7 +79,7 @@ public class yacysearchtrailer {
count = theSearch.namespaceNavigator.get(name); count = theSearch.namespaceNavigator.get(name);
if (count == 0) break; if (count == 0) break;
nav = "inurl%3A" + name; nav = "inurl%3A" + name;
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
pos++; pos++;
@ -119,7 +119,7 @@ public class yacysearchtrailer {
count = hostNavigator.get(name); count = hostNavigator.get(name);
if (count == 0) break; if (count == 0) break;
nav = "site%3A" + name; nav = "site%3A" + name;
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
pos++; pos++;
@ -158,7 +158,7 @@ public class yacysearchtrailer {
count = theSearch.authorNavigator.get(name); count = theSearch.authorNavigator.get(name);
if (count == 0) break; if (count == 0) break;
nav = (name.indexOf(' ', 0) < 0) ? "author%3A" + name : "author%3A%28" + name.replace(" ", "+") + "%29"; nav = (name.indexOf(' ', 0) < 0) ? "author%3A" + name : "author%3A%28" + name.replace(" ", "+") + "%29";
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
pos++; pos++;
@ -197,9 +197,9 @@ public class yacysearchtrailer {
name = navigatorIterator.next(); name = navigatorIterator.next();
count = topicNavigator.get(name); count = topicNavigator.get(name);
if (count == 0) break; if (count == 0) break;
if (theSearch.query.getQueryGoal().getQueryString() == null) break; queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
if (queryStringForUrl == null) break;
if (name != null) { if (name != null) {
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl();
prop.put("nav-topics_element_" + i + "_on", 1); prop.put("nav-topics_element_" + i + "_on", 1);
prop.put(fileType, "nav-topics_element_" + i + "_modifier", name); prop.put(fileType, "nav-topics_element_" + i + "_modifier", name);
prop.put(fileType, "nav-topics_element_" + i + "_name", name); prop.put(fileType, "nav-topics_element_" + i + "_name", name);
@ -227,7 +227,7 @@ public class yacysearchtrailer {
count = theSearch.protocolNavigator.get(name); count = theSearch.protocolNavigator.get(name);
if (count == 0) break; if (count == 0) break;
nav = "%2F" + name; nav = "%2F" + name;
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
pos++; pos++;
@ -266,7 +266,7 @@ public class yacysearchtrailer {
count = theSearch.filetypeNavigator.get(name); count = theSearch.filetypeNavigator.get(name);
if (count == 0) break; if (count == 0) break;
nav = "filetype%3A" + name; nav = "filetype%3A" + name;
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
pos++; pos++;
@ -310,7 +310,7 @@ public class yacysearchtrailer {
count = ve.getValue().get(name); count = ve.getValue().get(name);
if (count == 0) break; if (count == 0) break;
nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Tagging.encodePrintname(name)).toString(); nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Tagging.encodePrintname(name)).toString();
queryStringForUrl = theSearch.query.getQueryGoal().queryStringForUrl(); queryStringForUrl = theSearch.query.getQueryGoal().getOriginalQueryString(true);
p = queryStringForUrl.indexOf(nav); p = queryStringForUrl.indexOf(nav);
if (p < 0) { if (p < 0) {
queryStringForUrl += "+" + nav; queryStringForUrl += "+" + nav;
@ -354,8 +354,9 @@ public class yacysearchtrailer {
prop.put("cat-location", 0); prop.put("cat-location", 0);
} else { } else {
prop.put("cat-location", 1); prop.put("cat-location", 1);
prop.put(fileType, "cat-location_query", theSearch.query.queryString(true)); String uriginalQuery = theSearch.query.getQueryGoal().getOriginalQueryString(true);
prop.put(fileType, "cat-location_queryenc", theSearch.query.queryString(true).replace(' ', '+')); prop.put(fileType, "cat-location_query", uriginalQuery);
prop.put(fileType, "cat-location_queryenc", uriginalQuery.replace(' ', '+'));
} }
prop.put("num-results_totalcount", theSearch.query.getResultCount()); prop.put("num-results_totalcount", theSearch.query.getResultCount());
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "bottomline", 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "bottomline", 0, 0), false);

@ -1034,7 +1034,7 @@ public final class Protocol
final Seed target, final Seed target,
final Blacklist blacklist) { final Blacklist blacklist) {
if (event.query.getQueryGoal().getQueryString() == null || event.query.getQueryGoal().getQueryString().length() == 0) { if (event.query.getQueryGoal().getOriginalQueryString(false) == null || event.query.getQueryGoal().getOriginalQueryString(false).length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string return -1; // we cannot query solr only with word hashes, there is no clear text string
} }
event.addExpectedRemoteReferences(count); event.addExpectedRemoteReferences(count);

@ -3321,7 +3321,7 @@ public final class Switchboard extends serverSwitch {
new Thread() { new Thread() {
@Override @Override
public void run() { public void run() {
String queryString = searchEvent.query.queryString(true); String queryString = searchEvent.query.getQueryGoal().getOriginalQueryString(false);
Thread.currentThread().setName("Switchboard.heuristicRSS:" + queryString); Thread.currentThread().setName("Switchboard.heuristicRSS:" + queryString);
final int meta = queryString.indexOf("heuristic:", 0); final int meta = queryString.indexOf("heuristic:", 0);
if ( meta >= 0 ) { if ( meta >= 0 ) {

@ -67,9 +67,10 @@ public class AccessTracker {
private static void add(final LinkedList<QueryParams> list, final QueryParams query) { private static void add(final LinkedList<QueryParams> list, final QueryParams query) {
// learn that this word can be a word completion for the DidYouMeanLibrary // learn that this word can be a word completion for the DidYouMeanLibrary
if (query.getResultCount() > 10 && query.getQueryGoal().getQueryString() != null && query.getQueryGoal().getQueryString().length() > 0) { String queryString = query.getQueryGoal().getOriginalQueryString(false);
final StringBuilder sb = new StringBuilder(query.getQueryGoal().getQueryString()); if (query.getResultCount() > 10 && queryString != null && queryString.length() > 0) {
sb.append(query.getQueryGoal().getQueryString()); final StringBuilder sb = new StringBuilder(queryString);
sb.append(queryString);
WordCache.learn(sb); WordCache.learn(sb);
} }
@ -108,8 +109,9 @@ public class AccessTracker {
} }
private static void addToDump(final QueryParams query) { private static void addToDump(final QueryParams query) {
if (query.getQueryGoal().getQueryString() == null || query.getQueryGoal().getQueryString().isEmpty()) return; String queryString = query.getQueryGoal().getOriginalQueryString(false);
addToDump(query.getQueryGoal().getQueryString(), Integer.toString(query.getResultCount()), new Date(query.starttime)); if (queryString == null || queryString.isEmpty()) return;
addToDump(queryString, Integer.toString(query.getResultCount()), new Date(query.starttime));
} }
public static void addToDump(String querystring, String resultcount) { public static void addToDump(String querystring, String resultcount) {

@ -28,15 +28,10 @@ import java.util.SortedSet;
import net.yacy.cora.federate.solr.Boost; import net.yacy.cora.federate.solr.Boost;
import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.parser.html.AbstractScraper; import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrConfiguration; import net.yacy.search.index.SolrConfiguration;
@ -48,14 +43,15 @@ public class QueryGoal {
private static char dq = '"'; private static char dq = '"';
private static String seps = ".,/&_"; private static String seps = ".,/&_";
private String querystring; private String query_original, query_words;
private HandleSet include_hashes, exclude_hashes, all_hashes; private HandleSet include_hashes, exclude_hashes, all_hashes;
private final ArrayList<String> include_words, exclude_words, all_words; private final ArrayList<String> include_words, exclude_words, all_words;
private final ArrayList<String> include_strings, exclude_strings, all_strings; private final ArrayList<String> include_strings, exclude_strings, all_strings;
public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes, HandleSet all_hashes) { public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes, HandleSet all_hashes) {
this.querystring = null; this.query_original = null;
this.query_words = null;
this.include_words = null; this.include_words = null;
this.exclude_words = null; this.exclude_words = null;
this.all_words = null; this.all_words = null;
@ -67,34 +63,11 @@ public class QueryGoal {
this.all_hashes = all_hashes; this.all_hashes = all_hashes;
} }
public QueryGoal(byte[] queryHash) { public QueryGoal(String query_original, String query_words) {
assert querystring != null; assert query_original != null;
assert queryHash.length == 12; assert query_words != null;
assert Base64Order.enhancedCoder.wellformed(queryHash); this.query_original = query_original;
this.querystring = null; this.query_words = query_words;
this.include_words = new ArrayList<String>();
this.exclude_words = new ArrayList<String>();
this.all_words = new ArrayList<String>();
this.include_strings = new ArrayList<String>();
this.exclude_strings = new ArrayList<String>();
this.all_strings = new ArrayList<String>();
this.include_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.exclude_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.all_hashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
try {
this.include_hashes.put(queryHash);
this.all_hashes.put(queryHash);
} catch (final SpaceExceededException e) {
Log.logException(e);
}
this.include_hashes = null;
this.exclude_hashes = null;
this.all_hashes = null;
}
public QueryGoal(String querystring) {
assert querystring != null;
this.querystring = querystring;
this.include_words = new ArrayList<String>(); this.include_words = new ArrayList<String>();
this.exclude_words = new ArrayList<String>(); this.exclude_words = new ArrayList<String>();
this.all_words = new ArrayList<String>(); this.all_words = new ArrayList<String>();
@ -103,16 +76,16 @@ public class QueryGoal {
this.all_strings = new ArrayList<String>(); this.all_strings = new ArrayList<String>();
// remove funny symbols // remove funny symbols
querystring = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(querystring.toCharArray())).toLowerCase().trim(); this.query_words = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(this.query_words.toCharArray())).toLowerCase().trim();
int c; int c;
for (int i = 0; i < seps.length(); i++) { for (int i = 0; i < seps.length(); i++) {
while ((c = querystring.indexOf(seps.charAt(i))) >= 0) { while ((c = this.query_words.indexOf(seps.charAt(i))) >= 0) {
querystring = querystring.substring(0, c) + (((c + 1) < querystring.length()) ? (' ' + querystring.substring(c + 1)) : ""); this.query_words = this.query_words.substring(0, c) + (((c + 1) < this.query_words.length()) ? (' ' + this.query_words.substring(c + 1)) : "");
} }
} }
// parse first quoted strings // parse first quoted strings
parseQuery(querystring, this.include_strings, this.exclude_strings, this.all_strings); parseQuery(this.query_words, this.include_strings, this.exclude_strings, this.all_strings);
// .. end then take these strings apart to generate word lists // .. end then take these strings apart to generate word lists
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words); for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words);
@ -168,17 +141,31 @@ public class QueryGoal {
} }
} }
public String getQueryString() { public String getOriginalQueryString(final boolean encodeHTML) {
return this.querystring; String ret;
if (encodeHTML){
try {
ret = URLEncoder.encode(this.query_original, "UTF-8");
} catch (UnsupportedEncodingException e) {
ret = this.query_original;
} }
} else {
public String queryStringForUrl() { ret = this.query_original;
}
return ret;
}
public String getWordQueryString(final boolean encodeHTML) {
String ret;
if (encodeHTML){
try { try {
return URLEncoder.encode(this.querystring, "UTF-8"); ret = URLEncoder.encode(this.query_words, "UTF-8");
} catch (final UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
Log.logException(e); ret = this.query_words;
return this.querystring; }
} else {
ret = this.query_words;
} }
return ret;
} }
public HandleSet getIncludeHashes() { public HandleSet getIncludeHashes() {

@ -153,13 +153,13 @@ public final class QueryParams {
public final SortedSet<byte[]> misses; // url hashes that had been sorted out because of constraints in postranking public final SortedSet<byte[]> misses; // url hashes that had been sorted out because of constraints in postranking
public QueryParams( public QueryParams(
final String queryString, final String query_original, final String query_words,
final int itemsPerPage, final int itemsPerPage,
final Bitfield constraint, final Bitfield constraint,
final Segment indexSegment, final Segment indexSegment,
final RankingProfile ranking, final RankingProfile ranking,
final String userAgent) { final String userAgent) {
this.queryGoal = new QueryGoal(queryString); this.queryGoal = new QueryGoal(query_original, query_words);
this.ranking = ranking; this.ranking = ranking;
this.modifier = new Modifier(""); this.modifier = new Modifier("");
this.maxDistance = Integer.MAX_VALUE; this.maxDistance = Integer.MAX_VALUE;
@ -398,17 +398,6 @@ public final class QueryParams {
return SetTools.anymatch(wordhashes, keyhashes); return SetTools.anymatch(wordhashes, keyhashes);
} }
public String queryString(final boolean encodeHTML) {
final String ret;
if (encodeHTML){
ret = CharacterCoding.unicode2html(this.queryGoal.getQueryString(), true);
} else {
ret = this.queryGoal.getQueryString();
}
return ret;
}
public SolrQuery solrQuery() { public SolrQuery solrQuery() {
if (this.queryGoal.getIncludeStrings().size() == 0) return null; if (this.queryGoal.getIncludeStrings().size() == 0) return null;
// get text query // get text query
@ -588,7 +577,7 @@ public final class QueryParams {
sb.append("/yacysearch."); sb.append("/yacysearch.");
sb.append(ext); sb.append(ext);
sb.append("?query="); sb.append("?query=");
sb.append(newQueryString == null ? theQuery.getQueryGoal().queryStringForUrl() : newQueryString); sb.append(newQueryString == null ? theQuery.getQueryGoal().getOriginalQueryString(true) : newQueryString);
sb.append(ampersand); sb.append(ampersand);
sb.append("maximumRecords="); sb.append("maximumRecords=");
@ -619,7 +608,7 @@ public final class QueryParams {
sb.append(ampersand); sb.append(ampersand);
sb.append("former="); sb.append("former=");
sb.append(theQuery.getQueryGoal().queryStringForUrl()); sb.append(theQuery.getQueryGoal().getOriginalQueryString(true));
return sb; return sb;
} }

@ -235,7 +235,7 @@ public final class RankingProcess extends Thread {
new ProfilingGraph.EventSearch( new ProfilingGraph.EventSearch(
this.query.id(true), this.query.id(true),
SearchEventType.JOIN, SearchEventType.JOIN,
this.query.getQueryGoal().getQueryString(), this.query.getQueryGoal().getOriginalQueryString(false),
index.size(), index.size(),
System.currentTimeMillis() - timer), System.currentTimeMillis() - timer),
false); false);

Loading…
Cancel
Save