refactoring of query attribute variable names for better consistency

with (next) stored query words
pull/1/head
orbiter 13 years ago
parent 1517a3b7b9
commit 62202e2d71

@ -172,7 +172,7 @@ public class AccessTracker_p {
} else {
// remote search
prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "<unknown>" : query.remotepeer.getName());
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.queryHashes));
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.query_include_hashes));
}
prop.putNum("page_list_" + m + "_querycount", query.itemsPerPage);
prop.putNum("page_list_" + m + "_transmitcount", query.transmitcount);

@ -250,11 +250,11 @@ public final class search {
header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d
);
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.itemsPerPage() + " links");
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + theQuery.itemsPerPage() + " links");
final long timer = System.currentTimeMillis();
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.queryHashes, QueryParams.hashes2Handles(urls));
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.query_include_hashes, QueryParams.hashes2Handles(urls));
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEvent.Type.COLLECTION, "", incc.size(), System.currentTimeMillis() - timer), false);
if (incc != null) {
@ -312,8 +312,8 @@ public final class search {
header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d
);
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.itemsPerPage() + " links");
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + theQuery.itemsPerPage() + " links");
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes), ""));
// make event
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
@ -431,7 +431,7 @@ public final class search {
// log
Network.log.logInfo("EXIT HASH SEARCH: " +
QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + joincount + " links found, " +
QueryParams.anonymizedQueryHashes(theQuery.query_include_hashes) + " - " + joincount + " links found, " +
prop.get("linkcount", "?") + " links selected, " +
indexabstractContainercount + " index abstracts, " +
(System.currentTimeMillis() - timestamp) + " milliseconds");

@ -789,7 +789,7 @@ public class yacysearch {
"INIT WORD SEARCH: "
+ theQuery.queryString
+ ":"
+ QueryParams.hashSet2hashString(theQuery.queryHashes)
+ QueryParams.hashSet2hashString(theQuery.query_include_hashes)
+ " - "
+ theQuery.neededResults()
+ " links to be computed, "

@ -218,7 +218,7 @@ public class yacysearchitem {
prop.putHTML("content_former", theQuery.queryString);
prop.putHTML("content_showPictures_former", theQuery.queryString);
final TextSnippet snippet = result.textSnippet();
final String desc = (snippet == null) ? "" : snippet.getLineMarked(theQuery.fullqueryHashes);
final String desc = (snippet == null) ? "" : snippet.getLineMarked(theQuery.query_all_hashes);
prop.put("content_description", desc);
prop.putXML("content_description-xml", desc);
prop.putJSON("content_description-json", desc);

@ -156,7 +156,7 @@ public class NetworkGraph {
// draw in the search target
final QueryParams query = event.getQuery();
final Iterator<byte[]> i = query.queryHashes.iterator();
final Iterator<byte[]> i = query.query_include_hashes.iterator();
eventPicture.setColor(RasterPlotter.GREY);
while (i.hasNext()) {
final long[] positions = seedDB.scheme.dhtPositions(i.next());

@ -330,7 +330,7 @@ public class Segment {
// data during search-time. To transfer indexed data directly to the search process
// the following lines push the index data additionally to the search process
// this is done only for searched words
if (searchEvent != null && !searchEvent.getQuery().excludeHashes.has(wordhash) && searchEvent.getQuery().queryHashes.has(wordhash)) {
if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) {
// if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
ReferenceContainer<WordReference> container;
try {

@ -106,7 +106,8 @@ public final class QueryParams {
public static final Pattern matchnothing_pattern = Pattern.compile("");
public final String queryString;
public HandleSet fullqueryHashes, queryHashes, excludeHashes;
public HandleSet query_include_hashes, query_exclude_hashes, query_all_hashes;
public Collection<String> query_include_words, query_exclude_words, query_all_words = new ArrayList<String>();
public final int itemsPerPage;
public int offset;
public final Pattern urlMask, prefer;
@ -151,19 +152,19 @@ public final class QueryParams {
byte[] queryHash;
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryHash = UTF8.getBytes(queryString)))) {
this.queryString = null;
this.queryHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.excludeHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.query_include_hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.query_exclude_hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
try {
this.queryHashes.put(queryHash);
this.query_include_hashes.put(queryHash);
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
} else {
this.queryString = queryString;
final Collection<String>[] cq = cleanQuery(queryString);
this.queryHashes = Word.words2hashesHandles(cq[0]);
this.excludeHashes = Word.words2hashesHandles(cq[1]);
this.fullqueryHashes = Word.words2hashesHandles(cq[2]);
this.query_include_hashes = Word.words2hashesHandles(cq[0]);
this.query_exclude_hashes = Word.words2hashesHandles(cq[1]);
this.query_all_hashes = Word.words2hashesHandles(cq[2]);
}
this.ranking = ranking;
this.tenant = null;
@ -204,7 +205,8 @@ public final class QueryParams {
}
public QueryParams(
final String queryString, final HandleSet queryHashes,
final String queryString,
final HandleSet queryHashes,
final HandleSet excludeHashes,
final HandleSet fullqueryHashes,
final String tenant,
@ -230,9 +232,9 @@ public final class QueryParams {
final double lat, final double lon, final double radius) {
this.queryString = queryString;
this.queryHashes = queryHashes;
this.excludeHashes = excludeHashes;
this.fullqueryHashes = fullqueryHashes;
this.query_include_hashes = queryHashes;
this.query_exclude_hashes = excludeHashes;
this.query_all_hashes = fullqueryHashes;
this.tenant = (tenant != null && tenant.length() == 0) ? null : tenant;
this.modifier = new Modifier(modifier == null ? "" : modifier);
this.ranking = ranking;
@ -368,8 +370,8 @@ public final class QueryParams {
public final boolean matchesText(final String text) {
boolean ret = false;
final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text, null).keySet());
if (!SetTools.anymatch(wordhashes, this.excludeHashes)) {
ret = SetTools.totalInclusion(this.queryHashes, wordhashes);
if (!SetTools.anymatch(wordhashes, this.query_exclude_hashes)) {
ret = SetTools.totalInclusion(this.query_include_hashes, wordhashes);
}
return ret;
}
@ -384,12 +386,11 @@ public final class QueryParams {
private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked")
public static Collection<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, a exclude set and a full query set
final Collection<String> query = new ArrayList<String>();
final Collection<String> exclude = new ArrayList<String>();
final Collection<String> fullquery = new ArrayList<String>();
final Collection<String> query_include_words = new ArrayList<String>();
final Collection<String> query_exclude_words = new ArrayList<String>();
final Collection<String> query_all_words = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) {
@ -409,22 +410,22 @@ public final class QueryParams {
for (String quer : queries) {
if (quer.startsWith("-")) {
String x = quer.substring(1);
if (!exclude.contains(x)) exclude.add(x);
if (!query_exclude_words.contains(x)) query_exclude_words.add(x);
} else {
while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c);
l = s.length();
if (l >= Condenser.wordminsize && !query.contains(s)) {query.add(s);}
if (l > 0 && !fullquery.contains(s)) {fullquery.add(s);}
if (l >= Condenser.wordminsize && !query_include_words.contains(s)) {query_include_words.add(s);}
if (l > 0 && !query_all_words.contains(s)) {query_all_words.add(s);}
quer = quer.substring(c + 1);
}
l = quer.length();
if (l >= Condenser.wordminsize && !query.contains(quer)) {query.add(quer);}
if (l > 0 && !fullquery.contains(quer)) {fullquery.add(quer);}
if (l >= Condenser.wordminsize && !query_include_words.contains(quer)) {query_include_words.add(quer);}
if (l > 0 && !query_all_words.contains(quer)) {query_all_words.add(quer);}
}
}
}
return new Collection[]{query, exclude, fullquery};
return new Collection[]{query_include_words, query_exclude_words, query_all_words};
}
public String queryString(final boolean encodeHTML) {
@ -454,7 +455,7 @@ public final class QueryParams {
// filter out words that appear in this set
// this is applied to the queryHashes
final HandleSet blues = Word.words2hashesHandles(blueList);
for (final byte[] b: blues) this.queryHashes.remove(b);
for (final byte[] b: blues) this.query_include_hashes.remove(b);
}
@ -494,13 +495,13 @@ public final class QueryParams {
// generate a string that identifies a search so results can be re-used in a cache
final StringBuilder context = new StringBuilder(180);
if (anonymized) {
context.append(anonymizedQueryHashes(this.queryHashes));
context.append(anonymizedQueryHashes(this.query_include_hashes));
context.append('-');
context.append(anonymizedQueryHashes(this.excludeHashes));
context.append(anonymizedQueryHashes(this.query_exclude_hashes));
} else {
context.append(hashSet2hashString(this.queryHashes));
context.append(hashSet2hashString(this.query_include_hashes));
context.append('-');
context.append(hashSet2hashString(this.excludeHashes));
context.append(hashSet2hashString(this.query_exclude_hashes));
}
//context.append(asterisk);
//context.append(this.domType);

@ -203,8 +203,8 @@ public final class RWIProcess extends Thread
.getSegment()
.termIndex()
.query(
this.query.queryHashes,
this.query.excludeHashes,
this.query.query_include_hashes,
this.query.query_exclude_hashes,
null,
Segment.wordReferenceFactory,
this.query.maxDistance);
@ -676,10 +676,10 @@ public final class RWIProcess extends Thread
final String pagetitle = page.dc_title().toLowerCase();
// check exclusion
if ( this.query.excludeHashes != null && !this.query.excludeHashes.isEmpty() &&
((QueryParams.anymatch(pagetitle, this.query.excludeHashes))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.excludeHashes))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.excludeHashes)))) {
if ( this.query.query_exclude_hashes != null && !this.query.query_exclude_hashes.isEmpty() &&
((QueryParams.anymatch(pagetitle, this.query.query_exclude_hashes))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.query_exclude_hashes))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.query_exclude_hashes)))) {
this.sortout++;
continue;
}
@ -688,7 +688,7 @@ public final class RWIProcess extends Thread
if ( (this.query.constraint != null)
&& (this.query.constraint.get(Condenser.flag_cat_indexof))
&& (!(pagetitle.startsWith("index of"))) ) {
final Iterator<byte[]> wi = this.query.queryHashes.iterator();
final Iterator<byte[]> wi = this.query.query_include_hashes.iterator();
while ( wi.hasNext() ) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
}
@ -964,7 +964,7 @@ public final class RWIProcess extends Thread
if ( word.length() > 2
&& "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off"
.indexOf(word) < 0
&& !this.query.queryHashes.has(Word.word2hash(word))
&& !this.query.query_include_hashes.has(Word.word2hash(word))
&& lettermatch.matcher(word).matches()
&& !Switchboard.badwords.contains(word)
&& !Switchboard.stopwords.contains(word) ) {

@ -125,7 +125,7 @@ public final class SearchEvent
this.workTables = workTables;
this.query = query;
this.secondarySearchSuperviser =
(this.query.queryHashes.size() > 1) ? new SecondarySearchSuperviser() : null; // generate abstracts only for combined searches
(this.query.query_include_hashes.size() > 1) ? new SecondarySearchSuperviser() : null; // generate abstracts only for combined searches
if ( this.secondarySearchSuperviser != null ) {
this.secondarySearchSuperviser.start();
}
@ -155,7 +155,7 @@ public final class SearchEvent
if ( remote ) {
// start global searches
final long timer = System.currentTimeMillis();
if (this.query.queryHashes.isEmpty()) {
if (this.query.query_include_hashes.isEmpty()) {
this.primarySearchThreadsL = null;
} else {
this.primarySearchThreadsL = new ArrayList<RemoteSearch>();
@ -168,8 +168,8 @@ public final class SearchEvent
Thread.currentThread().setName("SearchEvent.primaryRemoteSearches");
RemoteSearch.primaryRemoteSearches(
SearchEvent.this.primarySearchThreadsL,
QueryParams.hashSet2hashString(SearchEvent.this.query.queryHashes),
QueryParams.hashSet2hashString(SearchEvent.this.query.excludeHashes),
QueryParams.hashSet2hashString(SearchEvent.this.query.query_include_hashes),
QueryParams.hashSet2hashString(SearchEvent.this.query.query_exclude_hashes),
SearchEvent.this.query.prefer,
SearchEvent.this.query.urlMask,
SearchEvent.this.query.modifier,
@ -612,7 +612,7 @@ public final class SearchEvent
}
private void prepareSecondarySearch() {
if ( this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size() ) {
if ( this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.query_include_hashes.size() ) {
return; // secondary search not possible (yet)
}
@ -625,7 +625,7 @@ public final class SearchEvent
*/
// find out if there are enough references for all words that are searched
if ( this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size() ) {
if ( this.abstractsCache.size() != SearchEvent.this.query.query_include_hashes.size() ) {
return;
}

@ -115,12 +115,12 @@ public class SnippetProcess {
// only with the query minus the stopwords which had not been used for the search
HandleSet filtered;
try {
filtered = HandleSet.joinConstructive(query.queryHashes, Switchboard.stopwordHashes);
filtered = HandleSet.joinConstructive(query.query_include_hashes, Switchboard.stopwordHashes);
} catch (final RowSpaceExceededException e) {
Log.logException(e);
filtered = new HandleSet(query.queryHashes.row().primaryKeyLength, query.queryHashes.comparator(), 0);
filtered = new HandleSet(query.query_include_hashes.row().primaryKeyLength, query.query_include_hashes.comparator(), 0);
}
this.snippetFetchWordHashes = query.queryHashes.clone();
this.snippetFetchWordHashes = query.query_include_hashes.clone();
if (filtered != null && !filtered.isEmpty()) {
this.snippetFetchWordHashes.excludeDestructive(Switchboard.stopwordHashes);
}
@ -333,7 +333,7 @@ public class SnippetProcess {
// apply query-in-result matching
final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
final Iterator<byte[]> shi = this.query.queryHashes.iterator();
final Iterator<byte[]> shi = this.query.query_include_hashes.iterator();
byte[] queryhash;
while (shi.hasNext()) {
queryhash = shi.next();
@ -572,6 +572,7 @@ public class SnippetProcess {
solrText,
page,
this.snippetFetchWordHashes,
//this.query.queryString,
null,
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
220,
@ -610,7 +611,7 @@ public class SnippetProcess {
}
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.deleteIfSnippetFail) {
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.query_include_hashes, reason);
}
Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
return null;

Loading…
Cancel
Save