completed search exclusion:

- exclusion on index-level (not only from search snippets)
- exclusion hand-over at remote search protocol

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3556 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 991c62387a
commit b79b4082e2

@ -89,6 +89,7 @@ public final class search {
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability // final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
final String key = post.get("key", ""); // transmission key for response final String key = post.get("key", ""); // transmission key for response
final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined
final String exclude= post.get("exclude", "");// a string of word hashes that shall not be within the search result
String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none) String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results // final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
@ -126,7 +127,8 @@ public final class search {
} }
// prepare search // prepare search
final TreeSet keyhashes = plasmaSearchQuery.hashes2Set(query); final TreeSet queryhashes = plasmaSearchQuery.hashes2Set(query);
final TreeSet excludehashes = (exclude.length() == 0) ? new TreeSet(kelondroBase64Order.enhancedCoder) : plasmaSearchQuery.hashes2Set(exclude);
final long timestamp = System.currentTimeMillis(); final long timestamp = System.currentTimeMillis();
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
@ -138,7 +140,7 @@ public final class search {
plasmaSearchEvent theSearch = null; plasmaSearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) { if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts // this is _not_ a normal search, only a request for index abstracts
squery = new plasmaSearchQuery(abstractSet, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), count, duetime, filter, plasmaSearchQuery.catchall_constraint); squery = new plasmaSearchQuery(abstractSet, new TreeSet(kelondroBase64Order.enhancedCoder), maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), count, duetime, filter, plasmaSearchQuery.catchall_constraint);
squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links");
@ -148,9 +150,9 @@ public final class search {
plasmaSearchTimingProfile remoteTiming = null; plasmaSearchTimingProfile remoteTiming = null;
theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, sb.snippetCache); theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, sb.snippetCache);
Map containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls)); Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
if (containers != null) { if (containers != null) {
Iterator ci = containers.entrySet().iterator(); Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry; Map.Entry entry;
String wordhash; String wordhash;
while (ci.hasNext()) { while (ci.hasNext()) {
@ -165,7 +167,7 @@ public final class search {
prop.put("joincount", 0); prop.put("joincount", 0);
} else { } else {
// retrieve index containers from search request // retrieve index containers from search request
squery = new plasmaSearchQuery(keyhashes, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), count, duetime, filter, constraint); squery = new plasmaSearchQuery(queryhashes, excludehashes, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), count, duetime, filter, constraint);
squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links");
@ -178,14 +180,14 @@ public final class search {
rankingProfile, localTiming, remoteTiming, true, rankingProfile, localTiming, remoteTiming, true,
yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL,
sb.snippetCache); sb.snippetCache);
Map containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls)); Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
// set statistic details of search result and find best result index set // set statistic details of search result and find best result index set
if (containers == null) { if (containers == null) {
prop.putASIS("indexcount", ""); prop.putASIS("indexcount", "");
prop.putASIS("joincount", "0"); prop.putASIS("joincount", "0");
} else { } else {
Iterator ci = containers.entrySet().iterator(); Iterator ci = containers[0].entrySet().iterator();
StringBuffer indexcount = new StringBuffer(); StringBuffer indexcount = new StringBuffer();
Map.Entry entry; Map.Entry entry;
int maxcount = -1; int maxcount = -1;
@ -213,7 +215,7 @@ public final class search {
prop.putASIS("indexcount", new String(indexcount)); prop.putASIS("indexcount", new String(indexcount));
// join and order the result // join and order the result
indexContainer localResults = theSearch.localSearchJoin(containers.values()); indexContainer localResults = theSearch.localSearchJoinExclude(containers[0].values(), containers[1].values());
if (localResults == null) { if (localResults == null) {
joincount = 0; joincount = 0;
prop.put("joincount", 0); prop.put("joincount", 0);
@ -226,13 +228,13 @@ public final class search {
// generate compressed index for maxcounthash // generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific // this is not needed if the search is restricted to specific
// urls, because it is a re-search // urls, because it is a re-search
if ((maxcounthash == null) || (urls.length() != 0) || (keyhashes.size() == 1) || (abstracts.length() == 0)) { if ((maxcounthash == null) || (urls.length() != 0) || (queryhashes.size() == 1) || (abstracts.length() == 0)) {
prop.putASIS("indexabstract", ""); prop.putASIS("indexabstract", "");
} else if (abstracts.equals("auto")) { } else if (abstracts.equals("auto")) {
indexabstract.append("indexabstract." + maxcounthash + "=").append(plasmaURL.compressIndex(((indexContainer) containers.get(maxcounthash)),localResults, 1000).toString()).append(serverCore.crlfString); indexabstract.append("indexabstract." + maxcounthash + "=").append(plasmaURL.compressIndex(((indexContainer) containers[0].get(maxcounthash)),localResults, 1000).toString()).append(serverCore.crlfString);
if ((neardhthash != null) if ((neardhthash != null)
&& (!(neardhthash.equals(maxcounthash)))) { && (!(neardhthash.equals(maxcounthash)))) {
indexabstract.append("indexabstract." + neardhthash + "=").append(plasmaURL.compressIndex(((indexContainer) containers.get(neardhthash)), localResults, 1000).toString()).append(serverCore.crlfString); indexabstract.append("indexabstract." + neardhthash + "=").append(plasmaURL.compressIndex(((indexContainer) containers[0].get(neardhthash)), localResults, 1000).toString()).append(serverCore.crlfString);
} }
//System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash); //System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash);
//System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash); //System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash);

@ -213,7 +213,7 @@ public class indexContainer extends kelondroRowSet {
} }
} }
public static indexContainer joinContainer(Collection containers, long time, int maxDistance) { public static indexContainer joinContainers(Collection containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis(); long stamp = System.currentTimeMillis();
@ -258,6 +258,23 @@ public class indexContainer extends kelondroRowSet {
return searchResult; return searchResult;
} }
public static indexContainer excludeContainers(indexContainer pivot, Collection containers, long time) {
long stamp = System.currentTimeMillis();
// check if there is any result
if ((containers == null) || (containers.size() == 0)) return pivot; // no result, nothing found
Iterator i = containers.iterator();
while (i.hasNext()) {
time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis();
pivot = excludeDestructive(pivot, (indexContainer) i.next(), time);
if ((pivot == null) || (pivot.size() == 0)) return null;
}
return pivot;
}
// join methods // join methods
private static int log2(int x) { private static int log2(int x) {
int l = 0; int l = 0;
@ -348,6 +365,83 @@ public class indexContainer extends kelondroRowSet {
return conj; return conj;
} }
public static indexContainer excludeDestructive(indexContainer pivot, indexContainer excl, long time) {
if (pivot == null) return null;
if (excl == null) return pivot;
if (pivot.size() == 0) return null;
if (excl.size() == 0) return pivot;
// decide which method to use
int high = ((pivot.size() > excl.size()) ? pivot.size() : excl.size());
int low = ((pivot.size() > excl.size()) ? excl.size() : pivot.size());
int stepsEnum = 10 * (high + low - 1);
int stepsTest = 12 * log2(high) * low;
// start most efficient method
if (stepsEnum > stepsTest) {
return excludeDestructiveByTest(pivot, excl, time);
} else {
return excludeDestructiveByEnumeration(pivot, excl, time);
}
}
private static indexContainer excludeDestructiveByTest(indexContainer pivot, indexContainer excl, long time) {
assert pivot.rowdef.equals(excl.rowdef) : "small = " + pivot.rowdef.toString() + "; large = " + excl.rowdef.toString();
int keylength = pivot.rowdef.width(0);
assert (keylength == excl.rowdef.width(0));
boolean iterate_pivot = pivot.size() < excl.size();
Iterator se = (iterate_pivot) ? pivot.entries() : excl.entries();
indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis();
while ((se.hasNext()) && ((System.currentTimeMillis() - stamp) < time)) {
ie0 = (indexRWIEntry) se.next();
ie1 = excl.get(ie0.urlHash());
if ((ie0 != null) && (ie1 != null)) {
assert (ie0.urlHash().length() == keylength) : "ie0.urlHash() = " + ie0.urlHash();
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
if (iterate_pivot) se.remove(); pivot.remove(ie0.urlHash().getBytes());
}
}
return pivot;
}
private static indexContainer excludeDestructiveByEnumeration(indexContainer pivot, indexContainer excl, long time) {
assert pivot.rowdef.equals(excl.rowdef) : "i1 = " + pivot.rowdef.toString() + "; i2 = " + excl.rowdef.toString();
int keylength = pivot.rowdef.width(0);
assert (keylength == excl.rowdef.width(0));
if (!((pivot.rowdef.getOrdering().signature().equals(excl.rowdef.getOrdering().signature())) &&
(pivot.rowdef.primaryKey() == excl.rowdef.primaryKey()))) return pivot; // ordering must be equal
Iterator e1 = pivot.entries();
Iterator e2 = excl.entries();
int c;
if ((e1.hasNext()) && (e2.hasNext())) {
indexRWIEntry ie1;
indexRWIEntry ie2;
ie1 = (indexRWIEntry) e1.next();
ie2 = (indexRWIEntry) e2.next();
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
assert (ie2.urlHash().length() == keylength) : "ie2.urlHash() = " + ie2.urlHash();
c = pivot.rowdef.getOrdering().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
} else if (c > 0) {
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1.combineDistance(ie2);
e1.remove();
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
}
}
}
return pivot;
}
public String toString() { public String toString() {
return "C[" + wordHash + "] has " + this.size() + " entries"; return "C[" + wordHash + "] has " + this.size() + " entries";
} }

@ -96,7 +96,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.snippetCache = snippetCache; this.snippetCache = snippetCache;
this.rcContainers = wordIndex.emptyContainer(null); this.rcContainers = wordIndex.emptyContainer(null);
this.rcContainerFlushCount = 0; this.rcContainerFlushCount = 0;
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches this.rcAbstracts = (query.queryHashes.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming; this.profileLocal = localTiming;
this.profileGlobal = remoteTiming; this.profileGlobal = remoteTiming;
this.postsort = postsort; this.postsort = postsort;
@ -151,12 +151,12 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS"); log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2; long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime(); long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "", primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), plasmaSearchQuery.hashSet2hashString(query.excludeHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, wordIndex, rcContainers, rcAbstracts, query.prefer, query.urlMask, query.maxDistance, urlStore, wordIndex, rcContainers, rcAbstracts,
fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking, query.constraint); fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking, query.constraint);
// meanwhile do a local search // meanwhile do a local search
Map searchContainerMap = localSearchContainers(null); Map[] searchContainerMaps = localSearchContainers(null);
// use the search containers to fill up rcAbstracts locally // use the search containers to fill up rcAbstracts locally
/* /*
@ -184,7 +184,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
*/ */
// try to pre-fetch some LURLs if there is enough time // try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal = localSearchJoin(searchContainerMap.values()); indexContainer rcLocal = localSearchJoinExclude(searchContainerMaps[0].values(), searchContainerMaps[1].values());
prefetchLocal(rcLocal, secondaryTimeout); prefetchLocal(rcLocal, secondaryTimeout);
// this is temporary debugging code to learn that the index abstracts are fetched correctly // this is temporary debugging code to learn that the index abstracts are fetched correctly
@ -217,8 +217,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.start(); // start to flush results this.start(); // start to flush results
} }
} else { } else {
Map searchContainerMap = localSearchContainers(null); Map[] searchContainerMaps = localSearchContainers(null);
indexContainer rcLocal = (searchContainerMap == null) ? wordIndex.emptyContainer(null) : localSearchJoin(searchContainerMap.values()); indexContainer rcLocal = (searchContainerMaps == null) ? wordIndex.emptyContainer(null) : localSearchJoinExclude(searchContainerMaps[0].values(), searchContainerMaps[1].values());
result = orderFinal(rcLocal); result = orderFinal(rcLocal);
result.globalContributions = 0; result.globalContributions = 0;
} }
@ -238,9 +238,9 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private void prepareSecondarySearch() { private void prepareSecondarySearch() {
// catch up index abstracts and join them; then call peers again to submit their urls // catch up index abstracts and join them; then call peers again to submit their urls
System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.size() + " needed"); System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.queryHashes.size() + " needed");
if (rcAbstracts.size() != query.size()) return; // secondary search not possible if (rcAbstracts.size() != query.queryHashes.size()) return; // secondary search not possible
Iterator i = rcAbstracts.entrySet().iterator(); Iterator i = rcAbstracts.entrySet().iterator();
Map.Entry entry; Map.Entry entry;
@ -249,7 +249,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries"); System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries");
} }
TreeMap abstractJoin = (rcAbstracts.size() == query.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values(), true) : new TreeMap(); TreeMap abstractJoin = (rcAbstracts.size() == query.queryHashes.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values(), true) : new TreeMap();
if (abstractJoin.size() == 0) { if (abstractJoin.size() == 0) {
System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers"); System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers");
} else { } else {
@ -293,7 +293,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls); System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words); System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, urlStore, wordIndex, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache, words, "", urls, urlStore, wordIndex, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
profileGlobal, ranking, query.constraint); profileGlobal, ranking, query.constraint);
} }
@ -328,35 +328,48 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return wordlist; return wordlist;
} }
public Map localSearchContainers(Set urlselection) { public Map[] localSearchContainers(Set urlselection) {
// search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result // search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result
// retrieve entities that belong to the hashes // retrieve entities that belong to the hashes
profileLocal.startTimer(); profileLocal.startTimer();
Map containers = wordIndex.getContainers( long start = System.currentTimeMillis();
Map inclusionContainers = wordIndex.getContainers(
query.queryHashes, query.queryHashes,
urlselection, urlselection,
true, true,
true, true,
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION)); profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()));
if ((containers.size() != 0) && (containers.size() < query.size())) containers = new HashMap(); // prevent that only a subset is returned if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < query.queryHashes.size())) inclusionContainers = new HashMap(); // prevent that only a subset is returned
long remaintime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_COLLECTION) - System.currentTimeMillis() + start;
Map exclusionContainers = ((inclusionContainers == null) || (inclusionContainers.size() == 0) || (remaintime <= 0)) ? new HashMap() : wordIndex.getContainers(
query.excludeHashes,
urlselection,
true,
true,
remaintime);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_COLLECTION); profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_COLLECTION);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_COLLECTION, containers.size()); profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_COLLECTION, inclusionContainers.size());
return containers; return new Map[]{inclusionContainers, exclusionContainers};
} }
public indexContainer localSearchJoin(Collection containers) { public indexContainer localSearchJoinExclude(Collection includeContainers, Collection excludeContainers) {
// join a search result and return the joincount (number of pages after join) // join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known // since this is a conjunction we return an empty entity if any word is not known
if (containers == null) return wordIndex.emptyContainer(null); if (includeContainers == null) return wordIndex.emptyContainer(null);
// join the result // join the result
profileLocal.startTimer(); profileLocal.startTimer();
indexContainer rcLocal = indexContainer.joinContainer(containers, long start = System.currentTimeMillis();
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN), indexContainer rcLocal = indexContainer.joinContainers(includeContainers,
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance); query.maxDistance);
long remaining = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN) - System.currentTimeMillis() + start;
if ((rcLocal != null) && (remaining > 0)) {
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
}
if (rcLocal == null) rcLocal = wordIndex.emptyContainer(null); if (rcLocal == null) rcLocal = wordIndex.emptyContainer(null);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_JOIN); profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_JOIN);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_JOIN, rcLocal.size()); profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_JOIN, rcLocal.size());

@ -103,11 +103,11 @@ public final class plasmaSearchQuery {
this.constraint = constraint; this.constraint = constraint;
} }
public plasmaSearchQuery(TreeSet queryHashes, int maxDistance, String prefer, int contentdom, public plasmaSearchQuery(TreeSet queryHashes, TreeSet excludeHashes, int maxDistance, String prefer, int contentdom,
int wantedResults, long maximumTime, String urlMask, int wantedResults, long maximumTime, String urlMask,
kelondroBitfield constraint) { kelondroBitfield constraint) {
this.queryString = null; this.queryString = null;
this.excludeHashes = new TreeSet(kelondroBase64Order.enhancedCoder);; this.excludeHashes = excludeHashes;
this.maxDistance = maxDistance; this.maxDistance = maxDistance;
this.prefer = prefer; this.prefer = prefer;
this.contentdom = contentdom; this.contentdom = contentdom;
@ -186,10 +186,6 @@ public final class plasmaSearchQuery {
return new TreeSet[]{query, exclude}; return new TreeSet[]{query, exclude};
} }
public int size() {
return queryHashes.size();
}
public String queryString() { public String queryString() {
return this.queryString; return this.queryString;
} }

@ -2769,9 +2769,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
wordIndex.loadedURL.remove(urlentry.hash()); // clean up wordIndex.loadedURL.remove(urlentry.hash()); // clean up
continue; // next result continue; // next result
} }
urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
urlname = "http://share." + seed.getName() + ".yacy" + filename; urlname = "http://share." + seed.getName() + ".yacy" + filename;
if ((p = urlname.indexOf("?")) > 0) urlname = urlname.substring(0, p); if ((p = urlname.indexOf("?")) > 0) urlname = urlname.substring(0, p);
urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
} else { } else {
urlstring = comp.url().toNormalform(); urlstring = comp.url().toNormalform();
urlname = urlstring; urlname = urlstring;

@ -367,6 +367,7 @@ public final class yacyClient {
public static String[] search( public static String[] search(
String wordhashes, String wordhashes,
String excludehashes,
String urlhashes, String urlhashes,
String prefer, String prefer,
String filter, String filter,
@ -429,6 +430,7 @@ public final class yacyClient {
obj.put("resource", ((global) ? "global" : "local")); obj.put("resource", ((global) ? "global" : "local"));
obj.put("partitions", partitions); obj.put("partitions", partitions);
obj.put("query", wordhashes); obj.put("query", wordhashes);
obj.put("exclude", excludehashes);
obj.put("urls", urlhashes); obj.put("urls", urlhashes);
obj.put("prefer", prefer); obj.put("prefer", prefer);
obj.put("filter", filter); obj.put("filter", filter);

@ -64,7 +64,7 @@ import de.anomic.server.logging.serverLog;
public class yacySearch extends Thread { public class yacySearch extends Thread {
final private String wordhashes, urlhashes; final private String wordhashes, excludehashes, urlhashes;
final private boolean global; final private boolean global;
final private int partitions; final private int partitions;
final private plasmaCrawlLURL urlManager; final private plasmaCrawlLURL urlManager;
@ -81,7 +81,7 @@ public class yacySearch extends Thread {
final private String prefer, filter; final private String prefer, filter;
final private kelondroBitfield constraint; final private kelondroBitfield constraint;
public yacySearch(String wordhashes, String urlhashes, String prefer, String filter, int maxDistance, public yacySearch(String wordhashes, String excludehashes, String urlhashes, String prefer, String filter, int maxDistance,
boolean global, int partitions, yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex, boolean global, int partitions, yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache, indexContainer containerCache, Map abstractCache,
plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
@ -90,6 +90,7 @@ public class yacySearch extends Thread {
super("yacySearch_" + targetPeer.getName()); super("yacySearch_" + targetPeer.getName());
//System.out.println("DEBUG - yacySearch thread " + this.getName() + " initialized " + ((urlhashes.length() == 0) ? "(primary)" : "(secondary)")); //System.out.println("DEBUG - yacySearch thread " + this.getName() + " initialized " + ((urlhashes.length() == 0) ? "(primary)" : "(secondary)"));
this.wordhashes = wordhashes; this.wordhashes = wordhashes;
this.excludehashes = excludehashes;
this.urlhashes = urlhashes; this.urlhashes = urlhashes;
this.prefer = prefer; this.prefer = prefer;
this.filter = filter; this.filter = filter;
@ -111,7 +112,7 @@ public class yacySearch extends Thread {
public void run() { public void run() {
this.urls = yacyClient.search( this.urls = yacyClient.search(
wordhashes, urlhashes, prefer, filter, maxDistance, global, partitions, wordhashes, excludehashes, urlhashes, prefer, filter, maxDistance, global, partitions,
targetPeer, urlManager, wordIndex, containerCache, abstractCache, targetPeer, urlManager, wordIndex, containerCache, abstractCache,
blacklist, snippetCache, timingProfile, rankingProfile, constraint); blacklist, snippetCache, timingProfile, rankingProfile, constraint);
if (urls != null) { if (urls != null) {
@ -206,7 +207,7 @@ public class yacySearch extends Thread {
return result; return result;
} }
public static yacySearch[] primaryRemoteSearches(String wordhashes, String urlhashes, String prefer, String filter, int maxDist, public static yacySearch[] primaryRemoteSearches(String wordhashes, String excludehashes, String urlhashes, String prefer, String filter, int maxDist,
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache, indexContainer containerCache, Map abstractCache,
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
@ -223,7 +224,7 @@ public class yacySearch extends Thread {
if (targets == 0) return null; if (targets == 0) return null;
yacySearch[] searchThreads = new yacySearch[targets]; yacySearch[] searchThreads = new yacySearch[targets];
for (int i = 0; i < targets; i++) { for (int i = 0; i < targets; i++) {
searchThreads[i]= new yacySearch(wordhashes, urlhashes, prefer, filter, maxDist, true, targets, targetPeers[i], searchThreads[i]= new yacySearch(wordhashes, excludehashes, urlhashes, prefer, filter, maxDist, true, targets, targetPeers[i],
urlManager, wordIndex, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile, constraint); urlManager, wordIndex, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile, constraint);
searchThreads[i].start(); searchThreads[i].start();
//try {Thread.sleep(20);} catch (InterruptedException e) {} //try {Thread.sleep(20);} catch (InterruptedException e) {}
@ -231,7 +232,7 @@ public class yacySearch extends Thread {
return searchThreads; return searchThreads;
} }
public static yacySearch secondaryRemoteSearch(String wordhashes, String urlhashes, public static yacySearch secondaryRemoteSearch(String wordhashes, String excludehashes, String urlhashes,
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, indexContainer containerCache,
String targethash, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, String targethash, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
@ -244,7 +245,7 @@ public class yacySearch extends Thread {
//Set wordhashes = plasmaSearch.words2hashes(querywords); //Set wordhashes = plasmaSearch.words2hashes(querywords);
final yacySeed targetPeer = yacyCore.seedDB.getConnected(targethash); final yacySeed targetPeer = yacyCore.seedDB.getConnected(targethash);
if (targetPeer == null) return null; if (targetPeer == null) return null;
yacySearch searchThread = new yacySearch(wordhashes, urlhashes, "", "", 9999, true, 0, targetPeer, yacySearch searchThread = new yacySearch(wordhashes, excludehashes, urlhashes, "", "", 9999, true, 0, targetPeer,
urlManager, wordIndex, containerCache, new TreeMap(), blacklist, snippetCache, timingProfile, rankingProfile, constraint); urlManager, wordIndex, containerCache, new TreeMap(), blacklist, snippetCache, timingProfile, rankingProfile, constraint);
searchThread.start(); searchThread.start();
return searchThread; return searchThread;

Loading…
Cancel
Save