* added a search history cache that maintains search results for 10 minutes

it is necessary for the new search process that will do automatic re-searches
a positive effect is, that when a re-search is done it can be monitored how many
results had been contributed from other peers. The message for this contribution
was moved from the end of the result page to the top.
* enhanced re-search time when a global search was done an the local index has
already a great number of results for this word
* re-organised presearch computation; must be further enhanced

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4059 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent ae86d010bb
commit a34d9b8609

@ -30,6 +30,7 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -132,7 +133,7 @@ public class Ranking_p {
final serverObjects prop = defaultValues(); final serverObjects prop = defaultValues();
plasmaSearchRankingProfile ranking = plasmaSearchRankingProfile ranking =
(sb.getConfig("rankingProfile", "").length() == 0) ? (sb.getConfig("rankingProfile", "").length() == 0) ?
new plasmaSearchRankingProfile("text") : new plasmaSearchRankingProfile(plasmaSearchQuery.CONTENTDOM_TEXT) :
new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null)); new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
putRanking(prop, ranking, "local"); putRanking(prop, ranking, "local");
return prop; return prop;
@ -149,7 +150,7 @@ public class Ranking_p {
if (post.containsKey("ResetRanking")) { if (post.containsKey("ResetRanking")) {
sb.setConfig("rankingProfile", ""); sb.setConfig("rankingProfile", "");
plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile("text"); plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(plasmaSearchQuery.CONTENTDOM_TEXT);
final serverObjects prop = defaultValues(); final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local")); //prop.putAll(ranking.toExternalMap("local"));
putRanking(prop, ranking, "local"); putRanking(prop, ranking, "local");

@ -164,7 +164,7 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links");
// prepare a search profile // prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile); plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
plasmaSearchProcessing localProcess = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults); plasmaSearchProcessing localProcess = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
//plasmaSearchProcessing remoteProcess = null; //plasmaSearchProcessing remoteProcess = null;
@ -208,7 +208,7 @@ public final class search {
// join and order the result // join and order the result
indexContainer localResults = indexContainer localResults =
(containers == null) ? (containers == null) ?
plasmaWordIndex.emptyContainer(null) : plasmaWordIndex.emptyContainer(null, 0) :
localProcess.localSearchJoinExclude( localProcess.localSearchJoinExclude(
containers[0].values(), containers[0].values(),
containers[1].values(), containers[1].values(),
@ -223,7 +223,7 @@ public final class search {
joincount = localResults.size(); joincount = localResults.size();
prop.putASIS("joincount", Integer.toString(joincount)); prop.putASIS("joincount", Integer.toString(joincount));
plasmaSearchPreOrder pre = new plasmaSearchPreOrder(squery, localProcess, rankingProfile, localResults); plasmaSearchPreOrder pre = new plasmaSearchPreOrder(squery, localProcess, rankingProfile, localResults);
accu = new plasmaSearchResultAccumulator(squery, localProcess, rankingProfile, pre, sb.wordIndex, plasmaSwitchboard.blueList, false); accu = new plasmaSearchResultAccumulator(squery, localProcess, rankingProfile, pre.strippedContainer(200), sb.wordIndex, plasmaSwitchboard.blueList, false);
} }
// generate compressed index for maxcounthash // generate compressed index for maxcounthash

@ -116,7 +116,7 @@ document.getElementById("Enter").value = "search again - catch up more links";
:: ::
<p>No Results. &quot;<strong>#[wrong_regex]#</strong>&quot; is no valid regular expression. Please go back to the previous page and make sure to enter a valid regular expressions for URL mask and Prefer mask.</p> <p>No Results. &quot;<strong>#[wrong_regex]#</strong>&quot; is no valid regular expression. Please go back to the previous page and make sure to enter a valid regular expressions for URL mask and Prefer mask.</p>
:: ::
<p><strong id="linkcount">#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links from <strong>#[filteredcount]#</strong> filtered links of a total number of <strong>#[totalcount]#</strong> known.</p> <p><strong id="linkcount">#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links from <strong>#[filteredcount]#</strong> filtered links of a total number of <strong>#[totalcount]#</strong> known#(globalresults)#.::, <strong>#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#</p>
:: ::
<p>Searching the web with this peer is disabled for unauthorized users. Please <a href="Status.html?login=">log in</a> as administrator to use the search function</p> <p>Searching the web with this peer is disabled for unauthorized users. Please <a href="Status.html?login=">log in</a> as administrator to use the search function</p>
#(/num-results)# #(/num-results)#
@ -164,21 +164,7 @@ document.getElementById("Enter").value = "search again - catch up more links";
</script> </script>
<!-- linklist end --> <!-- linklist end -->
#(resultbottomline)#
::
<p>The global search resulted in #[globalresults]# link contributions from other YaCy peers.</p>
::
<p></p>
::
<p>
You cannot get global search results because you are not connected to another YaCy peer.
</p>
::
<p>
You can enrich the search results by using the 'global' option; you must also switch to online mode
(by using the proxy) to contribute to the global index.
</p>
#(/resultbottomline)#
::<!-- type 1: media search --> ::<!-- type 1: media search -->
<!-- non-js variant --> <!-- non-js variant -->
<noscript> <noscript>

@ -56,6 +56,7 @@ import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
@ -151,7 +152,6 @@ public class yacysearch {
prop.put("type", 0); prop.put("type", 0);
prop.put("type_excluded", 0); prop.put("type_excluded", 0);
prop.put("type_combine", 0); prop.put("type_combine", 0);
prop.put("type_resultbottomline", 0);
prop.put("type_results", ""); prop.put("type_results", "");
prop.put("num-results", (searchAllowed) ? 0 : 6); prop.put("num-results", (searchAllowed) ? 0 : 6);
@ -197,13 +197,7 @@ public class yacysearch {
if (clustersearch) global = true; // switches search on, but search target is limited to cluster nodes if (clustersearch) global = true; // switches search on, but search target is limited to cluster nodes
// find search domain // find search domain
int contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT; int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text"));
String contentdomString = post.get("contentdom", "text");
if (contentdomString.equals("text")) contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
if (contentdomString.equals("audio")) contentdomCode = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (contentdomString.equals("video")) contentdomCode = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (contentdomString.equals("image")) contentdomCode = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (contentdomString.equals("app")) contentdomCode = plasmaSearchQuery.CONTENTDOM_APP;
// patch until better search profiles are available // patch until better search profiles are available
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30; if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30;
@ -265,8 +259,7 @@ public class yacysearch {
// prepare search properties // prepare search properties
final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getPublicAddress() != null)); final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getPublicAddress() != null));
final boolean samesearch = env.getConfig("last-search", "").equals(querystring + contentdomString); final boolean globalsearch = (global) && (yacyonline);
final boolean globalsearch = (global) && (yacyonline) && (!samesearch);
// do the search // do the search
TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]); TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]);
@ -285,7 +278,7 @@ public class yacysearch {
"", "",
20, 20,
constraint); constraint);
plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomString) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null)); plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomCode) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(4 * theQuery.maximumTime / 10, theQuery.wantedResults); plasmaSearchProcessing localTiming = new plasmaSearchProcessing(4 * theQuery.maximumTime / 10, theQuery.wantedResults);
plasmaSearchProcessing remoteTiming = new plasmaSearchProcessing(6 * theQuery.maximumTime / 10, theQuery.wantedResults); plasmaSearchProcessing remoteTiming = new plasmaSearchProcessing(6 * theQuery.maximumTime / 10, theQuery.wantedResults);
@ -305,8 +298,8 @@ public class yacysearch {
// create a new search event // create a new search event
String wrongregex = null; String wrongregex = null;
plasmaSearchEvent theSearch = new plasmaSearchEvent(theQuery, ranking, localTiming, remoteTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null); plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, ranking, localTiming, remoteTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null);
plasmaSearchPreOrder preorder = theSearch.search(); indexContainer preorder = theSearch.search();
// fetch snippets // fetch snippets
serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
@ -322,7 +315,7 @@ public class yacysearch {
// log // log
serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " + serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
(theSearch.getLocalCount() + theSearch.getGlobalCount()) + " links found, " + (theSearch.getLocalCount() + theSearch.getGlobalCount()) + " links found, " +
preorder.filteredCount() + " links filtered, " + theSearch.filteredCount() + " links filtered, " +
accu.resultCount() + " links ordered, " + accu.resultCount() + " links ordered, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
@ -347,9 +340,10 @@ public class yacysearch {
//prop.put("references", 0); //prop.put("references", 0);
URL wordURL=null; URL wordURL=null;
prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount()); prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount());
prop.put("num-results_filteredcount", preorder.filteredCount()); prop.put("num-results_filteredcount", theSearch.filteredCount());
prop.put("num-results_orderedcount", accu.resultCount()); prop.put("num-results_orderedcount", accu.resultCount());
prop.put("num-results_globalresults", theSearch.getGlobalCount()); prop.put("num-results_globalresults", (theSearch.getGlobalCount() == 0) ? 0 : 1);
prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount());
prop.put("num-results_linkcount", 0); prop.put("num-results_linkcount", 0);
prop.put("type_results", 0); prop.put("type_results", 0);
@ -425,11 +419,7 @@ public class yacysearch {
prop.put("num-results_linkcount", Integer.toString(accu.resultCount())); prop.put("num-results_linkcount", Integer.toString(accu.resultCount()));
} }
// remember the last search expression
env.setConfig("last-search", querystring + contentdomString);
// process result of search // process result of search
prop.put("type_resultbottomline", 0);
if (filtered.size() > 0) { if (filtered.size() > 0) {
prop.put("excluded", 1); prop.put("excluded", 1);
prop.put("excluded_stopwords", filtered.toString()); prop.put("excluded_stopwords", filtered.toString());
@ -509,26 +499,8 @@ public class yacysearch {
} }
} }
if (wrongregex != null) {
prop.put("type_resultbottomline", 0);
}
else if (yacyonline) {
if (global) {
prop.put("type_resultbottomline", 1);
prop.put("type_resultbottomline_globalresults", prop.get("num-results_globalresults", "0"));
} else {
prop.put("type_resultbottomline", 0);
}
} else {
if (global) {
prop.put("type_resultbottomline", 3);
} else {
prop.put("type_resultbottomline", 0);
}
}
prop.put("type", (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 2 : 1)); prop.put("type", (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 2 : 1));
if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", contentdomString); if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", post.get("contentdom", "text"));
prop.put("input_cat", "href"); prop.put("input_cat", "href");
prop.put("input_depth", "0"); prop.put("input_depth", "0");
@ -536,7 +508,6 @@ public class yacysearch {
String hostName = (String) header.get("Host", "localhost"); String hostName = (String) header.get("Host", "localhost");
if (hostName.indexOf(":") == -1) hostName += ":" + serverCore.getPortNr(env.getConfig("port", "8080")); if (hostName.indexOf(":") == -1) hostName += ":" + serverCore.getPortNr(env.getConfig("port", "8080"));
prop.put("rssYacyImageURL", "http://" + hostName + "/env/grafics/yacy.gif"); prop.put("rssYacyImageURL", "http://" + hostName + "/env/grafics/yacy.gif");
} }
if (post.get("cat", "href").equals("image")) { if (post.get("cat", "href").equals("image")) {
@ -591,7 +562,7 @@ public class yacysearch {
prop.put("input_prefermaskfilter", prefermask); prop.put("input_prefermaskfilter", prefermask);
prop.put("input_indexof", (indexof) ? "on" : "off"); prop.put("input_indexof", (indexof) ? "on" : "off");
prop.put("input_constraint", constraint.exportB64()); prop.put("input_constraint", constraint.exportB64());
prop.put("input_contentdom", contentdomString); prop.put("input_contentdom", post.get("contentdom", "text"));
prop.put("input_contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0); prop.put("input_contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("input_contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0); prop.put("input_contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("input_contentdomCheckVideo", (contentdomCode == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0); prop.put("input_contentdomCheckVideo", (contentdomCode == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);

@ -46,14 +46,14 @@ public class indexContainer extends kelondroRowSet {
this.wordHash = wordHash; this.wordHash = wordHash;
} }
public indexContainer(String wordHash, kelondroRow rowdef) { public indexContainer(String wordHash, kelondroRow rowdef, int objectCount) {
super(rowdef, 0); super(rowdef, objectCount);
this.wordHash = wordHash; this.wordHash = wordHash;
this.lastTimeWrote = 0; this.lastTimeWrote = 0;
} }
public indexContainer topLevelClone() { public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef); indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.size());
newContainer.addAllUnique(this); newContainer.addAllUnique(this);
return newContainer; return newContainer;
} }
@ -308,7 +308,7 @@ public class indexContainer extends kelondroRowSet {
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString(); assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
int keylength = small.rowdef.width(0); int keylength = small.rowdef.width(0);
assert (keylength == large.rowdef.width(0)); assert (keylength == large.rowdef.width(0));
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result indexContainer conj = new indexContainer(null, small.rowdef, 0); // start with empty search result
Iterator se = small.entries(); Iterator se = small.entries();
indexRWIEntry ie0, ie1; indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis(); long stamp = System.currentTimeMillis();
@ -331,7 +331,7 @@ public class indexContainer extends kelondroRowSet {
assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString(); assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString();
int keylength = i1.rowdef.width(0); int keylength = i1.rowdef.width(0);
assert (keylength == i2.rowdef.width(0)); assert (keylength == i2.rowdef.width(0));
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result indexContainer conj = new indexContainer(null, i1.rowdef, 0); // start with empty search result
if (!((i1.rowdef.getOrdering().signature().equals(i2.rowdef.getOrdering().signature())) && if (!((i1.rowdef.getOrdering().signature().equals(i2.rowdef.getOrdering().signature())) &&
(i1.rowdef.primaryKey() == i2.rowdef.primaryKey()))) return conj; // ordering must be equal (i1.rowdef.primaryKey() == i2.rowdef.primaryKey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries(); Iterator e1 = i1.entries();

@ -383,10 +383,14 @@ public final class indexRAMRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime; return (((long) intTime) * (long) 1000) + initTime;
} }
public synchronized boolean hasContainer(String wordHash) { public boolean hasContainer(String wordHash) {
return cache.containsKey(wordHash); return cache.containsKey(wordHash);
} }
public int sizeContainer(String wordHash) {
return ((indexContainer) cache.get(wordHash)).size();
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) {
// retrieve container // retrieve container
@ -497,7 +501,7 @@ public final class indexRAMRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) { public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow); if (container == null) container = new indexContainer(wordHash, this.payloadrow, 1);
container.put(newEntry); container.put(newEntry);
cache.put(wordHash, container); cache.put(wordHash, container);
hashScore.incScore(wordHash); hashScore.incScore(wordHash);

@ -26,6 +26,7 @@
package de.anomic.plasma; package de.anomic.plasma;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
@ -39,27 +40,36 @@ import de.anomic.yacy.yacySearch;
public final class plasmaSearchEvent { public final class plasmaSearchEvent {
public static plasmaSearchEvent lastEvent = null; public static plasmaSearchEvent lastEvent = null;
private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes
private long eventTime;
private plasmaSearchQuery query; private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking; private plasmaSearchRankingProfile ranking;
private plasmaWordIndex wordIndex; private plasmaWordIndex wordIndex;
private indexContainer rcContainers; // cache for results private indexContainer rcLocal; // cache for local results
private indexContainer rcGlobal; // cache for global results
private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private plasmaSearchProcessing profileLocal, profileGlobal; private plasmaSearchProcessing profileLocal, profileGlobal;
private yacySearch[] primarySearchThreads, secondarySearchThreads; private yacySearch[] primarySearchThreads, secondarySearchThreads;
private TreeMap preselectedPeerHashes; private TreeMap preselectedPeerHashes;
private int localcount, globalcount; private int localcount, globalcount;
private indexContainer sortedResults;
private int lastglobal;
private int filteredCount;
public plasmaSearchEvent(plasmaSearchQuery query, private plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking, plasmaSearchRankingProfile ranking,
plasmaSearchProcessing localTiming, plasmaSearchProcessing localTiming,
plasmaSearchProcessing remoteTiming, plasmaSearchProcessing remoteTiming,
plasmaWordIndex wordIndex, plasmaWordIndex wordIndex,
TreeMap preselectedPeerHashes) { TreeMap preselectedPeerHashes) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
this.wordIndex = wordIndex; this.wordIndex = wordIndex;
this.query = query; this.query = query;
this.ranking = ranking; this.ranking = ranking;
this.rcContainers = plasmaWordIndex.emptyContainer(null); this.rcLocal = null;
this.rcGlobal = plasmaWordIndex.emptyContainer(null, 0);;
this.rcAbstracts = (query.queryHashes.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches this.rcAbstracts = (query.queryHashes.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming; this.profileLocal = localTiming;
this.profileGlobal = remoteTiming; this.profileGlobal = remoteTiming;
@ -68,6 +78,133 @@ public final class plasmaSearchEvent {
this.preselectedPeerHashes = preselectedPeerHashes; this.preselectedPeerHashes = preselectedPeerHashes;
this.localcount = 0; this.localcount = 0;
this.globalcount = 0; this.globalcount = 0;
this.sortedResults = null;
this.lastglobal = 0;
long start = System.currentTimeMillis();
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
int fetchpeers = (int) (query.maximumTime / 500L); // number of target peers; means 10 peers in 10 seconds
if (fetchpeers > 50) fetchpeers = 50;
if (fetchpeers < 30) fetchpeers = 30;
// do a global search
// the result of the fetch is then in the rcGlobal
serverLog.logFine("SEARCH_EVENT", "STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(
plasmaSearchQuery.hashSet2hashString(query.queryHashes),
plasmaSearchQuery.hashSet2hashString(query.excludeHashes),
"",
query.prefer,
query.urlMask,
query.maxDistance,
wordIndex,
rcGlobal,
rcAbstracts,
fetchpeers,
plasmaSwitchboard.urlBlacklist,
profileGlobal,
ranking,
query.constraint,
(query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
// meanwhile do a local search
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
// use the search containers to fill up rcAbstracts locally
/*
if ((rcAbstracts != null) && (searchContainerMap != null)) {
Iterator i, ci = searchContainerMap.entrySet().iterator();
Map.Entry entry;
String wordhash;
indexContainer container;
TreeMap singleAbstract;
String mypeerhash = yacyCore.seedDB.mySeed.hash;
while (ci.hasNext()) {
entry = (Map.Entry) ci.next();
wordhash = (String) entry.getKey();
container = (indexContainer) entry.getValue();
// collect all urlhashes from the container
synchronized (rcAbstracts) {
singleAbstract = (TreeMap) rcAbstracts.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
i = container.entries();
while (i.hasNext()) singleAbstract.put(((indexEntry) i.next()).urlHash(), mypeerhash);
rcAbstracts.put(wordhash, singleAbstract);
}
}
}
*/
// join and exlcude the local result
this.rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null, 0) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast
localcount = rcLocal.size();
plasmaSearchPreOrder firstsort = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
rcLocal = firstsort.strippedContainer(200);
// wait some time to retrieve index abstracts from primary search
while (System.currentTimeMillis() < secondaryTimeout) {
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
// evaluate index abstracts and start a secondary search
if (rcAbstracts != null) prepareSecondarySearch();
// catch up global results:
// wait until primary timeout passed
while (System.currentTimeMillis() < primaryTimeout) {
if ((yacySearch.remainingWaiting(primarySearchThreads) == 0) &&
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
// finished searching
serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
} else {
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null, 0) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
this.localcount = rcLocal.size();
}
// log the event
serverLog.logFine("SEARCH_EVENT", "SEARCHRESULT: " + profileLocal.reportToString());
// set link for statistic
lastEvent = this;
// remove old events in the event cache
Iterator i = lastEvents.entrySet().iterator();
while (i.hasNext()) {
if (((plasmaSearchEvent) ((Map.Entry) i.next()).getValue()).eventTime + eventLifetime < System.currentTimeMillis()) i.remove();
}
// store this search to a cache so it can be re-used
lastEvents.put(query.id(), this);
} }
public plasmaSearchQuery getQuery() { public plasmaSearchQuery getQuery() {
@ -97,134 +234,52 @@ public final class plasmaSearchEvent {
return this.globalcount; return this.globalcount;
} }
public plasmaSearchPreOrder search() { public static plasmaSearchEvent getEvent(plasmaSearchQuery query,
// combine all threads plasmaSearchRankingProfile ranking,
plasmaSearchProcessing localTiming,
long start = System.currentTimeMillis(); plasmaSearchProcessing remoteTiming,
plasmaSearchPreOrder pre; plasmaWordIndex wordIndex,
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) || TreeMap preselectedPeerHashes) {
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) { plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id());
int fetchpeers = (int) (query.maximumTime / 500L); // number of target peers; means 10 peers in 10 seconds if (event == null) {
if (fetchpeers > 50) fetchpeers = 50; event = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, wordIndex, preselectedPeerHashes);
if (fetchpeers < 30) fetchpeers = 30; } else {
//re-new the event time for this event, so it is not deleted next time too early
// do a global search event.eventTime = System.currentTimeMillis();
// the result of the fetch is then in the rcGlobal }
serverLog.logFine("SEARCH_EVENT", "STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS"); return event;
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2; }
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches( public indexContainer search() {
plasmaSearchQuery.hashSet2hashString(query.queryHashes), // combine the local and global (if any) result and order
plasmaSearchQuery.hashSet2hashString(query.excludeHashes), if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
"", globalcount = rcGlobal.size();
query.prefer, if ((this.sortedResults == null) || (this.lastglobal != globalcount)) {
query.urlMask, indexContainer searchResult = plasmaWordIndex.emptyContainer(null, rcLocal.size() + rcGlobal.size());
query.maxDistance,
wordIndex,
rcContainers,
rcAbstracts,
fetchpeers,
plasmaSwitchboard.urlBlacklist,
profileGlobal,
ranking,
query.constraint,
(query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
// meanwhile do a local search
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
// use the search containers to fill up rcAbstracts locally
/*
if ((rcAbstracts != null) && (searchContainerMap != null)) {
Iterator i, ci = searchContainerMap.entrySet().iterator();
Map.Entry entry;
String wordhash;
indexContainer container;
TreeMap singleAbstract;
String mypeerhash = yacyCore.seedDB.mySeed.hash;
while (ci.hasNext()) {
entry = (Map.Entry) ci.next();
wordhash = (String) entry.getKey();
container = (indexContainer) entry.getValue();
// collect all urlhashes from the container
synchronized (rcAbstracts) {
singleAbstract = (TreeMap) rcAbstracts.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
i = container.entries();
while (i.hasNext()) singleAbstract.put(((indexEntry) i.next()).urlHash(), mypeerhash);
rcAbstracts.put(wordhash, singleAbstract);
}
}
}
*/
// try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
// this is temporary debugging code to learn that the index abstracts are fetched correctly
while (System.currentTimeMillis() < secondaryTimeout) {
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
// evaluate index abstracts and start a secondary search
if (rcAbstracts != null) prepareSecondarySearch();
// catch up global results:
// wait until primary timeout passed
while (System.currentTimeMillis() < primaryTimeout) {
if ((yacySearch.remainingWaiting(primarySearchThreads) == 0) &&
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
// finished searching
serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
// combine the result and order
indexContainer searchResult = plasmaWordIndex.emptyContainer(null);
searchResult.addAllUnique(rcLocal); searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcContainers); searchResult.addAllUnique(rcGlobal);
searchResult.sort(); searchResult.sort();
searchResult.uniq(1000); searchResult.uniq(100);
localcount = rcLocal.size(); lastglobal = globalcount;
globalcount = rcContainers.size(); plasmaSearchPreOrder pre = new plasmaSearchPreOrder(query, profileLocal, ranking, searchResult);
pre = new plasmaSearchPreOrder(query, profileLocal, ranking, searchResult); this.filteredCount = pre.filteredCount();
} else { this.sortedResults = pre.strippedContainer(200);
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
indexContainer rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
(query.queryHashes.size() == 0) ?
0 :
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
this.localcount = rcLocal.size();
pre = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
} }
} else {
// log the event if (this.sortedResults == null) {
serverLog.logFine("SEARCH_EVENT", "SEARCHRESULT: " + profileLocal.reportToString()); plasmaSearchPreOrder pre = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
this.filteredCount = pre.filteredCount();
// prepare values for statistics this.sortedResults = pre.strippedContainer(200);
lastEvent = this; }
}
// return search result
return pre; return this.sortedResults;
} }
public int filteredCount() {
return this.filteredCount;
}
private void prepareSecondarySearch() { private void prepareSecondarySearch() {
// catch up index abstracts and join them; then call peers again to submit their urls // catch up index abstracts and join them; then call peers again to submit their urls
System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.queryHashes.size() + " needed"); System.out.println("DEBUG-INDEXABSTRACT: " + rcAbstracts.size() + " word references catched, " + query.queryHashes.size() + " needed");
@ -282,7 +337,7 @@ public final class plasmaSearchEvent {
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls); System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words); System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, "", urls, wordIndex, rcContainers, peer, plasmaSwitchboard.urlBlacklist, words, "", urls, wordIndex, rcGlobal, peer, plasmaSwitchboard.urlBlacklist,
profileGlobal, ranking, query.constraint, preselectedPeerHashes); profileGlobal, ranking, query.constraint, preselectedPeerHashes);
} }

@ -101,7 +101,7 @@ public final class plasmaSearchPostOrder {
return (indexURLEntry) pageAcc.remove(top); return (indexURLEntry) pageAcc.remove(top);
} }
protected void addPage(indexURLEntry page, Long preranking) { protected void addPage(indexURLEntry page) {
// take out relevant information for reference computation // take out relevant information for reference computation
indexURLEntry.Components comp = page.comp(); indexURLEntry.Components comp = page.comp();
@ -110,7 +110,7 @@ public final class plasmaSearchPostOrder {
String[] descrcomps = comp.title().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description String[] descrcomps = comp.title().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description
// store everything // store everything
results.add(new Object[] {page, urlcomps, descrcomps, preranking}); results.add(new Object[] {page, urlcomps, descrcomps});
// add references // add references
addScoreFiltered(urlcomps); addScoreFiltered(urlcomps);
@ -137,12 +137,12 @@ public final class plasmaSearchPostOrder {
// calculate ranking // calculate ranking
if (postsort) if (postsort)
ranking = this.ranking.postRanking( ranking = this.ranking.postRanking(
((Long) resultVector[3]).longValue(),
query, query,
commonSense, commonSense,
(String[]) resultVector[1], (String[]) resultVector[1],
(String[]) resultVector[2], (String[]) resultVector[2],
page page,
i
); );
else else
ranking = ((Long) resultVector[3]).longValue(); ranking = ((Long) resultVector[3]).longValue();

@ -208,11 +208,11 @@ public final class plasmaSearchPreOrder {
return theClone; return theClone;
} }
public boolean hasNext() { private boolean hasNext() {
return pageAcc.size() > 0; return pageAcc.size() > 0;
} }
public Object[] /*{indexEntry, Long}*/ next() { private Object[] /*{indexEntry, Long}*/ next() {
String top = (String) pageAcc.firstKey(); String top = (String) pageAcc.firstKey();
//System.out.println("preorder-key: " + top); //System.out.println("preorder-key: " + top);
Long preranking; Long preranking;
@ -225,6 +225,19 @@ public final class plasmaSearchPreOrder {
return new Object[]{(indexRWIEntry) pageAcc.remove(top), preranking}; return new Object[]{(indexRWIEntry) pageAcc.remove(top), preranking};
} }
public indexContainer strippedContainer(int count) {
// return an indexContainer with a limited number of results
indexContainer container = plasmaWordIndex.emptyContainer(null, count);
Object[] o;
indexRWIEntry entry;
while ((count-- > 0) && (hasNext())) {
o = next();
entry = (indexRWIEntry) o[0];
container.addUnique(entry.toKelondroEntry());
}
return container;
}
public indexRWIEntry[] getNormalizer() { public indexRWIEntry[] getNormalizer() {
return new indexRWIEntry[] {entryMin, entryMax}; return new indexRWIEntry[] {entryMin, entryMax};
} }

@ -309,7 +309,7 @@ public class plasmaSearchProcessing implements Cloneable {
// join a search result and return the joincount (number of pages after join) // join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known // since this is a conjunction we return an empty entity if any word is not known
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null); if (includeContainers == null) return plasmaWordIndex.emptyContainer(null, 0);
// join the result // join the result
startTimer(); startTimer();
@ -319,7 +319,7 @@ public class plasmaSearchProcessing implements Cloneable {
if ((rcLocal != null) && (remaining > 0)) { if ((rcLocal != null) && (remaining > 0)) {
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining); indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
} }
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null); if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null, 0);
setYieldTime(plasmaSearchProcessing.PROCESS_JOIN); setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);
setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size()); setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());

@ -140,9 +140,9 @@ public final class plasmaSearchQuery {
return keyhashes; return keyhashes;
} }
public static String hashSet2hashString(Set words) { public static String hashSet2hashString(Set hashes) {
Iterator i = words.iterator(); Iterator i = hashes.iterator();
StringBuffer sb = new StringBuffer(words.size() * yacySeedDB.commonHashLength); StringBuffer sb = new StringBuffer(hashes.size() * yacySeedDB.commonHashLength);
while (i.hasNext()) sb.append((String) i.next()); while (i.hasNext()) sb.append((String) i.next());
return new String(sb); return new String(sb);
} }
@ -216,7 +216,11 @@ public final class plasmaSearchQuery {
return new String(sb); return new String(sb);
} }
public String id() {
// generate a string that identifies a search so results can be re-used in a cache
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom;
}
public HashMap resultProfile(int searchcount, long searchtime) { public HashMap resultProfile(int searchcount, long searchtime) {
// generate statistics about search: query, time, etc // generate statistics about search: query, time, etc
HashMap r = new HashMap(); HashMap r = new HashMap();

@ -94,9 +94,8 @@ public class plasmaSearchRankingProfile {
coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp, coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp,
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer; coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer;
public plasmaSearchRankingProfile(String mediatype) { public plasmaSearchRankingProfile(int mediatype) {
// set default-values // set default-values
if (mediatype == null) mediatype = "text";
coeff_domlength = 8; coeff_domlength = 8;
coeff_ybr = 8; coeff_ybr = 8;
coeff_date = 4; coeff_date = 4;
@ -121,15 +120,15 @@ public class plasmaSearchRankingProfile {
coeff_urlcompintoplist = 3; coeff_urlcompintoplist = 3;
coeff_descrcompintoplist = 2; coeff_descrcompintoplist = 2;
coeff_prefer = 15; coeff_prefer = 15;
coeff_catindexof = (mediatype.equals("text")) ? 1 : 10; coeff_catindexof = (mediatype == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 10;
coeff_cathasimage = (mediatype.equals("image")) ? 15 : 1; coeff_cathasimage = (mediatype == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 15 : 1;
coeff_cathasaudio = (mediatype.equals("audio")) ? 15 : 1; coeff_cathasaudio = (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 15 : 1;
coeff_cathasvideo = (mediatype.equals("video")) ? 15 : 1; coeff_cathasvideo = (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 15 : 1;
coeff_cathasapp = (mediatype.equals("app")) ? 15 : 1; coeff_cathasapp = (mediatype == plasmaSearchQuery.CONTENTDOM_APP) ? 15 : 1;
} }
public plasmaSearchRankingProfile(String prefix, String profile) { public plasmaSearchRankingProfile(String prefix, String profile) {
this("text"); // set defaults this(plasmaSearchQuery.CONTENTDOM_TEXT); // set defaults
if ((profile != null) && (profile.length() > 0)) { if ((profile != null) && (profile.length() > 0)) {
//parse external form //parse external form
HashMap coeff = new HashMap(); HashMap coeff = new HashMap();
@ -326,13 +325,15 @@ public class plasmaSearchRankingProfile {
} }
*/ */
public long postRanking( public long postRanking(
long ranking,
plasmaSearchQuery query, plasmaSearchQuery query,
Set topwords, Set topwords,
String[] urlcomps, String[] urlcomps,
String[] descrcomps, String[] descrcomps,
indexURLEntry page) { indexURLEntry page,
int position) {
long ranking = (255 - position) << 8;
// for media search: prefer pages with many links // for media search: prefer pages with many links
if (query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ranking += page.limage() << coeff_cathasimage; if (query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ranking += page.limage() << coeff_cathasimage;
if (query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ranking += page.limage() << coeff_cathasaudio; if (query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ranking += page.limage() << coeff_cathasaudio;

@ -33,6 +33,7 @@ import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
@ -51,7 +52,7 @@ public class plasmaSearchResultAccumulator {
plasmaSearchQuery theQuery, plasmaSearchQuery theQuery,
plasmaSearchProcessing process, plasmaSearchProcessing process,
plasmaSearchRankingProfile ranking, plasmaSearchRankingProfile ranking,
plasmaSearchPreOrder pre, indexContainer pre,
plasmaWordIndex wordIndex, plasmaWordIndex wordIndex,
TreeSet blueList, TreeSet blueList,
boolean overfetch) { boolean overfetch) {
@ -67,18 +68,14 @@ public class plasmaSearchResultAccumulator {
indexRWIEntry rwientry; indexRWIEntry rwientry;
indexURLEntry page; indexURLEntry page;
Long preranking;
Object[] preorderEntry;
indexURLEntry.Components comp; indexURLEntry.Components comp;
String pagetitle, pageurl, pageauthor; String pagetitle, pageurl, pageauthor;
int minEntries = process.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT); int minEntries = process.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);
try { try {
ordering: while (pre.hasNext()) { ordering: for (int i = 0; i < pre.size(); i++) {
if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= ((overfetch) ? 4 : 1) * minEntries)) break; if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= ((overfetch) ? 4 : 1) * minEntries)) break;
preorderEntry = pre.next(); rwientry = new indexRWIEntry(pre.get(i));
rwientry = (indexRWIEntry) preorderEntry[0];
// load only urls if there was not yet a root url of that hash // load only urls if there was not yet a root url of that hash
preranking = (Long) preorderEntry[1];
// find the url entry // find the url entry
page = wordIndex.loadedURL.load(rwientry.urlHash(), rwientry); page = wordIndex.loadedURL.load(rwientry.urlHash(), rwientry);
if (page != null) { if (page != null) {
@ -105,12 +102,12 @@ public class plasmaSearchResultAccumulator {
Iterator wi = theQuery.queryHashes.iterator(); Iterator wi = theQuery.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash()); while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (theQuery.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) { } else if (theQuery.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking); if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking); else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking); else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking); else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page);
} else { } else {
acc.addPage(page, preranking); acc.addPage(page);
} }
} }
} }

@ -2621,7 +2621,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
ioLinks[1].intValue(), ioLinks[1].intValue(),
condenser.RESULT_FLAGS condenser.RESULT_FLAGS
); );
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash); indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
wordIdxContainer.add(wordIdxEntry); wordIdxContainer.add(wordIdxEntry);
tmpContainers.add(wordIdxContainer); tmpContainers.add(wordIdxContainer);
} }

@ -152,8 +152,8 @@ public final class plasmaWordIndex implements indexRI {
return entries.updated(); return entries.updated();
} }
public static indexContainer emptyContainer(String wordHash) { public static indexContainer emptyContainer(String wordHash, int elementCount) {
return new indexContainer(wordHash, indexRWIEntry.urlEntryRow); return new indexContainer(wordHash, indexRWIEntry.urlEntryRow, elementCount);
} }
public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) { public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) {
@ -392,7 +392,11 @@ public final class plasmaWordIndex implements indexRI {
} }
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, indexRWIEntry.urlEntryRow); indexContainer c = new indexContainer(
wordHash,
indexRWIEntry.urlEntryRow,
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash) + collections.indexSize(wordHash)
);
synchronized (dhtInCache) { synchronized (dhtInCache) {
c.addAllUnique(dhtInCache.deleteContainer(wordHash)); c.addAllUnique(dhtInCache.deleteContainer(wordHash));
} }

@ -199,6 +199,7 @@ public class serverDomains {
// checks for local/global IP range and local IP // checks for local/global IP range and local IP
public static boolean isLocal(URL url) { public static boolean isLocal(URL url) {
InetAddress hostAddress = dnsResolve(url.getHost()); InetAddress hostAddress = dnsResolve(url.getHost());
if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets
return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress();
} }

@ -449,7 +449,7 @@ public final class yacyClient {
final int words = wordhashes.length() / yacySeedDB.commonHashLength; final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words]; indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) { for (int i = 0; i < words; i++) {
container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength)); container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), timingProfile.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT));
} }
// insert results to containers // insert results to containers

Loading…
Cancel
Save