* added a search history cache that maintains search results for 10 minutes

it is necessary for the new search process that will do automatic re-searches
a positive effect is, that when a re-search is done it can be monitored how many
results had been contributed from other peers. The message for this contribution
was moved from the end of the result page to the top.
* enhanced re-search time when a global search was done an the local index has
already a great number of results for this word
* re-organised presearch computation; must be further enhanced

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4059 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent ae86d010bb
commit a34d9b8609

@ -30,6 +30,7 @@ import java.util.Iterator;
import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
@ -132,7 +133,7 @@ public class Ranking_p {
final serverObjects prop = defaultValues();
plasmaSearchRankingProfile ranking =
(sb.getConfig("rankingProfile", "").length() == 0) ?
new plasmaSearchRankingProfile("text") :
new plasmaSearchRankingProfile(plasmaSearchQuery.CONTENTDOM_TEXT) :
new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
putRanking(prop, ranking, "local");
return prop;
@ -149,7 +150,7 @@ public class Ranking_p {
if (post.containsKey("ResetRanking")) {
sb.setConfig("rankingProfile", "");
plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile("text");
plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(plasmaSearchQuery.CONTENTDOM_TEXT);
final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local"));
putRanking(prop, ranking, "local");

@ -164,7 +164,7 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(squery.queryHashes) + " - " + squery.wantedResults + " links");
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
plasmaSearchProcessing localProcess = new plasmaSearchProcessing(squery.maximumTime, squery.wantedResults);
//plasmaSearchProcessing remoteProcess = null;
@ -208,7 +208,7 @@ public final class search {
// join and order the result
indexContainer localResults =
(containers == null) ?
plasmaWordIndex.emptyContainer(null) :
plasmaWordIndex.emptyContainer(null, 0) :
localProcess.localSearchJoinExclude(
containers[0].values(),
containers[1].values(),
@ -223,7 +223,7 @@ public final class search {
joincount = localResults.size();
prop.putASIS("joincount", Integer.toString(joincount));
plasmaSearchPreOrder pre = new plasmaSearchPreOrder(squery, localProcess, rankingProfile, localResults);
accu = new plasmaSearchResultAccumulator(squery, localProcess, rankingProfile, pre, sb.wordIndex, plasmaSwitchboard.blueList, false);
accu = new plasmaSearchResultAccumulator(squery, localProcess, rankingProfile, pre.strippedContainer(200), sb.wordIndex, plasmaSwitchboard.blueList, false);
}
// generate compressed index for maxcounthash

@ -116,7 +116,7 @@ document.getElementById("Enter").value = "search again - catch up more links";
::
<p>No Results. &quot;<strong>#[wrong_regex]#</strong>&quot; is no valid regular expression. Please go back to the previous page and make sure to enter a valid regular expressions for URL mask and Prefer mask.</p>
::
<p><strong id="linkcount">#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links from <strong>#[filteredcount]#</strong> filtered links of a total number of <strong>#[totalcount]#</strong> known.</p>
<p><strong id="linkcount">#[linkcount]#</strong> results from <strong>#[orderedcount]#</strong> ordered links from <strong>#[filteredcount]#</strong> filtered links of a total number of <strong>#[totalcount]#</strong> known#(globalresults)#.::, <strong>#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#</p>
::
<p>Searching the web with this peer is disabled for unauthorized users. Please <a href="Status.html?login=">log in</a> as administrator to use the search function</p>
#(/num-results)#
@ -164,21 +164,7 @@ document.getElementById("Enter").value = "search again - catch up more links";
</script>
<!-- linklist end -->
#(resultbottomline)#
::
<p>The global search resulted in #[globalresults]# link contributions from other YaCy peers.</p>
::
<p></p>
::
<p>
You cannot get global search results because you are not connected to another YaCy peer.
</p>
::
<p>
You can enrich the search results by using the 'global' option; you must also switch to online mode
(by using the proxy) to contribute to the global index.
</p>
#(/resultbottomline)#
::<!-- type 1: media search -->
<!-- non-js variant -->
<noscript>

@ -56,6 +56,7 @@ import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools;
@ -151,7 +152,6 @@ public class yacysearch {
prop.put("type", 0);
prop.put("type_excluded", 0);
prop.put("type_combine", 0);
prop.put("type_resultbottomline", 0);
prop.put("type_results", "");
prop.put("num-results", (searchAllowed) ? 0 : 6);
@ -197,13 +197,7 @@ public class yacysearch {
if (clustersearch) global = true; // switches search on, but search target is limited to cluster nodes
// find search domain
int contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
String contentdomString = post.get("contentdom", "text");
if (contentdomString.equals("text")) contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
if (contentdomString.equals("audio")) contentdomCode = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (contentdomString.equals("video")) contentdomCode = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (contentdomString.equals("image")) contentdomCode = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (contentdomString.equals("app")) contentdomCode = plasmaSearchQuery.CONTENTDOM_APP;
int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text"));
// patch until better search profiles are available
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30;
@ -265,8 +259,7 @@ public class yacysearch {
// prepare search properties
final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getPublicAddress() != null));
final boolean samesearch = env.getConfig("last-search", "").equals(querystring + contentdomString);
final boolean globalsearch = (global) && (yacyonline) && (!samesearch);
final boolean globalsearch = (global) && (yacyonline);
// do the search
TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]);
@ -285,7 +278,7 @@ public class yacysearch {
"",
20,
constraint);
plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomString) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomCode) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(4 * theQuery.maximumTime / 10, theQuery.wantedResults);
plasmaSearchProcessing remoteTiming = new plasmaSearchProcessing(6 * theQuery.maximumTime / 10, theQuery.wantedResults);
@ -305,8 +298,8 @@ public class yacysearch {
// create a new search event
String wrongregex = null;
plasmaSearchEvent theSearch = new plasmaSearchEvent(theQuery, ranking, localTiming, remoteTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null);
plasmaSearchPreOrder preorder = theSearch.search();
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, ranking, localTiming, remoteTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null);
indexContainer preorder = theSearch.search();
// fetch snippets
serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
@ -322,7 +315,7 @@ public class yacysearch {
// log
serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
(theSearch.getLocalCount() + theSearch.getGlobalCount()) + " links found, " +
preorder.filteredCount() + " links filtered, " +
theSearch.filteredCount() + " links filtered, " +
accu.resultCount() + " links ordered, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
@ -347,9 +340,10 @@ public class yacysearch {
//prop.put("references", 0);
URL wordURL=null;
prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount());
prop.put("num-results_filteredcount", preorder.filteredCount());
prop.put("num-results_filteredcount", theSearch.filteredCount());
prop.put("num-results_orderedcount", accu.resultCount());
prop.put("num-results_globalresults", theSearch.getGlobalCount());
prop.put("num-results_globalresults", (theSearch.getGlobalCount() == 0) ? 0 : 1);
prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount());
prop.put("num-results_linkcount", 0);
prop.put("type_results", 0);
@ -425,11 +419,7 @@ public class yacysearch {
prop.put("num-results_linkcount", Integer.toString(accu.resultCount()));
}
// remember the last search expression
env.setConfig("last-search", querystring + contentdomString);
// process result of search
prop.put("type_resultbottomline", 0);
if (filtered.size() > 0) {
prop.put("excluded", 1);
prop.put("excluded_stopwords", filtered.toString());
@ -509,26 +499,8 @@ public class yacysearch {
}
}
if (wrongregex != null) {
prop.put("type_resultbottomline", 0);
}
else if (yacyonline) {
if (global) {
prop.put("type_resultbottomline", 1);
prop.put("type_resultbottomline_globalresults", prop.get("num-results_globalresults", "0"));
} else {
prop.put("type_resultbottomline", 0);
}
} else {
if (global) {
prop.put("type_resultbottomline", 3);
} else {
prop.put("type_resultbottomline", 0);
}
}
prop.put("type", (theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 2 : 1));
if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", contentdomString);
if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", post.get("contentdom", "text"));
prop.put("input_cat", "href");
prop.put("input_depth", "0");
@ -536,7 +508,6 @@ public class yacysearch {
String hostName = (String) header.get("Host", "localhost");
if (hostName.indexOf(":") == -1) hostName += ":" + serverCore.getPortNr(env.getConfig("port", "8080"));
prop.put("rssYacyImageURL", "http://" + hostName + "/env/grafics/yacy.gif");
}
if (post.get("cat", "href").equals("image")) {
@ -591,7 +562,7 @@ public class yacysearch {
prop.put("input_prefermaskfilter", prefermask);
prop.put("input_indexof", (indexof) ? "on" : "off");
prop.put("input_constraint", constraint.exportB64());
prop.put("input_contentdom", contentdomString);
prop.put("input_contentdom", post.get("contentdom", "text"));
prop.put("input_contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("input_contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("input_contentdomCheckVideo", (contentdomCode == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);

@ -46,14 +46,14 @@ public class indexContainer extends kelondroRowSet {
this.wordHash = wordHash;
}
public indexContainer(String wordHash, kelondroRow rowdef) {
super(rowdef, 0);
public indexContainer(String wordHash, kelondroRow rowdef, int objectCount) {
super(rowdef, objectCount);
this.wordHash = wordHash;
this.lastTimeWrote = 0;
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef);
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.size());
newContainer.addAllUnique(this);
return newContainer;
}
@ -308,7 +308,7 @@ public class indexContainer extends kelondroRowSet {
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
int keylength = small.rowdef.width(0);
assert (keylength == large.rowdef.width(0));
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, small.rowdef, 0); // start with empty search result
Iterator se = small.entries();
indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis();
@ -331,7 +331,7 @@ public class indexContainer extends kelondroRowSet {
assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString();
int keylength = i1.rowdef.width(0);
assert (keylength == i2.rowdef.width(0));
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, i1.rowdef, 0); // start with empty search result
if (!((i1.rowdef.getOrdering().signature().equals(i2.rowdef.getOrdering().signature())) &&
(i1.rowdef.primaryKey() == i2.rowdef.primaryKey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();

@ -383,10 +383,14 @@ public final class indexRAMRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime;
}
public synchronized boolean hasContainer(String wordHash) {
public boolean hasContainer(String wordHash) {
return cache.containsKey(wordHash);
}
public int sizeContainer(String wordHash) {
return ((indexContainer) cache.get(wordHash)).size();
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) {
// retrieve container
@ -497,7 +501,7 @@ public final class indexRAMRI implements indexRI {
public synchronized void addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, 1);
container.put(newEntry);
cache.put(wordHash, container);
hashScore.incScore(wordHash);

@ -26,6 +26,7 @@
package de.anomic.plasma;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
@ -39,27 +40,36 @@ import de.anomic.yacy.yacySearch;
public final class plasmaSearchEvent {
public static plasmaSearchEvent lastEvent = null;
private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes
private long eventTime;
private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking;
private plasmaWordIndex wordIndex;
private indexContainer rcContainers; // cache for results
private indexContainer rcLocal; // cache for local results
private indexContainer rcGlobal; // cache for global results
private Map rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private plasmaSearchProcessing profileLocal, profileGlobal;
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private TreeMap preselectedPeerHashes;
private int localcount, globalcount;
private indexContainer sortedResults;
private int lastglobal;
private int filteredCount;
public plasmaSearchEvent(plasmaSearchQuery query,
private plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchProcessing localTiming,
plasmaSearchProcessing remoteTiming,
plasmaWordIndex wordIndex,
TreeMap preselectedPeerHashes) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
this.wordIndex = wordIndex;
this.query = query;
this.ranking = ranking;
this.rcContainers = plasmaWordIndex.emptyContainer(null);
this.rcLocal = null;
this.rcGlobal = plasmaWordIndex.emptyContainer(null, 0);;
this.rcAbstracts = (query.queryHashes.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
@ -68,40 +78,10 @@ public final class plasmaSearchEvent {
this.preselectedPeerHashes = preselectedPeerHashes;
this.localcount = 0;
this.globalcount = 0;
}
public plasmaSearchQuery getQuery() {
return query;
}
public plasmaSearchRankingProfile getRanking() {
return ranking;
}
public plasmaSearchProcessing getLocalTiming() {
return profileLocal;
}
public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads;
}
public yacySearch[] getSecondarySearchThreads() {
return secondarySearchThreads;
}
public int getLocalCount() {
return this.localcount;
}
public int getGlobalCount() {
return this.globalcount;
}
public plasmaSearchPreOrder search() {
// combine all threads
this.sortedResults = null;
this.lastglobal = 0;
long start = System.currentTimeMillis();
plasmaSearchPreOrder pre;
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
int fetchpeers = (int) (query.maximumTime / 500L); // number of target peers; means 10 peers in 10 seconds
@ -121,7 +101,7 @@ public final class plasmaSearchEvent {
query.urlMask,
query.maxDistance,
wordIndex,
rcContainers,
rcGlobal,
rcAbstracts,
fetchpeers,
plasmaSwitchboard.urlBlacklist,
@ -158,10 +138,10 @@ public final class plasmaSearchEvent {
}
*/
// try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal =
// join and exlcude the local result
this.rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
plasmaWordIndex.emptyContainer(null, 0) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
@ -170,11 +150,18 @@ public final class plasmaSearchEvent {
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
// this is temporary debugging code to learn that the index abstracts are fetched correctly
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast
localcount = rcLocal.size();
plasmaSearchPreOrder firstsort = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
rcLocal = firstsort.strippedContainer(200);
// wait some time to retrieve index abstracts from primary search
while (System.currentTimeMillis() < secondaryTimeout) {
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
// evaluate index abstracts and start a secondary search
if (rcAbstracts != null) prepareSecondarySearch();
@ -188,22 +175,12 @@ public final class plasmaSearchEvent {
// finished searching
serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
// combine the result and order
indexContainer searchResult = plasmaWordIndex.emptyContainer(null);
searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcContainers);
searchResult.sort();
searchResult.uniq(1000);
localcount = rcLocal.size();
globalcount = rcContainers.size();
pre = new plasmaSearchPreOrder(query, profileLocal, ranking, searchResult);
} else {
Map[] searchContainerMaps = profileLocal.localSearchContainers(query, wordIndex, null);
indexContainer rcLocal =
rcLocal =
(searchContainerMaps == null) ?
plasmaWordIndex.emptyContainer(null) :
plasmaWordIndex.emptyContainer(null, 0) :
profileLocal.localSearchJoinExclude(
searchContainerMaps[0].values(),
searchContainerMaps[1].values(),
@ -212,17 +189,95 @@ public final class plasmaSearchEvent {
profileLocal.getTargetTime(plasmaSearchProcessing.PROCESS_JOIN) * query.queryHashes.size() / (query.queryHashes.size() + query.excludeHashes.size()),
query.maxDistance);
this.localcount = rcLocal.size();
pre = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
}
// log the event
serverLog.logFine("SEARCH_EVENT", "SEARCHRESULT: " + profileLocal.reportToString());
// prepare values for statistics
// set link for statistic
lastEvent = this;
// return search result
return pre;
// remove old events in the event cache
Iterator i = lastEvents.entrySet().iterator();
while (i.hasNext()) {
if (((plasmaSearchEvent) ((Map.Entry) i.next()).getValue()).eventTime + eventLifetime < System.currentTimeMillis()) i.remove();
}
// store this search to a cache so it can be re-used
lastEvents.put(query.id(), this);
}
public plasmaSearchQuery getQuery() {
return query;
}
public plasmaSearchRankingProfile getRanking() {
return ranking;
}
public plasmaSearchProcessing getLocalTiming() {
return profileLocal;
}
public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads;
}
public yacySearch[] getSecondarySearchThreads() {
return secondarySearchThreads;
}
public int getLocalCount() {
return this.localcount;
}
public int getGlobalCount() {
return this.globalcount;
}
public static plasmaSearchEvent getEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchProcessing localTiming,
plasmaSearchProcessing remoteTiming,
plasmaWordIndex wordIndex,
TreeMap preselectedPeerHashes) {
plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id());
if (event == null) {
event = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, wordIndex, preselectedPeerHashes);
} else {
//re-new the event time for this event, so it is not deleted next time too early
event.eventTime = System.currentTimeMillis();
}
return event;
}
public indexContainer search() {
// combine the local and global (if any) result and order
if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
globalcount = rcGlobal.size();
if ((this.sortedResults == null) || (this.lastglobal != globalcount)) {
indexContainer searchResult = plasmaWordIndex.emptyContainer(null, rcLocal.size() + rcGlobal.size());
searchResult.addAllUnique(rcLocal);
searchResult.addAllUnique(rcGlobal);
searchResult.sort();
searchResult.uniq(100);
lastglobal = globalcount;
plasmaSearchPreOrder pre = new plasmaSearchPreOrder(query, profileLocal, ranking, searchResult);
this.filteredCount = pre.filteredCount();
this.sortedResults = pre.strippedContainer(200);
}
} else {
if (this.sortedResults == null) {
plasmaSearchPreOrder pre = new plasmaSearchPreOrder(query, profileLocal, ranking, rcLocal);
this.filteredCount = pre.filteredCount();
this.sortedResults = pre.strippedContainer(200);
}
}
return this.sortedResults;
}
public int filteredCount() {
return this.filteredCount;
}
private void prepareSecondarySearch() {
@ -282,7 +337,7 @@ public final class plasmaSearchEvent {
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, "", urls, wordIndex, rcContainers, peer, plasmaSwitchboard.urlBlacklist,
words, "", urls, wordIndex, rcGlobal, peer, plasmaSwitchboard.urlBlacklist,
profileGlobal, ranking, query.constraint, preselectedPeerHashes);
}

@ -101,7 +101,7 @@ public final class plasmaSearchPostOrder {
return (indexURLEntry) pageAcc.remove(top);
}
protected void addPage(indexURLEntry page, Long preranking) {
protected void addPage(indexURLEntry page) {
// take out relevant information for reference computation
indexURLEntry.Components comp = page.comp();
@ -110,7 +110,7 @@ public final class plasmaSearchPostOrder {
String[] descrcomps = comp.title().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description
// store everything
results.add(new Object[] {page, urlcomps, descrcomps, preranking});
results.add(new Object[] {page, urlcomps, descrcomps});
// add references
addScoreFiltered(urlcomps);
@ -137,12 +137,12 @@ public final class plasmaSearchPostOrder {
// calculate ranking
if (postsort)
ranking = this.ranking.postRanking(
((Long) resultVector[3]).longValue(),
query,
commonSense,
(String[]) resultVector[1],
(String[]) resultVector[2],
page
page,
i
);
else
ranking = ((Long) resultVector[3]).longValue();

@ -208,11 +208,11 @@ public final class plasmaSearchPreOrder {
return theClone;
}
public boolean hasNext() {
private boolean hasNext() {
return pageAcc.size() > 0;
}
public Object[] /*{indexEntry, Long}*/ next() {
private Object[] /*{indexEntry, Long}*/ next() {
String top = (String) pageAcc.firstKey();
//System.out.println("preorder-key: " + top);
Long preranking;
@ -225,6 +225,19 @@ public final class plasmaSearchPreOrder {
return new Object[]{(indexRWIEntry) pageAcc.remove(top), preranking};
}
public indexContainer strippedContainer(int count) {
// return an indexContainer with a limited number of results
indexContainer container = plasmaWordIndex.emptyContainer(null, count);
Object[] o;
indexRWIEntry entry;
while ((count-- > 0) && (hasNext())) {
o = next();
entry = (indexRWIEntry) o[0];
container.addUnique(entry.toKelondroEntry());
}
return container;
}
public indexRWIEntry[] getNormalizer() {
return new indexRWIEntry[] {entryMin, entryMax};
}

@ -309,7 +309,7 @@ public class plasmaSearchProcessing implements Cloneable {
// join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null);
if (includeContainers == null) return plasmaWordIndex.emptyContainer(null, 0);
// join the result
startTimer();
@ -319,7 +319,7 @@ public class plasmaSearchProcessing implements Cloneable {
if ((rcLocal != null) && (remaining > 0)) {
indexContainer.excludeContainers(rcLocal, excludeContainers, remaining);
}
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null);
if (rcLocal == null) rcLocal = plasmaWordIndex.emptyContainer(null, 0);
setYieldTime(plasmaSearchProcessing.PROCESS_JOIN);
setYieldCount(plasmaSearchProcessing.PROCESS_JOIN, rcLocal.size());

@ -140,9 +140,9 @@ public final class plasmaSearchQuery {
return keyhashes;
}
public static String hashSet2hashString(Set words) {
Iterator i = words.iterator();
StringBuffer sb = new StringBuffer(words.size() * yacySeedDB.commonHashLength);
public static String hashSet2hashString(Set hashes) {
Iterator i = hashes.iterator();
StringBuffer sb = new StringBuffer(hashes.size() * yacySeedDB.commonHashLength);
while (i.hasNext()) sb.append((String) i.next());
return new String(sb);
}
@ -216,6 +216,10 @@ public final class plasmaSearchQuery {
return new String(sb);
}
public String id() {
// generate a string that identifies a search so results can be re-used in a cache
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom;
}
public HashMap resultProfile(int searchcount, long searchtime) {
// generate statistics about search: query, time, etc

@ -94,9 +94,8 @@ public class plasmaSearchRankingProfile {
coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp,
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer;
public plasmaSearchRankingProfile(String mediatype) {
public plasmaSearchRankingProfile(int mediatype) {
// set default-values
if (mediatype == null) mediatype = "text";
coeff_domlength = 8;
coeff_ybr = 8;
coeff_date = 4;
@ -121,15 +120,15 @@ public class plasmaSearchRankingProfile {
coeff_urlcompintoplist = 3;
coeff_descrcompintoplist = 2;
coeff_prefer = 15;
coeff_catindexof = (mediatype.equals("text")) ? 1 : 10;
coeff_cathasimage = (mediatype.equals("image")) ? 15 : 1;
coeff_cathasaudio = (mediatype.equals("audio")) ? 15 : 1;
coeff_cathasvideo = (mediatype.equals("video")) ? 15 : 1;
coeff_cathasapp = (mediatype.equals("app")) ? 15 : 1;
coeff_catindexof = (mediatype == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 10;
coeff_cathasimage = (mediatype == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 15 : 1;
coeff_cathasaudio = (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 15 : 1;
coeff_cathasvideo = (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 15 : 1;
coeff_cathasapp = (mediatype == plasmaSearchQuery.CONTENTDOM_APP) ? 15 : 1;
}
public plasmaSearchRankingProfile(String prefix, String profile) {
this("text"); // set defaults
this(plasmaSearchQuery.CONTENTDOM_TEXT); // set defaults
if ((profile != null) && (profile.length() > 0)) {
//parse external form
HashMap coeff = new HashMap();
@ -326,12 +325,14 @@ public class plasmaSearchRankingProfile {
}
*/
public long postRanking(
long ranking,
plasmaSearchQuery query,
Set topwords,
String[] urlcomps,
String[] descrcomps,
indexURLEntry page) {
indexURLEntry page,
int position) {
long ranking = (255 - position) << 8;
// for media search: prefer pages with many links
if (query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ranking += page.limage() << coeff_cathasimage;

@ -33,6 +33,7 @@ import java.util.Date;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield;
@ -51,7 +52,7 @@ public class plasmaSearchResultAccumulator {
plasmaSearchQuery theQuery,
plasmaSearchProcessing process,
plasmaSearchRankingProfile ranking,
plasmaSearchPreOrder pre,
indexContainer pre,
plasmaWordIndex wordIndex,
TreeSet blueList,
boolean overfetch) {
@ -67,18 +68,14 @@ public class plasmaSearchResultAccumulator {
indexRWIEntry rwientry;
indexURLEntry page;
Long preranking;
Object[] preorderEntry;
indexURLEntry.Components comp;
String pagetitle, pageurl, pageauthor;
int minEntries = process.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT);
try {
ordering: while (pre.hasNext()) {
ordering: for (int i = 0; i < pre.size(); i++) {
if ((System.currentTimeMillis() >= postorderLimitTime) || (acc.sizeFetched() >= ((overfetch) ? 4 : 1) * minEntries)) break;
preorderEntry = pre.next();
rwientry = (indexRWIEntry) preorderEntry[0];
rwientry = new indexRWIEntry(pre.get(i));
// load only urls if there was not yet a root url of that hash
preranking = (Long) preorderEntry[1];
// find the url entry
page = wordIndex.loadedURL.load(rwientry.urlHash(), rwientry);
if (page != null) {
@ -105,12 +102,12 @@ public class plasmaSearchResultAccumulator {
Iterator wi = theQuery.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (theQuery.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page, preranking);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page, preranking);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page, preranking);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page, preranking);
if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (page.limage() > 0)) acc.addPage(page);
else if ((theQuery.contentdom == plasmaSearchQuery.CONTENTDOM_APP) && (page.lapp() > 0)) acc.addPage(page);
} else {
acc.addPage(page, preranking);
acc.addPage(page);
}
}
}

@ -2621,7 +2621,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
ioLinks[1].intValue(),
condenser.RESULT_FLAGS
);
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash);
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
wordIdxContainer.add(wordIdxEntry);
tmpContainers.add(wordIdxContainer);
}

@ -152,8 +152,8 @@ public final class plasmaWordIndex implements indexRI {
return entries.updated();
}
public static indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, indexRWIEntry.urlEntryRow);
public static indexContainer emptyContainer(String wordHash, int elementCount) {
return new indexContainer(wordHash, indexRWIEntry.urlEntryRow, elementCount);
}
public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) {
@ -392,7 +392,11 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, indexRWIEntry.urlEntryRow);
indexContainer c = new indexContainer(
wordHash,
indexRWIEntry.urlEntryRow,
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash) + collections.indexSize(wordHash)
);
synchronized (dhtInCache) {
c.addAllUnique(dhtInCache.deleteContainer(wordHash));
}

@ -199,6 +199,7 @@ public class serverDomains {
// checks for local/global IP range and local IP
public static boolean isLocal(URL url) {
InetAddress hostAddress = dnsResolve(url.getHost());
if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets
return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress();
}

@ -449,7 +449,7 @@ public final class yacyClient {
final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength));
container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), timingProfile.getTargetCount(plasmaSearchProcessing.PROCESS_POSTSORT));
}
// insert results to containers

Loading…
Cancel
Save