fixed several search result navigation bugs

fixed bad behaviours during search result collection

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7362 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 49b5a206cd
commit 18d33b5c6d

@ -17,6 +17,9 @@ var modifiertype = "";
function search(search) {
query = search;
start = new Date();
if (query == null || query == "") {
return;
}
var self = this;
if (window.XMLHttpRequest) { // Mozilla/Safari
self.xmlHttpReq = new XMLHttpRequest();
@ -42,7 +45,7 @@ function navget(list, name) {
function preparepage(str) {
var raw = document.getElementById("raw");
if (raw != null) raw.innerHTML = str;
var rsp = eval("("+str+")");
var rsp = eval("(" + str + ")");
var firstChannel = rsp.channels[0];
searchresult = firstChannel.items;
totalResults = firstChannel.totalResults.replace(/[,.]/,"");
@ -78,10 +81,10 @@ function hideDownloadScript() {
}
function resultStart() {
var html = "<span style=\"display:block\">";
var html = "";
if (totalResults > 0) {
html += "<form><div style=\"float:left\">" + searchresult.length + " results from a total of " + totalResults + " docs in index; search time: " + ((new Date()).getTime() - start.getTime()) + " milliseconds. </div>";
html += "<div id=\"downloadbutton\" style=\"float:left\"></div></form>";
html += "<div>" + searchresult.length + " results from a total of " + totalResults + " docs in index; search time: " + ((new Date()).getTime() - start.getTime()) + " milliseconds.&nbsp;";
html += "<div id=\"downloadbutton\" style=\"inline\"></div></div>";
} else {
if (query == "") {
html += "please enter some search words<br\>or use the following predefined search queries:<br\>";
@ -90,12 +93,13 @@ function resultStart() {
html += "(<a style=\"text-decoration:underline\" href=\"/yacyinteractive.html?query=gif+filetype:gif\">gif</a>),";
html += "(<a style=\"text-decoration:underline\" href=\"/yacyinteractive.html?query=jpg+filetype:jpg\">jpg</a>)<br>";
html += "list: ";
html += "<a style=\"text-decoration:underline\" href=\"/yacyinteractive.html?query=pdf+/date+filetype:pdf\">recent pdf</a>,";
html += "<a style=\"text-decoration:underline\" href=\"/yacyinteractive.html?query=pdf+/date+filetype:pdf\">recent pdf</a><br>";
//html += "<iframe src=\"rssTerminal.html?set=LOCALINDEXING&amp;width=600px&amp;height=180px&amp;maxlines=20&amp;maxwidth=120\" ";
//html += "style=\"width:600px;height:180px;margin:0px;\" scrolling=\"no\" name=\"newsframe\"></iframe>";
} else {
html += "no results";
}
}
html += "</span>";
// add extension navigation
var extnav = "";
@ -105,7 +109,7 @@ function resultStart() {
}
}
if (extnav.length > 0) {
html += "<span style=\"display:block\">apply a <b>filter</b> by filetype:&nbsp;&nbsp;&nbsp;&nbsp;" + extnav + "</span>";
html += "<div style=\"display:block\">apply a <b>filter</b> by filetype:&nbsp;&nbsp;&nbsp;&nbsp;" + extnav + "</div>";
} else {
// check if there is a filetype constraint and offer a removal
if (modifier != "") {
@ -131,8 +135,8 @@ function resultList() {
var html = "";
if (searchresult.length > 0) {
html += "<table class=\"sortable\" id=\"sortable\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\" width=\"99%\">";
html += "<tr class=\"TableHeader\" valign=\"bottom\"><td width=\"40\">Protocol</td><td width=\"60\">Host</td><td width=\"260\">Path</td><td width=\"360\">Name</td><td width=\"60\">Size</td><td width=\"75\">Date</td></tr>";
for (var i = 0; i < searchresult.length; i++) { html += resultLine("row", searchresult[i]); }
html += "<tr class=\"TableHeader\" valign=\"bottom\"><td width=\"10\">count</td><td width=\"40\">Protocol</td><td width=\"60\">Host</td><td width=\"260\">Path</td><td width=\"360\">Name</td><td width=\"60\">Size</td><td width=\"75\">Date</td></tr>";
for (var i = 0; i < searchresult.length; i++) { html += resultLine("row", searchresult[i], i + 1); }
html += "</table>";
}
return html;
@ -144,7 +148,7 @@ function resultImages() {
return html;
}
function resultLine(type, item) {
function resultLine(type, item, linenumber) {
// evaluate item
p = item.link.indexOf("//");
protocol = "";
@ -185,6 +189,7 @@ function resultLine(type, item) {
var html = "";
if (type == "row") {
html += "<tr class=\"TableCellLight\">";
html += "<td align=\"left\">" + linenumber + "</td>";
html += "<td align=\"left\">" + protocol + "</td>";
html += "<td align=\"left\"><a href=\"" + protocol + "://" + host + "/" + "\">" + host + "</a></td>";
html += "<td align=\"left\"><a href=\"" + item.link + "\">" + path + "</a></td>";

@ -93,7 +93,7 @@ public final class search {
final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
// final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping
final int count = Math.min(100, post.getInt("count", 0)); // maximum number of wanted results
final int count = Math.min(100, post.getInt("count", 10)); // maximum number of wanted results
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "text");
@ -301,7 +301,6 @@ public final class search {
}
if (joincount <= 0 || abstracts.length() == 0) {
prop.put("indexcount", "");
prop.put("joincount", "0");
} else {
// attach information about index abstracts
final StringBuilder indexcount = new StringBuilder(6000);

@ -30,15 +30,16 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
#%env/templates/embeddedheader.template%#
#(/display)#
<div style="float:left">
<div style="float:left"><form class="search small" id="searchform" action="yacyinteractive.html" method="get" onkeyup="xmlhttpPost(); return false;">
<form class="search small" id="searchform" action="yacyinteractive.html" method="get" onkeyup="xmlhttpPost(); return false;">
<h2>#[promoteSearchPageGreeting]#</h2>
<div class="yacylogo">
<a href="#[promoteSearchPageGreeting.homepage]#" class="yacylogo"><img src="#[promoteSearchPageGreeting.smallImage]#" alt="yacysearch"/></a>
</div>
<fieldset class="yacys"><input id="query" name="query" type="text" value="#[query]#" size="50" maxlength="80" /></fieldset>
<!--<pre>Raw JSON String: <div id="raw"></div></pre>-->
<!--<pre>Raw JSON String: <div id="raw"></div></pre>-->
</form>
<div id="searchnavigation" style="float:left"></div>
<div id="searchnavigation"></div>
</div>
<div id="downloadscript" style="float:left"></div>
<div id="searchresults" style="float:left"></div>
<script type="text/javascript">

@ -551,7 +551,7 @@ public class yacysearch {
(System.currentTimeMillis() - timestamp) + " ms");
// prepare search statistics
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
@ -615,7 +615,7 @@ public class yacysearch {
Log.logException(e);
}
int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteResourceSize();
int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
prop.put("num-results_offset", offset);
prop.put("num-results_itemscount", Formatter.number(0, true));
prop.put("num-results_itemsPerPage", itemsPerPage);
@ -637,7 +637,8 @@ public class yacysearch {
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, null, originalUrlMask, navigation));
resnav.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;");
}
final int numberofpages = Math.min(10,indexcount / theQuery.displayResults());
final int numberofpages = Math.min(10, 1 + ((indexcount - 1) / theQuery.displayResults()));
for (int i = 0; i < numberofpages; i++) {
if (i == thispage) {
resnav.append("<img src=\"env/grafics/navs");

@ -87,7 +87,7 @@ public class yacysearchitem {
final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteResourceSize();
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
prop.put("offset", offset);
prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount)));

@ -169,8 +169,7 @@ public class yacysearchtrailer {
String aboutBody = env.getConfig("about.body", "");
String aboutHeadline = env.getConfig("about.headline", "");
if ((aboutBody.length() == 0 && aboutHeadline.length() == 0) ||
theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() +
theSearch.getRankingResult().getRemoteResourceSize() == 0) {
theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) {
prop.put("nav-about", 0);
} else {
prop.put("nav-about", 1);

@ -166,6 +166,7 @@ public final class RankingProcess extends Thread {
if (local) {
this.local_resourceSize += index.size();
} else {
assert fullResource >= 0;
this.remote_resourceSize += fullResource;
this.remote_peerCount++;
}
@ -293,26 +294,6 @@ public final class RankingProcess extends Thread {
WeakPriorityBlockingQueue<WordReferenceVars> m;
WeakPriorityBlockingQueue.Element<WordReferenceVars> rwi = null;
// check if the doubleDomCache is filled
/*
boolean doubleDomCacheFilled = false;
synchronized (this.doubleDomCache) {
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
while (i.hasNext()) {
try {
m = i.next();
} catch (ConcurrentModificationException e) {
Log.logException(e);
break; // not the best solution...
}
if (m == null) continue;
if (m.isEmpty()) continue;
doubleDomCacheFilled = true;
break;
}
}
*/
// take one entry from the stack if there are entries on that stack or the feeding is not yet finished
if (!feedingIsFinished() || stack.sizeQueue() > 0) try {
//System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
@ -493,13 +474,6 @@ public final class RankingProcess extends Thread {
}
// accept url
/*
try {
this.handover.put(page.hash()); // remember that we handed over this url
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
*/
return page;
}
return null;
@ -545,11 +519,6 @@ public final class RankingProcess extends Thread {
return this.local_indexCount;
}
public int getLocalResourceSize() {
// the number of hits in the local peer (index size, size of the collection in the own index)
return this.local_resourceSize;
}
public int getRemoteIndexCount() {
// the number of result contributions from all the remote peers
return this.remote_indexCount;

@ -105,10 +105,148 @@ public class ResultFetcher {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false);
}
public void deployWorker(int deployCount, int neededResults) {
if (anyWorkerAlive()) return;
public long getURLRetrievalTime() {
return this.urlRetrievalAllTime;
}
public long getSnippetComputationTime() {
return this.snippetComputationAllTime;
}
public ResultEntry oneResult(final int item, long timeout) {
// check if we already retrieved this item
// (happens if a search pages is accessed a second time)
long finishTime = System.currentTimeMillis() + timeout;
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
if (this.result.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that
ResultEntry re = this.result.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "prefetched, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re;
}
// deploy worker to get more results
deployWorker(Math.min(20, query.itemsPerPage), item + query.itemsPerPage);
// finally wait until enough results are there produced from the snippet fetch process
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (InterruptedException e) {Log.logException(e);}
if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break;
}
// finally, if there is something, return the result
if (entry == null) {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
return null;
}
ResultEntry re = entry.getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re;
}
private int resultCounter = 0;
public ResultEntry nextResult() {
final ResultEntry re = oneResult(resultCounter, 1000);
resultCounter++;
return re;
}
public MediaSnippet oneImage(final int item) {
// always look for a next object if there are way too less
if (this.images.sizeAvailable() <= item + 10) fillImagesCache();
// check if we already retrieved the item
if (this.images.sizeDrained() > item) return this.images.element(item).getElement();
// look again if there are not enough for presentation
while (this.images.sizeAvailable() <= item) {
if (fillImagesCache() == 0) break;
}
if (this.images.sizeAvailable() <= item) return null;
// now take the specific item from the image stack
return this.images.element(item).getElement();
}
private int fillImagesCache() {
ResultEntry result = nextResult();
int c = 0;
if (result == null) return c;
// iterate over all images in the result
final List<MediaSnippet> imagemedia = result.mediaSnippets();
if (imagemedia != null) {
for (final MediaSnippet ms: imagemedia) {
images.put(new ReverseElement<MediaSnippet>(ms, ms.ranking)); // remove smallest in case of overflow
c++;
//System.out.println("*** image " + new String(ms.href.hash()) + " images.size = " + images.size() + "/" + images.size());
}
}
return c;
}
public ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> completeResults(final long waitingtime) {
final long timeout = System.currentTimeMillis() + waitingtime;
while ( result.sizeAvailable() < query.neededResults() &&
anyWorkerAlive() &&
System.currentTimeMillis() < timeout) {
try {Thread.sleep(20);} catch (final InterruptedException e) {}
//System.out.println("+++DEBUG-completeResults+++ sleeping " + 200);
}
return this.result.list(Math.min(query.neededResults(), this.result.sizeAvailable()));
}
public long postRanking(
final ResultEntry rentry,
final StaticScore<String> topwords) {
long r = 0;
// for media search: prefer pages with many links
if (query.contentdom == ContentDomain.IMAGE) r += rentry.limage() << query.ranking.coeff_cathasimage;
if (query.contentdom == ContentDomain.AUDIO) r += rentry.laudio() << query.ranking.coeff_cathasaudio;
if (query.contentdom == ContentDomain.VIDEO) r += rentry.lvideo() << query.ranking.coeff_cathasvideo;
if (query.contentdom == ContentDomain.APP ) r += rentry.lapp() << query.ranking.coeff_cathasapp;
// prefer hit with 'prefer' pattern
if (query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) r += 256 << query.ranking.coeff_prefer;
if (query.prefer.matcher(rentry.title()).matches()) r += 256 << query.ranking.coeff_prefer;
// apply 'common-sense' heuristic using references
final String urlstring = rentry.url().toNormalform(true, true);
final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
int tc;
for (int j = 0; j < urlcomps.length; j++) {
tc = topwords.get(urlcomps[j]);
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_urlcompintoplist;
}
for (int j = 0; j < descrcomps.length; j++) {
tc = topwords.get(descrcomps[j]);
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching
final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
final Iterator<byte[]> shi = query.queryHashes.iterator();
byte[] queryhash;
while (shi.hasNext()) {
queryhash = shi.next();
if (urlcomph.has(queryhash)) r += 256 << query.ranking.coeff_appurl;
if (descrcomph.has(queryhash)) r += 256 << query.ranking.coeff_app_dc_title;
}
return r;
}
public void deployWorker(int deployCount, final int neededResults) {
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) return;
this.workerThreads = new Worker[/*(query.snippetCacheStrategy.mustBeOffline()) ? 1 : */deployCount];
if (this.workerThreads == null) {
this.workerThreads = new Worker[deployCount];
synchronized(this.workerThreads) {
for (int i = 0; i < workerThreads.length; i++) {
Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, neededResults);
@ -116,9 +254,24 @@ public class ResultFetcher {
this.workerThreads[i] = worker;
}
}
} else {
// there are still worker threads running, but some may be dead.
// if we find dead workers, reanimate them
synchronized(this.workerThreads) {
for (int i = 0; i < this.workerThreads.length; i++) {
if (deployCount <= 0) break;
if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) {
Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, neededResults);
worker.start();
this.workerThreads[i] = worker;
deployCount--;
}
}
}
}
}
boolean anyWorkerAlive() {
private boolean anyWorkerAlive() {
if (this.workerThreads == null) return false;
synchronized(this.workerThreads) {
for (int i = 0; i < this.workerThreads.length; i++) {
@ -130,14 +283,6 @@ public class ResultFetcher {
return false;
}
public long getURLRetrievalTime() {
return this.urlRetrievalAllTime;
}
public long getSnippetComputationTime() {
return this.snippetComputationAllTime;
}
protected class Worker extends Thread {
private final long timeout; // the date until this thread should try to work
@ -169,12 +314,13 @@ public class ResultFetcher {
// check if we have enough
if (result.sizeAvailable() >= this.neededResults) {
//System.out.println("result.sizeAvailable() >= this.neededResults");
System.out.println(result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
break;
}
// check if we can succeed if we try to take another url
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) {
System.out.println("rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
break;
}
@ -182,7 +328,7 @@ public class ResultFetcher {
page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) {
//System.out.println("page == null");
System.out.println("page == null");
break; // no more available
}
if (failedURLs.has(page.hash())) continue;
@ -294,147 +440,4 @@ public class ResultFetcher {
}
Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason);
}
public ResultEntry oneResult(final int item, long timeout) {
// check if we already retrieved this item
// (happens if a search pages is accessed a second time)
long finishTime = System.currentTimeMillis() + timeout;
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
if (this.result.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that
ResultEntry re = this.result.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "prefetched, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re;
}
/*
System.out.println("rankedCache.size() = " + this.rankedCache.size());
System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults());
*/
if (this.result.sizeAvailable() <= item) {
// start worker threads to fetch urls and snippets
//System.out.println("item = " + item);
//System.out.println("anyWorkerAlive() = " + anyWorkerAlive());
//System.out.println("rankingProcess.feedingIsFinished() = " + rankingProcess.feedingIsFinished());
//System.out.println("this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue());
//System.out.println("this.result.sizeAvailable() = " + this.result.sizeAvailable());
//System.out.println("this.result.sizeAvailable() + this.rankingProcess.sizeQueue() = " + (this.result.sizeAvailable() + this.rankingProcess.sizeQueue()));
deployWorker(Math.min(20, query.itemsPerPage), ((item + query.itemsPerPage) / query.itemsPerPage) * query.itemsPerPage);
}
// finally wait until enough results are there produced from the
// snippet fetch process
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (InterruptedException e) {Log.logException(e);}
if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; //
}
// finally, if there is something, return the result
if (entry == null) {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
return null;
}
ResultEntry re = entry.getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re;
}
private int resultCounter = 0;
public ResultEntry nextResult() {
final ResultEntry re = oneResult(resultCounter, 1000);
resultCounter++;
return re;
}
public MediaSnippet oneImage(final int item) {
// always look for a next object if there are way too less
if (this.images.sizeAvailable() <= item + 10) fillImagesCache();
// check if we already retrieved the item
if (this.images.sizeDrained() > item) return this.images.element(item).getElement();
// look again if there are not enough for presentation
while (this.images.sizeAvailable() <= item) {
if (fillImagesCache() == 0) break;
}
if (this.images.sizeAvailable() <= item) return null;
// now take the specific item from the image stack
return this.images.element(item).getElement();
}
private int fillImagesCache() {
ResultEntry result = nextResult();
int c = 0;
if (result == null) return c;
// iterate over all images in the result
final List<MediaSnippet> imagemedia = result.mediaSnippets();
if (imagemedia != null) {
for (final MediaSnippet ms: imagemedia) {
images.put(new ReverseElement<MediaSnippet>(ms, ms.ranking)); // remove smallest in case of overflow
c++;
//System.out.println("*** image " + new String(ms.href.hash()) + " images.size = " + images.size() + "/" + images.size());
}
}
return c;
}
public ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> completeResults(final long waitingtime) {
final long timeout = System.currentTimeMillis() + waitingtime;
while ( result.sizeAvailable() < query.neededResults() &&
anyWorkerAlive() &&
System.currentTimeMillis() < timeout) {
try {Thread.sleep(20);} catch (final InterruptedException e) {}
//System.out.println("+++DEBUG-completeResults+++ sleeping " + 200);
}
return this.result.list(Math.min(query.neededResults(), this.result.sizeAvailable()));
}
public long postRanking(
final ResultEntry rentry,
final StaticScore<String> topwords) {
long r = 0;
// for media search: prefer pages with many links
if (query.contentdom == ContentDomain.IMAGE) r += rentry.limage() << query.ranking.coeff_cathasimage;
if (query.contentdom == ContentDomain.AUDIO) r += rentry.laudio() << query.ranking.coeff_cathasaudio;
if (query.contentdom == ContentDomain.VIDEO) r += rentry.lvideo() << query.ranking.coeff_cathasvideo;
if (query.contentdom == ContentDomain.APP ) r += rentry.lapp() << query.ranking.coeff_cathasapp;
// prefer hit with 'prefer' pattern
if (query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) r += 256 << query.ranking.coeff_prefer;
if (query.prefer.matcher(rentry.title()).matches()) r += 256 << query.ranking.coeff_prefer;
// apply 'common-sense' heuristic using references
final String urlstring = rentry.url().toNormalform(true, true);
final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
int tc;
for (int j = 0; j < urlcomps.length; j++) {
tc = topwords.get(urlcomps[j]);
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_urlcompintoplist;
}
for (int j = 0; j < descrcomps.length; j++) {
tc = topwords.get(descrcomps[j]);
if (tc > 0) r += Math.max(1, tc) << query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching
final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
final Iterator<byte[]> shi = query.queryHashes.iterator();
byte[] queryhash;
while (shi.hasNext()) {
queryhash = shi.next();
if (urlcomph.has(queryhash)) r += 256 << query.ranking.coeff_appurl;
if (descrcomph.has(queryhash)) r += 256 << query.ranking.coeff_app_dc_title;
}
return r;
}
}

@ -250,7 +250,7 @@ public class Segment {
} catch (RowSpaceExceededException e) {
continue;
}
rankingProcess.add(container, false, sourceName, -1);
rankingProcess.add(container, true, sourceName, -1);
}
}
if (rankingProcess != null) rankingProcess.oneFeederTerminated();

Loading…
Cancel
Save