better search computation:

- increased sort limit, now 3000 entries, before: 1000
  this should cause that more results can be shown in case
  of strong limitating constraints, like domain navigation
- enhanced the sort process
- check against domain navigator bugs
- fix in sort stack
- showing now all naviagtion pages at first search (not only next page)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6569 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent d126d6c1b5
commit 18172451a0

@ -591,7 +591,7 @@ public class yacysearch {
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation)); resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append("\"><img src=\"env/grafics/navdl.gif\" width=\"16\" height=\"16\"></a>&nbsp;"); resnav.append("\"><img src=\"env/grafics/navdl.gif\" width=\"16\" height=\"16\"></a>&nbsp;");
} }
final int numberofpages = Math.min(10, Math.min(thispage + 2, totalcount / theQuery.displayResults())); final int numberofpages = Math.min(10, Math.max(thispage + 1, totalcount / theQuery.displayResults()));
for (int i = 0; i < numberofpages; i++) { for (int i = 0; i < numberofpages; i++) {
if (i == thispage) { if (i == thispage) {
resnav.append("<img src=\"env/grafics/navs"); resnav.append("<img src=\"env/grafics/navs");

@ -201,7 +201,7 @@ public class DocumentIndex extends Segment {
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) { public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {
RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2); RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 2);
rankedCache.run(); rankedCache.run();
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>(); ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();

@ -67,7 +67,7 @@ public final class RankingProcess extends Thread {
private final QueryParams query; private final QueryParams query;
private final int maxentries; private final int maxentries;
private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final ConcurrentHashMap<String, Long> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter private final int[] flagcount; // flag counter
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
//private final int[] domZones; //private final int[] domZones;
@ -99,7 +99,7 @@ public final class RankingProcess extends Thread {
this.remote_indexCount = 0; this.remote_indexCount = 0;
this.remote_resourceSize = 0; this.remote_resourceSize = 0;
this.local_resourceSize = 0; this.local_resourceSize = 0;
this.urlhashes = new ConcurrentHashMap<String, Integer>(0, 0.75f, concurrency); this.urlhashes = new ConcurrentHashMap<String, Long>(0, 0.75f, concurrency);
this.misses = new TreeSet<String>(); this.misses = new TreeSet<String>();
this.flagcount = new int[32]; this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
@ -172,7 +172,8 @@ public final class RankingProcess extends Thread {
String domhash; String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
WordReferenceVars iEntry; WordReferenceVars iEntry;
final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>(); Long r;
//final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();
// apply all constraints // apply all constraints
try { try {
@ -225,39 +226,39 @@ public final class RankingProcess extends Thread {
} }
// accept // accept
filteredEntries.add(iEntry); //filteredEntries.add(iEntry);
// increase counter for statistics // increase counter for statistics
if (!local) this.remote_indexCount++; if (!local) this.remote_indexCount++;/*
} }
} catch (InterruptedException e) {}
// do the ranking // do the ranking
Long r; for (WordReferenceVars fEntry: filteredEntries) {
for (WordReferenceVars fEntry: filteredEntries) { */
// kick out entries that are too bad according to current findings
// kick out entries that are too bad according to current findings r = Long.valueOf(this.order.cardinal(iEntry));
r = Long.valueOf(this.order.cardinal(fEntry)); assert maxentries != 0;
assert maxentries != 0;
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue; // double-check
if (urlhashes.containsKey(iEntry.metadataHash())) continue;
// insert
if ((maxentries < 0) || (stack.size() < maxentries)) { // insert
// in case that we don't have enough yet, accept any new entry if (maxentries < 0 || stack.size() < maxentries) {
if (urlhashes.containsKey(fEntry.metadataHash())) continue; // in case that we don't have enough yet, accept any new entry
stack.push(fEntry, r); stack.push(iEntry, r);
} else { } else {
// if we already have enough entries, insert only such that are necessary to get a better result // if we already have enough entries, insert only such that are necessary to get a better result
if (stack.bottom(r.longValue())) { if (stack.bottom(r.longValue())) continue;
continue;
} // take the entry. the stack is automatically reduced
// double-check // to the maximum size by deletion of elements at the bottom
if (urlhashes.containsKey(fEntry.metadataHash())) continue; stack.push(iEntry, r);
stack.push(fEntry, r); }
} urlhashes.put(iEntry.metadataHash(), r);
}
}
} catch (InterruptedException e) {}
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime); EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
} }
@ -574,7 +575,7 @@ public final class RankingProcess extends Thread {
URIMetadataRow mr; URIMetadataRow mr;
DigestURI url; DigestURI url;
String hostname; String hostname;
for (int i = 0; i < rc; i++) { loop: for (int i = 0; i < rc; i++) {
mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0); mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
if (mr == null) continue; if (mr == null) continue;
url = mr.metadata().url(); url = mr.metadata().url();
@ -582,6 +583,7 @@ public final class RankingProcess extends Thread {
hostname = url.getHost(); hostname = url.getHost();
if (hostname == null) continue; if (hostname == null) continue;
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue; if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new NavigatorEntry(hostname, hsa[i].count)); result.add(new NavigatorEntry(hostname, hsa[i].count));
} }
return result; return result;

@ -59,7 +59,7 @@ public final class SearchEvent {
public static final String NORMALIZING = "normalizing"; public static final String NORMALIZING = "normalizing";
public static final String FINALIZATION = "finalization"; public static final String FINALIZATION = "finalization";
private static final int max_results_preparation = 1000; public static final int max_results_preparation = 3000;
// class variables that may be implemented with an abstract class // class variables that may be implemented with an abstract class
private long eventTime; private long eventTime;

@ -177,10 +177,18 @@ public class SortStack<E> {
// returns true if the element with that weight would be on the bottom of the stack after inserting // returns true if the element with that weight would be on the bottom of the stack after inserting
if (this.onstack.isEmpty()) return true; if (this.onstack.isEmpty()) return true;
Long l; Long l;
synchronized (this.onstack) {
l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey(); if (this.upward) {
synchronized (this.onstack) {
l = this.onstack.lastKey();
}
return weight > l.longValue();
} else {
synchronized (this.onstack) {
l = this.onstack.firstKey();
}
return weight < l.longValue();
} }
return weight > l.longValue();
} }
public class stackElement { public class stackElement {

Loading…
Cancel
Save