more refactoring of search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6270 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 323a8e733d
commit 61748285c3

@ -366,7 +366,7 @@ public class IndexControlRWIs_p {
URLMetadataRow entry;
String us;
long rn = -1;
while ((ranked.size() > 0) && ((entry = ranked.bestURL(false)) != null)) {
while ((ranked.size() > 0) && ((entry = ranked.takeURL(false)) != null)) {
if ((entry == null) || (entry.metadata() == null)) continue;
url = entry.metadata().url();
if (url == null) continue;
@ -480,7 +480,7 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, sb.getRanking(), filter);
final RankingProcess ranked = new RankingProcess(sb.indexSegment, query, Integer.MAX_VALUE, 1);
ranked.execQuery();
ranked.run();
if (ranked.filteredCount() == 0) {
prop.put("searchresult", 2);

@ -65,19 +65,21 @@ public final class RankingProcess extends Thread {
private static boolean useYBR = true;
private static final int maxDoubleDomAll = 20, maxDoubleDomSpecial = 10000;
private final SortStack<WordReferenceVars> stack;
private final HashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final Segment indexSegment;
private final QueryParams query;
private final int maxentries;
private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize;
private final ReferenceOrder order;
private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
private final Segment indexSegment;
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private final int[] domZones;
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize;
private final SortStack<WordReferenceVars> stack;
private final HashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
@ -114,12 +116,26 @@ public final class RankingProcess extends Thread {
}
public void run() {
// do a search concurrently
// do a search
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast
try {
execQuery();
long timer = System.currentTimeMillis();
final TermSearch<WordReference> search = this.indexSegment.termIndex().query(
query.queryHashes,
query.excludeHashes,
null,
Segment.wordReferenceFactory,
query.maxDistance);
this.localSearchInclusion = search.inclusion();
final ReferenceContainer<WordReference> index = search.joined();
serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer), false);
if (index.size() == 0) {
return;
}
add(index, true, index.size());
} catch (final Exception e) {
e.printStackTrace();
}
@ -133,26 +149,7 @@ public final class RankingProcess extends Thread {
return this.domZones;
}
public void execQuery() {
long timer = System.currentTimeMillis();
final TermSearch<WordReference> search = this.indexSegment.termIndex().query(
query.queryHashes,
query.excludeHashes,
null,
Segment.wordReferenceFactory,
query.maxDistance);
this.localSearchInclusion = search.inclusion();
final ReferenceContainer<WordReference> index = search.joined();
serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer), false);
if (index.size() == 0) {
return;
}
insertRanked(index, true, index.size());
}
public void insertRanked(final ReferenceContainer<WordReference> index, final boolean local, final int fullResource) {
public void add(final ReferenceContainer<WordReference> index, final boolean local, final int fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@ -173,14 +170,11 @@ public final class RankingProcess extends Thread {
// iterate over normalized entries and select some that are better than currently stored
timer = System.currentTimeMillis();
final Iterator<WordReferenceVars> i = decodedEntries.iterator();
WordReferenceVars iEntry;
Long r;
HostInfo hs;
String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
while (i.hasNext()) {
iEntry = i.next();
for (WordReferenceVars iEntry: decodedEntries) {
assert (iEntry.metadataHash().length() == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;
@ -282,7 +276,7 @@ public final class RankingProcess extends Thread {
// - root-domain guessing to prefer the root domain over other urls if search word appears in domain name
private SortStack<WordReferenceVars>.stackElement bestRWI(final boolean skipDoubleDom) {
private SortStack<WordReferenceVars>.stackElement takeRWI(final boolean skipDoubleDom) {
// returns from the current RWI list the best entry and removes this entry from the list
SortStack<WordReferenceVars> m;
SortStack<WordReferenceVars>.stackElement rwi;
@ -328,16 +322,19 @@ public final class RankingProcess extends Thread {
return bestEntry;
}
public URLMetadataRow bestURL(final boolean skipDoubleDom) {
public URLMetadataRow takeURL(final boolean skipDoubleDom) {
// returns from the current RWI list the best URL entry and removes this entry from the list
while ((stack.size() > 0) || (size() > 0)) {
if (((stack.size() == 0) && (size() == 0))) break;
final SortStack<WordReferenceVars>.stackElement obrwi = bestRWI(skipDoubleDom);
final SortStack<WordReferenceVars>.stackElement obrwi = takeRWI(skipDoubleDom);
if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause?
final URLMetadataRow u = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
if (u != null) {
final URLMetadataRow.Components metadata = u.metadata();
// TODO: check url constraints
// evaluate information of metadata for navigation
// author navigation:
String author = metadata.dc_creator();
@ -376,11 +373,11 @@ public final class RankingProcess extends Thread {
return null;
}
public URLMetadataRow bestURL(final boolean skipDoubleDom, long timeout) {
public URLMetadataRow takeURL(final boolean skipDoubleDom, long timeout) {
timeout += System.currentTimeMillis();
long wait = 10;
while (System.currentTimeMillis() < timeout) {
URLMetadataRow row = bestURL(skipDoubleDom);
URLMetadataRow row = takeURL(skipDoubleDom);
if (row != null) return row;
try {Thread.sleep(wait);} catch (final InterruptedException e1) {}
wait = wait * 2;
@ -391,8 +388,9 @@ public final class RankingProcess extends Thread {
public int size() {
//assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
int c = stack.size();
final Iterator<SortStack<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
while (i.hasNext()) c += i.next().size();
for (SortStack<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.size();
}
return c;
}

@ -45,7 +45,7 @@ import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.logging.Log;
import de.anomic.ymage.ProfilingGraph;
public class SnippetFetcher {
public class ResultFetcher {
protected final static int workerThreadCount = 10;
@ -66,7 +66,7 @@ public class SnippetFetcher {
@SuppressWarnings("unchecked")
SnippetFetcher(
ResultFetcher(
RankingProcess rankedCache,
final QueryParams query,
final Segment indexSegment,
@ -112,7 +112,7 @@ public class SnippetFetcher {
}
}
ResultEntry obtainResultEntry(final URLMetadataRow page, final int snippetFetchMode) {
protected ResultEntry obtainResultEntry(final URLMetadataRow page, final int snippetFetchMode) {
// a search result entry needs some work to produce a result Entry:
// - check if url entry exists in LURL-db
@ -276,7 +276,7 @@ public class SnippetFetcher {
if ((query.contentdom != QueryParams.CONTENTDOM_IMAGE) && (result.size() >= query.neededResults() + fetchAhead)) break;
// get next entry
page = rankedCache.bestURL(true, 10000);
page = rankedCache.takeURL(true, 10000);
if (page == null) break;
if (result.exists(page.hash().hashCode())) continue;
if (failedURLs.get(page.hash()) != null) continue;

@ -66,7 +66,7 @@ public final class SearchEvent {
private final Segment indexSegment;
private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private SnippetFetcher snippets;
private ResultFetcher snippets;
// class variables for search abstracts
private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
@ -144,7 +144,7 @@ public final class SearchEvent {
} else {
// do a local search
this.rankedCache = new RankingProcess(indexSegment, query, max_results_preparation, 2);
this.rankedCache.execQuery();
this.rankedCache.run();
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
if (generateAbstracts) {
@ -176,7 +176,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.snippets = new SnippetFetcher(rankedCache, query, indexSegment, peers);
this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers);
// clean up events
SearchEventCache.cleanupEvents(false);
@ -400,7 +400,7 @@ public final class SearchEvent {
//assert e != null;
}
public SnippetFetcher result() {
public ResultFetcher result() {
return this.snippets;
}

@ -602,7 +602,7 @@ public final class yacyClient {
// store remote result to local result container
synchronized (containerCache) {
// insert one container into the search result buffer
containerCache.insertRanked(container[0], false, joincount); // one is enough
containerCache.add(container[0], false, joincount); // one is enough
// integrate remote topwords
final String references = result.get("references");

Loading…
Cancel
Save