small enhancements in search result computation speed

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6039 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent e0b3984805
commit 2c5554c912

@ -121,6 +121,7 @@ public class URLMetadataRow implements Metadata {
private final String snippet;
private WordReference word; // this is only used if the url is transported via remote search requests
private final long ranking; // during generation of a search result this value is set
private Components comp;
public URLMetadataRow(
final yacyURL url,
@ -176,6 +177,7 @@ public class URLMetadataRow implements Metadata {
this.snippet = null;
this.word = null;
this.ranking = 0;
this.comp = null;
}
private void encodeDate(final int col, final Date d) {
@ -206,6 +208,7 @@ public class URLMetadataRow implements Metadata {
this.snippet = null;
this.word = searchedWord;
this.ranking = ranking;
this.comp = null;
}
public URLMetadataRow(final Properties prop) {
@ -270,6 +273,7 @@ public class URLMetadataRow implements Metadata {
this.word = new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""), "de.anomic.index.indexURLEntry.indexURLEntry()"));
}
this.ranking = 0;
this.comp = null;
}
public static URLMetadataRow importEntry(final String propStr) {
@ -345,14 +349,18 @@ public class URLMetadataRow implements Metadata {
}
public Components metadata() {
// avoid double computation of metadata elements
if (this.comp != null) return this.comp;
// parse elements from comp string;
final ArrayList<String> cl = FileUtils.strings(this.entry.getCol("comp", null), "UTF-8");
return new Components(
this.comp = new Components(
(cl.size() > 0) ? (cl.get(0)).trim() : "",
hash(),
(cl.size() > 1) ? (cl.get(1)).trim() : "",
(cl.size() > 2) ? (cl.get(2)).trim() : "",
(cl.size() > 3) ? (cl.get(3)).trim() : "",
(cl.size() > 4) ? (cl.get(4)).trim() : "");
return this.comp;
}
public Date moddate() {

@ -541,7 +541,7 @@ public final class plasmaSearchEvent {
if (page == null) {
if (!anyRemoteSearchAlive()) break; // we cannot expect more results
// if we did not get another entry, sleep some time and try again
try {Thread.sleep(100);} catch (final InterruptedException e1) {}
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (result.exists(page.hash().hashCode())) continue;

@ -32,6 +32,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
@ -65,7 +66,7 @@ public final class plasmaSearchRankingProcess {
private final SortStack<WordReferenceVars> stack;
private final HashMap<String, SortStack<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
private final HashSet<String> handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final plasmaSearchQuery query;
private final int maxentries;
private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize;
@ -91,7 +92,7 @@ public final class plasmaSearchRankingProcess {
this.localSearchInclusion = null;
this.stack = new SortStack<WordReferenceVars>(maxentries);
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashMap<String, String>();
this.handover = new HashSet<String>();
this.order = (query == null) ? null : new ReferenceOrder(query.ranking, query.targetlang);
this.query = query;
this.maxentries = maxentries;
@ -317,47 +318,47 @@ public final class plasmaSearchRankingProcess {
public URLMetadataRow bestURL(final boolean skipDoubleDom) {
// returns from the current RWI list the best URL entry and removes this entry from the list
while ((stack.size() > 0) || (size() > 0)) {
if (((stack.size() == 0) && (size() == 0))) break;
final SortStack<WordReferenceVars>.stackElement obrwi = bestRWI(skipDoubleDom);
if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause?
final URLMetadataRow u = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
if (u != null) {
final URLMetadataRow.Components metadata = u.metadata();
if (((stack.size() == 0) && (size() == 0))) break;
final SortStack<WordReferenceVars>.stackElement obrwi = bestRWI(skipDoubleDom);
if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause?
final URLMetadataRow u = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
if (u != null) {
final URLMetadataRow.Components metadata = u.metadata();
// evaluate information of metadata for navigation
// author navigation:
String author = metadata.dc_creator();
if (author != null && author.length() > 0) {
// add author to the author navigator
String authorhash = new String(Word.word2hash(author));
System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author);
// check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
continue;
}
// add author to the author navigator
AuthorInfo in = this.authorNavigator.get(authorhash);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(author));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
} else if (this.query.authorhash != null) {
continue;
}
// evaluate information of metadata for navigation
// author navigation:
String author = metadata.dc_creator();
if (author != null && author.length() > 0) {
// add author to the author navigator
String authorhash = new String(Word.word2hash(author));
//System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author);
// get the url
if (metadata.url() != null) {
String urlstring = metadata.url().toNormalform(true, true);
if (urlstring == null || !urlstring.matches(query.urlMask)) continue;
this.handover.put(u.hash(), metadata.url().toNormalform(true, false)); // remember that we handed over this url
return u;
// check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
continue;
}
// add author to the author navigator
AuthorInfo in = this.authorNavigator.get(authorhash);
if (in == null) {
this.authorNavigator.put(authorhash, new AuthorInfo(author));
} else {
in.inc();
this.authorNavigator.put(authorhash, in);
}
} else if (this.query.authorhash != null) {
continue;
}
misses.add(obrwi.element.metadataHash());
// get the url
if (metadata.url() != null) {
String urlstring = metadata.url().toNormalform(true, true);
if (urlstring == null || !urlstring.matches(query.urlMask)) continue;
this.handover.add(u.hash()); // remember that we handed over this url
return u;
}
}
misses.add(obrwi.element.metadataHash());
}
return null;
}

Loading…
Cancel
Save