From 2c5554c912223f275722c41a3c84fdf7d91f4eb2 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 9 Jun 2009 15:22:23 +0000 Subject: [PATCH] small enhancements in search result computation speed git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6039 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../metadataPrototype/URLMetadataRow.java | 10 ++- .../de/anomic/plasma/plasmaSearchEvent.java | 2 +- .../plasma/plasmaSearchRankingProcess.java | 79 ++++++++++--------- 3 files changed, 50 insertions(+), 41 deletions(-) diff --git a/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java b/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java index 2b2d75ae1..a5c96cf13 100644 --- a/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java +++ b/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java @@ -121,6 +121,7 @@ public class URLMetadataRow implements Metadata { private final String snippet; private WordReference word; // this is only used if the url is transported via remote search requests private final long ranking; // during generation of a search result this value is set + private Components comp; public URLMetadataRow( final yacyURL url, @@ -176,6 +177,7 @@ public class URLMetadataRow implements Metadata { this.snippet = null; this.word = null; this.ranking = 0; + this.comp = null; } private void encodeDate(final int col, final Date d) { @@ -206,6 +208,7 @@ public class URLMetadataRow implements Metadata { this.snippet = null; this.word = searchedWord; this.ranking = ranking; + this.comp = null; } public URLMetadataRow(final Properties prop) { @@ -270,6 +273,7 @@ public class URLMetadataRow implements Metadata { this.word = new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""), "de.anomic.index.indexURLEntry.indexURLEntry()")); } this.ranking = 0; + this.comp = null; } public static URLMetadataRow importEntry(final String propStr) { @@ -345,14 +349,18 @@ public class URLMetadataRow implements Metadata { } public Components metadata() { + // avoid double computation of metadata elements + if (this.comp != null) return this.comp; + // parse elements from comp string; final ArrayList cl = FileUtils.strings(this.entry.getCol("comp", null), "UTF-8"); - return new Components( + this.comp = new Components( (cl.size() > 0) ? (cl.get(0)).trim() : "", hash(), (cl.size() > 1) ? (cl.get(1)).trim() : "", (cl.size() > 2) ? (cl.get(2)).trim() : "", (cl.size() > 3) ? (cl.get(3)).trim() : "", (cl.size() > 4) ? (cl.get(4)).trim() : ""); + return this.comp; } public Date moddate() { diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index ff992426e..f6a53c989 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -541,7 +541,7 @@ public final class plasmaSearchEvent { if (page == null) { if (!anyRemoteSearchAlive()) break; // we cannot expect more results // if we did not get another entry, sleep some time and try again - try {Thread.sleep(100);} catch (final InterruptedException e1) {} + try {Thread.sleep(10);} catch (final InterruptedException e1) {} continue; } if (result.exists(page.hash().hashCode())) continue; diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 612bce706..d0022938b 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -65,7 +66,7 @@ public final class plasmaSearchRankingProcess { private final SortStack stack; private final HashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack - private final HashMap handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process + private final HashSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process private final plasmaSearchQuery query; private final int maxentries; private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize; @@ -91,7 +92,7 @@ public final class plasmaSearchRankingProcess { this.localSearchInclusion = null; this.stack = new SortStack(maxentries); this.doubleDomCache = new HashMap>(); - this.handover = new HashMap(); + this.handover = new HashSet(); this.order = (query == null) ? null : new ReferenceOrder(query.ranking, query.targetlang); this.query = query; this.maxentries = maxentries; @@ -317,47 +318,47 @@ public final class plasmaSearchRankingProcess { public URLMetadataRow bestURL(final boolean skipDoubleDom) { // returns from the current RWI list the best URL entry and removes this entry from the list while ((stack.size() > 0) || (size() > 0)) { - if (((stack.size() == 0) && (size() == 0))) break; - final SortStack.stackElement obrwi = bestRWI(skipDoubleDom); - if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause? - final URLMetadataRow u = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue()); - if (u != null) { - final URLMetadataRow.Components metadata = u.metadata(); + if (((stack.size() == 0) && (size() == 0))) break; + final SortStack.stackElement obrwi = bestRWI(skipDoubleDom); + if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause? + final URLMetadataRow u = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue()); + if (u != null) { + final URLMetadataRow.Components metadata = u.metadata(); - // evaluate information of metadata for navigation - // author navigation: - String author = metadata.dc_creator(); - if (author != null && author.length() > 0) { - // add author to the author navigator - String authorhash = new String(Word.word2hash(author)); - System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author); - - // check if we already are filtering for authors - if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { - continue; - } - - // add author to the author navigator - AuthorInfo in = this.authorNavigator.get(authorhash); - if (in == null) { - this.authorNavigator.put(authorhash, new AuthorInfo(author)); - } else { - in.inc(); - this.authorNavigator.put(authorhash, in); - } - } else if (this.query.authorhash != null) { - continue; - } + // evaluate information of metadata for navigation + // author navigation: + String author = metadata.dc_creator(); + if (author != null && author.length() > 0) { + // add author to the author navigator + String authorhash = new String(Word.word2hash(author)); + //System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author); - // get the url - if (metadata.url() != null) { - String urlstring = metadata.url().toNormalform(true, true); - if (urlstring == null || !urlstring.matches(query.urlMask)) continue; - this.handover.put(u.hash(), metadata.url().toNormalform(true, false)); // remember that we handed over this url - return u; + // check if we already are filtering for authors + if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { + continue; + } + + // add author to the author navigator + AuthorInfo in = this.authorNavigator.get(authorhash); + if (in == null) { + this.authorNavigator.put(authorhash, new AuthorInfo(author)); + } else { + in.inc(); + this.authorNavigator.put(authorhash, in); } + } else if (this.query.authorhash != null) { + continue; } - misses.add(obrwi.element.metadataHash()); + + // get the url + if (metadata.url() != null) { + String urlstring = metadata.url().toNormalform(true, true); + if (urlstring == null || !urlstring.matches(query.urlMask)) continue; + this.handover.add(u.hash()); // remember that we handed over this url + return u; + } + } + misses.add(obrwi.element.metadataHash()); } return null; }