From ce0e5b1e17a21496b37abcd464129d0d339b20b3 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 18 Oct 2012 15:09:04 +0200 Subject: [PATCH] - more refactoring / private methods - fix for usage of custom solr field names --- htroot/HostBrowser.java | 8 +- .../solr/connector/AbstractSolrConnector.java | 2 +- .../solr/connector/MirrorSolrConnector.java | 2 +- .../kelondro/data/meta/URIMetadataNode.java | 71 +++++++++- source/net/yacy/search/index/Fulltext.java | 18 +-- .../yacy/search/index/SolrConfiguration.java | 123 ++---------------- source/net/yacy/search/query/SearchEvent.java | 1 - .../net/yacy/search/query/SnippetProcess.java | 53 +++----- 8 files changed, 107 insertions(+), 171 deletions(-) diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index 62c98aed2..153d84580 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -41,11 +41,11 @@ import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.crawler.retrieval.Request; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.logging.Log; import net.yacy.peers.graphics.WebStructureGraph.StructureEntry; import net.yacy.search.Switchboard; import net.yacy.search.index.Fulltext; -import net.yacy.search.index.SolrConfiguration; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -168,18 +168,18 @@ public class HostBrowser { Map> outboundHosts = new HashMap>(); int hostsize = 0; while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { - String u = (String) doc.getFieldValue(YaCySchema.sku.name()); + String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()); hostsize++; if (u.startsWith(path)) storedDocs.add(u); // collect inboundlinks to browse the host - Iterator links = SolrConfiguration.getLinks(doc, true); + Iterator links = URIMetadataNode.getLinks(doc, true); while (links.hasNext()) { u = links.next(); if (u.startsWith(path) && !storedDocs.contains(u)) inboundLinks.add(u); } // collect outboundlinks to browse to the outbound - links = SolrConfiguration.getLinks(doc, false); + links = URIMetadataNode.getLinks(doc, false); while (links.hasNext()) { u = links.next(); try { diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 95749926a..d59e38243 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -109,7 +109,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { try { SolrDocumentList sdl = query(querystring, o, pagesize); for (SolrDocument d: sdl) { - try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;} + try {queue.put((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));} catch (InterruptedException e) {break;} } if (sdl.size() < pagesize) break; o += pagesize; diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index d4ec704d6..52dce31df 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -432,7 +432,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo private void addToCache(SolrDocumentList list) { if (MemoryControl.shortStatus()) clearCache(); for (final SolrDocument solrdoc: list) { - String id = (String) solrdoc.getFieldValue(YaCySchema.id.name()); + String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName()); if (id != null) { this.hitCache.put(id, EXIST); cacheHit_Insert++; diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index 0c6ae6a9f..33f8458e7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -24,7 +24,11 @@ package net.yacy.kelondro.data.meta; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; @@ -105,7 +109,7 @@ public class URIMetadataNode { } public String hosthash() { - String hosthash = (String) this.doc.getFieldValue(YaCySchema.host_id_s.name()); + String hosthash = (String) this.doc.getFieldValue(YaCySchema.host_id_s.getSolrFieldName()); if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6); return hosthash; } @@ -147,7 +151,7 @@ public class URIMetadataNode { if (this.lat == Double.NaN) { this.lon = 0.0d; this.lat = 0.0d; - String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.name()); + String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.getSolrFieldName()); if (latlon != null) { int p = latlon.indexOf(','); if (p > 0) { @@ -281,6 +285,47 @@ public class URIMetadataNode { public WordReference word() { return this.word; } + + private static List indexedList2protocolList(Collection iplist, int dimension) { + List a = new ArrayList(dimension); + for (int i = 0; i < dimension; i++) a.add("http"); + if (iplist == null) return a; + for (Object ip: iplist) a.set(Integer.parseInt(((String) ip).substring(0, 3)), ((String) ip).substring(4)); + return a; + } + + public static Iterator getLinks(SolrDocument doc, boolean inbound) { + Collection urlstub = doc.getFieldValues((inbound ? YaCySchema.inboundlinks_urlstub_txt : YaCySchema.outboundlinks_urlstub_txt).getSolrFieldName()); + Collection urlprot = urlstub == null ? null : indexedList2protocolList(doc.getFieldValues((inbound ? YaCySchema.inboundlinks_protocol_sxt : YaCySchema.outboundlinks_protocol_sxt).getSolrFieldName()), urlstub.size()); + String u; + LinkedHashSet list = new LinkedHashSet(); + if (urlprot != null && urlstub != null) { + assert urlprot.size() == urlstub.size(); + Object[] urlprota = urlprot.toArray(); + Object[] urlstuba = urlstub.toArray(); + for (int i = 0; i < urlprota.length; i++) { + u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]); + int hp = u.indexOf('#'); + if (hp > 0) u = u.substring(0, hp); + list.add(u); + } + } + return list.iterator(); + } + + public static Date getDate(SolrDocument doc, final YaCySchema key) { + Date x = doc == null ? null : (Date) doc.getFieldValue(key.getSolrFieldName()); + Date now = new Date(); + return (x == null) ? new Date(0) : x.after(now) ? now : x; + } + + public String getText() { + return getString(YaCySchema.text_t); + } + + public String getDescription() { + return getString(YaCySchema.description); + } public boolean isOlder(URIMetadataRow other) { if (other == null) return false; @@ -374,10 +419,22 @@ public class URIMetadataNode { return core.toString(); } + private DigestURI getURL(YaCySchema field) { + assert !field.isMultiValued(); + assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight; + Object x = this.doc.getFieldValue(field.getSolrFieldName()); + if (x == null) return null; + try { + return new DigestURI((String) x); + } catch (MalformedURLException e) { + return null; + } + } + private int getInt(YaCySchema field) { assert !field.isMultiValued(); assert field.getType() == SolrType.integer; - Object x = this.doc.getFieldValue(field.name()); + Object x = this.doc.getFieldValue(field.getSolrFieldName()); if (x == null) return 0; if (x instanceof Integer) return ((Integer) x).intValue(); if (x instanceof Long) return ((Long) x).intValue(); @@ -387,7 +444,7 @@ public class URIMetadataNode { private Date getDate(YaCySchema field) { assert !field.isMultiValued(); assert field.getType() == SolrType.date; - Date x = (Date) this.doc.getFieldValue(field.name()); + Date x = (Date) this.doc.getFieldValue(field.getSolrFieldName()); if (x == null) return new Date(0); Date now = new Date(); return x.after(now) ? now : x; @@ -396,7 +453,7 @@ public class URIMetadataNode { private String getString(YaCySchema field) { assert !field.isMultiValued(); assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight; - Object x = this.doc.getFieldValue(field.name()); + Object x = this.doc.getFieldValue(field.getSolrFieldName()); if (x == null) return ""; if (x instanceof ArrayList) { @SuppressWarnings("unchecked") @@ -410,7 +467,7 @@ public class URIMetadataNode { private ArrayList getStringList(YaCySchema field) { assert field.isMultiValued(); assert field.getType() == SolrType.string || field.getType() == SolrType.text_general; - Object r = this.doc.getFieldValue(field.name()); + Object r = this.doc.getFieldValue(field.getSolrFieldName()); if (r == null) return new ArrayList(0); if (r instanceof ArrayList) { return (ArrayList) r; @@ -424,7 +481,7 @@ public class URIMetadataNode { private ArrayList getIntList(YaCySchema field) { assert field.isMultiValued(); assert field.getType() == SolrType.integer; - Object r = this.doc.getFieldValue(field.name()); + Object r = this.doc.getFieldValue(field.getSolrFieldName()); if (r == null) return new ArrayList(0); if (r instanceof ArrayList) { return (ArrayList) r; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index dcab10d5b..0dad88ffa 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -78,7 +78,7 @@ public final class Fulltext implements Iterable { private final MirrorSolrConnector solr; private final SolrConfiguration solrScheme; - public Fulltext(final File path, final SolrConfiguration solrScheme) { + protected Fulltext(final File path, final SolrConfiguration solrScheme) { this.location = path; this.tablename = null; this.urlIndexFile = null; @@ -88,11 +88,7 @@ public final class Fulltext implements Iterable { this.solrScheme = solrScheme; } - public boolean connectedUrlDb() { - return this.urlIndexFile != null; - } - - public void connectUrlDb(final String tablename, final boolean useTailCache, final boolean exceed134217727) { + protected void connectUrlDb(final String tablename, final boolean useTailCache, final boolean exceed134217727) { if (this.urlIndexFile != null) return; this.tablename = tablename; this.urlIndexFile = new SplitTable(this.location, tablename, URIMetadataRow.rowdef, useTailCache, exceed134217727); @@ -242,7 +238,7 @@ public final class Fulltext implements Iterable { if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); SolrDocument sd = this.solr.get(id); Date now = new Date(); - Date sdDate = sd == null ? null : SolrConfiguration.getDate(sd, YaCySchema.last_modified); + Date sdDate = sd == null ? null : URIMetadataNode.getDate(sd, YaCySchema.last_modified); if (sdDate == null || sdDate.after(now)) sdDate = now; Date docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified); if (docDate.after(now)) docDate = now; @@ -318,7 +314,7 @@ public final class Fulltext implements Iterable { if (urlHash == null) return null; SolrDocument doc = this.solr.get(urlHash); if (doc == null) return null; - String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.name()); + String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName()); return reason == null ? null : reason.length() == 0 ? null : reason; } @@ -468,7 +464,7 @@ public final class Fulltext implements Iterable { private final boolean dom; private final HandleSet set; - public Export(final File f, final String filter, final HandleSet set, final int format, boolean dom) { + private Export(final File f, final String filter, final HandleSet set, final int format, boolean dom) { // format: 0=text, 1=html, 2=rss/xml this.f = f; this.filter = filter; @@ -603,7 +599,7 @@ public final class Fulltext implements Iterable { * @param domainSamples a map from domain hashes to hash statistics * @return a set of domain names, ordered by name of the domains */ - public TreeSet domainNameCollector(int count, final Map domainSamples) { + private TreeSet domainNameCollector(int count, final Map domainSamples) { // collect hashes from all domains // fetch urls from the database to determine the host in clear text @@ -699,7 +695,7 @@ public final class Fulltext implements Iterable { public String hostname, hosthash; public int port; public int count; - public HostStat(final String host, final int port, final String urlhashfragment, final int count) { + private HostStat(final String host, final int port, final String urlhashfragment, final int count) { assert urlhashfragment.length() == 6; this.hostname = host; this.port = port; diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 460df3c58..a364c4e91 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -34,7 +34,6 @@ import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Properties; @@ -59,7 +58,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.Bitfield; -import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; @@ -111,67 +109,42 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable return this.contains(field.name()); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final byte[] value) { - assert !key.isMultiValued(); - if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length != 0))) key.add(doc, UTF8.String(value)); - } - - protected void add(final SolrInputDocument doc, final YaCySchema key, final String value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final String value) { assert !key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final String value, final float boost) { - assert !key.isMultiValued(); - if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value, boost); - } - - protected void add(final SolrInputDocument doc, final YaCySchema key, final Date value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final Date value) { assert !key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) key.add(doc, value); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final String[] value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final String[] value) { assert key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final Integer[] value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final Integer[] value) { assert key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final List values) { + private void add(final SolrInputDocument doc, final YaCySchema key, final List values) { assert key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || (values != null && !values.isEmpty()))) key.add(doc, values); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final int value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final int value) { assert !key.isMultiValued(); if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value); } - protected void add(final SolrInputDocument doc, final YaCySchema key, final long value) { - assert !key.isMultiValued(); - if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value); - } - - protected void add(final SolrInputDocument doc, final YaCySchema key, final float value) { - assert !key.isMultiValued(); - if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0f)) key.add(doc, value); - } - - protected void add(final SolrInputDocument doc, final YaCySchema key, final double value) { - assert !key.isMultiValued(); - if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0d)) key.add(doc, value); - } - - protected void add(final SolrInputDocument doc, final YaCySchema key, final boolean value) { + private void add(final SolrInputDocument doc, final YaCySchema key, final boolean value) { assert !key.isMultiValued(); if (isEmpty() || contains(key)) key.add(doc, value); } - public static Date getDate(SolrInputDocument doc, final YaCySchema key) { + protected static Date getDate(SolrInputDocument doc, final YaCySchema key) { Date x = (Date) doc.getFieldValue(key.name()); Date now = new Date(); return (x == null) ? new Date(0) : x.after(now) ? now : x; @@ -198,7 +171,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } catch (final IOException e) {} } - public SolrInputDocument metadata2solr(final URIMetadataRow md) { + protected SolrInputDocument metadata2solr(final URIMetadataRow md) { final SolrInputDocument doc = new SolrInputDocument(); final DigestURI digestURI = DigestURI.toDigestURI(md.url()); @@ -326,7 +299,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.'); } - public SolrInputDocument yacy2solr(final String id, final CrawlProfile profile, final ResponseHeader responseHeader, final Document document, Condenser condenser, DigestURI referrerURL, String language) { + protected SolrInputDocument yacy2solr(final String id, final CrawlProfile profile, final ResponseHeader responseHeader, final Document document, Condenser condenser, DigestURI referrerURL, String language) { // we use the SolrCell design as index scheme final SolrInputDocument doc = new SolrInputDocument(); final DigestURI digestURI = DigestURI.toDigestURI(document.dc_source()); @@ -810,14 +783,6 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable return a; } - public static List indexedList2protocolList(Collection iplist, int dimension) { - List a = new ArrayList(dimension); - for (int i = 0; i < dimension; i++) a.add("http"); - if (iplist == null) return a; - for (Object ip: iplist) a.set(Integer.parseInt(((String) ip).substring(0, 3)), ((String) ip).substring(4)); - return a; - } - /** * encode a string containing attributes from anchor rel properties binary: * bit 0: "me" contained in rel @@ -836,74 +801,6 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } return il; } - - public static Iterator getLinks(SolrDocument doc, boolean inbound) { - Collection urlstub = doc.getFieldValues((inbound ? YaCySchema.inboundlinks_urlstub_txt : YaCySchema.outboundlinks_urlstub_txt).name()); - Collection urlprot = urlstub == null ? null : indexedList2protocolList(doc.getFieldValues((inbound ? YaCySchema.inboundlinks_protocol_sxt : YaCySchema.outboundlinks_protocol_sxt).name()), urlstub.size()); - String u; - LinkedHashSet list = new LinkedHashSet(); - if (urlprot != null && urlstub != null) { - assert urlprot.size() == urlstub.size(); - Object[] urlprota = urlprot.toArray(); - Object[] urlstuba = urlstub.toArray(); - for (int i = 0; i < urlprota.length; i++) { - u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]); - int hp = u.indexOf('#'); - if (hp > 0) u = u.substring(0, hp); - list.add(u); - } - } - return list.iterator(); - } - - public static Date getDate(SolrDocument doc, final YaCySchema key) { - Date x = doc == null ? null : (Date) doc.getFieldValue(key.name()); - Date now = new Date(); - return (x == null) ? new Date(0) : x.after(now) ? now : x; - } - - public static String solrGetID(final SolrDocument solr) { - return (String) solr.getFieldValue(YaCySchema.id.getSolrFieldName()); - } - - public static DigestURI solrGetURL(final SolrDocument solr) { - try { - return new DigestURI((String) solr.getFieldValue(YaCySchema.sku.getSolrFieldName())); - } catch (final MalformedURLException e) { - return null; - } - } - - public static String solrGetTitle(final SolrDocument solr) { - return (String) solr.getFieldValue(YaCySchema.title.getSolrFieldName()); - } - - public static String solrGetText(final SolrDocument solr) { - return (String) solr.getFieldValue(YaCySchema.text_t.getSolrFieldName()); - } - - public static String solrGetAuthor(final SolrDocument solr) { - return (String) solr.getFieldValue(YaCySchema.author.getSolrFieldName()); - } - - public static String solrGetDescription(final SolrDocument solr) { - return (String) solr.getFieldValue(YaCySchema.description.getSolrFieldName()); - } - - public static Date solrGetDate(final SolrDocument solr) { - Date date = (Date) solr.getFieldValue(YaCySchema.last_modified.getSolrFieldName()); - Date now = new Date(); - return date.after(now) ? now : date; - } - - public static Collection solrGetKeywords(final SolrDocument solr) { - final Collection c = solr.getFieldValues(YaCySchema.keywords.getSolrFieldName()); - final ArrayList a = new ArrayList(); - for (final Object s: c) { - a.add((String) s); - } - return a; - } /** * register an entry as error document diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index dac3bf809..127897e0d 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -272,7 +272,6 @@ public final class SearchEvent { this.query, this.peers, this.workTables, - 5000, deleteIfSnippetFail, remote); diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index ebc06b3da..693269df0 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -54,15 +54,11 @@ import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; -import net.yacy.search.index.Fulltext; import net.yacy.search.index.Segment; import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; -import org.apache.solr.common.SolrDocument; - - public class SnippetProcess { public static Log log = new Log("SEARCH"); @@ -71,30 +67,28 @@ public class SnippetProcess { private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2); // input values - final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container + private final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container QueryParams query; private final SeedDB peers; private final WorkTables workTables; // result values - protected final LoaderDispatcher loader; + private final LoaderDispatcher loader; protected Worker[] workerThreads; - protected final WeakPriorityBlockingQueue result; - protected final WeakPriorityBlockingQueue images; // container to sort images by size - protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets - long urlRetrievalAllTime; - long snippetComputationAllTime; - int taketimeout; + private final WeakPriorityBlockingQueue result; + private final WeakPriorityBlockingQueue images; // container to sort images by size + private final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets + private long urlRetrievalAllTime; + private long snippetComputationAllTime; private final boolean deleteIfSnippetFail, remote; private boolean cleanupState; - public SnippetProcess( + protected SnippetProcess( final LoaderDispatcher loader, final RWIProcess rankedCache, final QueryParams query, final SeedDB peers, final WorkTables workTables, - final int taketimeout, final boolean deleteIfSnippetFail, final boolean remote) { assert query != null; @@ -103,7 +97,6 @@ public class SnippetProcess { this.query = query; this.peers = peers; this.workTables = workTables; - this.taketimeout = taketimeout; this.deleteIfSnippetFail = deleteIfSnippetFail; this.remote = remote; this.cleanupState = false; @@ -133,7 +126,7 @@ public class SnippetProcess { EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false); } - public void setCleanupState() { + protected void setCleanupState() { this.cleanupState = true; } @@ -145,7 +138,7 @@ public class SnippetProcess { return this.snippetComputationAllTime; } - public ResultEntry oneResult(final int item, final long timeout) { + protected ResultEntry oneResult(final int item, final long timeout) { // check if we already retrieved this item // (happens if a search pages is accessed a second time) final long finishTime = System.currentTimeMillis() + timeout; @@ -221,7 +214,7 @@ public class SnippetProcess { } private int resultCounter = 0; - public ResultEntry nextResult() { + private ResultEntry nextResult() { final ResultEntry re = oneResult(this.resultCounter, Math.max(3000, this.query.timeout - System.currentTimeMillis())); this.resultCounter++; return re; @@ -290,7 +283,7 @@ public class SnippetProcess { return this.result.list(Math.min(this.query.neededResults(), this.result.sizeAvailable())); } - public long postRanking( + private long postRanking( final ResultEntry rentry, final ScoreMap topwords) { @@ -351,7 +344,7 @@ public class SnippetProcess { } - public void deployWorker(int deployCount, final int neededResults) { + private void deployWorker(int deployCount, final int neededResults) { if (this.cleanupState || (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) || this.result.sizeAvailable() >= neededResults) { @@ -404,7 +397,7 @@ public class SnippetProcess { } } - public void stopAllWorker() { + private void stopAllWorker() { synchronized(this.workerThreads) { for (int i = 0; i < this.workerThreads.length; i++) { if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { @@ -439,15 +432,13 @@ public class SnippetProcess { private final CacheStrategy cacheStrategy; private final int neededResults; private boolean shallrun; - private final Fulltext metadata; - public Worker(final long maxlifetime, final CacheStrategy cacheStrategy, final int neededResults) { + private Worker(final long maxlifetime, final CacheStrategy cacheStrategy, final int neededResults) { this.cacheStrategy = cacheStrategy; this.lastLifeSign = System.currentTimeMillis(); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; this.shallrun = true; - this.metadata = SnippetProcess.this.rankingProcess.getQuery().getSegment().fulltext(); } @Override @@ -495,11 +486,7 @@ public class SnippetProcess { } // in case that we have an attached solr, we load also the solr document - String solrContent = null; - SolrDocument sd = page.getDocument(); - if (sd != null) { - solrContent = this.metadata.getSolrScheme().solrGetText(sd); - } + String solrContent = page.getText(); resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0 if (resultEntry == null) @@ -532,7 +519,7 @@ public class SnippetProcess { //Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated"); } - public void pleaseStop() { + protected void pleaseStop() { this.shallrun = false; } @@ -540,12 +527,12 @@ public class SnippetProcess { * calculate the time since the worker has had the latest activity * @return time in milliseconds lasted since latest activity */ - public long busytime() { + private long busytime() { return System.currentTimeMillis() - this.lastLifeSign; } } - protected ResultEntry fetchSnippet(final URIMetadataNode page, final String solrText, final CacheStrategy cacheStrategy) { + private ResultEntry fetchSnippet(final URIMetadataNode page, final String solrText, final CacheStrategy cacheStrategy) { // Snippet Fetching can has 3 modes: // 0 - do not fetch snippets // 1 - fetch snippets offline only @@ -620,7 +607,7 @@ public class SnippetProcess { * @param urlhash * @return true if an entry was deleted, false otherwise */ - public boolean delete(final String urlhash) { + protected boolean delete(final String urlhash) { final Iterator> i = this.result.iterator(); Element entry; while (i.hasNext()) {