- more refactoring / private methods

- fix for usage of custom solr field names
pull/1/head
Michael Peter Christen 13 years ago
parent ccc3760a47
commit ce0e5b1e17

@ -41,11 +41,11 @@ import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.graphics.WebStructureGraph.StructureEntry; import net.yacy.peers.graphics.WebStructureGraph.StructureEntry;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Fulltext; import net.yacy.search.index.Fulltext;
import net.yacy.search.index.SolrConfiguration;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -168,18 +168,18 @@ public class HostBrowser {
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>(); Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
int hostsize = 0; int hostsize = 0;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String u = (String) doc.getFieldValue(YaCySchema.sku.name()); String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
hostsize++; hostsize++;
if (u.startsWith(path)) storedDocs.add(u); if (u.startsWith(path)) storedDocs.add(u);
// collect inboundlinks to browse the host // collect inboundlinks to browse the host
Iterator<String> links = SolrConfiguration.getLinks(doc, true); Iterator<String> links = URIMetadataNode.getLinks(doc, true);
while (links.hasNext()) { while (links.hasNext()) {
u = links.next(); u = links.next();
if (u.startsWith(path) && !storedDocs.contains(u)) inboundLinks.add(u); if (u.startsWith(path) && !storedDocs.contains(u)) inboundLinks.add(u);
} }
// collect outboundlinks to browse to the outbound // collect outboundlinks to browse to the outbound
links = SolrConfiguration.getLinks(doc, false); links = URIMetadataNode.getLinks(doc, false);
while (links.hasNext()) { while (links.hasNext()) {
u = links.next(); u = links.next();
try { try {

@ -109,7 +109,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
try { try {
SolrDocumentList sdl = query(querystring, o, pagesize); SolrDocumentList sdl = query(querystring, o, pagesize);
for (SolrDocument d: sdl) { for (SolrDocument d: sdl) {
try {queue.put((String) d.getFieldValue(YaCySchema.id.name()));} catch (InterruptedException e) {break;} try {queue.put((String) d.getFieldValue(YaCySchema.id.getSolrFieldName()));} catch (InterruptedException e) {break;}
} }
if (sdl.size() < pagesize) break; if (sdl.size() < pagesize) break;
o += pagesize; o += pagesize;

@ -432,7 +432,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
private void addToCache(SolrDocumentList list) { private void addToCache(SolrDocumentList list) {
if (MemoryControl.shortStatus()) clearCache(); if (MemoryControl.shortStatus()) clearCache();
for (final SolrDocument solrdoc: list) { for (final SolrDocument solrdoc: list) {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name()); String id = (String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName());
if (id != null) { if (id != null) {
this.hitCache.put(id, EXIST); this.hitCache.put(id, EXIST);
cacheHit_Insert++; cacheHit_Insert++;

@ -24,7 +24,11 @@ package net.yacy.kelondro.data.meta;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
@ -105,7 +109,7 @@ public class URIMetadataNode {
} }
public String hosthash() { public String hosthash() {
String hosthash = (String) this.doc.getFieldValue(YaCySchema.host_id_s.name()); String hosthash = (String) this.doc.getFieldValue(YaCySchema.host_id_s.getSolrFieldName());
if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6); if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6);
return hosthash; return hosthash;
} }
@ -147,7 +151,7 @@ public class URIMetadataNode {
if (this.lat == Double.NaN) { if (this.lat == Double.NaN) {
this.lon = 0.0d; this.lon = 0.0d;
this.lat = 0.0d; this.lat = 0.0d;
String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.name()); String latlon = (String) this.doc.getFieldValue(YaCySchema.coordinate_p.getSolrFieldName());
if (latlon != null) { if (latlon != null) {
int p = latlon.indexOf(','); int p = latlon.indexOf(',');
if (p > 0) { if (p > 0) {
@ -282,6 +286,47 @@ public class URIMetadataNode {
return this.word; return this.word;
} }
private static List<String> indexedList2protocolList(Collection<Object> iplist, int dimension) {
List<String> a = new ArrayList<String>(dimension);
for (int i = 0; i < dimension; i++) a.add("http");
if (iplist == null) return a;
for (Object ip: iplist) a.set(Integer.parseInt(((String) ip).substring(0, 3)), ((String) ip).substring(4));
return a;
}
public static Iterator<String> getLinks(SolrDocument doc, boolean inbound) {
Collection<Object> urlstub = doc.getFieldValues((inbound ? YaCySchema.inboundlinks_urlstub_txt : YaCySchema.outboundlinks_urlstub_txt).getSolrFieldName());
Collection<String> urlprot = urlstub == null ? null : indexedList2protocolList(doc.getFieldValues((inbound ? YaCySchema.inboundlinks_protocol_sxt : YaCySchema.outboundlinks_protocol_sxt).getSolrFieldName()), urlstub.size());
String u;
LinkedHashSet<String> list = new LinkedHashSet<String>();
if (urlprot != null && urlstub != null) {
assert urlprot.size() == urlstub.size();
Object[] urlprota = urlprot.toArray();
Object[] urlstuba = urlstub.toArray();
for (int i = 0; i < urlprota.length; i++) {
u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]);
int hp = u.indexOf('#');
if (hp > 0) u = u.substring(0, hp);
list.add(u);
}
}
return list.iterator();
}
public static Date getDate(SolrDocument doc, final YaCySchema key) {
Date x = doc == null ? null : (Date) doc.getFieldValue(key.getSolrFieldName());
Date now = new Date();
return (x == null) ? new Date(0) : x.after(now) ? now : x;
}
public String getText() {
return getString(YaCySchema.text_t);
}
public String getDescription() {
return getString(YaCySchema.description);
}
public boolean isOlder(URIMetadataRow other) { public boolean isOlder(URIMetadataRow other) {
if (other == null) return false; if (other == null) return false;
final Date tmoddate = moddate(); final Date tmoddate = moddate();
@ -374,10 +419,22 @@ public class URIMetadataNode {
return core.toString(); return core.toString();
} }
private DigestURI getURL(YaCySchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
Object x = this.doc.getFieldValue(field.getSolrFieldName());
if (x == null) return null;
try {
return new DigestURI((String) x);
} catch (MalformedURLException e) {
return null;
}
}
private int getInt(YaCySchema field) { private int getInt(YaCySchema field) {
assert !field.isMultiValued(); assert !field.isMultiValued();
assert field.getType() == SolrType.integer; assert field.getType() == SolrType.integer;
Object x = this.doc.getFieldValue(field.name()); Object x = this.doc.getFieldValue(field.getSolrFieldName());
if (x == null) return 0; if (x == null) return 0;
if (x instanceof Integer) return ((Integer) x).intValue(); if (x instanceof Integer) return ((Integer) x).intValue();
if (x instanceof Long) return ((Long) x).intValue(); if (x instanceof Long) return ((Long) x).intValue();
@ -387,7 +444,7 @@ public class URIMetadataNode {
private Date getDate(YaCySchema field) { private Date getDate(YaCySchema field) {
assert !field.isMultiValued(); assert !field.isMultiValued();
assert field.getType() == SolrType.date; assert field.getType() == SolrType.date;
Date x = (Date) this.doc.getFieldValue(field.name()); Date x = (Date) this.doc.getFieldValue(field.getSolrFieldName());
if (x == null) return new Date(0); if (x == null) return new Date(0);
Date now = new Date(); Date now = new Date();
return x.after(now) ? now : x; return x.after(now) ? now : x;
@ -396,7 +453,7 @@ public class URIMetadataNode {
private String getString(YaCySchema field) { private String getString(YaCySchema field) {
assert !field.isMultiValued(); assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight; assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
Object x = this.doc.getFieldValue(field.name()); Object x = this.doc.getFieldValue(field.getSolrFieldName());
if (x == null) return ""; if (x == null) return "";
if (x instanceof ArrayList) { if (x instanceof ArrayList) {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -410,7 +467,7 @@ public class URIMetadataNode {
private ArrayList<String> getStringList(YaCySchema field) { private ArrayList<String> getStringList(YaCySchema field) {
assert field.isMultiValued(); assert field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general; assert field.getType() == SolrType.string || field.getType() == SolrType.text_general;
Object r = this.doc.getFieldValue(field.name()); Object r = this.doc.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList<String>(0); if (r == null) return new ArrayList<String>(0);
if (r instanceof ArrayList) { if (r instanceof ArrayList) {
return (ArrayList<String>) r; return (ArrayList<String>) r;
@ -424,7 +481,7 @@ public class URIMetadataNode {
private ArrayList<Integer> getIntList(YaCySchema field) { private ArrayList<Integer> getIntList(YaCySchema field) {
assert field.isMultiValued(); assert field.isMultiValued();
assert field.getType() == SolrType.integer; assert field.getType() == SolrType.integer;
Object r = this.doc.getFieldValue(field.name()); Object r = this.doc.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList<Integer>(0); if (r == null) return new ArrayList<Integer>(0);
if (r instanceof ArrayList) { if (r instanceof ArrayList) {
return (ArrayList<Integer>) r; return (ArrayList<Integer>) r;

@ -78,7 +78,7 @@ public final class Fulltext implements Iterable<byte[]> {
private final MirrorSolrConnector solr; private final MirrorSolrConnector solr;
private final SolrConfiguration solrScheme; private final SolrConfiguration solrScheme;
public Fulltext(final File path, final SolrConfiguration solrScheme) { protected Fulltext(final File path, final SolrConfiguration solrScheme) {
this.location = path; this.location = path;
this.tablename = null; this.tablename = null;
this.urlIndexFile = null; this.urlIndexFile = null;
@ -88,11 +88,7 @@ public final class Fulltext implements Iterable<byte[]> {
this.solrScheme = solrScheme; this.solrScheme = solrScheme;
} }
public boolean connectedUrlDb() { protected void connectUrlDb(final String tablename, final boolean useTailCache, final boolean exceed134217727) {
return this.urlIndexFile != null;
}
public void connectUrlDb(final String tablename, final boolean useTailCache, final boolean exceed134217727) {
if (this.urlIndexFile != null) return; if (this.urlIndexFile != null) return;
this.tablename = tablename; this.tablename = tablename;
this.urlIndexFile = new SplitTable(this.location, tablename, URIMetadataRow.rowdef, useTailCache, exceed134217727); this.urlIndexFile = new SplitTable(this.location, tablename, URIMetadataRow.rowdef, useTailCache, exceed134217727);
@ -242,7 +238,7 @@ public final class Fulltext implements Iterable<byte[]> {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.get(id); SolrDocument sd = this.solr.get(id);
Date now = new Date(); Date now = new Date();
Date sdDate = sd == null ? null : SolrConfiguration.getDate(sd, YaCySchema.last_modified); Date sdDate = sd == null ? null : URIMetadataNode.getDate(sd, YaCySchema.last_modified);
if (sdDate == null || sdDate.after(now)) sdDate = now; if (sdDate == null || sdDate.after(now)) sdDate = now;
Date docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified); Date docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified);
if (docDate.after(now)) docDate = now; if (docDate.after(now)) docDate = now;
@ -318,7 +314,7 @@ public final class Fulltext implements Iterable<byte[]> {
if (urlHash == null) return null; if (urlHash == null) return null;
SolrDocument doc = this.solr.get(urlHash); SolrDocument doc = this.solr.get(urlHash);
if (doc == null) return null; if (doc == null) return null;
String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.name()); String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName());
return reason == null ? null : reason.length() == 0 ? null : reason; return reason == null ? null : reason.length() == 0 ? null : reason;
} }
@ -468,7 +464,7 @@ public final class Fulltext implements Iterable<byte[]> {
private final boolean dom; private final boolean dom;
private final HandleSet set; private final HandleSet set;
public Export(final File f, final String filter, final HandleSet set, final int format, boolean dom) { private Export(final File f, final String filter, final HandleSet set, final int format, boolean dom) {
// format: 0=text, 1=html, 2=rss/xml // format: 0=text, 1=html, 2=rss/xml
this.f = f; this.f = f;
this.filter = filter; this.filter = filter;
@ -603,7 +599,7 @@ public final class Fulltext implements Iterable<byte[]> {
* @param domainSamples a map from domain hashes to hash statistics * @param domainSamples a map from domain hashes to hash statistics
* @return a set of domain names, ordered by name of the domains * @return a set of domain names, ordered by name of the domains
*/ */
public TreeSet<String> domainNameCollector(int count, final Map<String, URLHashCounter> domainSamples) { private TreeSet<String> domainNameCollector(int count, final Map<String, URLHashCounter> domainSamples) {
// collect hashes from all domains // collect hashes from all domains
// fetch urls from the database to determine the host in clear text // fetch urls from the database to determine the host in clear text
@ -699,7 +695,7 @@ public final class Fulltext implements Iterable<byte[]> {
public String hostname, hosthash; public String hostname, hosthash;
public int port; public int port;
public int count; public int count;
public HostStat(final String host, final int port, final String urlhashfragment, final int count) { private HostStat(final String host, final int port, final String urlhashfragment, final int count) {
assert urlhashfragment.length() == 6; assert urlhashfragment.length() == 6;
this.hostname = host; this.hostname = host;
this.port = port; this.port = port;

@ -34,7 +34,6 @@ import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
@ -59,7 +58,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.Bitfield;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -111,67 +109,42 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
return this.contains(field.name()); return this.contains(field.name());
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final byte[] value) { private void add(final SolrInputDocument doc, final YaCySchema key, final String value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length != 0))) key.add(doc, UTF8.String(value));
}
protected void add(final SolrInputDocument doc, final YaCySchema key, final String value) {
assert !key.isMultiValued(); assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final String value, final float boost) { private void add(final SolrInputDocument doc, final YaCySchema key, final Date value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value, boost);
}
protected void add(final SolrInputDocument doc, final YaCySchema key, final Date value) {
assert !key.isMultiValued(); assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) key.add(doc, value); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) key.add(doc, value);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final String[] value) { private void add(final SolrInputDocument doc, final YaCySchema key, final String[] value) {
assert key.isMultiValued(); assert key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final Integer[] value) { private void add(final SolrInputDocument doc, final YaCySchema key, final Integer[] value) {
assert key.isMultiValued(); assert key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final List<?> values) { private void add(final SolrInputDocument doc, final YaCySchema key, final List<?> values) {
assert key.isMultiValued(); assert key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || (values != null && !values.isEmpty()))) key.add(doc, values); if ((isEmpty() || contains(key)) && (!this.lazy || (values != null && !values.isEmpty()))) key.add(doc, values);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final int value) { private void add(final SolrInputDocument doc, final YaCySchema key, final int value) {
assert !key.isMultiValued(); assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value); if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
} }
protected void add(final SolrInputDocument doc, final YaCySchema key, final long value) { private void add(final SolrInputDocument doc, final YaCySchema key, final boolean value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
}
protected void add(final SolrInputDocument doc, final YaCySchema key, final float value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0f)) key.add(doc, value);
}
protected void add(final SolrInputDocument doc, final YaCySchema key, final double value) {
assert !key.isMultiValued();
if ((isEmpty() || contains(key)) && (!this.lazy || value != 0.0d)) key.add(doc, value);
}
protected void add(final SolrInputDocument doc, final YaCySchema key, final boolean value) {
assert !key.isMultiValued(); assert !key.isMultiValued();
if (isEmpty() || contains(key)) key.add(doc, value); if (isEmpty() || contains(key)) key.add(doc, value);
} }
public static Date getDate(SolrInputDocument doc, final YaCySchema key) { protected static Date getDate(SolrInputDocument doc, final YaCySchema key) {
Date x = (Date) doc.getFieldValue(key.name()); Date x = (Date) doc.getFieldValue(key.name());
Date now = new Date(); Date now = new Date();
return (x == null) ? new Date(0) : x.after(now) ? now : x; return (x == null) ? new Date(0) : x.after(now) ? now : x;
@ -198,7 +171,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
} catch (final IOException e) {} } catch (final IOException e) {}
} }
public SolrInputDocument metadata2solr(final URIMetadataRow md) { protected SolrInputDocument metadata2solr(final URIMetadataRow md) {
final SolrInputDocument doc = new SolrInputDocument(); final SolrInputDocument doc = new SolrInputDocument();
final DigestURI digestURI = DigestURI.toDigestURI(md.url()); final DigestURI digestURI = DigestURI.toDigestURI(md.url());
@ -326,7 +299,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.'); if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.');
} }
public SolrInputDocument yacy2solr(final String id, final CrawlProfile profile, final ResponseHeader responseHeader, final Document document, Condenser condenser, DigestURI referrerURL, String language) { protected SolrInputDocument yacy2solr(final String id, final CrawlProfile profile, final ResponseHeader responseHeader, final Document document, Condenser condenser, DigestURI referrerURL, String language) {
// we use the SolrCell design as index scheme // we use the SolrCell design as index scheme
final SolrInputDocument doc = new SolrInputDocument(); final SolrInputDocument doc = new SolrInputDocument();
final DigestURI digestURI = DigestURI.toDigestURI(document.dc_source()); final DigestURI digestURI = DigestURI.toDigestURI(document.dc_source());
@ -810,14 +783,6 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
return a; return a;
} }
public static List<String> indexedList2protocolList(Collection<Object> iplist, int dimension) {
List<String> a = new ArrayList<String>(dimension);
for (int i = 0; i < dimension; i++) a.add("http");
if (iplist == null) return a;
for (Object ip: iplist) a.set(Integer.parseInt(((String) ip).substring(0, 3)), ((String) ip).substring(4));
return a;
}
/** /**
* encode a string containing attributes from anchor rel properties binary: * encode a string containing attributes from anchor rel properties binary:
* bit 0: "me" contained in rel * bit 0: "me" contained in rel
@ -837,74 +802,6 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
return il; return il;
} }
public static Iterator<String> getLinks(SolrDocument doc, boolean inbound) {
Collection<Object> urlstub = doc.getFieldValues((inbound ? YaCySchema.inboundlinks_urlstub_txt : YaCySchema.outboundlinks_urlstub_txt).name());
Collection<String> urlprot = urlstub == null ? null : indexedList2protocolList(doc.getFieldValues((inbound ? YaCySchema.inboundlinks_protocol_sxt : YaCySchema.outboundlinks_protocol_sxt).name()), urlstub.size());
String u;
LinkedHashSet<String> list = new LinkedHashSet<String>();
if (urlprot != null && urlstub != null) {
assert urlprot.size() == urlstub.size();
Object[] urlprota = urlprot.toArray();
Object[] urlstuba = urlstub.toArray();
for (int i = 0; i < urlprota.length; i++) {
u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]);
int hp = u.indexOf('#');
if (hp > 0) u = u.substring(0, hp);
list.add(u);
}
}
return list.iterator();
}
public static Date getDate(SolrDocument doc, final YaCySchema key) {
Date x = doc == null ? null : (Date) doc.getFieldValue(key.name());
Date now = new Date();
return (x == null) ? new Date(0) : x.after(now) ? now : x;
}
public static String solrGetID(final SolrDocument solr) {
return (String) solr.getFieldValue(YaCySchema.id.getSolrFieldName());
}
public static DigestURI solrGetURL(final SolrDocument solr) {
try {
return new DigestURI((String) solr.getFieldValue(YaCySchema.sku.getSolrFieldName()));
} catch (final MalformedURLException e) {
return null;
}
}
public static String solrGetTitle(final SolrDocument solr) {
return (String) solr.getFieldValue(YaCySchema.title.getSolrFieldName());
}
public static String solrGetText(final SolrDocument solr) {
return (String) solr.getFieldValue(YaCySchema.text_t.getSolrFieldName());
}
public static String solrGetAuthor(final SolrDocument solr) {
return (String) solr.getFieldValue(YaCySchema.author.getSolrFieldName());
}
public static String solrGetDescription(final SolrDocument solr) {
return (String) solr.getFieldValue(YaCySchema.description.getSolrFieldName());
}
public static Date solrGetDate(final SolrDocument solr) {
Date date = (Date) solr.getFieldValue(YaCySchema.last_modified.getSolrFieldName());
Date now = new Date();
return date.after(now) ? now : date;
}
public static Collection<String> solrGetKeywords(final SolrDocument solr) {
final Collection<Object> c = solr.getFieldValues(YaCySchema.keywords.getSolrFieldName());
final ArrayList<String> a = new ArrayList<String>();
for (final Object s: c) {
a.add((String) s);
}
return a;
}
/** /**
* register an entry as error document * register an entry as error document
* @param digestURI * @param digestURI

@ -272,7 +272,6 @@ public final class SearchEvent {
this.query, this.query,
this.peers, this.peers,
this.workTables, this.workTables,
5000,
deleteIfSnippetFail, deleteIfSnippetFail,
remote); remote);

@ -54,15 +54,11 @@ import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker; import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet; import net.yacy.search.snippet.TextSnippet;
import org.apache.solr.common.SolrDocument;
public class SnippetProcess { public class SnippetProcess {
public static Log log = new Log("SEARCH"); public static Log log = new Log("SEARCH");
@ -71,30 +67,28 @@ public class SnippetProcess {
private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2); private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2);
// input values // input values
final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container private final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query; QueryParams query;
private final SeedDB peers; private final SeedDB peers;
private final WorkTables workTables; private final WorkTables workTables;
// result values // result values
protected final LoaderDispatcher loader; private final LoaderDispatcher loader;
protected Worker[] workerThreads; protected Worker[] workerThreads;
protected final WeakPriorityBlockingQueue<ResultEntry> result; private final WeakPriorityBlockingQueue<ResultEntry> result;
protected final WeakPriorityBlockingQueue<MediaSnippet> images; // container to sort images by size private final WeakPriorityBlockingQueue<MediaSnippet> images; // container to sort images by size
protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets private final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
long urlRetrievalAllTime; private long urlRetrievalAllTime;
long snippetComputationAllTime; private long snippetComputationAllTime;
int taketimeout;
private final boolean deleteIfSnippetFail, remote; private final boolean deleteIfSnippetFail, remote;
private boolean cleanupState; private boolean cleanupState;
public SnippetProcess( protected SnippetProcess(
final LoaderDispatcher loader, final LoaderDispatcher loader,
final RWIProcess rankedCache, final RWIProcess rankedCache,
final QueryParams query, final QueryParams query,
final SeedDB peers, final SeedDB peers,
final WorkTables workTables, final WorkTables workTables,
final int taketimeout,
final boolean deleteIfSnippetFail, final boolean deleteIfSnippetFail,
final boolean remote) { final boolean remote) {
assert query != null; assert query != null;
@ -103,7 +97,6 @@ public class SnippetProcess {
this.query = query; this.query = query;
this.peers = peers; this.peers = peers;
this.workTables = workTables; this.workTables = workTables;
this.taketimeout = taketimeout;
this.deleteIfSnippetFail = deleteIfSnippetFail; this.deleteIfSnippetFail = deleteIfSnippetFail;
this.remote = remote; this.remote = remote;
this.cleanupState = false; this.cleanupState = false;
@ -133,7 +126,7 @@ public class SnippetProcess {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false);
} }
public void setCleanupState() { protected void setCleanupState() {
this.cleanupState = true; this.cleanupState = true;
} }
@ -145,7 +138,7 @@ public class SnippetProcess {
return this.snippetComputationAllTime; return this.snippetComputationAllTime;
} }
public ResultEntry oneResult(final int item, final long timeout) { protected ResultEntry oneResult(final int item, final long timeout) {
// check if we already retrieved this item // check if we already retrieved this item
// (happens if a search pages is accessed a second time) // (happens if a search pages is accessed a second time)
final long finishTime = System.currentTimeMillis() + timeout; final long finishTime = System.currentTimeMillis() + timeout;
@ -221,7 +214,7 @@ public class SnippetProcess {
} }
private int resultCounter = 0; private int resultCounter = 0;
public ResultEntry nextResult() { private ResultEntry nextResult() {
final ResultEntry re = oneResult(this.resultCounter, Math.max(3000, this.query.timeout - System.currentTimeMillis())); final ResultEntry re = oneResult(this.resultCounter, Math.max(3000, this.query.timeout - System.currentTimeMillis()));
this.resultCounter++; this.resultCounter++;
return re; return re;
@ -290,7 +283,7 @@ public class SnippetProcess {
return this.result.list(Math.min(this.query.neededResults(), this.result.sizeAvailable())); return this.result.list(Math.min(this.query.neededResults(), this.result.sizeAvailable()));
} }
public long postRanking( private long postRanking(
final ResultEntry rentry, final ResultEntry rentry,
final ScoreMap<String> topwords) { final ScoreMap<String> topwords) {
@ -351,7 +344,7 @@ public class SnippetProcess {
} }
public void deployWorker(int deployCount, final int neededResults) { private void deployWorker(int deployCount, final int neededResults) {
if (this.cleanupState || if (this.cleanupState ||
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) || (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) ||
this.result.sizeAvailable() >= neededResults) { this.result.sizeAvailable() >= neededResults) {
@ -404,7 +397,7 @@ public class SnippetProcess {
} }
} }
public void stopAllWorker() { private void stopAllWorker() {
synchronized(this.workerThreads) { synchronized(this.workerThreads) {
for (int i = 0; i < this.workerThreads.length; i++) { for (int i = 0; i < this.workerThreads.length; i++) {
if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) {
@ -439,15 +432,13 @@ public class SnippetProcess {
private final CacheStrategy cacheStrategy; private final CacheStrategy cacheStrategy;
private final int neededResults; private final int neededResults;
private boolean shallrun; private boolean shallrun;
private final Fulltext metadata;
public Worker(final long maxlifetime, final CacheStrategy cacheStrategy, final int neededResults) { private Worker(final long maxlifetime, final CacheStrategy cacheStrategy, final int neededResults) {
this.cacheStrategy = cacheStrategy; this.cacheStrategy = cacheStrategy;
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults; this.neededResults = neededResults;
this.shallrun = true; this.shallrun = true;
this.metadata = SnippetProcess.this.rankingProcess.getQuery().getSegment().fulltext();
} }
@Override @Override
@ -495,11 +486,7 @@ public class SnippetProcess {
} }
// in case that we have an attached solr, we load also the solr document // in case that we have an attached solr, we load also the solr document
String solrContent = null; String solrContent = page.getText();
SolrDocument sd = page.getDocument();
if (sd != null) {
solrContent = this.metadata.getSolrScheme().solrGetText(sd);
}
resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0 resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) if (resultEntry == null)
@ -532,7 +519,7 @@ public class SnippetProcess {
//Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated"); //Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated");
} }
public void pleaseStop() { protected void pleaseStop() {
this.shallrun = false; this.shallrun = false;
} }
@ -540,12 +527,12 @@ public class SnippetProcess {
* calculate the time since the worker has had the latest activity * calculate the time since the worker has had the latest activity
* @return time in milliseconds lasted since latest activity * @return time in milliseconds lasted since latest activity
*/ */
public long busytime() { private long busytime() {
return System.currentTimeMillis() - this.lastLifeSign; return System.currentTimeMillis() - this.lastLifeSign;
} }
} }
protected ResultEntry fetchSnippet(final URIMetadataNode page, final String solrText, final CacheStrategy cacheStrategy) { private ResultEntry fetchSnippet(final URIMetadataNode page, final String solrText, final CacheStrategy cacheStrategy) {
// Snippet Fetching can has 3 modes: // Snippet Fetching can has 3 modes:
// 0 - do not fetch snippets // 0 - do not fetch snippets
// 1 - fetch snippets offline only // 1 - fetch snippets offline only
@ -620,7 +607,7 @@ public class SnippetProcess {
* @param urlhash * @param urlhash
* @return true if an entry was deleted, false otherwise * @return true if an entry was deleted, false otherwise
*/ */
public boolean delete(final String urlhash) { protected boolean delete(final String urlhash) {
final Iterator<Element<ResultEntry>> i = this.result.iterator(); final Iterator<Element<ResultEntry>> i = this.result.iterator();
Element<ResultEntry> entry; Element<ResultEntry> entry;
while (i.hasNext()) { while (i.hasNext()) {

Loading…
Cancel
Save