refactore URIMetadataNode to further unify interaction with index

-  URIMetadataNode extending SolrDocument
- use language as stored (String), reducing conversion to string
- optimize debug code in transferIndex
pull/1/head
reger 11 years ago
parent 79e7947442
commit 727dfb5875

@ -108,7 +108,7 @@ public class yacydoc {
prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate()));
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", entry.url().toNormalform(true));
prop.putXML("dc_language", ASCII.String(entry.language()));
prop.putXML("dc_language", entry.language());
prop.putXML("collection", Arrays.toString(entry.collections()));
prop.put("geo_lat", entry.lat());
prop.put("geo_long", entry.lon());

@ -136,7 +136,7 @@ public final class crawlReceipt {
}
// Check URL against DHT blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry.url())) {
// URL is blacklisted
log.warn("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false) + " from peer " + iam);
prop.put("delay", "9999");

@ -123,7 +123,7 @@ public final class transferURL {
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry.url()))) {
if (Network.log.isFine()) Network.log.fine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;

@ -32,7 +32,6 @@ import java.util.EnumMap;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
@ -117,7 +116,7 @@ public class YMarkMetadata {
metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));
metadata.put(METADATA.LANGUAGE, urlEntry.language());
metadata.put(METADATA.TITLE, urlEntry.dc_title());
metadata.put(METADATA.CREATOR, urlEntry.dc_creator());
metadata.put(METADATA.KEYWORDS, urlEntry.dc_subject());

@ -38,7 +38,6 @@ import net.yacy.cora.date.MicroDate;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.lod.vocabulary.Tagging;
@ -63,7 +62,7 @@ import org.apache.solr.common.SolrDocument;
* The purpose of this object is the migration from the old metadata structure to solr document.
* Future implementations should try to replace URIMetadata objects completely by SolrDocument objects
*/
public class URIMetadataNode {
public class URIMetadataNode extends SolrDocument {
protected byte[] hash = null;
protected String urlRaw = null, keywords = null;
@ -72,7 +71,6 @@ public class URIMetadataNode {
protected int imagec = -1, audioc = -1, videoc = -1, appc = -1;
protected double lat = Double.NaN, lon = Double.NaN;
protected long ranking = 0; // during generation of a search result this value is set
protected SolrDocument doc = null;
protected String snippet = null;
protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests
@ -80,7 +78,7 @@ public class URIMetadataNode {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
this.doc = new SolrDocument();
super();
urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
try {
url = new DigestURL(urlRaw);
@ -98,10 +96,9 @@ public class URIMetadataNode {
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0")); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0")); if (lats == null) lats = "0.0";
this.doc.setField(CollectionSchema.title.name(), descr);
this.doc.setField(CollectionSchema.author.name(), dc_creator);
this.doc.setField(CollectionSchema.publisher_t.name(), dc_publisher);
this.setField(CollectionSchema.title.name(), descr);
this.setField(CollectionSchema.author.name(), dc_creator);
this.setField(CollectionSchema.publisher_t.name(), dc_publisher);
this.lat = Float.parseFloat(lats);
this.lon = Float.parseFloat(lons);
@ -109,32 +106,32 @@ public class URIMetadataNode {
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
this.doc.setField(CollectionSchema.last_modified.name(), formatter.parse(prop.getProperty("mod", "20000101")));
this.setField(CollectionSchema.last_modified.name(), formatter.parse(prop.getProperty("mod", "20000101")));
} catch (final ParseException e) {
this.doc.setField(CollectionSchema.last_modified.name(), new Date());
this.setField(CollectionSchema.last_modified.name(), new Date());
}
try {
this.doc.setField(CollectionSchema.load_date_dt.name(), formatter.parse(prop.getProperty("load", "20000101")));
this.setField(CollectionSchema.load_date_dt.name(), formatter.parse(prop.getProperty("load", "20000101")));
} catch (final ParseException e) {
this.doc.setField(CollectionSchema.load_date_dt.name(), new Date());
this.setField(CollectionSchema.load_date_dt.name(), new Date());
}
try {
this.doc.setField(CollectionSchema.fresh_date_dt.name(), formatter.parse(prop.getProperty("fresh", "20000101")));
this.setField(CollectionSchema.fresh_date_dt.name(), formatter.parse(prop.getProperty("fresh", "20000101")));
} catch (final ParseException e) {
this.doc.setField(CollectionSchema.fresh_date_dt.name(), new Date());
this.setField(CollectionSchema.fresh_date_dt.name(), new Date());
}
this.doc.setField(CollectionSchema.referrer_id_s.name(), prop.getProperty("referrer", ""));
this.doc.setField(CollectionSchema.md5_s.name(), prop.getProperty("md5", ""));
this.doc.setField(CollectionSchema.size_i.name(), Integer.parseInt(prop.getProperty("size", "0")));
this.doc.setField(CollectionSchema.wordcount_i.name(), Integer.parseInt(prop.getProperty("wc", "0")));
this.setField(CollectionSchema.referrer_id_s.name(), prop.getProperty("referrer", ""));
this.setField(CollectionSchema.md5_s.name(), prop.getProperty("md5", ""));
this.setField(CollectionSchema.size_i.name(), Integer.parseInt(prop.getProperty("size", "0")));
this.setField(CollectionSchema.wordcount_i.name(), Integer.parseInt(prop.getProperty("wc", "0")));
final String dt = prop.getProperty("dt", "t");
String[] mime = Response.doctype2mime(null,dt.charAt(0));
this.doc.setField(CollectionSchema.content_type.name(), mime);
this.setField(CollectionSchema.content_type.name(), mime);
final String flagsp = prop.getProperty("flags", "AAAAAA");
this.flags = (flagsp.length() > 6) ? QueryParams.empty_constraint : (new Bitfield(4, flagsp));
this.doc.setField(CollectionSchema.language_s.name(), prop.getProperty("lang", ""));
this.doc.setField(CollectionSchema.inboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("llocal", "0")));
this.doc.setField(CollectionSchema.outboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("lother", "0")));
this.setField(CollectionSchema.language_s.name(), prop.getProperty("lang", ""));
this.setField(CollectionSchema.inboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("llocal", "0")));
this.setField(CollectionSchema.outboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("lother", "0")));
this.imagec = Integer.parseInt(prop.getProperty("limage", "0"));
this.audioc = Integer.parseInt(prop.getProperty("laudio", "0"));
this.videoc = Integer.parseInt(prop.getProperty("lvideo", "0"));
@ -147,9 +144,11 @@ public class URIMetadataNode {
}
public URIMetadataNode(final SolrDocument doc) {
this.doc = doc;
super();
for (String name : doc.getFieldNames()) {
this.addField(name, doc.getFieldValue(name));
}
this.snippet = "";
this.word = null;
Float score = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
this.ranking = score == null ? 0 : (long) (1000000.0f * score.floatValue()); // solr score values are sometimes very low
this.hash = ASCII.getBytes(getString(CollectionSchema.id));
@ -174,7 +173,6 @@ public class URIMetadataNode {
* @return the content domain which classifies the content type
*/
public ContentDomain getContentDomain() {
if (this.doc == null) return this.url.getContentDomainFromExt();
String mime = mime();
if (mime == null) return this.url.getContentDomainFromExt();
ContentDomain contentDomain = Classification.getContentDomainFromMime(mime);
@ -182,16 +180,12 @@ public class URIMetadataNode {
return this.url.getContentDomainFromExt();
}
public SolrDocument getDocument() {
return this.doc;
}
public byte[] hash() {
return this.hash;
}
public String hosthash() {
String hosthash = (String) this.doc.getFieldValue(CollectionSchema.host_id_s.getSolrFieldName());
String hosthash = (String) this.getFieldValue(CollectionSchema.host_id_s.getSolrFieldName());
if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6);
return hosthash;
}
@ -233,7 +227,7 @@ public class URIMetadataNode {
if (Double.isNaN(this.lat)) {
this.lon = 0.0d;
this.lat = 0.0d;
String latlon = (String) this.doc.getFieldValue(CollectionSchema.coordinate_p.getSolrFieldName());
String latlon = (String) this.getFieldValue(CollectionSchema.coordinate_p.getSolrFieldName());
if (latlon != null) {
int p = latlon.indexOf(',');
if (p > 0) {
@ -277,10 +271,10 @@ public class URIMetadataNode {
return mime == null || mime.size() == 0 ? null : mime.get(0);
}
public byte[] language() {
public String language() {
String language = getString(CollectionSchema.language_s);
if (language == null || language.length() == 0) return ASCII.getBytes("en");
return UTF8.getBytes(language);
if (language == null || language.length() == 0) return "en";
return language;
}
public byte[] referrerHash() {
@ -430,7 +424,7 @@ public class URIMetadataNode {
}
}
protected static StringBuilder corePropList(URIMetadataNode md) {
protected StringBuilder corePropList() {
// generate a parseable string; this is a simple property-list
final StringBuilder s = new StringBuilder(300);
@ -438,33 +432,33 @@ public class URIMetadataNode {
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
s.append("hash=").append(ASCII.String(md.hash()));
s.append(",url=").append(crypt.simpleEncode(md.url().toNormalform(true)));
s.append(",descr=").append(crypt.simpleEncode(md.dc_title()));
s.append(",author=").append(crypt.simpleEncode(md.dc_creator()));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(md.dc_subject())));
s.append(",publisher=").append(crypt.simpleEncode(md.dc_publisher()));
s.append(",lat=").append(md.lat());
s.append(",lon=").append(md.lon());
s.append(",mod=").append(formatter.format(md.moddate()));
s.append(",load=").append(formatter.format(md.loaddate()));
s.append(",fresh=").append(formatter.format(md.freshdate()));
s.append(",referrer=").append(md.referrerHash() == null ? "" : ASCII.String(md.referrerHash()));
s.append(",md5=").append(md.md5());
s.append(",size=").append(md.size());
s.append(",wc=").append(md.wordCount());
s.append(",dt=").append(md.doctype());
s.append(",flags=").append(md.flags().exportB64());
s.append(",lang=").append(md.language() == null ? "EN" : UTF8.String(md.language()));
s.append(",llocal=").append(md.llocal());
s.append(",lother=").append(md.lother());
s.append(",limage=").append(md.limage());
s.append(",laudio=").append(md.laudio());
s.append(",lvideo=").append(md.lvideo());
s.append(",lapp=").append(md.lapp());
if (md.word() != null) {
s.append("hash=").append(ASCII.String(this.hash()));
s.append(",url=").append(crypt.simpleEncode(this.url().toNormalform(true)));
s.append(",descr=").append(crypt.simpleEncode(this.dc_title()));
s.append(",author=").append(crypt.simpleEncode(this.dc_creator()));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(this.dc_subject())));
s.append(",publisher=").append(crypt.simpleEncode(this.dc_publisher()));
s.append(",lat=").append(this.lat());
s.append(",lon=").append(this.lon());
s.append(",mod=").append(formatter.format(this.moddate()));
s.append(",load=").append(formatter.format(this.loaddate()));
s.append(",fresh=").append(formatter.format(this.freshdate()));
s.append(",referrer=").append(this.referrerHash() == null ? "" : ASCII.String(this.referrerHash()));
s.append(",md5=").append(this.md5());
s.append(",size=").append(this.size());
s.append(",wc=").append(this.wordCount());
s.append(",dt=").append(this.doctype());
s.append(",flags=").append(this.flags().exportB64());
s.append(",lang=").append(this.language());
s.append(",llocal=").append(this.llocal());
s.append(",lother=").append(this.lother());
s.append(",limage=").append(this.limage());
s.append(",laudio=").append(this.laudio());
s.append(",lvideo=").append(this.lvideo());
s.append(",lapp=").append(this.lapp());
if (this.word() != null) {
// append also word properties
final String wprop = md.word().toPropertyForm();
final String wprop = this.word().toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
return s;
@ -480,7 +474,7 @@ public class URIMetadataNode {
*/
public String toString(String snippet) {
// add information needed for remote transport
final StringBuilder core = corePropList(this);
final StringBuilder core = corePropList();
if (core == null)
return null;
@ -501,7 +495,7 @@ public class URIMetadataNode {
*/
@Override
public String toString() {
final StringBuilder core = corePropList(this);
final StringBuilder core = corePropList();
if (core == null) return null;
core.insert(0, '{');
core.append('}');
@ -511,7 +505,7 @@ public class URIMetadataNode {
private int getInt(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.num_integer;
Object x = this.doc.getFieldValue(field.getSolrFieldName());
Object x = this.getFieldValue(field.getSolrFieldName());
if (x == null) return 0;
if (x instanceof Integer) return ((Integer) x).intValue();
if (x instanceof Long) return ((Long) x).intValue();
@ -521,7 +515,7 @@ public class URIMetadataNode {
private Date getDate(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.date;
Date x = (Date) this.doc.getFieldValue(field.getSolrFieldName());
Date x = (Date) this.getFieldValue(field.getSolrFieldName());
if (x == null) return new Date(0);
Date now = new Date();
return x.after(now) ? now : x;
@ -530,7 +524,7 @@ public class URIMetadataNode {
private String getString(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
Object x = this.doc.getFieldValue(field.getSolrFieldName());
Object x = this.getFieldValue(field.getSolrFieldName());
if (x == null) return "";
if (x instanceof ArrayList) {
@SuppressWarnings("unchecked")
@ -544,7 +538,7 @@ public class URIMetadataNode {
private ArrayList<String> getStringList(CollectionSchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general;
Object r = this.doc.getFieldValue(field.getSolrFieldName());
Object r = this.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList<String>(0);
if (r instanceof ArrayList) {
return (ArrayList<String>) r;
@ -558,7 +552,7 @@ public class URIMetadataNode {
private ArrayList<Integer> getIntList(CollectionSchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.num_integer;
Object r = this.doc.getFieldValue(field.getSolrFieldName());
Object r = this.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList<Integer>(0);
if (r instanceof ArrayList) {
return (ArrayList<Integer>) r;

@ -58,7 +58,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private final Bitfield flags;
private long lastModified;
private final byte[] language;
private final String language;
public final byte[] urlHash;
private String hostHash = null;
private final char type;
@ -108,7 +108,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int posinphrase, // position of word in its phrase
final int posofphrase, // number of the phrase where word appears
final long lastmodified, // last-modified time of the document where word appears
byte[] language, // (guessed) language of document
String language, // (guessed) language of document
final char doctype, // type of document
final int outlinksSame, // outlinks to same domain
final int outlinksOther, // outlinks to other domain
@ -143,7 +143,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.flags = e.flags();
//this.freshUntil = e.freshUntil();
this.lastModified = e.lastModified();
this.language = e.getLanguage();
this.language = ASCII.String(e.getLanguage());
this.urlHash = e.urlhash();
this.type = e.getType();
this.hitcount = e.hitcount();
@ -229,7 +229,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
@Override
public byte[] getLanguage() {
return this.language;
return ASCII.getBytes(this.language);
}
@Override
@ -291,7 +291,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.posofphrase, // number of the phrase where word appears
this.lastModified, // last-modified time of the document where word appears
System.currentTimeMillis(), // update time;
this.language, // (guessed) language of document
ASCII.getBytes(this.language), // (guessed) language of document
this.type, // type of document
this.llocal, // outlinks to same domain
this.lother, // outlinks to other domain

@ -1362,6 +1362,7 @@ public final class Protocol {
final int timeout) {
// check if we got all necessary urls in the urlCache (only for debugging)
if (Network.log.isFine()) {
Iterator<WordReference> eenum;
Reference entry;
for ( final ReferenceContainer<WordReference> ic : indexes ) {
@ -1369,7 +1370,6 @@ public final class Protocol {
while ( eenum.hasNext() ) {
entry = eenum.next();
if ( !urlRefs.has(entry.urlhash()) ) {
if ( Network.log.isFine() ) {
Network.log.fine("DEBUG transferIndex: to-send url hash '"
+ ASCII.String(entry.urlhash())
+ "' is not contained in urlCache");

@ -53,7 +53,6 @@ import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.ListManager;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils;
@ -457,10 +456,6 @@ public class Blacklist {
return ret;
}
public final boolean isListed(final BlacklistType blacklistType, final URIMetadataNode entry) {
return isListed(blacklistType, entry.url());
}
/**
* Checks whether the given entry is listed in given blacklist type.
* @param blacklistType The used blacklist

@ -47,7 +47,6 @@ import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
@ -279,7 +278,7 @@ public final class SearchEvent {
this.snippetFetchAlive = new AtomicInteger(0);
this.addRunning = true;
this.receivedRemoteReferences = new AtomicInteger(0);
this.order = new ReferenceOrder(this.query.ranking, UTF8.getBytes(this.query.targetlang));
this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang);
this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100);
this.taggingPredicates = new HashMap<String, String>();
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
@ -897,7 +896,7 @@ public final class SearchEvent {
}
if (this.query.modifier.language != null) {
if (!this.query.modifier.language.equals(UTF8.String(iEntry.language()))) {
if (!this.query.modifier.language.equals(iEntry.language())) {
if (log.isFine()) log.fine("dropped Node: language");
continue pollloop;
}
@ -1083,7 +1082,7 @@ public final class SearchEvent {
// check modifier constraint (language)
// TODO: : page.language() never null but defaults to "en" (may cause false drop of result)
if (this.query.modifier.language != null && !this.query.modifier.language.equals(ASCII.String(page.language()))) {
if (this.query.modifier.language != null && !this.query.modifier.language.equals(page.language())) {
if (log.isFine()) log.fine("dropped RWI: language constraint = " + this.query.modifier.language);
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
@ -1165,7 +1164,7 @@ public final class SearchEvent {
// TODO: vocabulary is only valid and available in local Solr index (considere to auto-switch to Searchdom.LOCAL)
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
tagloop: for (Tagging.Metatag tag : this.query.metatags) {
SolrDocument sdoc = page.getDocument();
SolrDocument sdoc = page;
if (sdoc != null) {
Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_SUFFIX);
if (tagvalues != null && tagvalues.contains(tag.getObject())) {
@ -1462,7 +1461,7 @@ public final class SearchEvent {
ResultEntry ms = oneResult(item, timeout);
// check if the match was made in the url or in the image links
if (ms != null) {
SolrDocument doc = ms.getNode().getDocument();
SolrDocument doc = ms.getNode();
Collection<Object> alt = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
Collection<Object> img = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
Collection<Object> prt = doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName());

@ -33,6 +33,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.util.ByteBuffer;
@ -55,9 +56,9 @@ public class ReferenceOrder {
private WordReferenceVars min, max;
private final ConcurrentScoreMap<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking;
private final byte[] language;
private final String language;
public ReferenceOrder(final RankingProfile profile, final byte[] language) {
public ReferenceOrder(final RankingProfile profile, final String language) {
this.min = null;
this.max = null;
this.ranking = profile;
@ -256,7 +257,7 @@ public class ReferenceOrder {
+ ((flags.get(Condenser.flag_cat_hasaudio)) ? 255 << this.ranking.coeff_cathasaudio : 0)
+ ((flags.get(Condenser.flag_cat_hasvideo)) ? 255 << this.ranking.coeff_cathasvideo : 0)
+ ((flags.get(Condenser.flag_cat_hasapp)) ? 255 << this.ranking.coeff_cathasapp : 0)
+ ((ByteBuffer.equals(t.getLanguage(), this.language)) ? 255 << this.ranking.coeff_language : 0);
+ ((ByteBuffer.equals(t.getLanguage(), ASCII.getBytes(this.language))) ? 255 << this.ranking.coeff_language : 0);
//if (searchWords != null) r += (yacyURL.probablyWordURL(t.urlHash(), searchWords) != null) ? 256 << ranking.coeff_appurl : 0;
@ -289,7 +290,7 @@ public class ReferenceOrder {
+ ((flags.get(Condenser.flag_cat_hasaudio)) ? 255 << this.ranking.coeff_cathasaudio : 0)
+ ((flags.get(Condenser.flag_cat_hasvideo)) ? 255 << this.ranking.coeff_cathasvideo : 0)
+ ((flags.get(Condenser.flag_cat_hasapp)) ? 255 << this.ranking.coeff_cathasapp : 0)
+ ((ByteBuffer.equals(t.language(), this.language)) ? 255 << this.ranking.coeff_language : 0);
+ ((this.language.equals(t.language())) ? 255 << this.ranking.coeff_language : 0);
return r; // the higher the number the better the ranking.
}

@ -48,7 +48,6 @@ import java.util.regex.Pattern;
import net.yacy.cora.document.analysis.EnhancedTextProfileSignature;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
@ -306,7 +305,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if ((allAttr || contains(CollectionSchema.referrer_id_s)) && md.referrerHash() != null) add(doc, CollectionSchema.referrer_id_s, ASCII.String(md.referrerHash()));
if (allAttr || contains(CollectionSchema.md5_s)) add(doc, CollectionSchema.md5_s, md.md5());
if (allAttr || contains(CollectionSchema.publisher_t)) add(doc, CollectionSchema.publisher_t, md.dc_publisher());
if ((allAttr || contains(CollectionSchema.language_s)) && md.language() != null) add(doc, CollectionSchema.language_s, UTF8.String(md.language()));
if (allAttr || contains(CollectionSchema.language_s)) add(doc, CollectionSchema.language_s, md.language());
if (allAttr || contains(CollectionSchema.size_i)) add(doc, CollectionSchema.size_i, md.size());
if (allAttr || contains(CollectionSchema.audiolinkscount_i)) add(doc, CollectionSchema.audiolinkscount_i, md.laudio());
if (allAttr || contains(CollectionSchema.videolinkscount_i)) add(doc, CollectionSchema.videolinkscount_i, md.lvideo());

@ -70,7 +70,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
final List<MediaSnippet> mediaSnippets,
final long snippetComputationTime) {
this.urlentry = urlentry;
this.urlentry.getDocument().setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
this.indexSegment = indexSegment;
this.alternative_urlstring = null;
this.alternative_urlname = null;

Loading…
Cancel
Save