diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list
index 1093b9da8..6f336a8c3 100644
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@@ -292,9 +292,6 @@ publisher_t
## the language used in the document; starts with primary language
language_txt
-## an external ranking value
-ranking_i
-
## the size of the raw source
size_i
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index 06fe81c0a..a3b639597 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -31,6 +31,7 @@ import java.io.IOException;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol;
@@ -115,7 +116,7 @@ public final class crawlReceipt {
}
// generating a new loaded URL entry
- final URIMetadataRow entry = URIMetadataRow.importEntry(propStr);
+ final URIMetadata entry = URIMetadataRow.importEntry(propStr);
if (entry == null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "3600");
diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java
index cf50c88f3..633c08dd4 100644
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@@ -33,6 +33,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.EventChannel;
@@ -87,7 +88,7 @@ public final class transferURL {
final int sizeBefore = sb.index.urlMetadata().size();
// read the urls from the other properties and store
String urls;
- URIMetadataRow lEntry;
+ URIMetadata lEntry;
for (int i = 0; i < urlc; i++) {
serverCore.checkInterruption();
diff --git a/source/de/anomic/crawler/ResultURLs.java b/source/de/anomic/crawler/ResultURLs.java
index 460107638..2593d40b1 100644
--- a/source/de/anomic/crawler/ResultURLs.java
+++ b/source/de/anomic/crawler/ResultURLs.java
@@ -223,7 +223,7 @@ public final class ResultURLs {
public static void main(final String[] args) {
try {
final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/");
- final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0d, 0.0d, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), UTF8.getBytes("de"), 0, 0, 0, 0, 0, 0);
+ final URIMetadata urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0d, 0.0d, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), UTF8.getBytes("de"), 0, 0, 0, 0, 0, 0);
final EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING;
System.out.println("valid test:\n=======");
// add
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadata.java b/source/net/yacy/kelondro/data/meta/URIMetadata.java
index 49dc6a0e7..52a4b11a3 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadata.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadata.java
@@ -26,6 +26,7 @@ import java.util.Date;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.order.Bitfield;
+import de.anomic.crawler.retrieval.Request;
public interface URIMetadata extends URIReference {
@@ -82,4 +83,6 @@ public interface URIMetadata extends URIReference {
public byte[] referrerHash();
+ public Request toBalancerEntry(final String initiatorHash);
+
}
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
index 5593fd602..29e98cfa8 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
@@ -1,7 +1,7 @@
/**
* URIMetadataNode
* Copyright 2012 by Michael Peter Christen
- * First released 3.4.2012 at http://yacy.net
+ * First released 10.8.2012 at http://yacy.net
*
* This file is part of YaCy Content Integration
*
@@ -9,12 +9,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
@@ -22,432 +22,260 @@
package net.yacy.kelondro.data.meta;
-import net.yacy.cora.lod.Node;
-import net.yacy.cora.lod.vocabulary.Rdf;
-import net.yacy.kelondro.data.word.WordReferenceVars;
+import java.net.MalformedURLException;
+import java.util.Date;
+import java.util.List;
+import java.util.regex.Pattern;
+import net.yacy.cora.date.GenericFormatter;
+import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.UTF8;
+import net.yacy.cora.lod.vocabulary.Tagging;
+import net.yacy.document.Condenser;
+import net.yacy.kelondro.data.word.WordReference;
+import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.order.Base64Order;
+import net.yacy.kelondro.order.Bitfield;
+import net.yacy.search.index.YaCySchema;
-public class URIMetadataNode /*extends URIReferenceNode implements URIMetadata*/ {
+import org.apache.solr.common.SolrDocument;
- private final Node entry;
+import de.anomic.crawler.retrieval.Request;
+import de.anomic.crawler.retrieval.Response;
+import de.anomic.tools.crypt;
+
+/**
+ * This is the URIMetadata object implementation for Solr documents.
+ * The purpose of this object is the migration from the old metadata structure to solr document.
+ * Future implementations should try to replace URIMetadata objects completely by SolrDocument objects
+ */
+public class URIMetadataNode implements URIMetadata {
+
+ private final byte[] hash;
+ private final String urlRaw, keywords;
+ private DigestURI url;
+ Bitfield flags;
+ private final int imagec, audioc, videoc, appc;
+ private final double lon, lat;
+ private long ranking; // during generation of a search result this value is set
+ private final SolrDocument doc;
private final String snippet;
- private final WordReferenceVars word; // this is only used if the url is transported via remote search requests
- private final long ranking; // during generation of a search result this value is set
-
- public URIMetadataNode() {
- // create a dummy entry, good to produce poison objects
- this.entry = new Node(Rdf.Description);
- this.snippet = null;
- this.word = null;
- this.ranking = 0;
- }
-/*
- public URIMetadataNode(
- final DigestURI url,
- final String dc_title,
- final String dc_creator,
- final String dc_subject,
- final String dc_publisher,
- final float lon, final float lat, // decimal degrees as in WGS84; if unknown both values may be 0.0f;
- final Date mod,
- final Date load,
- final Date fresh,
- final String referrer,
- final byte[] md5,
- final long size,
- final int wc,
- final char dt,
- final Bitfield flags,
- final byte[] lang,
- final int llocal,
- final int lother,
- final int laudio,
- final int limage,
- final int lvideo,
- final int lapp) {
- // create new entry
- this.entry = new Node();
- this.entry.setSubject(UTF8.getBytes(url.toNormalform(true, false)));
- this.entry.setObject(YaCyMetadata.hash, url.hash());
- this.entry.setObject(DublinCore.Title, UTF8.getBytes(dc_title));
- this.entry.setObject(DublinCore.Creator, UTF8.getBytes(dc_creator));
- this.entry.setObject(DublinCore.Subject, UTF8.getBytes(dc_subject));
- this.entry.setObject(DublinCore.Publisher, UTF8.getBytes(dc_publisher));
- this.entry.setObject(Geo.Lat, ASCII.getBytes(Float.toString(lat)));
- this.entry.setObject(Geo.Long, ASCII.getBytes(Float.toString(lon)));
-
-
- encodeDate(col_mod, mod);
- encodeDate(col_load, load);
- encodeDate(col_fresh, fresh);
- this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer));
- this.entry.setCol(col_md5, md5);
- this.entry.setCol(col_size, size);
- this.entry.setCol(col_wc, wc);
- this.entry.setCol(col_dt, new byte[]{(byte) dt});
- this.entry.setCol(col_flags, flags.bytes());
- this.entry.setCol(col_lang, lang);
- this.entry.setCol(col_llocal, llocal);
- this.entry.setCol(col_lother, lother);
- this.entry.setCol(col_limage, limage);
- this.entry.setCol(col_laudio, laudio);
- this.entry.setCol(col_lvideo, lvideo);
- this.entry.setCol(col_lapp, lapp);
- //System.out.println("===DEBUG=== " + load.toString() + ", " + decodeDate(col_load).toString());
- this.snippet = null;
- this.word = null;
- this.ranking = 0;
- this.comp = null;
- }
-
- private byte[] encodeDate(final Date d) {
- // calculates the number of days since 1.1.1970 and returns this as 4-byte array
- // 86400000 is the number of milliseconds in one day
- return NaturalOrder.encodeLong(d.getTime() / 86400000L, 4);
- }
-
- private Date decodeDate(final int col) {
- final long t = this.entry.getColLong(col);
- }
-
- public static byte[] encodeComp(
- final DigestURI url,
- final String dc_title,
- final String dc_creator,
- final String dc_subject,
- final String dc_publisher,
- final float lat,
- final float lon) {
- final CharBuffer s = new CharBuffer(360);
- s.append(url.toNormalform(false, true)).appendLF();
- s.append(dc_title).appendLF();
- if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
- s.appendLF();
- if (dc_subject.length() > 120) s.append(dc_subject, 0, 120); else s.append(dc_subject);
- s.appendLF();
- if (dc_publisher.length() > 80) s.append(dc_publisher, 0, 80); else s.append(dc_publisher);
- s.appendLF();
- if (lon == 0.0f && lat == 0.0f) s.appendLF(); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).appendLF();
- return UTF8.getBytes(s.toString());
- }
-
- public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
- this.entry = entry;
- this.snippet = null;
- this.word = searchedWord;
- this.ranking = ranking;
- this.comp = null;
- }
+ private WordReference word; // this is only used if the url is transported via remote search requests
- public URIMetadataRow(final Properties prop) {
- // generates an plasmaLURLEntry using the properties from the argument
- // the property names must correspond to the one from toString
- //System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
- DigestURI url;
+ public URIMetadataNode(final SolrDocument doc) {
+ this.doc = doc;
+ this.snippet = "";
+ this.word = null;
+ this.ranking = Long.MIN_VALUE;
+ this.hash = ASCII.getBytes(getString(YaCySchema.id));
+ this.urlRaw = getString(YaCySchema.sku);
try {
- url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), ASCII.getBytes(prop.getProperty("hash")));
- } catch (final MalformedURLException e) {
- url = null;
+ this.url = new DigestURI(this.urlRaw, this.hash);
+ } catch (MalformedURLException e) {
+ Log.logException(e);
+ this.url = null;
}
- String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
- String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
- String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
- String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
- String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
- String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
-
- this.entry = rowdef.newEntry();
- this.entry.setCol(col_hash, url.hash()); // FIXME potential null pointer access
- this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
- // create new formatters to make concurrency possible
- final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
-
- try {
- encodeDate(col_mod, formatter.parse(prop.getProperty("mod", "20000101")));
- } catch (final ParseException e) {
- encodeDate(col_mod, new Date());
- }
- try {
- encodeDate(col_load, formatter.parse(prop.getProperty("load", "20000101")));
- } catch (final ParseException e) {
- encodeDate(col_load, new Date());
- }
- try {
- encodeDate(col_fresh, formatter.parse(prop.getProperty("fresh", "20000101")));
- } catch (final ParseException e) {
- encodeDate(col_fresh, new Date());
- }
- this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", "")));
- this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
- this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
- this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
- final String dt = prop.getProperty("dt", "t");
- this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
- final String flags = prop.getProperty("flags", "AAAAAA");
- this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
- this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk")));
- this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
- this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
- this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));
- this.entry.setCol(col_laudio, Integer.parseInt(prop.getProperty("laudio", "0")));
- this.entry.setCol(col_lvideo, Integer.parseInt(prop.getProperty("lvideo", "0")));
- this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
- this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
- this.word = null;
- if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
- if (prop.containsKey("wi")) {
- this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
- }
- this.ranking = 0;
- this.comp = null;
+ // to set the flags bitfield we need to pre-load some values from the Solr document
+ this.keywords = getString(YaCySchema.keywords);
+ this.imagec = getInt(YaCySchema.imagescount_i);
+ this.audioc = getInt(YaCySchema.audiolinkscount_i);
+ this.videoc = getInt(YaCySchema.videolinkscount_i);
+ this.appc = getInt(YaCySchema.videolinkscount_i);
+ this.lon = getDouble(YaCySchema.lon_coordinate);
+ this.lat = getDouble(YaCySchema.lat_coordinate);
+ this.flags = new Bitfield();
+ if (this.keywords != null && this.keywords.indexOf("indexof") >= 0) this.flags.set(Condenser.flag_cat_indexof, true);
+ if (this.lon != 0.0d || this.lat != 0.0d) this.flags.set(Condenser.flag_cat_haslocation, true);
+ if (this.imagec > 0) this.flags.set(Condenser.flag_cat_hasimage, true);
+ if (this.audioc > 0) this.flags.set(Condenser.flag_cat_hasaudio, true);
+ if (this.videoc > 0) this.flags.set(Condenser.flag_cat_hasvideo, true);
+ if (this.appc > 0) this.flags.set(Condenser.flag_cat_hasapp, true);
+ }
+
+ public URIMetadataNode(final SolrDocument doc, final WordReference searchedWord, final long ranking) {
+ this(doc);
+ this.word = searchedWord;
+ this.ranking = ranking;
}
- public static URIMetadataRow importEntry(final String propStr) {
- if (propStr == null || (propStr.length() > 0 && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
- return null;
- }
- try {
- return new URIMetadataRow(MapTools.s2p(propStr.substring(1, propStr.length() - 1)));
- } catch (final kelondroException e) {
- // wrong format
- return null;
- }
+ private int getInt(YaCySchema field) {
+ Integer x = (Integer) this.doc.getFieldValue(field.name());
+ if (x == null) return 0;
+ return x.intValue();
}
- private StringBuilder corePropList() {
- // generate a parseable string; this is a simple property-list
- final Components metadata = metadata();
- final StringBuilder s = new StringBuilder(300);
- if (metadata == null) return null;
- //System.out.println("author=" + comp.author());
-
- // create new formatters to make concurrency possible
- final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
-
- try {
- s.append("hash=").append(ASCII.String(hash()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",url=").append(crypt.simpleEncode(metadata.url().toNormalform(false, true)));
- assert (s.toString().indexOf(0) < 0);
- s.append(",descr=").append(crypt.simpleEncode(metadata.dc_title()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",tags=").append(crypt.simpleEncode(metadata.dc_subject()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",lat=").append(metadata.lat());
- assert (s.toString().indexOf(0) < 0);
- s.append(",lon=").append(metadata.lon());
- assert (s.toString().indexOf(0) < 0);
- s.append(",mod=").append(formatter.format(moddate()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",load=").append(formatter.format(loaddate()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",fresh=").append(formatter.format(freshdate()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",referrer=").append(referrerHash() == null ? "" : ASCII.String(referrerHash()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",md5=").append(md5());
- assert (s.toString().indexOf(0) < 0);
- s.append(",size=").append(size());
- assert (s.toString().indexOf(0) < 0);
- s.append(",wc=").append(wordCount());
- assert (s.toString().indexOf(0) < 0);
- s.append(",dt=").append(doctype());
- assert (s.toString().indexOf(0) < 0);
- s.append(",flags=").append(flags().exportB64());
- assert (s.toString().indexOf(0) < 0);
- s.append(",lang=").append(language() == null ? "EN" : UTF8.String(language()));
- assert (s.toString().indexOf(0) < 0);
- s.append(",llocal=").append(llocal());
- assert (s.toString().indexOf(0) < 0);
- s.append(",lother=").append(lother());
- assert (s.toString().indexOf(0) < 0);
- s.append(",limage=").append(limage());
- assert (s.toString().indexOf(0) < 0);
- s.append(",laudio=").append(laudio());
- assert (s.toString().indexOf(0) < 0);
- s.append(",lvideo=").append(lvideo());
- assert (s.toString().indexOf(0) < 0);
- s.append(",lapp=").append(lapp());
- assert (s.toString().indexOf(0) < 0);
+ private long getLong(YaCySchema field) {
+ Long x = (Long) this.doc.getFieldValue(field.name());
+ if (x == null) return 0;
+ return x.longValue();
+ }
- if (this.word != null) {
- // append also word properties
- final String wprop = this.word.toPropertyForm();
- s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
- }
- assert (s.toString().indexOf(0) < 0);
- return s;
+ private double getDouble(YaCySchema field) {
+ Double x = (Double) this.doc.getFieldValue(field.name());
+ if (x == null) return 0.0d;
+ return x.doubleValue();
+ }
- } catch (final Throwable e) {
- // serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
- // if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
- // if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
- Log.logException(e);
- return null;
- }
+ private Date getDate(YaCySchema field) {
+ Date x = (Date) this.doc.getFieldValue(field.name());
+ if (x == null) return new Date(0);
+ return x;
}
- public Row.Entry toRowEntry() {
- return this.entry;
+ private String getString(YaCySchema field) {
+ String x = (String) this.doc.getFieldValue(field.name());
+ if (x == null) return "";
+ return x;
}
+ @Override
public byte[] hash() {
- // return a url-hash, based on the md5 algorithm
- // the result is a String of 12 bytes within a 72-bit space
- // (each byte has an 6-bit range)
- // that should be enough for all web pages on the world
- return this.entry.getPrimaryKeyBytes();
+ return this.hash;
}
- public long ranking() {
- return this.ranking;
+ @Override
+ public String hosthash() {
+ return (String) this.doc.getFieldValue(YaCySchema.host_id_s.name());
}
- public boolean matches(final Pattern matcher) {
- return this.metadata().matches(matcher);
+ @Override
+ public Date moddate() {
+ return getDate(YaCySchema.last_modified);
}
-
+
+ @Override
public DigestURI url() {
- return this.metadata().url();
+ return this.url;
}
-
- public String dc_title() {
- return this.metadata().dc_title();
+
+ @Override
+ public boolean matches(Pattern matcher) {
+ return matcher.matcher(this.urlRaw.toLowerCase()).matches();
}
-
- public String dc_creator() {
- return this.metadata().dc_creator();
+
+ @Override
+ public String dc_title() {
+ @SuppressWarnings("unchecked")
+ List titles = (List) this.doc.getFieldValue(YaCySchema.title.name());
+ if (titles == null || titles.size() == 0) return "";
+ return titles.get(0);
}
-
- public String dc_publisher() {
- return this.metadata().dc_publisher();
+
+ @Override
+ public String dc_creator() {
+ return getString(YaCySchema.author);
}
-
- public String dc_subject() {
- return this.metadata().dc_subject();
+
+ @Override
+ public String dc_publisher() {
+ return getString(YaCySchema.publisher_t);
}
- public float lat() {
- return this.metadata().lat();
+ @Override
+ public String dc_subject() {
+ return this.keywords;
}
- public float lon() {
- return this.metadata().lon();
+ @Override
+ public double lat() {
+ return this.lat;
}
-
- private Components metadata() {
- // avoid double computation of metadata elements
- if (this.comp != null) return this.comp;
- // parse elements from comp field;
- final byte[] c = this.entry.getColBytes(col_comp, true);
- final List cl = ByteBuffer.split(c, (byte) 10);
- this.comp = new Components(
- (cl.size() > 0) ? UTF8.String(cl.get(0)) : "",
- hash(),
- (cl.size() > 1) ? UTF8.String(cl.get(1)) : "",
- (cl.size() > 2) ? UTF8.String(cl.get(2)) : "",
- (cl.size() > 3) ? UTF8.String(cl.get(3)) : "",
- (cl.size() > 4) ? UTF8.String(cl.get(4)) : "",
- (cl.size() > 5) ? UTF8.String(cl.get(5)) : "");
- return this.comp;
+
+ @Override
+ public double lon() {
+ return this.lon;
}
- public Date moddate() {
- return decodeDate(col_mod);
+ @Override
+ public long ranking() {
+ return this.ranking;
}
+ @Override
public Date loaddate() {
- return decodeDate(col_load);
+ return getDate(YaCySchema.load_date_dt);
}
+ @Override
public Date freshdate() {
- return decodeDate(col_fresh);
- }
-
- public byte[] referrerHash() {
- // return the creator's hash or null if there is none
- // FIXME: There seem to be some malformed entries in the databasees like "null\0\0\0\0\0\0\0\0"
- final byte[] r = this.entry.getColBytes(col_referrer, true);
- if (r != null) {
- int i = r.length;
- while (i > 0) {
- if (r[--i] == 0) return null;
- }
- }
- return r;
+ return getDate(YaCySchema.fresh_date_dt);
}
+ @Override
public String md5() {
- // returns the md5 in hex representation
- return Digest.encodeHex(this.entry.getColBytes(col_md5, true));
+ return getString(YaCySchema.md5_s);
}
+ @Override
public char doctype() {
- return (char) this.entry.getColByte(col_dt);
+ return Response.docType(getString(YaCySchema.content_type));
}
+ @Override
public byte[] language() {
- byte[] b = this.entry.getColBytes(col_lang, true);
- if (b == null || b[0] == (byte)'[') {
- String tld = this.metadata().url.getTLD();
- if (tld.length() < 2 || tld.length() > 2) return ASCII.getBytes("en");
- return ASCII.getBytes(tld);
- }
- return b;
+ String[] languages = (String[]) this.doc.getFieldValue(YaCySchema.language_txt.name());
+ if (languages == null || languages.length == 0) return ASCII.getBytes("en");
+ return UTF8.getBytes(languages[0]);
}
+ @Override
public int size() {
- return (int) this.entry.getColLong(col_size);
+ return getInt(YaCySchema.size_i);
}
+ @Override
public Bitfield flags() {
- return new Bitfield(this.entry.getColBytes(col_flags, true));
+ return this.flags;
}
+ @Override
public int wordCount() {
- return (int) this.entry.getColLong(col_wc);
+ return getInt(YaCySchema.wordcount_i);
}
+ @Override
public int llocal() {
- return (int) this.entry.getColLong(col_llocal);
+ return getInt(YaCySchema.inboundlinkscount_i);
}
+ @Override
public int lother() {
- return (int) this.entry.getColLong(col_lother);
+ return getInt(YaCySchema.outboundlinkscount_i);
}
+ @Override
public int limage() {
- return (int) this.entry.getColLong(col_limage);
+ return this.imagec;
}
+ @Override
public int laudio() {
- return (int) this.entry.getColLong(col_laudio);
+ return this.audioc;
}
+ @Override
public int lvideo() {
- return (int) this.entry.getColLong(col_lvideo);
+ return this.videoc;
}
+ @Override
public int lapp() {
- return (int) this.entry.getColLong(col_lapp);
+ return this.appc;
}
+ @Override
public String snippet() {
- // the snippet may appear here if the url was transported in a remote search
- // it will not be saved anywhere, but can only be requested here
return this.snippet;
}
- public WordReferenceVars word() {
+ @Override
+ public WordReference word() {
return this.word;
}
- public boolean isOlder(final URIMetadata other) {
+ @Override
+ public boolean isOlder(URIMetadata other) {
if (other == null) return false;
final Date tmoddate = moddate();
final Date omoddate = other.moddate();
@@ -461,7 +289,84 @@ public class URIMetadataNode /*extends URIReferenceNode implements URIMetadata*/
return false;
}
- public String toString(final String snippet) {
+ private StringBuilder corePropList() {
+ // generate a parseable string; this is a simple property-list
+ final StringBuilder s = new StringBuilder(300);
+ //System.out.println("author=" + comp.author());
+
+ // create new formatters to make concurrency possible
+ final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
+
+ try {
+ s.append("hash=").append(ASCII.String(hash()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",url=").append(crypt.simpleEncode(url().toNormalform(false, true)));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",descr=").append(crypt.simpleEncode(dc_title()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",author=").append(crypt.simpleEncode(dc_creator()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(dc_subject())));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",publisher=").append(crypt.simpleEncode(dc_publisher()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lat=").append(lat());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lon=").append(lon());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",mod=").append(formatter.format(moddate()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",load=").append(formatter.format(loaddate()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",fresh=").append(formatter.format(freshdate()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",referrer=").append(referrerHash() == null ? "" : ASCII.String(referrerHash()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",md5=").append(md5());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",size=").append(size());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",wc=").append(wordCount());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",dt=").append(doctype());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",flags=").append(flags().exportB64());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lang=").append(language() == null ? "EN" : UTF8.String(language()));
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",llocal=").append(llocal());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lother=").append(lother());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",limage=").append(limage());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",laudio=").append(laudio());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lvideo=").append(lvideo());
+ assert (s.toString().indexOf(0) < 0);
+ s.append(",lapp=").append(lapp());
+ assert (s.toString().indexOf(0) < 0);
+
+ if (this.word != null) {
+ // append also word properties
+ final String wprop = this.word.toPropertyForm();
+ s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
+ }
+ assert (s.toString().indexOf(0) < 0);
+ return s;
+
+ } catch (final Throwable e) {
+ Log.logException(e);
+ return null;
+ }
+ }
+
+ /**
+ * the toString format must be completely identical to URIMetadataRow because that is used
+ * to transport the data over p2p connections.
+ */
+ @Override
+ public String toString(String snippet) {
// add information needed for remote transport
final StringBuilder core = corePropList();
if (core == null)
@@ -476,12 +381,20 @@ public class URIMetadataNode /*extends URIReferenceNode implements URIMetadata*/
//return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
}
+ @Override
+ public byte[] referrerHash() {
+ String[] referrer = (String[]) this.doc.getFieldValue(YaCySchema.referrer_id_txt.name());
+ if (referrer == null || referrer.length == 0) return null;
+ return ASCII.getBytes(referrer[0]);
+ }
+
+ @Override
public Request toBalancerEntry(final String initiatorHash) {
return new Request(
ASCII.getBytes(initiatorHash),
- metadata().url(),
+ url(),
referrerHash(),
- metadata().dc_title(),
+ dc_title(),
moddate(),
null,
0,
@@ -489,74 +402,4 @@ public class URIMetadataNode /*extends URIReferenceNode implements URIMetadata*/
0,
0);
}
-
- @Override
- public String toString() {
- final StringBuilder core = corePropList();
- if (core == null) return null;
-
- core.insert(0, "{");
- core.append("}");
-
- return core.toString();
- //return "{" + core + "}";
- }
-
- private class Components {
- private DigestURI url;
- private String urlRaw;
- private byte[] urlHash;
- private final String dc_title, dc_creator, dc_subject, dc_publisher;
- private final String latlon; // a comma-separated tuple as "," where the coordinates are given as WGS84 spatial coordinates in decimal degrees
-
- public Components(
- final String urlRaw,
- final byte[] urlhash,
- final String title,
- final String author,
- final String tags,
- final String publisher,
- final String latlon) {
- this.url = null;
- this.urlRaw = urlRaw;
- this.urlHash = urlhash;
- this.dc_title = title;
- this.dc_creator = author;
- this.dc_subject = tags;
- this.dc_publisher = publisher;
- this.latlon = latlon;
- }
- public boolean matches(final Pattern matcher) {
- if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
- if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
- return false;
- }
- public DigestURI url() {
- if (this.url == null) {
- try {
- this.url = new DigestURI(this.urlRaw, this.urlHash);
- } catch (final MalformedURLException e) {
- this.url = null;
- }
- this.urlRaw = null;
- this.urlHash = null;
- }
- return this.url;
- }
- public String dc_title() { return this.dc_title; }
- public String dc_creator() { return this.dc_creator; }
- public String dc_publisher() { return this.dc_publisher; }
- public String dc_subject() { return this.dc_subject; }
- public float lat() {
- if (this.latlon == null || this.latlon.isEmpty()) return 0.0f;
- final int p = this.latlon.indexOf(',');
- return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(0, p));
- }
- public float lon() {
- if (this.latlon == null || this.latlon.isEmpty()) return 0.0f;
- final int p = this.latlon.indexOf(',');
- return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
- }
- }
- */
}
\ No newline at end of file
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
index 48fbaea3c..7374f4121 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
@@ -30,7 +30,6 @@ import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
import java.util.List;
-import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
@@ -38,6 +37,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.vocabulary.Tagging;
+import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Row;
@@ -104,7 +104,7 @@ public class URIMetadataRow implements URIMetadata {
private final Row.Entry entry;
private final String snippet;
- private WordReferenceVars word; // this is only used if the url is transported via remote search requests
+ private WordReference word; // this is only used if the url is transported via remote search requests
private final long ranking; // during generation of a search result this value is set
private Components comp;
@@ -167,12 +167,6 @@ public class URIMetadataRow implements URIMetadata {
this.comp = null;
}
- @Override
- public Map toMap() {
- // TODO to be implemented
- return null;
- }
-
private void encodeDate(final int col, final Date d) {
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
// 86400000 is the number of milliseconds in one day
@@ -211,7 +205,7 @@ public class URIMetadataRow implements URIMetadata {
return UTF8.getBytes(s0);
}
- public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
+ public URIMetadataRow(final Row.Entry entry, final WordReference searchedWord, final long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
@@ -284,7 +278,7 @@ public class URIMetadataRow implements URIMetadata {
this.comp = null;
}
- public static URIMetadataRow importEntry(final String propStr) {
+ public static URIMetadata importEntry(final String propStr) {
if (propStr == null || (!propStr.isEmpty() && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
return null;
}
@@ -560,7 +554,7 @@ public class URIMetadataRow implements URIMetadata {
}
@Override
- public WordReferenceVars word() {
+ public WordReference word() {
return this.word;
}
diff --git a/source/net/yacy/kelondro/data/meta/URIReference.java b/source/net/yacy/kelondro/data/meta/URIReference.java
index 0616f2689..d701bb49a 100644
--- a/source/net/yacy/kelondro/data/meta/URIReference.java
+++ b/source/net/yacy/kelondro/data/meta/URIReference.java
@@ -9,12 +9,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
@@ -23,7 +23,6 @@
package net.yacy.kelondro.data.meta;
import java.util.Date;
-import java.util.Map;
import java.util.regex.Pattern;
public interface URIReference {
@@ -40,7 +39,7 @@ public interface URIReference {
* @return
*/
public String hosthash();
-
+
/**
* The modification date of the URIReference is given if
* the record was created first and is defined with the
@@ -48,26 +47,20 @@ public interface URIReference {
* @return the modification date of this record
*/
public Date moddate();
-
+
/**
* The DigestURI is the payload of the URIReference
* @return the url as DigestURI with assigned URL hash according to the record hash
*/
public DigestURI url();
-
+
/**
* check if the url matches agains a given matcher
* @param matcher
* @return true if the url() matches
*/
public boolean matches(final Pattern matcher);
-
- /**
- * transform the record into a map which can be stored
- * @return
- */
- public Map toMap();
-
+
/**
* produce a visible representation of the record
* @return a string for the url()
diff --git a/source/net/yacy/kelondro/data/meta/URIReferenceNode.java b/source/net/yacy/kelondro/data/meta/URIReferenceNode.java
index eab4cfdf4..cc840c2bc 100644
--- a/source/net/yacy/kelondro/data/meta/URIReferenceNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIReferenceNode.java
@@ -9,12 +9,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
@@ -26,7 +26,6 @@ import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
-import java.util.Map;
import java.util.regex.Pattern;
import net.yacy.cora.date.ISO8601Formatter;
@@ -36,14 +35,14 @@ public class URIReferenceNode extends HashMap implements URIRefe
private static final long serialVersionUID = -1580155759116466570L;
- private byte[] hash;
+ private final byte[] hash;
public URIReferenceNode(DigestURI uri, Date date) {
this.hash = uri.hash();
this.put(MetadataVocabulary.url.name(), ASCII.getBytes(uri.toNormalform(true, false)));
this.put(MetadataVocabulary.moddate.name(), ASCII.getBytes(ISO8601Formatter.FORMATTER.format(date)));
}
-
+
@Override
public byte[] hash() {
return this.hash;
@@ -56,7 +55,7 @@ public class URIReferenceNode extends HashMap implements URIRefe
this.hostHash = ASCII.String(this.hash, 6, 6);
return this.hostHash;
}
-
+
@Override
public Date moddate() {
byte[] x = this.get(MetadataVocabulary.moddate.name());
@@ -84,9 +83,4 @@ public class URIReferenceNode extends HashMap implements URIRefe
return matcher.matcher(ASCII.String(x)).matches();
}
- @Override
- public Map toMap() {
- return this;
- }
-
}
diff --git a/source/net/yacy/kelondro/data/word/WordReference.java b/source/net/yacy/kelondro/data/word/WordReference.java
index f30812afe..fb3983c02 100644
--- a/source/net/yacy/kelondro/data/word/WordReference.java
+++ b/source/net/yacy/kelondro/data/word/WordReference.java
@@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
-//
+//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@@ -48,17 +48,19 @@ public interface WordReference extends Reference {
public char getType();
public int wordsintitle();
-
+
public int llocal();
-
+
public int lother();
-
+
public int urllength();
-
+
public int urlcomps();
-
+
public Bitfield flags();
-
+
public double termFrequency();
+ public String hosthash();
+
}
diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java
index 7bc43b1ff..554d8f9a9 100644
--- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java
+++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java
@@ -431,4 +431,10 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
}
+ @Override
+ public String hosthash() {
+ return ASCII.String(this.urlhash(), 6, 6);
+ }
+
+
}
diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java
index bdd623836..91910542d 100644
--- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java
+++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java
@@ -285,6 +285,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return this.urlHash;
}
+ @Override
public String hosthash() {
if (this.hostHash != null) return this.hostHash;
this.hostHash = ASCII.String(this.urlHash, 6, 6);
diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java
index 4bfb9a867..c421b8d8e 100644
--- a/source/net/yacy/kelondro/table/SplitTable.java
+++ b/source/net/yacy/kelondro/table/SplitTable.java
@@ -265,6 +265,7 @@ public class SplitTable implements Index, Iterable {
}
public static void delete(final File path, final String tablename) {
+ if (path == null || tablename == null) return;
final File tabledir = new File(path, tablename);
if (!(tabledir.exists())) return;
if ((!(tabledir.isDirectory()))) {
diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java
index 34d6295f8..1480d00ec 100644
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@@ -690,7 +690,7 @@ public final class Protocol
// insert results to containers
int term = count;
- for ( final URIMetadataRow urlEntry : result.links ) {
+ for ( final URIMetadata urlEntry : result.links ) {
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
}
@@ -890,7 +890,7 @@ public final class Protocol
public Map indexcount; //
public long searchtime; // time that the peer actually spent to create the result
public String[] references; // search hints, the top-words
- public List links; // LURLs of search
+ public List links; // LURLs of search
public Map indexabstract; // index abstracts, a collection of url-hashes per word
public SearchResult(
@@ -1015,14 +1015,14 @@ public final class Protocol
}
}
this.references = resultMap.get("references").split(",");
- this.links = new ArrayList(this.urlcount);
+ this.links = new ArrayList(this.urlcount);
for ( int n = 0; n < this.urlcount; n++ ) {
// get one single search result
final String resultLine = resultMap.get("resource" + n);
if ( resultLine == null ) {
continue;
}
- final URIMetadataRow urlEntry = URIMetadataRow.importEntry(resultLine);
+ final URIMetadata urlEntry = URIMetadataRow.importEntry(resultLine);
if ( urlEntry == null ) {
continue;
}
@@ -1226,7 +1226,7 @@ public final class Protocol
} // all url's known
// extract the urlCache from the result
- final URIMetadata[] urls = new URIMetadataRow[uhs.length];
+ final URIMetadata[] urls = new URIMetadata[uhs.length];
for ( int i = 0; i < uhs.length; i++ ) {
urls[i] = urlCache.get(ASCII.getBytes(uhs[i]));
if ( urls[i] == null ) {
@@ -1540,7 +1540,7 @@ public final class Protocol
new RankingProfile(Classification.ContentDomain.TEXT), // rankingProfile,
null // constraint);
);
- for ( final URIMetadataRow link : result.links ) {
+ for ( final URIMetadata link : result.links ) {
System.out.println(link.url().toNormalform(true, false));
System.out.println(link.snippet());
}
diff --git a/source/net/yacy/search/index/DocumentReference.java b/source/net/yacy/search/index/DocumentReference.java
index 43a3eb9f8..34501d6bf 100644
--- a/source/net/yacy/search/index/DocumentReference.java
+++ b/source/net/yacy/search/index/DocumentReference.java
@@ -49,10 +49,6 @@ public class DocumentReference {
this.data = null;
}
- public void store(final URIReference entry) {
- this.data.put(entry.hash(), entry.toMap());
- }
-
public URIReference load(final WeakPriorityBlockingQueue.Element obrwi) {
return null;
}
diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java
index 122ad7ad5..c8ed8363b 100644
--- a/source/net/yacy/search/index/MetadataRepository.java
+++ b/source/net/yacy/search/index/MetadataRepository.java
@@ -49,8 +49,9 @@ import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
+import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.URIMetadataRow;
-import net.yacy.kelondro.data.word.WordReferenceVars;
+import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
@@ -61,6 +62,7 @@ import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import org.apache.lucene.util.Version;
+import org.apache.solr.common.SolrDocument;
public final class MetadataRepository implements /*Metadata,*/ Iterable {
@@ -190,36 +192,34 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable
* @param obrwi
* @return
*/
- public URIMetadata load(WordReferenceVars wre, long weight) {
+ public URIMetadata load(WordReference wre, long weight) {
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
- final byte[] urlHash = wre.urlhash();
- if (urlHash == null) return null;
- if (this.urlIndexFile != null) try {
- final Row.Entry entry = this.urlIndexFile.get(urlHash, false);
- if (entry == null) return null;
- return new URIMetadataRow(entry, wre, weight);
- } catch (final IOException e) {
- Log.logException(e);
- }
- /*
- try {
- SolrDocument doc = this.solr.get(ASCII.String(urlHash));
- } catch (IOException e) {
- Log.logException(e);
- }
- */
- return null;
+ return load(wre.urlhash(), wre, weight);
}
public URIMetadata load(final byte[] urlHash) {
if (urlHash == null) return null;
+ return load(urlHash, null, 0);
+ }
+
+ private URIMetadata load(final byte[] urlHash, WordReference wre, long weight) {
+
+ // get the metadata from the old metadata index
if (this.urlIndexFile != null) try {
final Row.Entry entry = this.urlIndexFile.get(urlHash, false);
- if (entry == null) return null;
- return new URIMetadataRow(entry, null, 0);
+ if (entry != null) return new URIMetadataRow(entry, wre, weight);
} catch (final IOException e) {
- return null;
+ Log.logException(e);
}
+
+ // get the metadata from Solr
+ try {
+ SolrDocument doc = this.solr.get(ASCII.String(urlHash));
+ if (doc != null) return new URIMetadataNode(doc, wre, weight);
+ } catch (IOException e) {
+ Log.logException(e);
+ }
+
return null;
}
diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java
index 6ebad68f6..c0eb9b6f7 100644
--- a/source/net/yacy/search/index/SolrConfiguration.java
+++ b/source/net/yacy/search/index/SolrConfiguration.java
@@ -105,11 +105,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
private boolean contains(YaCySchema field) {
return this.contains(field.name());
}
-
+
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final byte[] value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length != 0))) solrdoc.addSolr(key, UTF8.String(value));
}
-
+
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final String value) {
if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value);
}
@@ -149,7 +149,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
protected void addSolr(final SolrDoc solrdoc, final YaCySchema key, final boolean value) {
if (isEmpty() || contains(key)) solrdoc.addSolr(key, value);
}
-
+
/**
* save configuration to file and update enum SolrFields
* @throws IOException
@@ -170,7 +170,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
} catch (final IOException e) {}
}
-
+
public SolrDoc metadata2solr(final URIMetadata md) {
final SolrDoc solrdoc = new SolrDoc();
final DigestURI digestURI = new DigestURI(md.url());
@@ -190,18 +190,18 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains(YaCySchema.content_type)) addSolr(solrdoc, YaCySchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype()));
if (allAttr || contains(YaCySchema.last_modified)) addSolr(solrdoc, YaCySchema.last_modified, md.moddate());
if (allAttr || contains(YaCySchema.text_t)) addSolr(solrdoc, YaCySchema.text_t, ""); // not delivered in metadata
- if (allAttr || contains(YaCySchema.wordcount_i)) addSolr(solrdoc, YaCySchema.wordcount_i, md.wordCount());
+ if (allAttr || contains(YaCySchema.wordcount_i)) addSolr(solrdoc, YaCySchema.wordcount_i, md.wordCount());
if (allAttr || contains(YaCySchema.keywords)) {
String keywords = md.dc_subject();
Bitfield flags = md.flags();
if (flags.get(Condenser.flag_cat_indexof)) {
if (keywords == null || keywords.isEmpty()) keywords = "indexof"; else {
- if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof";
+ if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof";
}
}
addSolr(solrdoc, YaCySchema.keywords, keywords);
}
-
+
// path elements of link
final String path = digestURI.getPath();
if (path != null && (allAttr || contains(YaCySchema.paths_txt))) {
@@ -229,12 +229,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains(YaCySchema.md5_s)) addSolr(solrdoc, YaCySchema.md5_s, md.md5());
if (allAttr || contains(YaCySchema.publisher_t)) addSolr(solrdoc, YaCySchema.publisher_t, md.dc_publisher());
if ((allAttr || contains(YaCySchema.language_txt)) && md.language() != null) addSolr(solrdoc, YaCySchema.language_txt,new String[]{UTF8.String(md.language())});
- if (allAttr || contains(YaCySchema.ranking_i)) addSolr(solrdoc, YaCySchema.ranking_i, md.ranking());
if (allAttr || contains(YaCySchema.size_i)) addSolr(solrdoc, YaCySchema.size_i, md.size());
if (allAttr || contains(YaCySchema.audiolinkscount_i)) addSolr(solrdoc, YaCySchema.audiolinkscount_i, md.laudio());
if (allAttr || contains(YaCySchema.videolinkscount_i)) addSolr(solrdoc, YaCySchema.videolinkscount_i, md.lvideo());
if (allAttr || contains(YaCySchema.applinkscount_i)) addSolr(solrdoc, YaCySchema.applinkscount_i, md.lapp());
-
+
return solrdoc;
}
@@ -585,7 +584,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
if (allAttr || contains(YaCySchema.httpstatus_i)) addSolr(solrdoc, YaCySchema.httpstatus_i, header == null ? 200 : header.getStatusCode());
- // fields that are additionally in URIMetadataRow
+ // fields that are additionally in URIMetadataRow
if (allAttr || contains(YaCySchema.load_date_dt)) addSolr(solrdoc, YaCySchema.load_date_dt, metadata.loaddate());
if (allAttr || contains(YaCySchema.fresh_date_dt)) addSolr(solrdoc, YaCySchema.fresh_date_dt, metadata.freshdate());
if (allAttr || contains(YaCySchema.host_id_s)) addSolr(solrdoc, YaCySchema.host_id_s, metadata.hosthash());
@@ -593,12 +592,11 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
//if (allAttr || contains(SolrField.md5_s)) addSolr(solrdoc, SolrField.md5_s, new byte[0]);
if (allAttr || contains(YaCySchema.publisher_t)) addSolr(solrdoc, YaCySchema.publisher_t, yacydoc.dc_publisher());
if ((allAttr || contains(YaCySchema.language_txt)) && metadata.language() != null) addSolr(solrdoc, YaCySchema.language_txt,new String[]{UTF8.String(metadata.language())});
- if (allAttr || contains(YaCySchema.ranking_i)) addSolr(solrdoc, YaCySchema.ranking_i, metadata.ranking());
if (allAttr || contains(YaCySchema.size_i)) addSolr(solrdoc, YaCySchema.size_i, metadata.size());
if (allAttr || contains(YaCySchema.audiolinkscount_i)) addSolr(solrdoc, YaCySchema.audiolinkscount_i, yacydoc.getAudiolinks().size());
if (allAttr || contains(YaCySchema.videolinkscount_i)) addSolr(solrdoc, YaCySchema.videolinkscount_i, yacydoc.getVideolinks().size());
if (allAttr || contains(YaCySchema.applinkscount_i)) addSolr(solrdoc, YaCySchema.applinkscount_i, yacydoc.getApplinks().size());
-
+
return solrdoc;
}
diff --git a/source/net/yacy/search/index/YaCySchema.java b/source/net/yacy/search/index/YaCySchema.java
index 31cd03e9e..8380f7dc8 100644
--- a/source/net/yacy/search/index/YaCySchema.java
+++ b/source/net/yacy/search/index/YaCySchema.java
@@ -131,7 +131,6 @@ public enum YaCySchema implements Schema {
md5_s(SolrType.string, true, true, "the md5 of the raw source"),// String md5();
publisher_t(SolrType.text_general, true, true, "the name of the publisher of the document"),// String dc_publisher();
language_txt(SolrType.string, true, true, "the language used in the document; starts with primary language"),// byte[] language();
- ranking_i(SolrType.integer, true, true, "an external ranking value"),// long ranking();
size_i(SolrType.integer, true, true, "the size of the raw source"),// int size();
audiolinkscount_i(SolrType.integer, true, true, "number of links to audio resources"),// int laudio();
videolinkscount_i(SolrType.integer, true, true, "number of links to video resources"),// int lvideo();
diff --git a/source/net/yacy/search/ranking/ReferenceOrder.java b/source/net/yacy/search/ranking/ReferenceOrder.java
index 4bb72cba3..549d07bb2 100644
--- a/source/net/yacy/search/ranking/ReferenceOrder.java
+++ b/source/net/yacy/search/ranking/ReferenceOrder.java
@@ -213,7 +213,7 @@ public class ReferenceOrder {
* @param t
* @return a ranking: the higher the number, the better is the ranking
*/
- public long cardinal(final WordReferenceVars t) {
+ public long cardinal(final WordReference t) {
//return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin, this.entryMax, this.searchWords);
// the normalizedEntry must be a normalized indexEntry
final Bitfield flags = t.flags();
@@ -254,7 +254,7 @@ public class ReferenceOrder {
+ ((flags.get(Condenser.flag_cat_hasaudio)) ? 255 << this.ranking.coeff_cathasaudio : 0)
+ ((flags.get(Condenser.flag_cat_hasvideo)) ? 255 << this.ranking.coeff_cathasvideo : 0)
+ ((flags.get(Condenser.flag_cat_hasapp)) ? 255 << this.ranking.coeff_cathasapp : 0)
- + ((ByteBuffer.equals(t.language, this.language)) ? 255 << this.ranking.coeff_language : 0)
+ + ((ByteBuffer.equals(t.getLanguage(), this.language)) ? 255 << this.ranking.coeff_language : 0)
+ ((DigestURI.probablyRootURL(t.urlhash())) ? 15 << this.ranking.coeff_urllength : 0);
//if (searchWords != null) r += (yacyURL.probablyWordURL(t.urlHash(), searchWords) != null) ? 256 << ranking.coeff_appurl : 0;
diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java
index 126f58c41..23b8c1a87 100644
--- a/source/net/yacy/search/snippet/ResultEntry.java
+++ b/source/net/yacy/search/snippet/ResultEntry.java
@@ -36,6 +36,8 @@ import net.yacy.document.Condenser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.word.Word;
+import net.yacy.kelondro.data.word.WordReference;
+import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
@@ -188,10 +190,12 @@ public class ResultEntry implements Comparable, Comparator