|
|
|
@ -46,9 +46,8 @@ import net.yacy.kelondro.order.Bitfield;
|
|
|
|
|
import net.yacy.kelondro.order.Digest;
|
|
|
|
|
import net.yacy.kelondro.order.NaturalOrder;
|
|
|
|
|
import net.yacy.kelondro.util.ByteBuffer;
|
|
|
|
|
import net.yacy.kelondro.util.kelondroException;
|
|
|
|
|
import net.yacy.kelondro.util.MapTools;
|
|
|
|
|
|
|
|
|
|
import net.yacy.kelondro.util.kelondroException;
|
|
|
|
|
import de.anomic.crawler.retrieval.Request;
|
|
|
|
|
import de.anomic.search.QueryParams;
|
|
|
|
|
import de.anomic.tools.crypt;
|
|
|
|
@ -173,7 +172,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Date decodeDate(final int col) {
|
|
|
|
|
long t = this.entry.getColLong(col);
|
|
|
|
|
final long t = this.entry.getColLong(col);
|
|
|
|
|
/*if (t < 14600) */return new Date(86400000L * t); // time was stored as number of days since epoch
|
|
|
|
|
/*
|
|
|
|
|
if (t < 350400) return new Date(3600000L * t); // hours since epoch
|
|
|
|
@ -229,7 +228,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
|
|
|
|
|
|
|
|
|
|
// create new formatters to make concurrency possible
|
|
|
|
|
GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
|
|
|
|
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
encodeDate(col_mod, formatter.parse(prop.getProperty("mod", "20000101")));
|
|
|
|
@ -250,7 +249,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
|
|
|
|
|
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
|
|
|
|
|
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
|
|
|
|
|
String dt = prop.getProperty("dt", "t");
|
|
|
|
|
final String dt = prop.getProperty("dt", "t");
|
|
|
|
|
this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
|
|
|
|
|
final String flags = prop.getProperty("flags", "AAAAAA");
|
|
|
|
|
this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
|
|
|
|
@ -285,13 +284,13 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
|
|
|
|
|
private StringBuilder corePropList() {
|
|
|
|
|
// generate a parseable string; this is a simple property-list
|
|
|
|
|
final Components metadata = this.metadata();
|
|
|
|
|
final Components metadata = metadata();
|
|
|
|
|
final StringBuilder s = new StringBuilder(300);
|
|
|
|
|
if (metadata == null) return null;
|
|
|
|
|
//System.out.println("author=" + comp.author());
|
|
|
|
|
|
|
|
|
|
// create new formatters to make concurrency possible
|
|
|
|
|
GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
|
|
|
|
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
s.append("hash=").append(ASCII.String(hash()));
|
|
|
|
@ -345,12 +344,12 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
|
|
|
|
|
if (this.word != null) {
|
|
|
|
|
// append also word properties
|
|
|
|
|
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(word.toPropertyForm()));
|
|
|
|
|
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(this.word.toPropertyForm()));
|
|
|
|
|
}
|
|
|
|
|
assert (s.toString().indexOf(0) < 0);
|
|
|
|
|
return s;
|
|
|
|
|
|
|
|
|
|
} catch (final Exception e) {
|
|
|
|
|
} catch (final Throwable e) {
|
|
|
|
|
// serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
|
|
|
|
|
// if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
|
|
|
|
|
// if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
|
|
|
|
@ -379,8 +378,8 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
// avoid double computation of metadata elements
|
|
|
|
|
if (this.comp != null) return this.comp;
|
|
|
|
|
// parse elements from comp field;
|
|
|
|
|
byte[] c = this.entry.getColBytes(col_comp, true);
|
|
|
|
|
List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
|
|
|
|
|
final byte[] c = this.entry.getColBytes(col_comp, true);
|
|
|
|
|
final List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
|
|
|
|
|
this.comp = new Components(
|
|
|
|
|
(cl.size() > 0) ? UTF8.String(cl.get(0)) : "",
|
|
|
|
|
hash(),
|
|
|
|
@ -407,7 +406,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
public byte[] referrerHash() {
|
|
|
|
|
// return the creator's hash or null if there is none
|
|
|
|
|
// FIXME: There seem to be some malformed entries in the databasees like "null\0\0\0\0\0\0\0\0"
|
|
|
|
|
final byte[] r = entry.getColBytes(col_referrer, true);
|
|
|
|
|
final byte[] r = this.entry.getColBytes(col_referrer, true);
|
|
|
|
|
if (r != null) {
|
|
|
|
|
int i = r.length;
|
|
|
|
|
while (i > 0) {
|
|
|
|
@ -419,11 +418,11 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
|
|
|
|
|
public String md5() {
|
|
|
|
|
// returns the md5 in hex representation
|
|
|
|
|
return Digest.encodeHex(entry.getColBytes(col_md5, true));
|
|
|
|
|
return Digest.encodeHex(this.entry.getColBytes(col_md5, true));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public char doctype() {
|
|
|
|
|
return (char) entry.getColByte(col_dt);
|
|
|
|
|
return (char) this.entry.getColByte(col_dt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public byte[] language() {
|
|
|
|
@ -469,11 +468,11 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
public String snippet() {
|
|
|
|
|
// the snippet may appear here if the url was transported in a remote search
|
|
|
|
|
// it will not be saved anywhere, but can only be requested here
|
|
|
|
|
return snippet;
|
|
|
|
|
return this.snippet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public WordReferenceVars word() {
|
|
|
|
|
return word;
|
|
|
|
|
return this.word;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean isOlder(final URIMetadata other) {
|
|
|
|
@ -560,7 +559,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
this.dc_publisher = publisher;
|
|
|
|
|
this.latlon = latlon;
|
|
|
|
|
}
|
|
|
|
|
public boolean matches(Pattern matcher) {
|
|
|
|
|
public boolean matches(final Pattern matcher) {
|
|
|
|
|
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
|
|
|
|
|
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
|
|
|
|
|
return false;
|
|
|
|
@ -569,7 +568,7 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
if (this.url == null) {
|
|
|
|
|
try {
|
|
|
|
|
this.url = new DigestURI(this.urlRaw, this.urlHash);
|
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
this.url = null;
|
|
|
|
|
}
|
|
|
|
|
this.urlRaw = null;
|
|
|
|
@ -582,14 +581,14 @@ public class URIMetadataRow implements URIMetadata {
|
|
|
|
|
public String dc_publisher() { return this.dc_publisher; }
|
|
|
|
|
public String dc_subject() { return this.dc_subject; }
|
|
|
|
|
public float lat() {
|
|
|
|
|
if (latlon == null || latlon.length() == 0) return 0.0f;
|
|
|
|
|
int p = latlon.indexOf(',');
|
|
|
|
|
return p < 0 ? 0.0f : Float.parseFloat(latlon.substring(0, p));
|
|
|
|
|
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
|
|
|
|
|
final int p = this.latlon.indexOf(',');
|
|
|
|
|
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(0, p));
|
|
|
|
|
}
|
|
|
|
|
public float lon() {
|
|
|
|
|
if (latlon == null || latlon.length() == 0) return 0.0f;
|
|
|
|
|
int p = latlon.indexOf(',');
|
|
|
|
|
return p < 0 ? 0.0f : Float.parseFloat(latlon.substring(p + 1));
|
|
|
|
|
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
|
|
|
|
|
final int p = this.latlon.indexOf(',');
|
|
|
|
|
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|