|
|
|
@ -68,8 +68,7 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
|
|
|
|
|
private static final long serialVersionUID = -256046934741561968L;
|
|
|
|
|
|
|
|
|
|
protected byte[] hash = null;
|
|
|
|
|
protected String urlRaw = null, keywords = null;
|
|
|
|
|
protected String keywords = null;
|
|
|
|
|
protected DigestURL url = null;
|
|
|
|
|
protected Bitfield flags = null;
|
|
|
|
|
protected int imagec = -1, audioc = -1, videoc = -1, appc = -1;
|
|
|
|
@ -83,14 +82,12 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
// the property names must correspond to the one from toString
|
|
|
|
|
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
|
|
|
|
|
super();
|
|
|
|
|
urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
|
|
|
|
|
final String urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
|
|
|
|
|
try {
|
|
|
|
|
url = new DigestURL(urlRaw);
|
|
|
|
|
this.hash = url.hash();
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
|
this.url = null;
|
|
|
|
|
this.hash = null;
|
|
|
|
|
}
|
|
|
|
|
String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = "";
|
|
|
|
|
String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = "";
|
|
|
|
@ -156,10 +153,10 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
this.snippet = "";
|
|
|
|
|
Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
|
|
|
|
|
this.score = scorex == null ? 0.0f : scorex.floatValue();
|
|
|
|
|
this.hash = ASCII.getBytes(getString(CollectionSchema.id));
|
|
|
|
|
this.urlRaw = getString(CollectionSchema.sku);
|
|
|
|
|
final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
|
|
|
|
|
final String urlRaw = getString(CollectionSchema.sku);
|
|
|
|
|
try {
|
|
|
|
|
this.url = new DigestURL(this.urlRaw, this.hash);
|
|
|
|
|
this.url = new DigestURL(urlRaw, hash);
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
|
this.url = null;
|
|
|
|
@ -186,12 +183,12 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public byte[] hash() {
|
|
|
|
|
return this.hash;
|
|
|
|
|
return this.url.hash();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String hosthash() {
|
|
|
|
|
String hosthash = (String) this.getFieldValue(CollectionSchema.host_id_s.getSolrFieldName());
|
|
|
|
|
if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6);
|
|
|
|
|
if (hosthash == null) hosthash = ASCII.String(this.url.hash(), 6, 6);
|
|
|
|
|
return hosthash;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -204,7 +201,7 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean matches(Pattern matcher) {
|
|
|
|
|
return matcher.matcher(this.urlRaw.toLowerCase()).matches();
|
|
|
|
|
return matcher.matcher(this.url.toString().toLowerCase()).matches();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String dc_title() {
|
|
|
|
@ -332,8 +329,11 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
return ASCII.getBytes(referrer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public int size() {
|
|
|
|
|
/**
|
|
|
|
|
* gives the size in byte of the original url document
|
|
|
|
|
* @return filesize of url
|
|
|
|
|
*/
|
|
|
|
|
public int filesize() {
|
|
|
|
|
return getInt(CollectionSchema.size_i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -499,7 +499,7 @@ public class URIMetadataNode extends SolrDocument {
|
|
|
|
|
s.append(",fresh=").append(formatter.format(this.freshdate()));
|
|
|
|
|
s.append(",referrer=").append(this.referrerHash() == null ? "" : ASCII.String(this.referrerHash()));
|
|
|
|
|
s.append(",md5=").append(this.md5());
|
|
|
|
|
s.append(",size=").append(this.size());
|
|
|
|
|
s.append(",size=").append(this.filesize());
|
|
|
|
|
s.append(",wc=").append(this.wordCount());
|
|
|
|
|
s.append(",dt=").append(this.doctype());
|
|
|
|
|
s.append(",flags=").append(this.flags().exportB64());
|
|
|
|
|