From c6f634a4f2dec33570fb9ca807c2bbb2a9765701 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 21 Dec 2014 03:45:54 +0100 Subject: [PATCH 1/3] remove redundant caching of urlhash in URIMetadataNode (is already cached in underlaying DigestURL .url) upd pom keyword for maven-antrun-plugin --- pom.xml | 4 ++-- .../kelondro/data/meta/URIMetadataNode.java | 19 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 623addb58..b403a717b 100644 --- a/pom.xml +++ b/pom.xml @@ -141,14 +141,14 @@ compile-htroot compile - + - + run diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index c7fd11199..a6ffe5440 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -68,8 +68,7 @@ public class URIMetadataNode extends SolrDocument { private static final long serialVersionUID = -256046934741561968L; - protected byte[] hash = null; - protected String urlRaw = null, keywords = null; + protected String keywords = null; protected DigestURL url = null; protected Bitfield flags = null; protected int imagec = -1, audioc = -1, videoc = -1, appc = -1; @@ -83,14 +82,12 @@ public class URIMetadataNode extends SolrDocument { // the property names must correspond to the one from toString //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); super(); - urlRaw = crypt.simpleDecode(prop.getProperty("url", "")); + final String urlRaw = crypt.simpleDecode(prop.getProperty("url", "")); try { url = new DigestURL(urlRaw); - this.hash = url.hash(); } catch (final MalformedURLException e) { ConcurrentLog.logException(e); this.url = null; - this.hash = null; } String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = ""; String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = ""; @@ -156,10 +153,10 @@ public class URIMetadataNode extends SolrDocument { this.snippet = ""; Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result this.score = scorex == null ? 0.0f : scorex.floatValue(); - this.hash = ASCII.getBytes(getString(CollectionSchema.id)); - this.urlRaw = getString(CollectionSchema.sku); + final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we thrust this id ? + final String urlRaw = getString(CollectionSchema.sku); try { - this.url = new DigestURL(this.urlRaw, this.hash); + this.url = new DigestURL(urlRaw, hash); } catch (final MalformedURLException e) { ConcurrentLog.logException(e); this.url = null; @@ -186,12 +183,12 @@ public class URIMetadataNode extends SolrDocument { } public byte[] hash() { - return this.hash; + return this.url.hash(); } public String hosthash() { String hosthash = (String) this.getFieldValue(CollectionSchema.host_id_s.getSolrFieldName()); - if (hosthash == null) hosthash = ASCII.String(this.hash, 6, 6); + if (hosthash == null) hosthash = ASCII.String(this.url.hash(), 6, 6); return hosthash; } @@ -204,7 +201,7 @@ public class URIMetadataNode extends SolrDocument { } public boolean matches(Pattern matcher) { - return matcher.matcher(this.urlRaw.toLowerCase()).matches(); + return matcher.matcher(this.url.toString().toLowerCase()).matches(); } public String dc_title() { From 198102304bec01b04eb4ddfca846792f0c2a1d96 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 21 Dec 2014 06:05:35 +0100 Subject: [PATCH 2/3] refactor size() -> filesize() of URIMetadataNode (harmonize with ResultEntry and to not get confused with Collection.size()) --- htroot/ViewFile.java | 2 +- htroot/api/yacydoc.java | 2 +- .../net/yacy/kelondro/data/meta/URIMetadataNode.java | 11 +++++++---- source/net/yacy/search/snippet/ResultEntry.java | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 7debb3c53..7bcc55b71 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -156,7 +156,7 @@ public class ViewFile { url = urlEntry.url(); descr = urlEntry.dc_title(); //urlEntry.wordCount(); - size = urlEntry.size(); + size = urlEntry.filesize(); pre = urlEntry.flags().get(Condenser.flag_cat_indexof); prop.put("moar", 1); prop.putHTML("moar_search", post.get("search","")); diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 33b14ad67..5e12e67d4 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -117,7 +117,7 @@ public class yacydoc { prop.putXML("yacy_loaddate", entry.loaddate().toString()); prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash())); prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(true)); - prop.put("yacy_size", entry.size()); + prop.put("yacy_size", entry.filesize()); prop.put("yacy_words", entry.wordCount()); prop.put("yacy_citations", sb.index.connectedCitation() ? sb.index.urlCitation().count(entry.hash()) : 0); prop.put("yacy_inbound", entry.llocal()); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index a6ffe5440..644763ff7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -153,7 +153,7 @@ public class URIMetadataNode extends SolrDocument { this.snippet = ""; Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result this.score = scorex == null ? 0.0f : scorex.floatValue(); - final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we thrust this id ? + final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ? final String urlRaw = getString(CollectionSchema.sku); try { this.url = new DigestURL(urlRaw, hash); @@ -329,8 +329,11 @@ public class URIMetadataNode extends SolrDocument { return ASCII.getBytes(referrer); } - @Override - public int size() { + /** + * gives the size in byte of the original url document + * @return filesize of url + */ + public int filesize() { return getInt(CollectionSchema.size_i); } @@ -496,7 +499,7 @@ public class URIMetadataNode extends SolrDocument { s.append(",fresh=").append(formatter.format(this.freshdate())); s.append(",referrer=").append(this.referrerHash() == null ? "" : ASCII.String(this.referrerHash())); s.append(",md5=").append(this.md5()); - s.append(",size=").append(this.size()); + s.append(",size=").append(this.filesize()); s.append(",wc=").append(this.wordCount()); s.append(",dt=").append(this.doctype()); s.append(",flags=").append(this.flags().exportB64()); diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java index e1f361ffb..4b9ad2e6c 100644 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ b/source/net/yacy/search/snippet/ResultEntry.java @@ -158,7 +158,7 @@ public class ResultEntry implements Comparable, Comparator Date: Sun, 21 Dec 2014 14:02:06 +0100 Subject: [PATCH 3/3] fix refactored size() -> filesize() in YMarkMetadata --- source/net/yacy/data/ymark/YMarkMetadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/net/yacy/data/ymark/YMarkMetadata.java b/source/net/yacy/data/ymark/YMarkMetadata.java index d999bf938..c68faa8c4 100644 --- a/source/net/yacy/data/ymark/YMarkMetadata.java +++ b/source/net/yacy/data/ymark/YMarkMetadata.java @@ -115,7 +115,7 @@ public class YMarkMetadata { final EnumMap metadata = new EnumMap(METADATA.class); final URIMetadataNode urlEntry = this.indexSegment.fulltext().getMetadata(this.uri.hash()); if (urlEntry != null) { - metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size())); + metadata.put(METADATA.SIZE, String.valueOf(urlEntry.filesize())); metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate())); metadata.put(METADATA.LOADDATE, ISO8601Formatter.FORMATTER.format(urlEntry.loaddate())); metadata.put(METADATA.MODDATE, ISO8601Formatter.FORMATTER.format(urlEntry.moddate()));