diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 5b658437a..5a8fd21ef 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -197,14 +197,13 @@ public class Bookmarks { // try to get the bookmark from the LURL database final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash)); if (urlentry != null) try { - final URIMetadataRow.Components metadata = urlentry.metadata(); - final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE)); + final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE)); prop.put("mode_edit", "0"); // create mode - prop.put("mode_url", metadata.url().toNormalform(false, true)); - prop.putHTML("mode_title", metadata.dc_title()); - prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title()); - prop.putHTML("mode_author", metadata.dc_creator()); - prop.putHTML("mode_tags", (document == null) ? metadata.dc_subject() : document.dc_subject(',')); + prop.put("mode_url", urlentry.url().toNormalform(false, true)); + prop.putHTML("mode_title", urlentry.dc_title()); + prop.putHTML("mode_description", (document == null) ? urlentry.dc_title(): document.dc_title()); + prop.putHTML("mode_author", urlentry.dc_creator()); + prop.putHTML("mode_tags", (document == null) ? urlentry.dc_subject() : document.dc_subject(',')); prop.putHTML("mode_path",""); prop.put("mode_public", "0"); prop.put("mode_feed", "0"); //TODO: check if it IS a feed diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 9cdceff83..519f8cd0a 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -180,7 +180,6 @@ public class CrawlResults { String urlstr, urltxt; Seed initiatorSeed, executorSeed; URIMetadataRow urle; - URIMetadataRow.Components metadata; int cnt = 0; final Iterator> i = ResultURLs.results(tabletype); @@ -193,11 +192,9 @@ public class CrawlResults { Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); urlstr = null; urltxt = null; - metadata = null; continue; } - metadata = urle.metadata(); - urlstr = metadata.url().toNormalform(false, true); + urlstr = urle.url().toNormalform(false, true); urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL initiatorSeed = entry.getValue() == null || entry.getValue().initiatorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().initiatorHash)); @@ -236,11 +233,11 @@ public class CrawlResults { prop.put("table_indexed_" + cnt + "_showTitle", (showTitle) ? "1" : "0"); prop.put("table_indexed_" + cnt + "_showTitle_available", "1"); - if (metadata == null || metadata.dc_title() == null || metadata.dc_title().trim().length() == 0) + if (urle.dc_title() == null || urle.dc_title().trim().length() == 0) prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "0"); else { prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "1"); - prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", metadata.dc_title()); + prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", urle.dc_title()); } prop.put("table_indexed_" + cnt + "_showTitle_available_urlHash", entry.getKey()); @@ -250,13 +247,13 @@ public class CrawlResults { if (showCountry && urle != null) { prop.put("table_indexed_" + cnt + "_showCountry", "1"); - prop.put("table_indexed_" + cnt + "_showCountry_country", metadata.url().getLocale().getCountry()); + prop.put("table_indexed_" + cnt + "_showCountry_country", urle.url().getLocale().getCountry()); } else prop.put("table_indexed_" + cnt + "_showCountry", "0"); if (showIP && urle != null) { prop.put("table_indexed_" + cnt + "_showIP", "1"); - prop.put("table_indexed_" + cnt + "_showIP_ip", metadata.url().getInetAddress().getHostAddress()); + prop.put("table_indexed_" + cnt + "_showIP_ip", urle.url().getInetAddress().getHostAddress()); } else prop.put("table_indexed_" + cnt + "_showIP", "0"); diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index adf2ee61c..8132b40f1 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -427,7 +427,7 @@ public class IndexControlRWIs_p final URIMetadataRow e = segment.urlMetadata().load(b); segment.urlMetadata().remove(b); if ( e != null ) { - url = e.metadata().url(); + url = e.url(); pw.println(url.getHost() + "/" + url.getFile()); for ( final String supportedBlacklistType : supportedBlacklistTypes ) { if ( ListManager.listSetContains( @@ -463,7 +463,7 @@ public class IndexControlRWIs_p final URIMetadataRow e = segment.urlMetadata().load(b); segment.urlMetadata().remove(b); if ( e != null ) { - url = e.metadata().url(); + url = e.url(); pw.println(url.getHost() + "/.*"); for ( final String supportedBlacklistType : supportedBlacklistTypes ) { if ( ListManager.listSetContains( @@ -530,10 +530,7 @@ public class IndexControlRWIs_p String us; long rn = -1; while ( !ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null ) { - if ( (entry == null) || (entry.metadata() == null) ) { - continue; - } - url = entry.metadata().url(); + url = entry.url(); if ( url == null ) { continue; } diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index b150c5d68..3c6f21d2e 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -158,7 +158,7 @@ public class IndexControlURLs_p { if (entry == null) { prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); } else { - urlstring = entry.metadata().url().toNormalform(false, true); + urlstring = entry.url().toNormalform(false, true); prop.put("urlstring", ""); sb.urlRemove(segment, urlhash.getBytes()); prop.putHTML("result", "Removed URL " + urlstring); @@ -210,7 +210,7 @@ public class IndexControlURLs_p { if (entry == null) { prop.putHTML("result", "No Entry for URL hash " + urlhash); } else { - prop.putHTML("urlstring", entry.metadata().url().toNormalform(false, true)); + prop.putHTML("urlstring", entry.url().toNormalform(false, true)); prop.putAll(genUrlProfile(segment, entry, urlhash)); prop.put("statistics", 0); } @@ -333,21 +333,20 @@ public class IndexControlURLs_p { prop.put("genUrlProfile_urlhash", urlhash); return prop; } - final URIMetadataRow.Components metadata = entry.metadata(); final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash()); - if (metadata == null || metadata.url() == null) { + if (entry.url() == null) { prop.put("genUrlProfile", "1"); prop.put("genUrlProfile_urlhash", urlhash); return prop; } prop.put("genUrlProfile", "2"); - prop.putHTML("genUrlProfile_urlNormalform", metadata.url().toNormalform(false, true)); + prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(false, true)); prop.put("genUrlProfile_urlhash", urlhash); - prop.put("genUrlProfile_urlDescr", metadata.dc_title()); + prop.put("genUrlProfile_urlDescr", entry.dc_title()); prop.put("genUrlProfile_moddate", entry.moddate().toString()); prop.put("genUrlProfile_loaddate", entry.loaddate().toString()); prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1); - prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "" : le.metadata().url().toNormalform(false, true)); + prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true)); prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash())); prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype())); prop.put("genUrlProfile_language", entry.language()); diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index fb97741d0..08df8813b 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -117,14 +117,13 @@ public class ViewFile { // get the urlEntry that belongs to the url hash if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) { // get the url that belongs to the entry - final URIMetadataRow.Components metadata = urlEntry.metadata(); - if ((metadata == null) || (metadata.url() == null)) { + if (urlEntry == null || urlEntry.url() == null) { prop.put("error", "3"); prop.put("viewMode", VIEW_MODE_NO_TEXT); return prop; } - url = metadata.url(); - descr = metadata.dc_title(); + url = urlEntry.url(); + descr = urlEntry.dc_title(); //urlEntry.wordCount(); size = urlEntry.size(); pre = urlEntry.flags().get(Condenser.flag_cat_indexof); diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 71565d4bc..5bbbf33f4 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -87,29 +87,28 @@ public class yacydoc { final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes()); if (entry == null) return prop; - final URIMetadataRow.Components metadata = entry.metadata(); - if (metadata.url() == null) { + if (entry.url() == null) { return prop; } final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash()); - prop.putXML("dc_title", metadata.dc_title()); - prop.putXML("dc_creator", metadata.dc_creator()); + prop.putXML("dc_title", entry.dc_title()); + prop.putXML("dc_creator", entry.dc_creator()); prop.putXML("dc_description", ""); // this is the fulltext part in the surrogate - prop.putXML("dc_subject", metadata.dc_subject()); - prop.putXML("dc_publisher", metadata.dc_publisher()); + prop.putXML("dc_subject", entry.dc_subject()); + prop.putXML("dc_publisher", entry.dc_publisher()); prop.putXML("dc_contributor", ""); prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate())); prop.putXML("dc_type", String.valueOf(entry.doctype())); - prop.putXML("dc_identifier", metadata.url().toNormalform(false, true)); + prop.putXML("dc_identifier", entry.url().toNormalform(false, true)); prop.putXML("dc_language", ASCII.String(entry.language())); - prop.put("geo_lat", metadata.lat()); - prop.put("geo_long", metadata.lon()); + prop.put("geo_lat", entry.lat()); + prop.put("geo_long", entry.lon()); - prop.put("yacy_urlhash", metadata.url().hash()); + prop.put("yacy_urlhash", entry.url().hash()); prop.putXML("yacy_loaddate", entry.loaddate().toString()); prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash())); - prop.putXML("yacy_referrer_url", (le == null) ? "" : le.metadata().url().toNormalform(false, true)); + prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true)); prop.put("yacy_size", entry.size()); prop.put("yacy_words",entry.wordCount()); diff --git a/htroot/api/ymarks/add_ymark.java b/htroot/api/ymarks/add_ymark.java index ce61e9267..c34520514 100644 --- a/htroot/api/ymarks/add_ymark.java +++ b/htroot/api/ymarks/add_ymark.java @@ -35,7 +35,7 @@ public class add_ymark { if(post.containsKey("urlHash")) { final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING); - final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).metadata().url(); + final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).url(); final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt()); final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING); try { diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index a4567cfa4..fabcabe00 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -33,8 +33,8 @@ import net.yacy.cora.document.ASCII; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; -import net.yacy.peers.Seed; import net.yacy.peers.Protocol; +import net.yacy.peers.Seed; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; @@ -125,15 +125,14 @@ public final class crawlReceipt { return prop; } - final URIMetadataRow.Components metadata = entry.metadata(); - if (metadata.url() == null) { + if (entry.url() == null) { if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); prop.put("delay", "3600"); return prop; } // check if the entry is in our network domain - final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url()); + final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url()); if (urlRejectReason != null) { if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); prop.put("delay", "9999"); @@ -145,7 +144,7 @@ public final class crawlReceipt { sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry); ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done - if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + metadata.url().toNormalform(false, true)); + if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true)); // ready for more prop.put("delay", "10"); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index fa8ab98e2..944c96d00 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -34,10 +34,10 @@ import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; -import net.yacy.peers.Seed; import net.yacy.peers.EventChannel; -import net.yacy.peers.Protocol; import net.yacy.peers.Network; +import net.yacy.peers.Protocol; +import net.yacy.peers.Seed; import net.yacy.repository.Blacklist; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; @@ -108,8 +108,7 @@ public final class transferURL { } // check if entry is well-formed - final URIMetadataRow.Components metadata = lEntry.metadata(); - if (metadata == null || metadata.url() == null) { + if (lEntry.url() == null) { Network.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls); blocked++; continue; @@ -123,28 +122,28 @@ public final class transferURL { } // check if the entry is blacklisted - if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()))) { - if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName); + if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) { + if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); lEntry = null; blocked++; continue; } // check if the entry is in our network domain - final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url()); + final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(lEntry.url()); if (urlRejectReason != null) { - if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName); + if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + lEntry.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName); lEntry = null; blocked++; continue; } // write entry to database - if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.metadata().url().toNormalform(true, false)); + if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false)); try { sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry); ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER); - if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName); + if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); received++; } catch (final IOException e) { Log.logException(e); diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index eb1493f04..e01f091fb 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -112,7 +112,6 @@ public class urls { final int count = urlhashes.length() / 12; int c = 0; URIMetadataRow entry; - URIMetadataRow.Components metadata; DigestURI referrer; for (int i = 0; i < count; i++) { entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1)))); @@ -120,12 +119,11 @@ public class urls { // find referrer, if there is one referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash()); // create RSS entry - metadata = entry.metadata(); - prop.put("item_" + c + "_title", metadata.dc_title()); - prop.putXML("item_" + c + "_link", metadata.url().toNormalform(true, false)); + prop.put("item_" + c + "_title", entry.dc_title()); + prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false)); prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false)); - prop.putXML("item_" + c + "_description", metadata.dc_title()); - prop.put("item_" + c + "_author", metadata.dc_creator()); + prop.putXML("item_" + c + "_description", entry.dc_title()); + prop.put("item_" + c + "_author", entry.dc_creator()); prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate())); prop.put("item_" + c + "_guid", ASCII.String(entry.hash())); c++; diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 41cd4e58b..82da766b3 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -534,18 +534,17 @@ public class yacysearch { final String recommendHash = post.get("recommendref", ""); // urlhash final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash)); if (urlentry != null) { - final URIMetadataRow.Components metadata = urlentry.metadata(); Document[] documents = null; try { - documents = sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE); + documents = sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE); } catch (final IOException e) { } catch (final Parser.Failure e) { } if (documents != null) { // create a news message final Map map = new HashMap(); - map.put("url", metadata.url().toNormalform(false, true).replace(',', '|')); - map.put("title", metadata.dc_title().replace(',', ' ')); + map.put("url", urlentry.url().toNormalform(false, true).replace(',', '|')); + map.put("title", urlentry.dc_title().replace(',', ' ')); map.put("description", documents[0].dc_title().replace(',', ' ')); map.put("author", documents[0].dc_creator()); map.put("tags", documents[0].dc_subject(' ')); @@ -564,9 +563,8 @@ public class yacysearch { final String bookmarkHash = post.get("bookmarkref", ""); // urlhash final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash)); if (urlentry != null) { - final URIMetadataRow.Components metadata = urlentry.metadata(); try { - sb.tables.bookmarks.createBookmark(sb.loader, metadata.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search"); + sb.tables.bookmarks.createBookmark(sb.loader, urlentry.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search"); } catch (final Throwable e) { } } diff --git a/source/de/anomic/crawler/ResultURLs.java b/source/de/anomic/crawler/ResultURLs.java index 3a96a6237..0eeb89c79 100644 --- a/source/de/anomic/crawler/ResultURLs.java +++ b/source/de/anomic/crawler/ResultURLs.java @@ -115,7 +115,7 @@ public final class ResultURLs { try { final ScoreMap domains = getDomains(stackType); if (domains != null) { - domains.inc(e.metadata().url().getHost()); + domains.inc(e.url().getHost()); } } catch (final Exception ex) { System.out.println("INTERNAL ERROR in newEntry/3: " + ex.toString()); diff --git a/source/de/anomic/data/ymark/YMarkMetadata.java b/source/de/anomic/data/ymark/YMarkMetadata.java index 68b1a3752..cd05270a6 100644 --- a/source/de/anomic/data/ymark/YMarkMetadata.java +++ b/source/de/anomic/data/ymark/YMarkMetadata.java @@ -81,7 +81,7 @@ public class YMarkMetadata { public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) { this.document = null; this.indexSegment = indexSegment; - this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).metadata().url(); + this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).url(); } public YMarkMetadata(final Document document) { @@ -115,14 +115,10 @@ public class YMarkMetadata { metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount())); metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype())); metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language())); - - final URIMetadataRow.Components meta = urlEntry.metadata(); - if (meta != null) { - metadata.put(METADATA.TITLE, meta.dc_title()); - metadata.put(METADATA.CREATOR, meta.dc_creator()); - metadata.put(METADATA.KEYWORDS, meta.dc_subject()); - metadata.put(METADATA.PUBLISHER, meta.dc_publisher()); - } + metadata.put(METADATA.TITLE, urlEntry.dc_title()); + metadata.put(METADATA.CREATOR, urlEntry.dc_creator()); + metadata.put(METADATA.KEYWORDS, urlEntry.dc_subject()); + metadata.put(METADATA.PUBLISHER, urlEntry.dc_publisher()); } return metadata; } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index d0f5e9286..1022a7c51 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -378,7 +378,39 @@ public class URIMetadataRow implements URIMetadata { return this.ranking; } - public Components metadata() { + public boolean matches(final Pattern matcher) { + return this.metadata().matches(matcher); + } + + public DigestURI url() { + return this.metadata().url(); + } + + public String dc_title() { + return this.metadata().dc_title(); + } + + public String dc_creator() { + return this.metadata().dc_creator(); + } + + public String dc_publisher() { + return this.metadata().dc_publisher(); + } + + public String dc_subject() { + return this.metadata().dc_subject(); + } + + public float lat() { + return this.metadata().lat(); + } + + public float lon() { + return this.metadata().lon(); + } + + private Components metadata() { // avoid double computation of metadata elements if (this.comp != null) return this.comp; // parse elements from comp field; @@ -545,7 +577,7 @@ public class URIMetadataRow implements URIMetadata { //return "{" + core + "}"; } - public class Components { + private class Components { private DigestURI url; private String urlRaw; private byte[] urlHash; diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 0d937d451..2d45b118b 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -693,14 +693,10 @@ public final class Protocol if ( urlEntry.hash().length != 12 ) { continue; // bad url hash } - final URIMetadataRow.Components metadata = urlEntry.metadata(); - if ( metadata == null ) { - continue; - } - if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, metadata.url()) ) { + if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) { if ( Network.log.isInfo() ) { Network.log.logInfo("remote search: filtered blacklisted url " - + metadata.url() + + urlEntry.url() + " from peer " + target.getName()); } @@ -708,11 +704,11 @@ public final class Protocol } final String urlRejectReason = - Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(metadata.url()); + Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(urlEntry.url()); if ( urlRejectReason != null ) { if ( Network.log.isInfo() ) { Network.log.logInfo("remote search: rejected url '" - + metadata.url() + + urlEntry.url() + "' (" + urlRejectReason + ") from peer " @@ -740,7 +736,7 @@ public final class Protocol + " does not belong to word-attached-hash " + ASCII.String(entry.urlhash()) + "; url = " - + metadata.url() + + urlEntry.url() + " from peer " + target.getName()); continue; // spammed @@ -1530,7 +1526,7 @@ public final class Protocol null // constraint); ); for ( final URIMetadataRow link : result.links ) { - System.out.println(link.metadata().url().toNormalform(true, false)); + System.out.println(link.url().toNormalform(true, false)); System.out.println(link.snippet()); } } catch ( final IOException e ) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index fa21f75f7..8235d3ca3 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -108,7 +108,6 @@ import net.yacy.gui.Tray; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; -import net.yacy.kelondro.data.meta.URIMetadataRow.Components; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; @@ -1214,9 +1213,7 @@ public final class Switchboard extends serverSwitch { if (urlhash.length == 0) return null; final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash); if (le != null) { - final Components metadata = le.metadata(); - if (metadata == null) return null; - return metadata.url(); + return le.url(); } return this.crawlQueues.getURL(urlhash); } @@ -2290,11 +2287,11 @@ public final class Switchboard extends serverSwitch { final long t = System.currentTimeMillis(); final Map response = Protocol.crawlReceipt(Switchboard.this.peers.mySeed(), this.initiatorPeer, "crawl", "fill", "indexed", this.reference, ""); if (response == null) { - Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.metadata().url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " FAILED, send time = " + (System.currentTimeMillis() - t)); + Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " FAILED, send time = " + (System.currentTimeMillis() - t)); return; } final String delay = response.get("delay"); - Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.metadata().url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " success, delay = " + delay + ", send time = " + (System.currentTimeMillis() - t)); + Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " success, delay = " + delay + ", send time = " + (System.currentTimeMillis() - t)); } } diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index 9b4241a00..e74c07d82 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -41,7 +41,6 @@ import net.yacy.document.LibraryProvider; import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; -import net.yacy.kelondro.data.meta.URIMetadataRow.Components; import net.yacy.kelondro.logging.Log; import net.yacy.search.query.QueryParams; import net.yacy.search.query.RWIProcess; @@ -230,13 +229,8 @@ public class DocumentIndex extends Segment // search is running; retrieve results URIMetadataRow row; final ArrayList files = new ArrayList(); - Components metadata; while ( (row = rankedCache.takeURL(false, 1000)) != null ) { - metadata = row.metadata(); - if ( metadata == null ) { - continue; - } - files.add(metadata.url()); + files.add(row.url()); count--; if ( count == 0 ) { break; diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index 345569d6b..ebc18dfd1 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -222,6 +222,7 @@ public final class MetadataRepository implements Iterable { } } + @Override public Iterator iterator() { return keys(true, null); } @@ -254,6 +255,7 @@ public final class MetadataRepository implements Iterable { this.error = false; } + @Override public kiter clone(final Object secondHash) { try { return new kiter(this.up, (String) secondHash); @@ -262,12 +264,14 @@ public final class MetadataRepository implements Iterable { } } + @Override public final boolean hasNext() { if (this.error) return false; if (this.iter == null) return false; return this.iter.hasNext(); } + @Override public final URIMetadataRow next() { Row.Entry e = null; if (this.iter == null) { return null; } @@ -276,6 +280,7 @@ public final class MetadataRepository implements Iterable { return new URIMetadataRow(e, null, 0); } + @Override public final void remove() { this.iter.remove(); } @@ -372,6 +377,7 @@ public final class MetadataRepository implements Iterable { this.crawlStacker = crawlStacker; } + @Override public void run() { try { Log.logInfo("URLDBCLEANER", "UrldbCleaner-Thread startet"); @@ -394,30 +400,24 @@ public final class MetadataRepository implements Iterable { } else if (entry.hash() == null) { if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + "hash == null"); } else { - final URIMetadataRow.Components metadata = entry.metadata(); this.totalSearchedUrls++; - if (metadata == null) { - if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "corrupted entry for hash = " + ASCII.String(entry.hash())); - remove(entry.hash()); - continue; - } - if (metadata.url() == null) { + if (entry.url() == null) { if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + "URL == null"); remove(entry.hash()); continue; } - if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) || - this.blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) || - (this.crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) { - this.lastBlacklistedUrl = metadata.url().toNormalform(true, true); + if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) || + this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) || + (this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) { + this.lastBlacklistedUrl = entry.url().toNormalform(true, true); this.lastBlacklistedHash = ASCII.String(entry.hash()); - if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + metadata.url().toNormalform(false, true)); + if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + entry.url().toNormalform(false, true)); remove(entry.hash()); if (this.blacklistedUrls % 100 == 0) { Log.logInfo("URLDBCLEANER", "Deleted " + this.blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + this.lastBlacklistedUrl); } } - this.lastUrl = metadata.url().toNormalform(true, true); + this.lastUrl = entry.url().toNormalform(true, true); this.lastHash = ASCII.String(entry.hash()); } } @@ -502,6 +502,7 @@ public final class MetadataRepository implements Iterable { if ((dom) && (format == 2)) dom = false; } + @Override public void run() { try { final File parentf = this.f.getParentFile(); @@ -531,26 +532,24 @@ public final class MetadataRepository implements Iterable { } else { final Iterator i = entries(); // iterates indexURLEntry objects URIMetadataRow entry; - URIMetadataRow.Components metadata; String url; while (i.hasNext()) { entry = i.next(); if (this.set != null && !this.set.has(entry.hash())) continue; - metadata = entry.metadata(); - url = metadata.url().toNormalform(true, false); + url = entry.url().toNormalform(true, false); if (!url.matches(this.filter)) continue; if (this.format == 0) { pw.println(url); } if (this.format == 1) { - pw.println("" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "
"); + pw.println("" + CharacterCoding.unicode2xml(entry.dc_title(), true) + "
"); } if (this.format == 2) { pw.println(""); - pw.println("" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + ""); + pw.println("" + CharacterCoding.unicode2xml(entry.dc_title(), true) + ""); pw.println("" + MultiProtocolURI.escape(url) + ""); - if (metadata.dc_creator().length() > 0) pw.println("" + CharacterCoding.unicode2xml(metadata.dc_creator(), true) + ""); - if (metadata.dc_subject().length() > 0) pw.println("" + CharacterCoding.unicode2xml(metadata.dc_subject(), true) + ""); + if (entry.dc_creator().length() > 0) pw.println("" + CharacterCoding.unicode2xml(entry.dc_creator(), true) + ""); + if (entry.dc_subject().length() > 0) pw.println("" + CharacterCoding.unicode2xml(entry.dc_subject(), true) + ""); pw.println("" + entry.moddate().toString() + ""); pw.println("" + entry.size() + ""); pw.println("" + ASCII.String(entry.hash()) + ""); @@ -635,8 +634,8 @@ public final class MetadataRepository implements Iterable { for (final URLHashCounter hs: domainSamples.values()) { if (hs == null) continue; urlref = this.load(hs.urlhashb); - if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue; - set.add(urlref.metadata().url().getHost()); + if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue; + set.add(urlref.url().getHost()); count--; if (count == 0) break; } @@ -671,12 +670,10 @@ public final class MetadataRepository implements Iterable { for (final Map.Entry e: domainSamples.entrySet()) { hosthashScore.inc(ASCII.String(e.getValue().urlhashb, 6, 6), e.getValue().count); } - URIMetadataRow.Components comps; DigestURI url; for (final Map.Entry e: domainSamples.entrySet()) { urlref = this.load(e.getValue().urlhashb); - comps = urlref.metadata(); - url = comps.url(); + url = urlref.url(); hostMap.put(e.getKey(), new HostStat(url.getHost(), url.getPort(), e.getKey(), hosthashScore.get(e.getKey()))); } return hostMap; @@ -693,16 +690,14 @@ public final class MetadataRepository implements Iterable { count += 10; // make some more to prevent that we have to do this again after deletions too soon. if (count < 0 || domainScore.sizeSmaller(count)) count = domainScore.size(); this.statsDump = new ArrayList(); - URIMetadataRow.Components comps; DigestURI url; while (j.hasNext()) { urlhash = j.next(); if (urlhash == null) continue; urlref = this.load(ASCII.getBytes(urlhash)); - if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue; + if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue; if (this.statsDump == null) return new ArrayList().iterator(); // some other operation has destroyed the object - comps = urlref.metadata(); - url = comps.url(); + url = urlref.url(); this.statsDump.add(new HostStat(url.getHost(), url.getPort(), urlhash.substring(6), domainScore.get(urlhash))); count--; if (count == 0) break; diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 0779736b9..3378fb8ed 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -422,12 +422,11 @@ public class Segment { // determine the url string final URIMetadataRow entry = urlMetadata().load(urlhash); if (entry == null) return 0; - final URIMetadataRow.Components metadata = entry.metadata(); - if (metadata == null || metadata.url() == null) return 0; + if (entry.url() == null) return 0; try { // parse the resource - final Document document = Document.mergeDocuments(metadata.url(), null, loader.loadDocuments(loader.request(metadata.url(), true, false), cacheStrategy, 10000, Integer.MAX_VALUE)); + final Document document = Document.mergeDocuments(entry.url(), null, loader.loadDocuments(loader.request(entry.url(), true, false), cacheStrategy, 10000, Integer.MAX_VALUE)); if (document == null) { // delete just the url entry urlMetadata().remove(urlhash); @@ -496,7 +495,7 @@ public class Segment { if (ue == null) { urlHashs.put(entry.urlhash()); } else { - url = ue.metadata().url(); + url = ue.url(); if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { urlHashs.put(entry.urlhash()); } diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index a4374a734..3482fc8a3 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -563,18 +563,9 @@ public final class RWIProcess extends Thread continue; } - // prepare values for constraint check - final URIMetadataRow.Components metadata = page.metadata(); - - // check errors - if ( metadata == null ) { - this.sortout++; - continue; // rare case where the url is corrupted - } - if ( !this.query.urlMask_isCatchall ) { // check url mask - if ( !metadata.matches(this.query.urlMask) ) { + if ( !page.matches(this.query.urlMask) ) { this.sortout++; continue; } @@ -588,14 +579,14 @@ public final class RWIProcess extends Thread } // check for more errors - if ( metadata.url() == null ) { + if ( page.url() == null ) { this.sortout++; continue; // rare case where the url is corrupted } - final String pageurl = metadata.url().toNormalform(true, true); - final String pageauthor = metadata.dc_creator(); - final String pagetitle = metadata.dc_title().toLowerCase(); + final String pageurl = page.url().toNormalform(true, true); + final String pageauthor = page.dc_creator(); + final String pagetitle = page.dc_title().toLowerCase(); // check exclusion if ( (QueryParams.anymatch(pagetitle, this.query.excludeHashes)) @@ -620,7 +611,7 @@ public final class RWIProcess extends Thread // check location constraint if ( (this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) - && (metadata.lat() == 0.0f || metadata.lon() == 0.0f) ) { + && (page.lat() == 0.0f || page.lon() == 0.0f) ) { this.sortout++; continue; } @@ -654,7 +645,7 @@ public final class RWIProcess extends Thread } // namespace navigation - String pagepath = metadata.url().getPath(); + String pagepath = page.url().getPath(); if ( (p = pagepath.indexOf(':')) >= 0 ) { pagepath = pagepath.substring(0, p); p = pagepath.lastIndexOf('/'); @@ -665,17 +656,17 @@ public final class RWIProcess extends Thread } // protocol navigation - final String protocol = metadata.url().getProtocol(); + final String protocol = page.url().getProtocol(); this.protocolNavigator.inc(protocol); // file type navigation - final String fileext = metadata.url().getFileExtension(); + final String fileext = page.url().getFileExtension(); if ( fileext.length() > 0 ) { this.filetypeNavigator.inc(fileext); } // check Scanner - if ( !Scanner.acceptURL(metadata.url()) ) { + if ( !Scanner.acceptURL(page.url()) ) { this.sortout++; continue; } @@ -785,7 +776,7 @@ public final class RWIProcess extends Thread } urlhash = this.hostResolver.get(hosthash); row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash); - hostname = row == null ? null : row.metadata().url().getHost(); + hostname = row == null ? null : row.url().getHost(); if ( hostname != null ) { result.set(hostname, this.hostNavigator.get(hosthash)); } diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index f665d85c2..e22f6c55f 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -365,7 +365,6 @@ public class SnippetProcess { private final long timeout; // the date until this thread should try to work private long lastLifeSign; // when the last time the run()-loop was executed - private final int id; private final CacheStrategy cacheStrategy; private final int neededResults; private final Pattern snippetPattern; @@ -373,7 +372,6 @@ public class SnippetProcess { private final SolrConnector solr; public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) { - this.id = id; this.cacheStrategy = cacheStrategy; this.lastLifeSign = System.currentTimeMillis(); this.snippetPattern = snippetPattern; @@ -481,15 +479,14 @@ public class SnippetProcess { // find the url entry long startTime = System.currentTimeMillis(); - final URIMetadataRow.Components metadata = page.metadata(); - if (metadata == null) return null; + if (page == null) return null; final long dbRetrievalTime = System.currentTimeMillis() - startTime; if (cacheStrategy == null) { final TextSnippet snippet = new TextSnippet( null, solrText, - metadata, + page, this.snippetFetchWordHashes, null, ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), @@ -506,7 +503,7 @@ public class SnippetProcess { final TextSnippet snippet = new TextSnippet( this.loader, solrText, - metadata, + page, this.snippetFetchWordHashes, cacheStrategy, ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), @@ -514,7 +511,7 @@ public class SnippetProcess { Integer.MAX_VALUE, !this.query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; - Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); + Log.logInfo("SEARCH", "text snippet load time for " + page.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); if (!snippet.getErrorCode().fail()) { // we loaded the file and found the snippet @@ -526,16 +523,16 @@ public class SnippetProcess { } else { // problems with snippet fetch final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); - if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason); - Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason); + if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason); + Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason); return null; } } else { // attach media information startTime = System.currentTimeMillis(); - final List mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal()); + final List mediaSnippets = MediaSnippet.retrieveMediaSnippets(page.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; - Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime); + Log.logInfo("SEARCH", "media snippet load time for " + page.url() + ": " + snippetComputationTime); if (mediaSnippets != null && !mediaSnippets.isEmpty()) { // found media snippets, return entry @@ -545,8 +542,8 @@ public class SnippetProcess { } else { // problems with snippet fetch final String reason = "no media snippet"; - if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason); - Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason); + if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason); + Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason); return null; } } diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java index 0afcac18d..456dec424 100644 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ b/source/net/yacy/search/snippet/ResultEntry.java @@ -51,7 +51,6 @@ public class ResultEntry implements Comparable, Comparator, Comparator mediaSnippets, final long dbRetrievalTime, final long snippetComputationTime) { this.urlentry = urlentry; - this.urlcomps = urlentry.metadata(); this.alternative_urlstring = null; this.alternative_urlname = null; this.textSnippet = textSnippet; this.mediaSnippets = mediaSnippets; this.dbRetrievalTime = dbRetrievalTime; this.snippetComputationTime = snippetComputationTime; - final String host = urlcomps.url().getHost(); + final String host = urlentry.url().getHost(); if (host != null && host.endsWith(".yacyh")) { // translate host into current IP int p = host.indexOf('.'); final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6)); final Seed seed = peers.getConnected(hash); - final String filename = urlcomps.url().getFile(); + final String filename = urlentry.url().getFile(); String address = null; if ((seed == null) || ((address = seed.getPublicAddress()) == null)) { // seed is not known from here @@ -90,7 +88,7 @@ public class ResultEntry implements Comparable, Comparator, Comparator 0) alternative_urlname = alternative_urlname.substring(0, p); + this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename; + this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename; + if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p); } } @Override public int hashCode() { - return ByteArray.hashCode(urlentry.hash()); + return ByteArray.hashCode(this.urlentry.hash()); } @Override public boolean equals(final Object obj) { @@ -113,37 +111,37 @@ public class ResultEntry implements Comparable, Comparator, Comparator, Comparator, Comparator, Comparator, Comparator, Comparator, Comparator, Comparator