Removed handling of components objects for URIMetadataRows.

This is a preparation to replace this rows with nodes from the node
store.
pull/1/head
Michael Christen 13 years ago
parent 66ab51f89d
commit 9e5894c784

@ -197,14 +197,13 @@ public class Bookmarks {
// try to get the bookmark from the LURL database
final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
if (urlentry != null) try {
final URIMetadataRow.Components metadata = urlentry.metadata();
final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url", metadata.url().toNormalform(false, true));
prop.putHTML("mode_title", metadata.dc_title());
prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title());
prop.putHTML("mode_author", metadata.dc_creator());
prop.putHTML("mode_tags", (document == null) ? metadata.dc_subject() : document.dc_subject(','));
prop.put("mode_url", urlentry.url().toNormalform(false, true));
prop.putHTML("mode_title", urlentry.dc_title());
prop.putHTML("mode_description", (document == null) ? urlentry.dc_title(): document.dc_title());
prop.putHTML("mode_author", urlentry.dc_creator());
prop.putHTML("mode_tags", (document == null) ? urlentry.dc_subject() : document.dc_subject(','));
prop.putHTML("mode_path","");
prop.put("mode_public", "0");
prop.put("mode_feed", "0"); //TODO: check if it IS a feed

@ -180,7 +180,6 @@ public class CrawlResults {
String urlstr, urltxt;
Seed initiatorSeed, executorSeed;
URIMetadataRow urle;
URIMetadataRow.Components metadata;
int cnt = 0;
final Iterator<Map.Entry<String, InitExecEntry>> i = ResultURLs.results(tabletype);
@ -193,11 +192,9 @@ public class CrawlResults {
Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null;
urltxt = null;
metadata = null;
continue;
}
metadata = urle.metadata();
urlstr = metadata.url().toNormalform(false, true);
urlstr = urle.url().toNormalform(false, true);
urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
initiatorSeed = entry.getValue() == null || entry.getValue().initiatorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().initiatorHash));
@ -236,11 +233,11 @@ public class CrawlResults {
prop.put("table_indexed_" + cnt + "_showTitle", (showTitle) ? "1" : "0");
prop.put("table_indexed_" + cnt + "_showTitle_available", "1");
if (metadata == null || metadata.dc_title() == null || metadata.dc_title().trim().length() == 0)
if (urle.dc_title() == null || urle.dc_title().trim().length() == 0)
prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "0");
else {
prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "1");
prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", metadata.dc_title());
prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", urle.dc_title());
}
prop.put("table_indexed_" + cnt + "_showTitle_available_urlHash", entry.getKey());
@ -250,13 +247,13 @@ public class CrawlResults {
if (showCountry && urle != null) {
prop.put("table_indexed_" + cnt + "_showCountry", "1");
prop.put("table_indexed_" + cnt + "_showCountry_country", metadata.url().getLocale().getCountry());
prop.put("table_indexed_" + cnt + "_showCountry_country", urle.url().getLocale().getCountry());
} else
prop.put("table_indexed_" + cnt + "_showCountry", "0");
if (showIP && urle != null) {
prop.put("table_indexed_" + cnt + "_showIP", "1");
prop.put("table_indexed_" + cnt + "_showIP_ip", metadata.url().getInetAddress().getHostAddress());
prop.put("table_indexed_" + cnt + "_showIP_ip", urle.url().getInetAddress().getHostAddress());
} else
prop.put("table_indexed_" + cnt + "_showIP", "0");

@ -427,7 +427,7 @@ public class IndexControlRWIs_p
final URIMetadataRow e = segment.urlMetadata().load(b);
segment.urlMetadata().remove(b);
if ( e != null ) {
url = e.metadata().url();
url = e.url();
pw.println(url.getHost() + "/" + url.getFile());
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
if ( ListManager.listSetContains(
@ -463,7 +463,7 @@ public class IndexControlRWIs_p
final URIMetadataRow e = segment.urlMetadata().load(b);
segment.urlMetadata().remove(b);
if ( e != null ) {
url = e.metadata().url();
url = e.url();
pw.println(url.getHost() + "/.*");
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
if ( ListManager.listSetContains(
@ -530,10 +530,7 @@ public class IndexControlRWIs_p
String us;
long rn = -1;
while ( !ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null ) {
if ( (entry == null) || (entry.metadata() == null) ) {
continue;
}
url = entry.metadata().url();
url = entry.url();
if ( url == null ) {
continue;
}

@ -158,7 +158,7 @@ public class IndexControlURLs_p {
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
urlstring = entry.metadata().url().toNormalform(false, true);
urlstring = entry.url().toNormalform(false, true);
prop.put("urlstring", "");
sb.urlRemove(segment, urlhash.getBytes());
prop.putHTML("result", "Removed URL " + urlstring);
@ -210,7 +210,7 @@ public class IndexControlURLs_p {
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash);
} else {
prop.putHTML("urlstring", entry.metadata().url().toNormalform(false, true));
prop.putHTML("urlstring", entry.url().toNormalform(false, true));
prop.putAll(genUrlProfile(segment, entry, urlhash));
prop.put("statistics", 0);
}
@ -333,21 +333,20 @@ public class IndexControlURLs_p {
prop.put("genUrlProfile_urlhash", urlhash);
return prop;
}
final URIMetadataRow.Components metadata = entry.metadata();
final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
if (metadata == null || metadata.url() == null) {
if (entry.url() == null) {
prop.put("genUrlProfile", "1");
prop.put("genUrlProfile_urlhash", urlhash);
return prop;
}
prop.put("genUrlProfile", "2");
prop.putHTML("genUrlProfile_urlNormalform", metadata.url().toNormalform(false, true));
prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(false, true));
prop.put("genUrlProfile_urlhash", urlhash);
prop.put("genUrlProfile_urlDescr", metadata.dc_title());
prop.put("genUrlProfile_urlDescr", entry.dc_title());
prop.put("genUrlProfile_moddate", entry.moddate().toString());
prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.metadata().url().toNormalform(false, true));
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.url().toNormalform(false, true));
prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype()));
prop.put("genUrlProfile_language", entry.language());

@ -117,14 +117,13 @@ public class ViewFile {
// get the urlEntry that belongs to the url hash
if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) {
// get the url that belongs to the entry
final URIMetadataRow.Components metadata = urlEntry.metadata();
if ((metadata == null) || (metadata.url() == null)) {
if (urlEntry == null || urlEntry.url() == null) {
prop.put("error", "3");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
url = metadata.url();
descr = metadata.dc_title();
url = urlEntry.url();
descr = urlEntry.dc_title();
//urlEntry.wordCount();
size = urlEntry.size();
pre = urlEntry.flags().get(Condenser.flag_cat_indexof);

@ -87,29 +87,28 @@ public class yacydoc {
final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes());
if (entry == null) return prop;
final URIMetadataRow.Components metadata = entry.metadata();
if (metadata.url() == null) {
if (entry.url() == null) {
return prop;
}
final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
prop.putXML("dc_title", metadata.dc_title());
prop.putXML("dc_creator", metadata.dc_creator());
prop.putXML("dc_title", entry.dc_title());
prop.putXML("dc_creator", entry.dc_creator());
prop.putXML("dc_description", ""); // this is the fulltext part in the surrogate
prop.putXML("dc_subject", metadata.dc_subject());
prop.putXML("dc_publisher", metadata.dc_publisher());
prop.putXML("dc_subject", entry.dc_subject());
prop.putXML("dc_publisher", entry.dc_publisher());
prop.putXML("dc_contributor", "");
prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate()));
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", metadata.url().toNormalform(false, true));
prop.putXML("dc_identifier", entry.url().toNormalform(false, true));
prop.putXML("dc_language", ASCII.String(entry.language()));
prop.put("geo_lat", metadata.lat());
prop.put("geo_long", metadata.lon());
prop.put("geo_lat", entry.lat());
prop.put("geo_long", entry.lon());
prop.put("yacy_urlhash", metadata.url().hash());
prop.put("yacy_urlhash", entry.url().hash());
prop.putXML("yacy_loaddate", entry.loaddate().toString());
prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.metadata().url().toNormalform(false, true));
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true));
prop.put("yacy_size", entry.size());
prop.put("yacy_words",entry.wordCount());

@ -35,7 +35,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).metadata().url();
final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {

@ -33,8 +33,8 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Seed;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
@ -125,15 +125,14 @@ public final class crawlReceipt {
return prop;
}
final URIMetadataRow.Components metadata = entry.metadata();
if (metadata.url() == null) {
if (entry.url() == null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "3600");
return prop;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url());
if (urlRejectReason != null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
@ -145,7 +144,7 @@ public final class crawlReceipt {
sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + metadata.url().toNormalform(false, true));
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));
// ready for more
prop.put("delay", "10");

@ -34,10 +34,10 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Seed;
import net.yacy.peers.EventChannel;
import net.yacy.peers.Protocol;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
@ -108,8 +108,7 @@ public final class transferURL {
}
// check if entry is well-formed
final URIMetadataRow.Components metadata = lEntry.metadata();
if (metadata == null || metadata.url() == null) {
if (lEntry.url() == null) {
Network.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
@ -123,28 +122,28 @@ public final class transferURL {
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(lEntry.url());
if (urlRejectReason != null) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + lEntry.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.metadata().url().toNormalform(true, false));
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
try {
sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++;
} catch (final IOException e) {
Log.logException(e);

@ -112,7 +112,6 @@ public class urls {
final int count = urlhashes.length() / 12;
int c = 0;
URIMetadataRow entry;
URIMetadataRow.Components metadata;
DigestURI referrer;
for (int i = 0; i < count; i++) {
entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
@ -120,12 +119,11 @@ public class urls {
// find referrer, if there is one
referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash());
// create RSS entry
metadata = entry.metadata();
prop.put("item_" + c + "_title", metadata.dc_title());
prop.putXML("item_" + c + "_link", metadata.url().toNormalform(true, false));
prop.put("item_" + c + "_title", entry.dc_title());
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
prop.putXML("item_" + c + "_description", metadata.dc_title());
prop.put("item_" + c + "_author", metadata.dc_creator());
prop.putXML("item_" + c + "_description", entry.dc_title());
prop.put("item_" + c + "_author", entry.dc_creator());
prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate()));
prop.put("item_" + c + "_guid", ASCII.String(entry.hash()));
c++;

@ -534,18 +534,17 @@ public class yacysearch {
final String recommendHash = post.get("recommendref", ""); // urlhash
final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash));
if (urlentry != null) {
final URIMetadataRow.Components metadata = urlentry.metadata();
Document[] documents = null;
try {
documents = sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE);
documents = sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE);
} catch (final IOException e) {
} catch (final Parser.Failure e) {
}
if (documents != null) {
// create a news message
final Map<String, String> map = new HashMap<String, String>();
map.put("url", metadata.url().toNormalform(false, true).replace(',', '|'));
map.put("title", metadata.dc_title().replace(',', ' '));
map.put("url", urlentry.url().toNormalform(false, true).replace(',', '|'));
map.put("title", urlentry.dc_title().replace(',', ' '));
map.put("description", documents[0].dc_title().replace(',', ' '));
map.put("author", documents[0].dc_creator());
map.put("tags", documents[0].dc_subject(' '));
@ -564,9 +563,8 @@ public class yacysearch {
final String bookmarkHash = post.get("bookmarkref", ""); // urlhash
final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash));
if (urlentry != null) {
final URIMetadataRow.Components metadata = urlentry.metadata();
try {
sb.tables.bookmarks.createBookmark(sb.loader, metadata.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search");
sb.tables.bookmarks.createBookmark(sb.loader, urlentry.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search");
} catch (final Throwable e) {
}
}

@ -115,7 +115,7 @@ public final class ResultURLs {
try {
final ScoreMap<String> domains = getDomains(stackType);
if (domains != null) {
domains.inc(e.metadata().url().getHost());
domains.inc(e.url().getHost());
}
} catch (final Exception ex) {
System.out.println("INTERNAL ERROR in newEntry/3: " + ex.toString());

@ -81,7 +81,7 @@ public class YMarkMetadata {
public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).metadata().url();
this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).url();
}
public YMarkMetadata(final Document document) {
@ -115,14 +115,10 @@ public class YMarkMetadata {
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));
final URIMetadataRow.Components meta = urlEntry.metadata();
if (meta != null) {
metadata.put(METADATA.TITLE, meta.dc_title());
metadata.put(METADATA.CREATOR, meta.dc_creator());
metadata.put(METADATA.KEYWORDS, meta.dc_subject());
metadata.put(METADATA.PUBLISHER, meta.dc_publisher());
}
metadata.put(METADATA.TITLE, urlEntry.dc_title());
metadata.put(METADATA.CREATOR, urlEntry.dc_creator());
metadata.put(METADATA.KEYWORDS, urlEntry.dc_subject());
metadata.put(METADATA.PUBLISHER, urlEntry.dc_publisher());
}
return metadata;
}

@ -378,7 +378,39 @@ public class URIMetadataRow implements URIMetadata {
return this.ranking;
}
public Components metadata() {
public boolean matches(final Pattern matcher) {
return this.metadata().matches(matcher);
}
public DigestURI url() {
return this.metadata().url();
}
public String dc_title() {
return this.metadata().dc_title();
}
public String dc_creator() {
return this.metadata().dc_creator();
}
public String dc_publisher() {
return this.metadata().dc_publisher();
}
public String dc_subject() {
return this.metadata().dc_subject();
}
public float lat() {
return this.metadata().lat();
}
public float lon() {
return this.metadata().lon();
}
private Components metadata() {
// avoid double computation of metadata elements
if (this.comp != null) return this.comp;
// parse elements from comp field;
@ -545,7 +577,7 @@ public class URIMetadataRow implements URIMetadata {
//return "{" + core + "}";
}
public class Components {
private class Components {
private DigestURI url;
private String urlRaw;
private byte[] urlHash;

@ -693,14 +693,10 @@ public final class Protocol
if ( urlEntry.hash().length != 12 ) {
continue; // bad url hash
}
final URIMetadataRow.Components metadata = urlEntry.metadata();
if ( metadata == null ) {
continue;
}
if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, metadata.url()) ) {
if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) {
if ( Network.log.isInfo() ) {
Network.log.logInfo("remote search: filtered blacklisted url "
+ metadata.url()
+ urlEntry.url()
+ " from peer "
+ target.getName());
}
@ -708,11 +704,11 @@ public final class Protocol
}
final String urlRejectReason =
Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(metadata.url());
Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(urlEntry.url());
if ( urlRejectReason != null ) {
if ( Network.log.isInfo() ) {
Network.log.logInfo("remote search: rejected url '"
+ metadata.url()
+ urlEntry.url()
+ "' ("
+ urlRejectReason
+ ") from peer "
@ -740,7 +736,7 @@ public final class Protocol
+ " does not belong to word-attached-hash "
+ ASCII.String(entry.urlhash())
+ "; url = "
+ metadata.url()
+ urlEntry.url()
+ " from peer "
+ target.getName());
continue; // spammed
@ -1530,7 +1526,7 @@ public final class Protocol
null // constraint);
);
for ( final URIMetadataRow link : result.links ) {
System.out.println(link.metadata().url().toNormalform(true, false));
System.out.println(link.url().toNormalform(true, false));
System.out.println(link.snippet());
}
} catch ( final IOException e ) {

@ -108,7 +108,6 @@ import net.yacy.gui.Tray;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -1214,9 +1213,7 @@ public final class Switchboard extends serverSwitch {
if (urlhash.length == 0) return null;
final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash);
if (le != null) {
final Components metadata = le.metadata();
if (metadata == null) return null;
return metadata.url();
return le.url();
}
return this.crawlQueues.getURL(urlhash);
}
@ -2290,11 +2287,11 @@ public final class Switchboard extends serverSwitch {
final long t = System.currentTimeMillis();
final Map<String, String> response = Protocol.crawlReceipt(Switchboard.this.peers.mySeed(), this.initiatorPeer, "crawl", "fill", "indexed", this.reference, "");
if (response == null) {
Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.metadata().url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " FAILED, send time = " + (System.currentTimeMillis() - t));
Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " FAILED, send time = " + (System.currentTimeMillis() - t));
return;
}
final String delay = response.get("delay");
Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.metadata().url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " success, delay = " + delay + ", send time = " + (System.currentTimeMillis() - t));
Switchboard.this.log.logInfo("Sending crawl receipt for '" + this.reference.url().toNormalform(false, true) + "' to " + this.initiatorPeer.getName() + " success, delay = " + delay + ", send time = " + (System.currentTimeMillis() - t));
}
}

@ -41,7 +41,6 @@ import net.yacy.document.LibraryProvider;
import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
@ -230,13 +229,8 @@ public class DocumentIndex extends Segment
// search is running; retrieve results
URIMetadataRow row;
final ArrayList<DigestURI> files = new ArrayList<DigestURI>();
Components metadata;
while ( (row = rankedCache.takeURL(false, 1000)) != null ) {
metadata = row.metadata();
if ( metadata == null ) {
continue;
}
files.add(metadata.url());
files.add(row.url());
count--;
if ( count == 0 ) {
break;

@ -222,6 +222,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
}
}
@Override
public Iterator<byte[]> iterator() {
return keys(true, null);
}
@ -254,6 +255,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
this.error = false;
}
@Override
public kiter clone(final Object secondHash) {
try {
return new kiter(this.up, (String) secondHash);
@ -262,12 +264,14 @@ public final class MetadataRepository implements Iterable<byte[]> {
}
}
@Override
public final boolean hasNext() {
if (this.error) return false;
if (this.iter == null) return false;
return this.iter.hasNext();
}
@Override
public final URIMetadataRow next() {
Row.Entry e = null;
if (this.iter == null) { return null; }
@ -276,6 +280,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
return new URIMetadataRow(e, null, 0);
}
@Override
public final void remove() {
this.iter.remove();
}
@ -372,6 +377,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
this.crawlStacker = crawlStacker;
}
@Override
public void run() {
try {
Log.logInfo("URLDBCLEANER", "UrldbCleaner-Thread startet");
@ -394,30 +400,24 @@ public final class MetadataRepository implements Iterable<byte[]> {
} else if (entry.hash() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + "hash == null");
} else {
final URIMetadataRow.Components metadata = entry.metadata();
this.totalSearchedUrls++;
if (metadata == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "corrupted entry for hash = " + ASCII.String(entry.hash()));
remove(entry.hash());
continue;
}
if (metadata.url() == null) {
if (entry.url() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + "URL == null");
remove(entry.hash());
continue;
}
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) ||
(this.crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) {
this.lastBlacklistedUrl = metadata.url().toNormalform(true, true);
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) ||
(this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) {
this.lastBlacklistedUrl = entry.url().toNormalform(true, true);
this.lastBlacklistedHash = ASCII.String(entry.hash());
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + metadata.url().toNormalform(false, true));
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + entry.url().toNormalform(false, true));
remove(entry.hash());
if (this.blacklistedUrls % 100 == 0) {
Log.logInfo("URLDBCLEANER", "Deleted " + this.blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + this.lastBlacklistedUrl);
}
}
this.lastUrl = metadata.url().toNormalform(true, true);
this.lastUrl = entry.url().toNormalform(true, true);
this.lastHash = ASCII.String(entry.hash());
}
}
@ -502,6 +502,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
if ((dom) && (format == 2)) dom = false;
}
@Override
public void run() {
try {
final File parentf = this.f.getParentFile();
@ -531,26 +532,24 @@ public final class MetadataRepository implements Iterable<byte[]> {
} else {
final Iterator<URIMetadataRow> i = entries(); // iterates indexURLEntry objects
URIMetadataRow entry;
URIMetadataRow.Components metadata;
String url;
while (i.hasNext()) {
entry = i.next();
if (this.set != null && !this.set.has(entry.hash())) continue;
metadata = entry.metadata();
url = metadata.url().toNormalform(true, false);
url = entry.url().toNormalform(true, false);
if (!url.matches(this.filter)) continue;
if (this.format == 0) {
pw.println(url);
}
if (this.format == 1) {
pw.println("<a href=\"" + url + "\">" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "</a><br>");
pw.println("<a href=\"" + url + "\">" + CharacterCoding.unicode2xml(entry.dc_title(), true) + "</a><br>");
}
if (this.format == 2) {
pw.println("<item>");
pw.println("<title>" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "</title>");
pw.println("<title>" + CharacterCoding.unicode2xml(entry.dc_title(), true) + "</title>");
pw.println("<link>" + MultiProtocolURI.escape(url) + "</link>");
if (metadata.dc_creator().length() > 0) pw.println("<author>" + CharacterCoding.unicode2xml(metadata.dc_creator(), true) + "</author>");
if (metadata.dc_subject().length() > 0) pw.println("<description>" + CharacterCoding.unicode2xml(metadata.dc_subject(), true) + "</description>");
if (entry.dc_creator().length() > 0) pw.println("<author>" + CharacterCoding.unicode2xml(entry.dc_creator(), true) + "</author>");
if (entry.dc_subject().length() > 0) pw.println("<description>" + CharacterCoding.unicode2xml(entry.dc_subject(), true) + "</description>");
pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
pw.println("<yacy:size>" + entry.size() + "</yacy:size>");
pw.println("<guid isPermaLink=\"false\">" + ASCII.String(entry.hash()) + "</guid>");
@ -635,8 +634,8 @@ public final class MetadataRepository implements Iterable<byte[]> {
for (final URLHashCounter hs: domainSamples.values()) {
if (hs == null) continue;
urlref = this.load(hs.urlhashb);
if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
set.add(urlref.metadata().url().getHost());
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
set.add(urlref.url().getHost());
count--;
if (count == 0) break;
}
@ -671,12 +670,10 @@ public final class MetadataRepository implements Iterable<byte[]> {
for (final Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
hosthashScore.inc(ASCII.String(e.getValue().urlhashb, 6, 6), e.getValue().count);
}
URIMetadataRow.Components comps;
DigestURI url;
for (final Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
urlref = this.load(e.getValue().urlhashb);
comps = urlref.metadata();
url = comps.url();
url = urlref.url();
hostMap.put(e.getKey(), new HostStat(url.getHost(), url.getPort(), e.getKey(), hosthashScore.get(e.getKey())));
}
return hostMap;
@ -693,16 +690,14 @@ public final class MetadataRepository implements Iterable<byte[]> {
count += 10; // make some more to prevent that we have to do this again after deletions too soon.
if (count < 0 || domainScore.sizeSmaller(count)) count = domainScore.size();
this.statsDump = new ArrayList<HostStat>();
URIMetadataRow.Components comps;
DigestURI url;
while (j.hasNext()) {
urlhash = j.next();
if (urlhash == null) continue;
urlref = this.load(ASCII.getBytes(urlhash));
if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
if (this.statsDump == null) return new ArrayList<HostStat>().iterator(); // some other operation has destroyed the object
comps = urlref.metadata();
url = comps.url();
url = urlref.url();
this.statsDump.add(new HostStat(url.getHost(), url.getPort(), urlhash.substring(6), domainScore.get(urlhash)));
count--;
if (count == 0) break;

@ -422,12 +422,11 @@ public class Segment {
// determine the url string
final URIMetadataRow entry = urlMetadata().load(urlhash);
if (entry == null) return 0;
final URIMetadataRow.Components metadata = entry.metadata();
if (metadata == null || metadata.url() == null) return 0;
if (entry.url() == null) return 0;
try {
// parse the resource
final Document document = Document.mergeDocuments(metadata.url(), null, loader.loadDocuments(loader.request(metadata.url(), true, false), cacheStrategy, 10000, Integer.MAX_VALUE));
final Document document = Document.mergeDocuments(entry.url(), null, loader.loadDocuments(loader.request(entry.url(), true, false), cacheStrategy, 10000, Integer.MAX_VALUE));
if (document == null) {
// delete just the url entry
urlMetadata().remove(urlhash);
@ -496,7 +495,7 @@ public class Segment {
if (ue == null) {
urlHashs.put(entry.urlhash());
} else {
url = ue.metadata().url();
url = ue.url();
if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
urlHashs.put(entry.urlhash());
}

@ -563,18 +563,9 @@ public final class RWIProcess extends Thread
continue;
}
// prepare values for constraint check
final URIMetadataRow.Components metadata = page.metadata();
// check errors
if ( metadata == null ) {
this.sortout++;
continue; // rare case where the url is corrupted
}
if ( !this.query.urlMask_isCatchall ) {
// check url mask
if ( !metadata.matches(this.query.urlMask) ) {
if ( !page.matches(this.query.urlMask) ) {
this.sortout++;
continue;
}
@ -588,14 +579,14 @@ public final class RWIProcess extends Thread
}
// check for more errors
if ( metadata.url() == null ) {
if ( page.url() == null ) {
this.sortout++;
continue; // rare case where the url is corrupted
}
final String pageurl = metadata.url().toNormalform(true, true);
final String pageauthor = metadata.dc_creator();
final String pagetitle = metadata.dc_title().toLowerCase();
final String pageurl = page.url().toNormalform(true, true);
final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase();
// check exclusion
if ( (QueryParams.anymatch(pagetitle, this.query.excludeHashes))
@ -620,7 +611,7 @@ public final class RWIProcess extends Thread
// check location constraint
if ( (this.query.constraint != null)
&& (this.query.constraint.get(Condenser.flag_cat_haslocation))
&& (metadata.lat() == 0.0f || metadata.lon() == 0.0f) ) {
&& (page.lat() == 0.0f || page.lon() == 0.0f) ) {
this.sortout++;
continue;
}
@ -654,7 +645,7 @@ public final class RWIProcess extends Thread
}
// namespace navigation
String pagepath = metadata.url().getPath();
String pagepath = page.url().getPath();
if ( (p = pagepath.indexOf(':')) >= 0 ) {
pagepath = pagepath.substring(0, p);
p = pagepath.lastIndexOf('/');
@ -665,17 +656,17 @@ public final class RWIProcess extends Thread
}
// protocol navigation
final String protocol = metadata.url().getProtocol();
final String protocol = page.url().getProtocol();
this.protocolNavigator.inc(protocol);
// file type navigation
final String fileext = metadata.url().getFileExtension();
final String fileext = page.url().getFileExtension();
if ( fileext.length() > 0 ) {
this.filetypeNavigator.inc(fileext);
}
// check Scanner
if ( !Scanner.acceptURL(metadata.url()) ) {
if ( !Scanner.acceptURL(page.url()) ) {
this.sortout++;
continue;
}
@ -785,7 +776,7 @@ public final class RWIProcess extends Thread
}
urlhash = this.hostResolver.get(hosthash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash);
hostname = row == null ? null : row.metadata().url().getHost();
hostname = row == null ? null : row.url().getHost();
if ( hostname != null ) {
result.set(hostname, this.hostNavigator.get(hosthash));
}

@ -365,7 +365,6 @@ public class SnippetProcess {
private final long timeout; // the date until this thread should try to work
private long lastLifeSign; // when the last time the run()-loop was executed
private final int id;
private final CacheStrategy cacheStrategy;
private final int neededResults;
private final Pattern snippetPattern;
@ -373,7 +372,6 @@ public class SnippetProcess {
private final SolrConnector solr;
public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) {
this.id = id;
this.cacheStrategy = cacheStrategy;
this.lastLifeSign = System.currentTimeMillis();
this.snippetPattern = snippetPattern;
@ -481,15 +479,14 @@ public class SnippetProcess {
// find the url entry
long startTime = System.currentTimeMillis();
final URIMetadataRow.Components metadata = page.metadata();
if (metadata == null) return null;
if (page == null) return null;
final long dbRetrievalTime = System.currentTimeMillis() - startTime;
if (cacheStrategy == null) {
final TextSnippet snippet = new TextSnippet(
null,
solrText,
metadata,
page,
this.snippetFetchWordHashes,
null,
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
@ -506,7 +503,7 @@ public class SnippetProcess {
final TextSnippet snippet = new TextSnippet(
this.loader,
solrText,
metadata,
page,
this.snippetFetchWordHashes,
cacheStrategy,
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
@ -514,7 +511,7 @@ public class SnippetProcess {
Integer.MAX_VALUE,
!this.query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
Log.logInfo("SEARCH", "text snippet load time for " + page.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
if (!snippet.getErrorCode().fail()) {
// we loaded the file and found the snippet
@ -526,16 +523,16 @@ public class SnippetProcess {
} else {
// problems with snippet fetch
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
return null;
}
} else {
// attach media information
startTime = System.currentTimeMillis();
final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal());
final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(page.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);
Log.logInfo("SEARCH", "media snippet load time for " + page.url() + ": " + snippetComputationTime);
if (mediaSnippets != null && !mediaSnippets.isEmpty()) {
// found media snippets, return entry
@ -545,8 +542,8 @@ public class SnippetProcess {
} else {
// problems with snippet fetch
final String reason = "no media snippet";
if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
return null;
}
}

@ -51,7 +51,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
// payload objects
private final URIMetadataRow urlentry;
private final URIMetadataRow.Components urlcomps; // buffer for components
private String alternative_urlstring;
private String alternative_urlname;
private final TextSnippet textSnippet;
@ -67,20 +66,19 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
final List<MediaSnippet> mediaSnippets,
final long dbRetrievalTime, final long snippetComputationTime) {
this.urlentry = urlentry;
this.urlcomps = urlentry.metadata();
this.alternative_urlstring = null;
this.alternative_urlname = null;
this.textSnippet = textSnippet;
this.mediaSnippets = mediaSnippets;
this.dbRetrievalTime = dbRetrievalTime;
this.snippetComputationTime = snippetComputationTime;
final String host = urlcomps.url().getHost();
final String host = urlentry.url().getHost();
if (host != null && host.endsWith(".yacyh")) {
// translate host into current IP
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String filename = urlcomps.url().getFile();
final String filename = urlentry.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
// seed is not known from here
@ -90,7 +88,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
("yacyshare " +
filename.replace('?', ' ') +
" " +
urlcomps.dc_title()), null).keySet()),
urlentry.dc_title()), null).keySet()),
urlentry.hash());
} catch (IOException e) {
Log.logException(e);
@ -98,14 +96,14 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
indexSegment.urlMetadata().remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;
if ((p = alternative_urlname.indexOf('?')) > 0) alternative_urlname = alternative_urlname.substring(0, p);
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
}
@Override
public int hashCode() {
return ByteArray.hashCode(urlentry.hash());
return ByteArray.hashCode(this.urlentry.hash());
}
@Override
public boolean equals(final Object obj) {
@ -113,37 +111,37 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
if (obj == null) return false;
if (!(obj instanceof ResultEntry)) return false;
ResultEntry other = (ResultEntry) obj;
return Base64Order.enhancedCoder.equal(urlentry.hash(), other.urlentry.hash());
return Base64Order.enhancedCoder.equal(this.urlentry.hash(), other.urlentry.hash());
}
public byte[] hash() {
return urlentry.hash();
return this.urlentry.hash();
}
public DigestURI url() {
return urlcomps.url();
return this.urlentry.url();
}
public Bitfield flags() {
return urlentry.flags();
return this.urlentry.flags();
}
public String urlstring() {
return (alternative_urlstring == null) ? urlcomps.url().toNormalform(false, true) : alternative_urlstring;
return (this.alternative_urlstring == null) ? this.urlentry.url().toNormalform(false, true) : this.alternative_urlstring;
}
public String urlname() {
return (alternative_urlname == null) ? MultiProtocolURI.unescape(urlcomps.url().toNormalform(false, true)) : alternative_urlname;
return (this.alternative_urlname == null) ? MultiProtocolURI.unescape(this.urlentry.url().toNormalform(false, true)) : this.alternative_urlname;
}
public String title() {
return urlcomps.dc_title();
return this.urlentry.dc_title();
}
public String publisher() {
// dc:publisher
return urlcomps.dc_publisher();
return this.urlentry.dc_publisher();
}
public String creator() {
// dc:creator, the author
return urlcomps.dc_creator();
return this.urlentry.dc_creator();
}
public String subject() {
// dc:subject, keywords
return urlcomps.dc_subject();
return this.urlentry.dc_subject();
}
public TextSnippet textSnippet() {
return this.textSnippet;
@ -152,31 +150,31 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
return this.mediaSnippets;
}
public Date modified() {
return urlentry.moddate();
return this.urlentry.moddate();
}
public int filesize() {
return urlentry.size();
return this.urlentry.size();
}
public int limage() {
return urlentry.limage();
return this.urlentry.limage();
}
public int laudio() {
return urlentry.laudio();
return this.urlentry.laudio();
}
public int lvideo() {
return urlentry.lvideo();
return this.urlentry.lvideo();
}
public int lapp() {
return urlentry.lapp();
return this.urlentry.lapp();
}
public float lat() {
return urlentry.metadata().lat();
return this.urlentry.lat();
}
public float lon() {
return urlentry.metadata().lon();
return this.urlentry.lon();
}
public WordReferenceVars word() {
final Reference word = urlentry.word();
final Reference word = this.urlentry.word();
assert word instanceof WordReferenceVars;
return (WordReferenceVars) word;
}
@ -188,14 +186,16 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
}
public String resource() {
// generate transport resource
if ((textSnippet == null) || (!textSnippet.exists())) {
return urlentry.toString();
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
return this.urlentry.toString();
}
return urlentry.toString(textSnippet.getLineRaw());
return this.urlentry.toString(this.textSnippet.getLineRaw());
}
@Override
public int compareTo(ResultEntry o) {
return Base64Order.enhancedCoder.compare(this.urlentry.hash(), o.urlentry.hash());
}
@Override
public int compare(ResultEntry o1, ResultEntry o2) {
return Base64Order.enhancedCoder.compare(o1.urlentry.hash(), o2.urlentry.hash());
}

@ -144,7 +144,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
public TextSnippet(
final LoaderDispatcher loader,
final String solrText,
final URIMetadataRow.Components comp,
final URIMetadataRow row,
final HandleSet queryhashes,
final CacheStrategy cacheStrategy,
final boolean pre,
@ -152,7 +152,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
final int maxDocLen,
final boolean reindexing) {
// heise = "0OQUNU3JSs05"
final DigestURI url = comp.url();
final DigestURI url = row.url();
if (queryhashes.isEmpty()) {
//System.out.println("found no queryhashes for URL retrieve " + url);
init(url.hash(), null, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given");
@ -185,7 +185,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// if then no sentences are found, we fail-over to get the content from the re-loaded document
if (sentences == null) {
final Document document = loadDocument(loader, comp, queryhashes, cacheStrategy, url, reindexing, source);
final Document document = loadDocument(loader, row, queryhashes, cacheStrategy, url, reindexing, source);
if (document == null) {
return;
}
@ -251,7 +251,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
private Document loadDocument(
final LoaderDispatcher loader,
final URIMetadataRow.Components comp,
final URIMetadataRow row,
final HandleSet queryhashes,
final CacheStrategy cacheStrategy,
final DigestURI url,
@ -266,12 +266,12 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// first try to get the snippet from metadata
String loc;
final Request request = loader.request(url, true, reindexing);
final boolean inCache = de.anomic.http.client.Cache.has(comp.url());
final boolean inCache = de.anomic.http.client.Cache.has(row.url());
final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null;
if (containsAllHashes(loc = comp.dc_title(), queryhashes) ||
containsAllHashes(loc = comp.dc_creator(), queryhashes) ||
containsAllHashes(loc = comp.dc_subject(), queryhashes) ||
containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
if (containsAllHashes(loc = row.dc_title(), queryhashes) ||
containsAllHashes(loc = row.dc_creator(), queryhashes) ||
containsAllHashes(loc = row.dc_subject(), queryhashes) ||
containsAllHashes(loc = row.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
// try to create the snippet from information given in the url
if (inCache) response = loader == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true);
Document document = null;
@ -391,10 +391,12 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return l.toString().trim();
}
@Override
public int compareTo(final TextSnippet o) {
return Base64Order.enhancedCoder.compare(this.urlhash, o.urlhash);
}
@Override
public int compare(final TextSnippet o1, final TextSnippet o2) {
return o1.compareTo(o2);
}

Loading…
Cancel
Save