diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 8c63cbe49..8dcd1b5a9 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -249,7 +249,6 @@ public class Crawler_p { null, "CRAWLING-ROOT", new Date(), - null, pe.handle(), 0, 0, @@ -303,7 +302,6 @@ public class Crawler_p { null, "", new Date(), - null, pe.handle(), 0, 0, @@ -386,7 +384,6 @@ public class Crawler_p { null, e.getValue(), new Date(), - null, profile.handle(), 0, 0, diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java index 4b6ea43b4..b5e5d6ef7 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.java +++ b/htroot/IndexCreateWWWGlobalQueue_p.java @@ -106,7 +106,7 @@ public class IndexCreateWWWGlobalQueue_p { prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); - prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); + prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) ); prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name()); prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 097ec71b2..5fba1df3b 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -127,7 +127,7 @@ public class IndexCreateWWWLocalQueue_p { case INITIATOR: value = (entry.initiator() == null || entry.initiator().length == 0) ? "proxy" : new String(entry.initiator()); break; - case MODIFIED: value = daydate(entry.loaddate()); break; + case MODIFIED: value = daydate(entry.appdate()); break; default: value = null; } @@ -177,7 +177,7 @@ public class IndexCreateWWWLocalQueue_p { prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); - prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); + prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) ); prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name()); prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); diff --git a/htroot/IndexCreateWWWRemoteQueue_p.java b/htroot/IndexCreateWWWRemoteQueue_p.java index 821add7ae..02efe8145 100644 --- a/htroot/IndexCreateWWWRemoteQueue_p.java +++ b/htroot/IndexCreateWWWRemoteQueue_p.java @@ -103,7 +103,7 @@ public class IndexCreateWWWRemoteQueue_p { prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth()); - prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.loaddate()) ); + prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.appdate()) ); prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name()); prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString()); prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash()); diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java index 134d40712..2459ba783 100644 --- a/htroot/QuickCrawlLink_p.java +++ b/htroot/QuickCrawlLink_p.java @@ -181,7 +181,6 @@ public class QuickCrawlLink_p { null, (title==null)?"CRAWLING-ROOT":title, new Date(), - null, pe.handle(), 0, 0, diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 7fb777695..6be2bb7f3 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -186,8 +186,10 @@ public class ViewFile { } catch (IOException e) { Log.logException(e); } - if (response != null) resource = response.getContent(); - responseHeader = response.getResponseHeader(); + if (response != null) { + resource = response.getContent(); + responseHeader = response.getResponseHeader(); + } } if (responseHeader == null) responseHeader = Cache.getResponseHeader(url); diff --git a/htroot/api/queues_p.java b/htroot/api/queues_p.java index a32633ee8..56397b1f0 100755 --- a/htroot/api/queues_p.java +++ b/htroot/api/queues_p.java @@ -105,7 +105,7 @@ public class queues_p { prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle()); prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put(tableName + "_" + showNum + "_depth", urle.depth()); - prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate())); + prop.put(tableName + "_" + showNum + "_modified", daydate(urle.appdate())); prop.putXML(tableName + "_" + showNum + "_anchor", urle.name()); prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true)); prop.put(tableName + "_" + showNum + "_hash", urle.url().hash()); diff --git a/htroot/rct_p.java b/htroot/rct_p.java index 4e6c47398..a5549c117 100644 --- a/htroot/rct_p.java +++ b/htroot/rct_p.java @@ -83,7 +83,6 @@ public class rct_p { url, (referrer == null) ? null : referrer.hash(), "REMOTE-CRAWLING", - null, loaddate, sb.crawler.defaultRemoteProfile.handle(), 0, diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index 5a2111837..7a78283e5 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -448,7 +448,6 @@ public class CrawlQueues { url, (referrer == null) ? null : referrer.hash(), item.getDescription(), - null, loaddate, sb.crawler.defaultRemoteProfile.handle(), 0, diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 60b919404..c1b792d11 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -155,7 +155,7 @@ public final class CrawlStacker { public void enqueueEntry(final Request entry) { // DEBUG - if (log.isFinest()) log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + new String(entry.initiator()) + ", name=" + entry.name() + ", load=" + entry.loaddate() + ", depth=" + entry.depth()); + if (log.isFinest()) log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + new String(entry.initiator()) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth()); if (prefetchHost(entry.url().getHost())) { try { diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java index 39ac3cd0a..aae032a1c 100755 --- a/source/de/anomic/crawler/retrieval/Request.java +++ b/source/de/anomic/crawler/retrieval/Request.java @@ -44,30 +44,28 @@ public class Request extends WorkflowJob { public final static Row rowdef = new Row( "String urlhash-" + Word.commonHashLength + ", " + // the url's hash "String initiator-" + Word.commonHashLength + ", " + // the crawling initiator - "String urlstring-256, " + // the url as string + "String urlstring-256, " + // the url as string "String refhash-" + Word.commonHashLength + ", " + // the url's referrer hash - "String urlname-80, " + // the name of the url, from anchor tag name - "Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared + "String urlname-80, " + // the name of the url, from anchor tag name + "Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared "String profile-" + Word.commonHashLength + ", " + // the name of the prefetch profile handle - "Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0 - "Cardinal parentbr-3 {b256}, " + // number of anchors of the parent - "Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors - "byte[] flags-4, " + // flags - "String handle-4, " + // extra handle - "Cardinal loaddate-8 {b256}," + // time when the file was loaded - "Cardinal serverdate-8 {b256}," + // time when that the server returned as document date - "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince + "Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0 + "Cardinal parentbr-3 {b256}, " + // number of anchors of the parent + "Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors + "byte[] flags-4, " + // flags + "String handle-4, " + // extra handle + "Cardinal loaddate-8 {b256}," + // NOT USED + "Cardinal lastmodified-8 {b256}," + // NOT USED + "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince Base64Order.enhancedCoder ); private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy; // if this is generated by a crawl, the own peer hash in entered private byte[] refhash; // the url's referrer hash - private DigestURI url; // the url as string + private DigestURI url; // the url as string private String name; // the name of the url, from anchor tag name private long appdate; // the time when the url was first time appeared - private long loaddate; // the time when the url was loaded - private long serverdate; // the document date from the target server private long imsdate; // the time of a ifModifiedSince request private String profileHandle; // the name of the fetch profile private int depth; // the prefetch depth so far, starts at 0 @@ -84,7 +82,7 @@ public class Request extends WorkflowJob { * @param referrerhash */ public Request(final DigestURI url, final byte[] referrerhash) { - this(null, url, referrerhash, null, null, null, null, 0, 0, 0); + this(null, url, referrerhash, null, null, null, 0, 0, 0); } /** @@ -107,7 +105,6 @@ public class Request extends WorkflowJob { final byte[] referrerhash, final String name, final Date appdate, - final Date loaddate, final String profileHandle, final int depth, final int anchors, @@ -122,14 +119,12 @@ public class Request extends WorkflowJob { this.refhash = referrerhash; this.name = (name == null) ? "" : name; this.appdate = (appdate == null) ? 0 : appdate.getTime(); - this.loaddate = (loaddate == null) ? 0 : loaddate.getTime(); this.profileHandle = profileHandle; // must not be null this.depth = depth; this.anchors = anchors; this.forkfactor = forkfactor; this.flags = new Bitfield(rowdef.width(10)); this.handle = 0; - this.serverdate = 0; this.imsdate = 0; this.statusMessage = "loaded(args)"; this.initialHash = url.hashCode(); @@ -156,8 +151,8 @@ public class Request extends WorkflowJob { this.forkfactor = (int) entry.getColLong(9); this.flags = new Bitfield(entry.getColBytes(10, true)); this.handle = Integer.parseInt(entry.getColString(11, null), 16); - this.loaddate = entry.getColLong(12); - this.serverdate = entry.getColLong(13); + //this.loaddate = entry.getColLong(12); + //this.lastmodified = entry.getColLong(13); this.imsdate = entry.getColLong(14); this.statusMessage = "loaded(kelondroRow.Entry)"; this.initialHash = url.hashCode(); @@ -187,8 +182,8 @@ public class Request extends WorkflowJob { public Row.Entry toRow() { final byte[] appdatestr = NaturalOrder.encodeLong(appdate, rowdef.width(5)); - final byte[] loaddatestr = NaturalOrder.encodeLong(loaddate, rowdef.width(12)); - final byte[] serverdatestr = NaturalOrder.encodeLong(serverdate, rowdef.width(13)); + final byte[] loaddatestr = NaturalOrder.encodeLong(0 /*loaddate*/, rowdef.width(12)); + final byte[] serverdatestr = NaturalOrder.encodeLong(0 /*lastmodified*/, rowdef.width(13)); final byte[] imsdatestr = NaturalOrder.encodeLong(imsdate, rowdef.width(14)); // store the hash in the hash cache byte[] namebytes; @@ -245,17 +240,17 @@ public class Request extends WorkflowJob { // the date when the url appeared first return new Date(this.appdate); } - + /* public Date loaddate() { // the date when the url was loaded return new Date(this.loaddate); } - public Date serverdate() { + public Date lastmodified() { // the date that the server returned as document date - return new Date(this.serverdate); + return new Date(this.lastmodified); } - + */ public Date imsdate() { // the date that the client (browser) send as ifModifiedSince in proxy mode return new Date(this.imsdate); diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java index 174cc2968..7ea57e023 100755 --- a/source/de/anomic/crawler/retrieval/Response.java +++ b/source/de/anomic/crawler/retrieval/Response.java @@ -55,8 +55,8 @@ public class Response { // the class objects private final Request request; - private final RequestHeader requestHeader; - private final ResponseHeader responseHeader; + private final RequestHeader requestHeader; + private final ResponseHeader responseHeader; private final String responseStatus; private final CrawlProfile.entry profile; private byte[] content; @@ -201,6 +201,7 @@ public class Response { docDate = responseHeader.lastModified(); if (docDate == null) docDate = responseHeader.date(); } + if (docDate == null && request != null) docDate = request.appdate(); if (docDate == null) docDate = new Date(DateFormatter.correctedUTCTime()); return docDate; diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java index 6f1aad1b7..52bea8324 100644 --- a/source/de/anomic/data/SitemapParser.java +++ b/source/de/anomic/data/SitemapParser.java @@ -282,7 +282,6 @@ public class SitemapParser extends DefaultHandler { null, // this.siteMapURL.toString(), this.nextURL, new Date(), - null, this.crawlingProfile.handle(), 0, 0, diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index bfa4f6f6b..330dd3b1d 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -269,7 +269,6 @@ public class bookmarksDB { null, "CRAWLING-ROOT", new Date(), - null, pe.handle(), 0, 0, diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index 4c4589cd6..ee4549c03 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -387,8 +387,7 @@ public final class HTTPDProxyHandler { url, requestHeader.referer() == null ? null : requestHeader.referer().hash(), "", - new Date(), - new Date(), + cachedResponseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0, 0, @@ -510,8 +509,7 @@ public final class HTTPDProxyHandler { url, requestHeader.referer() == null ? null : requestHeader.referer().hash(), "", - new Date(), - new Date(), + responseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0, 0, diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java index 97a228f87..9a6712bc3 100644 --- a/source/de/anomic/search/DocumentIndex.java +++ b/source/de/anomic/search/DocumentIndex.java @@ -134,6 +134,7 @@ public class DocumentIndex extends Segment { url, null, new Date(url.lastModified()), + new Date(), url.length(), document, condenser diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java index 03abc651e..aeee8a76b 100644 --- a/source/de/anomic/search/Segment.java +++ b/source/de/anomic/search/Segment.java @@ -241,7 +241,8 @@ public class Segment { public URIMetadataRow storeDocument( final DigestURI url, final DigestURI referrerURL, - final Date docDate, + final Date modDate, + final Date loadDate, final long sourcesize, final Document document, final Condenser condenser @@ -295,16 +296,16 @@ public class Segment { } // create a new loaded URL db entry - final long ldate = System.currentTimeMillis(); + assert modDate.getTime() <= loadDate.getTime() : "modDate = " + modDate + ", loadDate = " + loadDate; final URIMetadataRow newEntry = new URIMetadataRow( url, // URL dc_title, // document description document.dc_creator(), // author document.dc_subject(' '), // tags "", // ETag - docDate, // modification date - new Date(), // loaded date - new Date(ldate + Math.max(0, ldate - docDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula + modDate, // modification date + loadDate, // loaded date + new Date(loadDate.getTime() + Math.max(0, loadDate.getTime() - modDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula (referrerURL == null) ? null : new String(referrerURL.hash()), // referer hash new byte[0], // md5 (int) sourcesize, // size @@ -328,7 +329,7 @@ public class Segment { // STORE PAGE INDEX INTO WORD INDEX DB final int words = addPageIndex( url, // document url - docDate, // document mod date + modDate, // document mod date document, // document content condenser, // document condenser language, // document language diff --git a/source/de/anomic/search/Segments.java b/source/de/anomic/search/Segments.java index 793c84068..ea191c7f9 100644 --- a/source/de/anomic/search/Segments.java +++ b/source/de/anomic/search/Segments.java @@ -204,7 +204,8 @@ public class Segments implements Iterable { final String segmentName, final DigestURI url, final DigestURI referrerURL, - final Date docDate, + final Date modDate, + final Date loadDate, final long sourcesize, final Document document, final Condenser condenser @@ -212,7 +213,8 @@ public class Segments implements Iterable { return segment(segmentName).storeDocument( url, referrerURL, - docDate, + modDate, + loadDate, sourcesize, document, condenser diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index d27d0da8b..948432129 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1332,8 +1332,7 @@ public final class Switchboard extends serverSwitch { surrogate.getIdentifier(true), null, "", - new Date(), - new Date(), + surrogate.getDate(), this.crawler.defaultSurrogateProfile.handle(), 0, 0, @@ -1670,7 +1669,7 @@ public final class Switchboard extends serverSwitch { in.queueEntry.updateStatus(Response.QUEUE_STATE_PARSING); // debug - if (log.isFinest()) log.logFinest("PARSE "+ in.queueEntry.toString()); + if (log.isFinest()) log.logFinest("PARSE "+ in.queueEntry); Document document = null; try { @@ -1731,10 +1730,7 @@ public final class Switchboard extends serverSwitch { return null; } - final long parsingEndTime = System.currentTimeMillis(); - - // get the document date - final Date docDate = response.lastModified(); + final long parsingEndTime = System.currentTimeMillis(); // put anchors on crawl stack final long stackStartTime = System.currentTimeMillis(); @@ -1767,8 +1763,7 @@ public final class Switchboard extends serverSwitch { new DigestURI(u, null), response.url().hash(), nextEntry.getValue(), - null, - docDate, + new Date(), response.profile().handle(), response.depth() + 1, 0, @@ -1860,6 +1855,7 @@ public final class Switchboard extends serverSwitch { queueEntry.url(), referrerURL, queueEntry.lastModified(), + new Date(), queueEntry.size(), document, condenser); @@ -2125,7 +2121,6 @@ public final class Switchboard extends serverSwitch { (name == null) ? "" : name, new Date(), null, - null, 0, 0, 0); diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 44feff07b..2397931ec 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -780,7 +780,6 @@ public class yacySeed implements Cloneable { // name final String peerName = this.dna.get(yacySeed.NAME); if (peerName == null) return "no peer name given"; - if (peerName.equalsIgnoreCase("VegaYacyB")) return "bad peer VegaYacyB [ " + this.hash + " ]"; // hack for wrong "VegaYacyB" peers dna.put(yacySeed.NAME, checkPeerName(peerName)); // type diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 37899527f..f3166f145 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -81,42 +81,24 @@ public class URIMetadataRow implements URIMetadata { /* =========================================================================== * Constants to access the various columns of an URL entry * =========================================================================== */ - /** the url's hash */ - private static final int col_hash = 0; - /** components: the url, description, author and tags. As 5th element, an ETag is possible */ - private static final int col_comp = 1; - /** components: the url, description, author and tags. As 5th element, an ETag is possible */ - private static final int col_mod = 2; - /** time when the url was loaded */ - private static final int col_load = 3; - /** time until this url is fresh */ - private static final int col_fresh = 4; - /** time when the url was loaded */ - private static final int col_referrer = 5; - /** the md5 of the url content (to identify changes) */ - private static final int col_md5 = 6; - /** size of file in bytes */ - private static final int col_size = 7; - /** size of file by number of words; for video and audio: seconds */ - private static final int col_wc = 8; - /** doctype, taken from extension or any other heuristic */ - private static final int col_dt = 9; - /** flags; any stuff (see Word-Entity definition) */ - private static final int col_flags = 10; - /** language */ - private static final int col_lang = 11; - /** of outlinks to same domain; for video and image: width */ - private static final int col_llocal = 12; - /** of outlinks to outside domain; for video and image: height */ - private static final int col_lother = 13; - /** of embedded image links */ - private static final int col_limage = 14; - /** of embedded audio links; for audio: track number; for video: number of audio tracks */ - private static final int col_laudio = 15; - /** of embedded video links */ - private static final int col_lvideo = 16; - /** of embedded links to applications */ - private static final int col_lapp = 17; + private static final int col_hash = 0; // the url's hash + private static final int col_comp = 1; // components: the url, description, author and tags. As 5th element, an ETag is possible + private static final int col_mod = 2; // the modifed-date time from the server (servertime in row) + private static final int col_load = 3; // time when the url was loaded + private static final int col_fresh = 4; // time until this url is fresh + private static final int col_referrer = 5; // a referrer of the url (there may be several, but this is the one that was acually referring to this one) + private static final int col_md5 = 6; // the md5 of the url content (to identify changes) + private static final int col_size = 7; // size of file in bytes + private static final int col_wc = 8; // size of file by number of words; for video and audio: seconds + private static final int col_dt = 9; // doctype, taken from extension or any other heuristic + private static final int col_flags = 10; // flags; any stuff (see Word-Entity definition) + private static final int col_lang = 11; // language + private static final int col_llocal = 12; // # of outlinks to same domain; for video and image: width + private static final int col_lother = 13; // # of outlinks to outside domain; for video and image: height + private static final int col_limage = 14; // # of embedded image links + private static final int col_laudio = 15; // # of embedded audio links; for audio: track number; for video: number of audio tracks + private static final int col_lvideo = 16; // # of embedded video links + private static final int col_lapp = 17; // # of embedded links to applications private final Row.Entry entry; private final String snippet; @@ -522,8 +504,7 @@ public class URIMetadataRow implements URIMetadata { metadata().url(), referrerHash(), metadata().dc_title(), - null, - loaddate(), + moddate(), null, 0, 0, diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index a3fd651b6..fa3f9ca15 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -68,7 +68,7 @@ public final class Row { os+= this.row[i].cellwidth; } this.objectsize = os; - this.primaryKeyLength = row[0].cellwidth; + this.primaryKeyLength = this.row[0].cellwidth; } public Row(final String structure, final ByteOrder objectOrder) { @@ -102,7 +102,7 @@ public final class Row { os += this.row[i].cellwidth; } this.objectsize = os; - this.primaryKeyLength = row[0].cellwidth; + this.primaryKeyLength = this.row[0].cellwidth; } public final ByteOrder getOrdering() { @@ -150,8 +150,8 @@ public final class Row { public final Entry newEntry(final byte[] rowinstance) { if (rowinstance == null) return null; //assert (rowinstance[0] != 0); - if (!(this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth))) { - Log.logWarning("kelondroRow", "row not well-formed: rowinstance[0] = " + new String(rowinstance, 0, row[0].cellwidth) + " / " + NaturalOrder.arrayList(rowinstance, 0, row[0].cellwidth)); + if (!(this.objectOrder.wellformed(rowinstance, 0, this.primaryKeyLength))) { + Log.logWarning("kelondroRow", "row not well-formed: rowinstance[0] = " + new String(rowinstance, 0, this.primaryKeyLength) + " / " + NaturalOrder.arrayList(rowinstance, 0, this.primaryKeyLength)); return null; } return new Entry(rowinstance, false); @@ -160,14 +160,14 @@ public final class Row { public final Entry newEntry(final Entry oldrow, final int fromColumn) { if (oldrow == null) return null; assert (oldrow.getColBytes(0, false)[0] != 0); - assert (this.objectOrder.wellformed(oldrow.getColBytes(0, false), 0, row[0].cellwidth)); + assert (this.objectOrder.wellformed(oldrow.getColBytes(0, false), 0, this.primaryKeyLength)); return new Entry(oldrow, fromColumn, false); } public final Entry newEntry(final byte[] rowinstance, final int start, final boolean clone) { if (rowinstance == null) return null; //assert (rowinstance[0] != 0); - assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth)) : "rowinstance = " + new String(rowinstance); + assert (this.objectOrder.wellformed(rowinstance, start, this.primaryKeyLength)) : "rowinstance = " + new String(rowinstance); // this method offers the option to clone the content // this is necessary if it is known that the underlying byte array may change and therefore // the reference to the byte array does not contain the original content @@ -177,7 +177,7 @@ public final class Row { public final Entry newEntry(final byte[][] cells) { if (cells == null) return null; assert (cells[0][0] != 0); - assert (this.objectOrder.wellformed(cells[0], 0, row[0].cellwidth)); + assert (this.objectOrder.wellformed(cells[0], 0, this.primaryKeyLength)); return new Entry(cells); } @@ -189,7 +189,7 @@ public final class Row { public final EntryIndex newEntryIndex(final byte[] rowinstance, final int index) { if (rowinstance == null) return null; assert (rowinstance[0] != 0); - assert (this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth)); + assert (this.objectOrder.wellformed(rowinstance, 0, this.primaryKeyLength)); return new EntryIndex(rowinstance, index); } diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index 9b5910469..0036e1fe8 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -574,16 +574,22 @@ public class Table implements ObjectIndex, Iterable { private void removeInFile(final int i) throws IOException, RowSpaceExceededException { assert i >= 0; - final byte[] p = new byte[rowdef.objectsize]; - if (table == null) { - if (i == index.size() - 1) { - file.cleanLast(); + final byte[] p = new byte[this.rowdef.objectsize]; + if (this.table == null) { + if (i == this.index.size() - 1) { + this.file.cleanLast(); } else { - file.cleanLast(p, 0); - file.put(i, p, 0); - final byte[] k = new byte[rowdef.primaryKeyLength]; - System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); - index.put(k, i); + while (this.file.size() > 0) { + this.file.cleanLast(p, 0); + if (!(this.rowdef.objectOrder.wellformed(p, 0, this.rowdef.primaryKeyLength))) { + continue; + } + this.file.put(i, p, 0); + final byte[] k = new byte[this.rowdef.primaryKeyLength]; + System.arraycopy(p, 0, k, 0, this.rowdef.primaryKeyLength); + this.index.put(k, i); + break; + } } } else { if (i == index.size() - 1) { diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 77d9bc063..59cfc67fb 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -151,7 +151,6 @@ public final class LoaderDispatcher { null, "", new Date(), - new Date(), (forText) ? ((global) ? sb.crawler.defaultTextSnippetGlobalProfile.handle() :