diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 8c63cbe49..8dcd1b5a9 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -249,7 +249,6 @@ public class Crawler_p {
null,
"CRAWLING-ROOT",
new Date(),
- null,
pe.handle(),
0,
0,
@@ -303,7 +302,6 @@ public class Crawler_p {
null,
"",
new Date(),
- null,
pe.handle(),
0,
0,
@@ -386,7 +384,6 @@ public class Crawler_p {
null,
e.getValue(),
new Date(),
- null,
profile.handle(),
0,
0,
diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java
index 4b6ea43b4..b5e5d6ef7 100644
--- a/htroot/IndexCreateWWWGlobalQueue_p.java
+++ b/htroot/IndexCreateWWWGlobalQueue_p.java
@@ -106,7 +106,7 @@ public class IndexCreateWWWGlobalQueue_p {
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
- prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
+ prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java
index 097ec71b2..5fba1df3b 100644
--- a/htroot/IndexCreateWWWLocalQueue_p.java
+++ b/htroot/IndexCreateWWWLocalQueue_p.java
@@ -127,7 +127,7 @@ public class IndexCreateWWWLocalQueue_p {
case INITIATOR:
value = (entry.initiator() == null || entry.initiator().length == 0) ? "proxy" : new String(entry.initiator());
break;
- case MODIFIED: value = daydate(entry.loaddate()); break;
+ case MODIFIED: value = daydate(entry.appdate()); break;
default: value = null;
}
@@ -177,7 +177,7 @@ public class IndexCreateWWWLocalQueue_p {
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
- prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
+ prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
diff --git a/htroot/IndexCreateWWWRemoteQueue_p.java b/htroot/IndexCreateWWWRemoteQueue_p.java
index 821add7ae..02efe8145 100644
--- a/htroot/IndexCreateWWWRemoteQueue_p.java
+++ b/htroot/IndexCreateWWWRemoteQueue_p.java
@@ -103,7 +103,7 @@ public class IndexCreateWWWRemoteQueue_p {
prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth());
- prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.loaddate()) );
+ prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.appdate()) );
prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name());
prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString());
prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash());
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 134d40712..2459ba783 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -181,7 +181,6 @@ public class QuickCrawlLink_p {
null,
(title==null)?"CRAWLING-ROOT":title,
new Date(),
- null,
pe.handle(),
0,
0,
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 7fb777695..6be2bb7f3 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -186,8 +186,10 @@ public class ViewFile {
} catch (IOException e) {
Log.logException(e);
}
- if (response != null) resource = response.getContent();
- responseHeader = response.getResponseHeader();
+ if (response != null) {
+ resource = response.getContent();
+ responseHeader = response.getResponseHeader();
+ }
}
if (responseHeader == null) responseHeader = Cache.getResponseHeader(url);
diff --git a/htroot/api/queues_p.java b/htroot/api/queues_p.java
index a32633ee8..56397b1f0 100755
--- a/htroot/api/queues_p.java
+++ b/htroot/api/queues_p.java
@@ -105,7 +105,7 @@ public class queues_p {
prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle());
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put(tableName + "_" + showNum + "_depth", urle.depth());
- prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
+ prop.put(tableName + "_" + showNum + "_modified", daydate(urle.appdate()));
prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
diff --git a/htroot/rct_p.java b/htroot/rct_p.java
index 4e6c47398..a5549c117 100644
--- a/htroot/rct_p.java
+++ b/htroot/rct_p.java
@@ -83,7 +83,6 @@ public class rct_p {
url,
(referrer == null) ? null : referrer.hash(),
"REMOTE-CRAWLING",
- null,
loaddate,
sb.crawler.defaultRemoteProfile.handle(),
0,
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 5a2111837..7a78283e5 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -448,7 +448,6 @@ public class CrawlQueues {
url,
(referrer == null) ? null : referrer.hash(),
item.getDescription(),
- null,
loaddate,
sb.crawler.defaultRemoteProfile.handle(),
0,
diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java
index 60b919404..c1b792d11 100644
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@@ -155,7 +155,7 @@ public final class CrawlStacker {
public void enqueueEntry(final Request entry) {
// DEBUG
- if (log.isFinest()) log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + new String(entry.initiator()) + ", name=" + entry.name() + ", load=" + entry.loaddate() + ", depth=" + entry.depth());
+ if (log.isFinest()) log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + new String(entry.initiator()) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth());
if (prefetchHost(entry.url().getHost())) {
try {
diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java
index 39ac3cd0a..aae032a1c 100755
--- a/source/de/anomic/crawler/retrieval/Request.java
+++ b/source/de/anomic/crawler/retrieval/Request.java
@@ -44,30 +44,28 @@ public class Request extends WorkflowJob {
public final static Row rowdef = new Row(
"String urlhash-" + Word.commonHashLength + ", " + // the url's hash
"String initiator-" + Word.commonHashLength + ", " + // the crawling initiator
- "String urlstring-256, " + // the url as string
+ "String urlstring-256, " + // the url as string
"String refhash-" + Word.commonHashLength + ", " + // the url's referrer hash
- "String urlname-80, " + // the name of the url, from anchor tag name
- "Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared
+ "String urlname-80, " + // the name of the url, from anchor tag name
+ "Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared
"String profile-" + Word.commonHashLength + ", " + // the name of the prefetch profile handle
- "Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0
- "Cardinal parentbr-3 {b256}, " + // number of anchors of the parent
- "Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors
- "byte[] flags-4, " + // flags
- "String handle-4, " + // extra handle
- "Cardinal loaddate-8 {b256}," + // time when the file was loaded
- "Cardinal serverdate-8 {b256}," + // time when that the server returned as document date
- "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince
+ "Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0
+ "Cardinal parentbr-3 {b256}, " + // number of anchors of the parent
+ "Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors
+ "byte[] flags-4, " + // flags
+ "String handle-4, " + // extra handle
+ "Cardinal loaddate-8 {b256}," + // NOT USED
+ "Cardinal lastmodified-8 {b256}," + // NOT USED
+ "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince
Base64Order.enhancedCoder
);
private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered
private byte[] refhash; // the url's referrer hash
- private DigestURI url; // the url as string
+ private DigestURI url; // the url as string
private String name; // the name of the url, from anchor tag name
private long appdate; // the time when the url was first time appeared
- private long loaddate; // the time when the url was loaded
- private long serverdate; // the document date from the target server
private long imsdate; // the time of a ifModifiedSince request
private String profileHandle; // the name of the fetch profile
private int depth; // the prefetch depth so far, starts at 0
@@ -84,7 +82,7 @@ public class Request extends WorkflowJob {
* @param referrerhash
*/
public Request(final DigestURI url, final byte[] referrerhash) {
- this(null, url, referrerhash, null, null, null, null, 0, 0, 0);
+ this(null, url, referrerhash, null, null, null, 0, 0, 0);
}
/**
@@ -107,7 +105,6 @@ public class Request extends WorkflowJob {
final byte[] referrerhash,
final String name,
final Date appdate,
- final Date loaddate,
final String profileHandle,
final int depth,
final int anchors,
@@ -122,14 +119,12 @@ public class Request extends WorkflowJob {
this.refhash = referrerhash;
this.name = (name == null) ? "" : name;
this.appdate = (appdate == null) ? 0 : appdate.getTime();
- this.loaddate = (loaddate == null) ? 0 : loaddate.getTime();
this.profileHandle = profileHandle; // must not be null
this.depth = depth;
this.anchors = anchors;
this.forkfactor = forkfactor;
this.flags = new Bitfield(rowdef.width(10));
this.handle = 0;
- this.serverdate = 0;
this.imsdate = 0;
this.statusMessage = "loaded(args)";
this.initialHash = url.hashCode();
@@ -156,8 +151,8 @@ public class Request extends WorkflowJob {
this.forkfactor = (int) entry.getColLong(9);
this.flags = new Bitfield(entry.getColBytes(10, true));
this.handle = Integer.parseInt(entry.getColString(11, null), 16);
- this.loaddate = entry.getColLong(12);
- this.serverdate = entry.getColLong(13);
+ //this.loaddate = entry.getColLong(12);
+ //this.lastmodified = entry.getColLong(13);
this.imsdate = entry.getColLong(14);
this.statusMessage = "loaded(kelondroRow.Entry)";
this.initialHash = url.hashCode();
@@ -187,8 +182,8 @@ public class Request extends WorkflowJob {
public Row.Entry toRow() {
final byte[] appdatestr = NaturalOrder.encodeLong(appdate, rowdef.width(5));
- final byte[] loaddatestr = NaturalOrder.encodeLong(loaddate, rowdef.width(12));
- final byte[] serverdatestr = NaturalOrder.encodeLong(serverdate, rowdef.width(13));
+ final byte[] loaddatestr = NaturalOrder.encodeLong(0 /*loaddate*/, rowdef.width(12));
+ final byte[] serverdatestr = NaturalOrder.encodeLong(0 /*lastmodified*/, rowdef.width(13));
final byte[] imsdatestr = NaturalOrder.encodeLong(imsdate, rowdef.width(14));
// store the hash in the hash cache
byte[] namebytes;
@@ -245,17 +240,17 @@ public class Request extends WorkflowJob {
// the date when the url appeared first
return new Date(this.appdate);
}
-
+ /*
public Date loaddate() {
// the date when the url was loaded
return new Date(this.loaddate);
}
- public Date serverdate() {
+ public Date lastmodified() {
// the date that the server returned as document date
- return new Date(this.serverdate);
+ return new Date(this.lastmodified);
}
-
+ */
public Date imsdate() {
// the date that the client (browser) send as ifModifiedSince in proxy mode
return new Date(this.imsdate);
diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java
index 174cc2968..7ea57e023 100755
--- a/source/de/anomic/crawler/retrieval/Response.java
+++ b/source/de/anomic/crawler/retrieval/Response.java
@@ -55,8 +55,8 @@ public class Response {
// the class objects
private final Request request;
- private final RequestHeader requestHeader;
- private final ResponseHeader responseHeader;
+ private final RequestHeader requestHeader;
+ private final ResponseHeader responseHeader;
private final String responseStatus;
private final CrawlProfile.entry profile;
private byte[] content;
@@ -201,6 +201,7 @@ public class Response {
docDate = responseHeader.lastModified();
if (docDate == null) docDate = responseHeader.date();
}
+ if (docDate == null && request != null) docDate = request.appdate();
if (docDate == null) docDate = new Date(DateFormatter.correctedUTCTime());
return docDate;
diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java
index 6f1aad1b7..52bea8324 100644
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@@ -282,7 +282,6 @@ public class SitemapParser extends DefaultHandler {
null, // this.siteMapURL.toString(),
this.nextURL,
new Date(),
- null,
this.crawlingProfile.handle(),
0,
0,
diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java
index bfa4f6f6b..330dd3b1d 100644
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@@ -269,7 +269,6 @@ public class bookmarksDB {
null,
"CRAWLING-ROOT",
new Date(),
- null,
pe.handle(),
0,
0,
diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java
index 4c4589cd6..ee4549c03 100644
--- a/source/de/anomic/http/server/HTTPDProxyHandler.java
+++ b/source/de/anomic/http/server/HTTPDProxyHandler.java
@@ -387,8 +387,7 @@ public final class HTTPDProxyHandler {
url,
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
"",
- new Date(),
- new Date(),
+ cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
0,
0,
@@ -510,8 +509,7 @@ public final class HTTPDProxyHandler {
url,
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
"",
- new Date(),
- new Date(),
+ responseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
0,
0,
diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java
index 97a228f87..9a6712bc3 100644
--- a/source/de/anomic/search/DocumentIndex.java
+++ b/source/de/anomic/search/DocumentIndex.java
@@ -134,6 +134,7 @@ public class DocumentIndex extends Segment {
url,
null,
new Date(url.lastModified()),
+ new Date(),
url.length(),
document,
condenser
diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java
index 03abc651e..aeee8a76b 100644
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@@ -241,7 +241,8 @@ public class Segment {
public URIMetadataRow storeDocument(
final DigestURI url,
final DigestURI referrerURL,
- final Date docDate,
+ final Date modDate,
+ final Date loadDate,
final long sourcesize,
final Document document,
final Condenser condenser
@@ -295,16 +296,16 @@ public class Segment {
}
// create a new loaded URL db entry
- final long ldate = System.currentTimeMillis();
+ assert modDate.getTime() <= loadDate.getTime() : "modDate = " + modDate + ", loadDate = " + loadDate;
final URIMetadataRow newEntry = new URIMetadataRow(
url, // URL
dc_title, // document description
document.dc_creator(), // author
document.dc_subject(' '), // tags
"", // ETag
- docDate, // modification date
- new Date(), // loaded date
- new Date(ldate + Math.max(0, ldate - docDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula
+ modDate, // modification date
+ loadDate, // loaded date
+ new Date(loadDate.getTime() + Math.max(0, loadDate.getTime() - modDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula
(referrerURL == null) ? null : new String(referrerURL.hash()), // referer hash
new byte[0], // md5
(int) sourcesize, // size
@@ -328,7 +329,7 @@ public class Segment {
// STORE PAGE INDEX INTO WORD INDEX DB
final int words = addPageIndex(
url, // document url
- docDate, // document mod date
+ modDate, // document mod date
document, // document content
condenser, // document condenser
language, // document language
diff --git a/source/de/anomic/search/Segments.java b/source/de/anomic/search/Segments.java
index 793c84068..ea191c7f9 100644
--- a/source/de/anomic/search/Segments.java
+++ b/source/de/anomic/search/Segments.java
@@ -204,7 +204,8 @@ public class Segments implements Iterable {
final String segmentName,
final DigestURI url,
final DigestURI referrerURL,
- final Date docDate,
+ final Date modDate,
+ final Date loadDate,
final long sourcesize,
final Document document,
final Condenser condenser
@@ -212,7 +213,8 @@ public class Segments implements Iterable {
return segment(segmentName).storeDocument(
url,
referrerURL,
- docDate,
+ modDate,
+ loadDate,
sourcesize,
document,
condenser
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index d27d0da8b..948432129 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -1332,8 +1332,7 @@ public final class Switchboard extends serverSwitch {
surrogate.getIdentifier(true),
null,
"",
- new Date(),
- new Date(),
+ surrogate.getDate(),
this.crawler.defaultSurrogateProfile.handle(),
0,
0,
@@ -1670,7 +1669,7 @@ public final class Switchboard extends serverSwitch {
in.queueEntry.updateStatus(Response.QUEUE_STATE_PARSING);
// debug
- if (log.isFinest()) log.logFinest("PARSE "+ in.queueEntry.toString());
+ if (log.isFinest()) log.logFinest("PARSE "+ in.queueEntry);
Document document = null;
try {
@@ -1731,10 +1730,7 @@ public final class Switchboard extends serverSwitch {
return null;
}
- final long parsingEndTime = System.currentTimeMillis();
-
- // get the document date
- final Date docDate = response.lastModified();
+ final long parsingEndTime = System.currentTimeMillis();
// put anchors on crawl stack
final long stackStartTime = System.currentTimeMillis();
@@ -1767,8 +1763,7 @@ public final class Switchboard extends serverSwitch {
new DigestURI(u, null),
response.url().hash(),
nextEntry.getValue(),
- null,
- docDate,
+ new Date(),
response.profile().handle(),
response.depth() + 1,
0,
@@ -1860,6 +1855,7 @@ public final class Switchboard extends serverSwitch {
queueEntry.url(),
referrerURL,
queueEntry.lastModified(),
+ new Date(),
queueEntry.size(),
document,
condenser);
@@ -2125,7 +2121,6 @@ public final class Switchboard extends serverSwitch {
(name == null) ? "" : name,
new Date(),
null,
- null,
0,
0,
0);
diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java
index 44feff07b..2397931ec 100644
--- a/source/de/anomic/yacy/yacySeed.java
+++ b/source/de/anomic/yacy/yacySeed.java
@@ -780,7 +780,6 @@ public class yacySeed implements Cloneable {
// name
final String peerName = this.dna.get(yacySeed.NAME);
if (peerName == null) return "no peer name given";
- if (peerName.equalsIgnoreCase("VegaYacyB")) return "bad peer VegaYacyB [ " + this.hash + " ]"; // hack for wrong "VegaYacyB" peers
dna.put(yacySeed.NAME, checkPeerName(peerName));
// type
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
index 37899527f..f3166f145 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
@@ -81,42 +81,24 @@ public class URIMetadataRow implements URIMetadata {
/* ===========================================================================
* Constants to access the various columns of an URL entry
* =========================================================================== */
- /** the url's hash */
- private static final int col_hash = 0;
- /** components: the url, description, author and tags. As 5th element, an ETag is possible */
- private static final int col_comp = 1;
- /** components: the url, description, author and tags. As 5th element, an ETag is possible */
- private static final int col_mod = 2;
- /** time when the url was loaded */
- private static final int col_load = 3;
- /** time until this url is fresh */
- private static final int col_fresh = 4;
- /** time when the url was loaded */
- private static final int col_referrer = 5;
- /** the md5 of the url content (to identify changes) */
- private static final int col_md5 = 6;
- /** size of file in bytes */
- private static final int col_size = 7;
- /** size of file by number of words; for video and audio: seconds */
- private static final int col_wc = 8;
- /** doctype, taken from extension or any other heuristic */
- private static final int col_dt = 9;
- /** flags; any stuff (see Word-Entity definition) */
- private static final int col_flags = 10;
- /** language */
- private static final int col_lang = 11;
- /** of outlinks to same domain; for video and image: width */
- private static final int col_llocal = 12;
- /** of outlinks to outside domain; for video and image: height */
- private static final int col_lother = 13;
- /** of embedded image links */
- private static final int col_limage = 14;
- /** of embedded audio links; for audio: track number; for video: number of audio tracks */
- private static final int col_laudio = 15;
- /** of embedded video links */
- private static final int col_lvideo = 16;
- /** of embedded links to applications */
- private static final int col_lapp = 17;
+ private static final int col_hash = 0; // the url's hash
+ private static final int col_comp = 1; // components: the url, description, author and tags. As 5th element, an ETag is possible
+ private static final int col_mod = 2; // the modifed-date time from the server (servertime in row)
+ private static final int col_load = 3; // time when the url was loaded
+ private static final int col_fresh = 4; // time until this url is fresh
+ private static final int col_referrer = 5; // a referrer of the url (there may be several, but this is the one that was acually referring to this one)
+ private static final int col_md5 = 6; // the md5 of the url content (to identify changes)
+ private static final int col_size = 7; // size of file in bytes
+ private static final int col_wc = 8; // size of file by number of words; for video and audio: seconds
+ private static final int col_dt = 9; // doctype, taken from extension or any other heuristic
+ private static final int col_flags = 10; // flags; any stuff (see Word-Entity definition)
+ private static final int col_lang = 11; // language
+ private static final int col_llocal = 12; // # of outlinks to same domain; for video and image: width
+ private static final int col_lother = 13; // # of outlinks to outside domain; for video and image: height
+ private static final int col_limage = 14; // # of embedded image links
+ private static final int col_laudio = 15; // # of embedded audio links; for audio: track number; for video: number of audio tracks
+ private static final int col_lvideo = 16; // # of embedded video links
+ private static final int col_lapp = 17; // # of embedded links to applications
private final Row.Entry entry;
private final String snippet;
@@ -522,8 +504,7 @@ public class URIMetadataRow implements URIMetadata {
metadata().url(),
referrerHash(),
metadata().dc_title(),
- null,
- loaddate(),
+ moddate(),
null,
0,
0,
diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java
index a3fd651b6..fa3f9ca15 100644
--- a/source/net/yacy/kelondro/index/Row.java
+++ b/source/net/yacy/kelondro/index/Row.java
@@ -68,7 +68,7 @@ public final class Row {
os+= this.row[i].cellwidth;
}
this.objectsize = os;
- this.primaryKeyLength = row[0].cellwidth;
+ this.primaryKeyLength = this.row[0].cellwidth;
}
public Row(final String structure, final ByteOrder objectOrder) {
@@ -102,7 +102,7 @@ public final class Row {
os += this.row[i].cellwidth;
}
this.objectsize = os;
- this.primaryKeyLength = row[0].cellwidth;
+ this.primaryKeyLength = this.row[0].cellwidth;
}
public final ByteOrder getOrdering() {
@@ -150,8 +150,8 @@ public final class Row {
public final Entry newEntry(final byte[] rowinstance) {
if (rowinstance == null) return null;
//assert (rowinstance[0] != 0);
- if (!(this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth))) {
- Log.logWarning("kelondroRow", "row not well-formed: rowinstance[0] = " + new String(rowinstance, 0, row[0].cellwidth) + " / " + NaturalOrder.arrayList(rowinstance, 0, row[0].cellwidth));
+ if (!(this.objectOrder.wellformed(rowinstance, 0, this.primaryKeyLength))) {
+ Log.logWarning("kelondroRow", "row not well-formed: rowinstance[0] = " + new String(rowinstance, 0, this.primaryKeyLength) + " / " + NaturalOrder.arrayList(rowinstance, 0, this.primaryKeyLength));
return null;
}
return new Entry(rowinstance, false);
@@ -160,14 +160,14 @@ public final class Row {
public final Entry newEntry(final Entry oldrow, final int fromColumn) {
if (oldrow == null) return null;
assert (oldrow.getColBytes(0, false)[0] != 0);
- assert (this.objectOrder.wellformed(oldrow.getColBytes(0, false), 0, row[0].cellwidth));
+ assert (this.objectOrder.wellformed(oldrow.getColBytes(0, false), 0, this.primaryKeyLength));
return new Entry(oldrow, fromColumn, false);
}
public final Entry newEntry(final byte[] rowinstance, final int start, final boolean clone) {
if (rowinstance == null) return null;
//assert (rowinstance[0] != 0);
- assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth)) : "rowinstance = " + new String(rowinstance);
+ assert (this.objectOrder.wellformed(rowinstance, start, this.primaryKeyLength)) : "rowinstance = " + new String(rowinstance);
// this method offers the option to clone the content
// this is necessary if it is known that the underlying byte array may change and therefore
// the reference to the byte array does not contain the original content
@@ -177,7 +177,7 @@ public final class Row {
public final Entry newEntry(final byte[][] cells) {
if (cells == null) return null;
assert (cells[0][0] != 0);
- assert (this.objectOrder.wellformed(cells[0], 0, row[0].cellwidth));
+ assert (this.objectOrder.wellformed(cells[0], 0, this.primaryKeyLength));
return new Entry(cells);
}
@@ -189,7 +189,7 @@ public final class Row {
public final EntryIndex newEntryIndex(final byte[] rowinstance, final int index) {
if (rowinstance == null) return null;
assert (rowinstance[0] != 0);
- assert (this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth));
+ assert (this.objectOrder.wellformed(rowinstance, 0, this.primaryKeyLength));
return new EntryIndex(rowinstance, index);
}
diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java
index 9b5910469..0036e1fe8 100644
--- a/source/net/yacy/kelondro/table/Table.java
+++ b/source/net/yacy/kelondro/table/Table.java
@@ -574,16 +574,22 @@ public class Table implements ObjectIndex, Iterable {
private void removeInFile(final int i) throws IOException, RowSpaceExceededException {
assert i >= 0;
- final byte[] p = new byte[rowdef.objectsize];
- if (table == null) {
- if (i == index.size() - 1) {
- file.cleanLast();
+ final byte[] p = new byte[this.rowdef.objectsize];
+ if (this.table == null) {
+ if (i == this.index.size() - 1) {
+ this.file.cleanLast();
} else {
- file.cleanLast(p, 0);
- file.put(i, p, 0);
- final byte[] k = new byte[rowdef.primaryKeyLength];
- System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
- index.put(k, i);
+ while (this.file.size() > 0) {
+ this.file.cleanLast(p, 0);
+ if (!(this.rowdef.objectOrder.wellformed(p, 0, this.rowdef.primaryKeyLength))) {
+ continue;
+ }
+ this.file.put(i, p, 0);
+ final byte[] k = new byte[this.rowdef.primaryKeyLength];
+ System.arraycopy(p, 0, k, 0, this.rowdef.primaryKeyLength);
+ this.index.put(k, i);
+ break;
+ }
}
} else {
if (i == index.size() - 1) {
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 77d9bc063..59cfc67fb 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -151,7 +151,6 @@ public final class LoaderDispatcher {
null,
"",
new Date(),
- new Date(),
(forText) ?
((global) ?
sb.crawler.defaultTextSnippetGlobalProfile.handle() :