|
|
|
@ -44,30 +44,28 @@ public class Request extends WorkflowJob {
|
|
|
|
|
public final static Row rowdef = new Row(
|
|
|
|
|
"String urlhash-" + Word.commonHashLength + ", " + // the url's hash
|
|
|
|
|
"String initiator-" + Word.commonHashLength + ", " + // the crawling initiator
|
|
|
|
|
"String urlstring-256, " + // the url as string
|
|
|
|
|
"String urlstring-256, " + // the url as string
|
|
|
|
|
"String refhash-" + Word.commonHashLength + ", " + // the url's referrer hash
|
|
|
|
|
"String urlname-80, " + // the name of the url, from anchor tag <a>name</a>
|
|
|
|
|
"Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared
|
|
|
|
|
"String urlname-80, " + // the name of the url, from anchor tag <a>name</a>
|
|
|
|
|
"Cardinal appdate-8 {b256}, " + // the time when the url was first time appeared
|
|
|
|
|
"String profile-" + Word.commonHashLength + ", " + // the name of the prefetch profile handle
|
|
|
|
|
"Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0
|
|
|
|
|
"Cardinal parentbr-3 {b256}, " + // number of anchors of the parent
|
|
|
|
|
"Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors
|
|
|
|
|
"byte[] flags-4, " + // flags
|
|
|
|
|
"String handle-4, " + // extra handle
|
|
|
|
|
"Cardinal loaddate-8 {b256}," + // time when the file was loaded
|
|
|
|
|
"Cardinal serverdate-8 {b256}," + // time when that the server returned as document date
|
|
|
|
|
"Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince
|
|
|
|
|
"Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0
|
|
|
|
|
"Cardinal parentbr-3 {b256}, " + // number of anchors of the parent
|
|
|
|
|
"Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors
|
|
|
|
|
"byte[] flags-4, " + // flags
|
|
|
|
|
"String handle-4, " + // extra handle
|
|
|
|
|
"Cardinal loaddate-8 {b256}," + // NOT USED
|
|
|
|
|
"Cardinal lastmodified-8 {b256}," + // NOT USED
|
|
|
|
|
"Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince
|
|
|
|
|
Base64Order.enhancedCoder
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
|
|
|
|
|
// if this is generated by a crawl, the own peer hash in entered
|
|
|
|
|
private byte[] refhash; // the url's referrer hash
|
|
|
|
|
private DigestURI url; // the url as string
|
|
|
|
|
private DigestURI url; // the url as string
|
|
|
|
|
private String name; // the name of the url, from anchor tag <a>name</a>
|
|
|
|
|
private long appdate; // the time when the url was first time appeared
|
|
|
|
|
private long loaddate; // the time when the url was loaded
|
|
|
|
|
private long serverdate; // the document date from the target server
|
|
|
|
|
private long imsdate; // the time of a ifModifiedSince request
|
|
|
|
|
private String profileHandle; // the name of the fetch profile
|
|
|
|
|
private int depth; // the prefetch depth so far, starts at 0
|
|
|
|
@ -84,7 +82,7 @@ public class Request extends WorkflowJob {
|
|
|
|
|
* @param referrerhash
|
|
|
|
|
*/
|
|
|
|
|
public Request(final DigestURI url, final byte[] referrerhash) {
|
|
|
|
|
this(null, url, referrerhash, null, null, null, null, 0, 0, 0);
|
|
|
|
|
this(null, url, referrerhash, null, null, null, 0, 0, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -107,7 +105,6 @@ public class Request extends WorkflowJob {
|
|
|
|
|
final byte[] referrerhash,
|
|
|
|
|
final String name,
|
|
|
|
|
final Date appdate,
|
|
|
|
|
final Date loaddate,
|
|
|
|
|
final String profileHandle,
|
|
|
|
|
final int depth,
|
|
|
|
|
final int anchors,
|
|
|
|
@ -122,14 +119,12 @@ public class Request extends WorkflowJob {
|
|
|
|
|
this.refhash = referrerhash;
|
|
|
|
|
this.name = (name == null) ? "" : name;
|
|
|
|
|
this.appdate = (appdate == null) ? 0 : appdate.getTime();
|
|
|
|
|
this.loaddate = (loaddate == null) ? 0 : loaddate.getTime();
|
|
|
|
|
this.profileHandle = profileHandle; // must not be null
|
|
|
|
|
this.depth = depth;
|
|
|
|
|
this.anchors = anchors;
|
|
|
|
|
this.forkfactor = forkfactor;
|
|
|
|
|
this.flags = new Bitfield(rowdef.width(10));
|
|
|
|
|
this.handle = 0;
|
|
|
|
|
this.serverdate = 0;
|
|
|
|
|
this.imsdate = 0;
|
|
|
|
|
this.statusMessage = "loaded(args)";
|
|
|
|
|
this.initialHash = url.hashCode();
|
|
|
|
@ -156,8 +151,8 @@ public class Request extends WorkflowJob {
|
|
|
|
|
this.forkfactor = (int) entry.getColLong(9);
|
|
|
|
|
this.flags = new Bitfield(entry.getColBytes(10, true));
|
|
|
|
|
this.handle = Integer.parseInt(entry.getColString(11, null), 16);
|
|
|
|
|
this.loaddate = entry.getColLong(12);
|
|
|
|
|
this.serverdate = entry.getColLong(13);
|
|
|
|
|
//this.loaddate = entry.getColLong(12);
|
|
|
|
|
//this.lastmodified = entry.getColLong(13);
|
|
|
|
|
this.imsdate = entry.getColLong(14);
|
|
|
|
|
this.statusMessage = "loaded(kelondroRow.Entry)";
|
|
|
|
|
this.initialHash = url.hashCode();
|
|
|
|
@ -187,8 +182,8 @@ public class Request extends WorkflowJob {
|
|
|
|
|
|
|
|
|
|
public Row.Entry toRow() {
|
|
|
|
|
final byte[] appdatestr = NaturalOrder.encodeLong(appdate, rowdef.width(5));
|
|
|
|
|
final byte[] loaddatestr = NaturalOrder.encodeLong(loaddate, rowdef.width(12));
|
|
|
|
|
final byte[] serverdatestr = NaturalOrder.encodeLong(serverdate, rowdef.width(13));
|
|
|
|
|
final byte[] loaddatestr = NaturalOrder.encodeLong(0 /*loaddate*/, rowdef.width(12));
|
|
|
|
|
final byte[] serverdatestr = NaturalOrder.encodeLong(0 /*lastmodified*/, rowdef.width(13));
|
|
|
|
|
final byte[] imsdatestr = NaturalOrder.encodeLong(imsdate, rowdef.width(14));
|
|
|
|
|
// store the hash in the hash cache
|
|
|
|
|
byte[] namebytes;
|
|
|
|
@ -245,17 +240,17 @@ public class Request extends WorkflowJob {
|
|
|
|
|
// the date when the url appeared first
|
|
|
|
|
return new Date(this.appdate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
public Date loaddate() {
|
|
|
|
|
// the date when the url was loaded
|
|
|
|
|
return new Date(this.loaddate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Date serverdate() {
|
|
|
|
|
public Date lastmodified() {
|
|
|
|
|
// the date that the server returned as document date
|
|
|
|
|
return new Date(this.serverdate);
|
|
|
|
|
return new Date(this.lastmodified);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
public Date imsdate() {
|
|
|
|
|
// the date that the client (browser) send as ifModifiedSince in proxy mode
|
|
|
|
|
return new Date(this.imsdate);
|
|
|
|
|