diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java
index 6769ca80b..7d8328072 100755
--- a/source/de/anomic/crawler/retrieval/Request.java
+++ b/source/de/anomic/crawler/retrieval/Request.java
@@ -38,45 +38,64 @@ import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.workflow.WorkflowJob;
-public class Request extends WorkflowJob {
+public class Request extends WorkflowJob
+{
// row definition for balancer-related NURL-entries
- public final static Row rowdef = new Row(
- "String urlhash-" + Word.commonHashLength + ", " + // the url's hash
- "String initiator-" + Word.commonHashLength + ", " + // the crawling initiator
- "String urlstring-256, " + // the url as string
- "String refhash-" + Word.commonHashLength + ", " + // the url's referrer hash
- "String urlname-80, " + // the name of the url, from anchor tag name
- "Cardinal appdate-8 {b256}, " + // the date of the resource; either file date or first appearance
- "String profile-" + Word.commonHashLength + ", " + // the name of the prefetch profile handle
- "Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0
- "Cardinal parentbr-3 {b256}, " + // number of anchors of the parent
- "Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors
- "byte[] flags-4, " + // flags
- "Cardinal handle-4 {b256}, " + // handle (NOT USED)
- "Cardinal loaddate-8 {b256}, " + // NOT USED
- "Cardinal lastmodified-8 {b256}, " + // NOT USED
- "Cardinal size-8 {b256}", // size of resource in bytes (if known) or 0 if not known
- Base64Order.enhancedCoder
- );
+ public final static Row rowdef = new Row("String urlhash-" + Word.commonHashLength + ", " + // the url's hash
+ "String initiator-"
+ + Word.commonHashLength
+ + ", "
+ + // the crawling initiator
+ "String urlstring-256, "
+ + // the url as string
+ "String refhash-"
+ + Word.commonHashLength
+ + ", "
+ + // the url's referrer hash
+ "String urlname-80, "
+ + // the name of the url, from anchor tag name
+ "Cardinal appdate-8 {b256}, "
+ + // the date of the resource; either file date or first appearance
+ "String profile-"
+ + Word.commonHashLength
+ + ", "
+ + // the name of the prefetch profile handle
+ "Cardinal depth-2 {b256}, "
+ + // the prefetch depth so far, starts at 0
+ "Cardinal parentbr-3 {b256}, "
+ + // number of anchors of the parent
+ "Cardinal forkfactor-4 {b256}, "
+ + // sum of anchors of all ancestors
+ "byte[] flags-4, "
+ + // flags
+ "Cardinal handle-4 {b256}, "
+ + // handle (NOT USED)
+ "Cardinal loaddate-8 {b256}, "
+ + // NOT USED
+ "Cardinal lastmodified-8 {b256}, "
+ + // NOT USED
+ "Cardinal size-8 {b256}", // size of resource in bytes (if known) or 0 if not known
+ Base64Order.enhancedCoder);
- private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
- // if this is generated by a crawl, the own peer hash in entered
- private byte[] refhash; // the url's referrer hash
- private DigestURI url; // the url as string
- private String name; // the name of the url, from anchor tag name
- private long appdate; // the time when the url was first time appeared.
- private String profileHandle; // the name of the fetch profile
- private int depth; // the prefetch depth so far, starts at 0
- private int anchors; // number of anchors of the parent
- private int forkfactor; // sum of anchors of all ancestors
+ private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
+ // if this is generated by a crawl, the own peer hash in entered
+ private byte[] refhash; // the url's referrer hash
+ private DigestURI url; // the url as string
+ private String name; // the name of the url, from anchor tag name
+ private long appdate; // the time when the url was first time appeared.
+ private String profileHandle; // the name of the fetch profile
+ private int depth; // the prefetch depth so far, starts at 0
+ private int anchors; // number of anchors of the parent
+ private int forkfactor; // sum of anchors of all ancestors
private Bitfield flags;
- private long size; // size of resource in bytes (if known) or 0 if not known
- private String statusMessage;
- private int initialHash; // to provide a object hash that does not change even if the url changes because of redirection
+ private long size; // size of resource in bytes (if known) or 0 if not known
+ private String statusMessage;
+ private int initialHash; // to provide a object hash that does not change even if the url changes because of redirection
/**
* convenience method for 'full' request object
+ *
* @param url
* @param referrerhash
*/
@@ -85,9 +104,8 @@ public class Request extends WorkflowJob {
}
/**
- * A Request Entry is a object that is created to provide
- * all information to load a specific resource.
- *
+ * A Request Entry is a object that is created to provide all information to load a specific resource.
+ *
* @param initiator the hash of the initiator peer
* @param url the {@link URL} to crawl
* @param referrer the hash of the referrer URL
@@ -99,35 +117,36 @@ public class Request extends WorkflowJob {
* @param forkfactor sum of anchors of all ancestors
*/
public Request(
- final byte[] initiator,
- final DigestURI url,
- final byte[] referrerhash,
- final String name,
- final Date appdate,
- final String profileHandle,
- final int depth,
- final int anchors,
- final int forkfactor,
- final long size
- ) {
+ final byte[] initiator,
+ final DigestURI url,
+ final byte[] referrerhash,
+ final String name,
+ final Date appdate,
+ final String profileHandle,
+ final int depth,
+ final int anchors,
+ final int forkfactor,
+ final long size) {
// create new entry and store it into database
assert url != null;
- assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle + " != " + Word.commonHashLength;
+ assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle
+ + " != "
+ + Word.commonHashLength;
url.removeRef(); // remove anchor reference
- this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator);
- this.url = url;
- this.refhash = referrerhash;
- this.name = (name == null) ? "" : name;
- this.appdate = (appdate == null) ? 0 : appdate.getTime();
+ this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator);
+ this.url = url;
+ this.refhash = referrerhash;
+ this.name = (name == null) ? "" : name;
+ this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null
- this.depth = depth;
- this.anchors = anchors;
- this.forkfactor = forkfactor;
- this.flags = new Bitfield(rowdef.width(10));
+ this.depth = depth;
+ this.anchors = anchors;
+ this.forkfactor = forkfactor;
+ this.flags = new Bitfield(rowdef.width(10));
this.statusMessage = "loaded(args)";
- this.initialHash = url.hashCode();
- this.status = WorkflowJob.STATUS_INITIATED;
- this.size = size;
+ this.initialHash = url.hashCode();
+ this.status = WorkflowJob.STATUS_INITIATED;
+ this.size = size;
}
public Request(final Row.Entry entry) throws IOException {
@@ -136,27 +155,35 @@ public class Request extends WorkflowJob {
}
private void insertEntry(final Row.Entry entry) throws IOException {
- final String urlstring = entry.getColUTF8(2);
- if (urlstring == null) throw new IOException ("url string is null");
- this.initiator = entry.getColBytes(1, true);
- this.initiator = (this.initiator == null) ? null : ((this.initiator.length == 0) ? null : this.initiator);
- this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes());
- this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true);
- this.name = (entry.empty(4)) ? "" : entry.getColUTF8(4).trim();
- this.appdate = entry.getColLong(5);
- this.profileHandle = (entry.empty(6)) ? null : entry.getColASCII(6).trim();
- this.depth = (int) entry.getColLong(7);
- this.anchors = (int) entry.getColLong(8);
- this.forkfactor = (int) entry.getColLong(9);
- this.flags = new Bitfield(entry.getColBytes(10, true));
- //this.loaddate = entry.getColLong(12);
- //this.lastmodified = entry.getColLong(13);
- this.size = entry.getColLong(14);
- this.statusMessage = "loaded(kelondroRow.Entry)";
- this.initialHash = this.url.hashCode();
+ try {
+ final String urlstring = entry.getColUTF8(2);
+ if ( urlstring == null ) {
+ throw new IOException("url string is null");
+ }
+ this.initiator = entry.getColBytes(1, true);
+ this.initiator =
+ (this.initiator == null) ? null : ((this.initiator.length == 0) ? null : this.initiator);
+ this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes());
+ this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true);
+ this.name = (entry.empty(4)) ? "" : entry.getColUTF8(4).trim();
+ this.appdate = entry.getColLong(5);
+ this.profileHandle = (entry.empty(6)) ? null : entry.getColASCII(6).trim();
+ this.depth = (int) entry.getColLong(7);
+ this.anchors = (int) entry.getColLong(8);
+ this.forkfactor = (int) entry.getColLong(9);
+ this.flags = new Bitfield(entry.getColBytes(10, true));
+ //this.loaddate = entry.getColLong(12);
+ //this.lastmodified = entry.getColLong(13);
+ this.size = entry.getColLong(14);
+ this.statusMessage = "loaded(kelondroRow.Entry)";
+ this.initialHash = this.url.hashCode();
+ } catch ( Throwable e ) {
+ throw new IOException(e.getMessage());
+ }
return;
}
+ @Override
public int hashCode() {
// overloads Object.hashCode()
return this.initialHash;
@@ -179,7 +206,8 @@ public class Request extends WorkflowJob {
final byte[] sizestr = NaturalOrder.encodeLong(this.size, rowdef.width(14));
// store the hash in the hash cache
final byte[] namebytes = UTF8.getBytes(this.name);
- final byte[][] entry = new byte[][] {
+ final byte[][] entry =
+ new byte[][] {
this.url.hash(),
this.initiator,
this.url.toString().getBytes(),
@@ -194,7 +222,8 @@ public class Request extends WorkflowJob {
NaturalOrder.encodeLong(0, rowdef.width(11)),
loaddatestr,
serverdatestr,
- sizestr};
+ sizestr
+ };
return rowdef.newEntry(entry);
}
@@ -227,6 +256,7 @@ public class Request extends WorkflowJob {
// the date when the url appeared first
return new Date(this.appdate);
}
+
/*
public Date loaddate() {
// the date when the url was loaded
@@ -255,7 +285,9 @@ public class Request extends WorkflowJob {
public String profileHandle() {
// the handle of the crawl profile
- assert this.profileHandle.length() == Word.commonHashLength : this.profileHandle + " != " + Word.commonHashLength;
+ assert this.profileHandle.length() == Word.commonHashLength : this.profileHandle
+ + " != "
+ + Word.commonHashLength;
return this.profileHandle;
}