Michael Christen 13 years ago
parent d35bdc2df6
commit 6e66c9d7f1

@ -38,45 +38,64 @@ import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.workflow.WorkflowJob; import net.yacy.kelondro.workflow.WorkflowJob;
public class Request extends WorkflowJob { public class Request extends WorkflowJob
{
// row definition for balancer-related NURL-entries // row definition for balancer-related NURL-entries
public final static Row rowdef = new Row( public final static Row rowdef = new Row("String urlhash-" + Word.commonHashLength + ", " + // the url's hash
"String urlhash-" + Word.commonHashLength + ", " + // the url's hash "String initiator-"
"String initiator-" + Word.commonHashLength + ", " + // the crawling initiator + Word.commonHashLength
"String urlstring-256, " + // the url as string + ", "
"String refhash-" + Word.commonHashLength + ", " + // the url's referrer hash + // the crawling initiator
"String urlname-80, " + // the name of the url, from anchor tag <a>name</a> "String urlstring-256, "
"Cardinal appdate-8 {b256}, " + // the date of the resource; either file date or first appearance + // the url as string
"String profile-" + Word.commonHashLength + ", " + // the name of the prefetch profile handle "String refhash-"
"Cardinal depth-2 {b256}, " + // the prefetch depth so far, starts at 0 + Word.commonHashLength
"Cardinal parentbr-3 {b256}, " + // number of anchors of the parent + ", "
"Cardinal forkfactor-4 {b256}, " + // sum of anchors of all ancestors + // the url's referrer hash
"byte[] flags-4, " + // flags "String urlname-80, "
"Cardinal handle-4 {b256}, " + // handle (NOT USED) + // the name of the url, from anchor tag <a>name</a>
"Cardinal loaddate-8 {b256}, " + // NOT USED "Cardinal appdate-8 {b256}, "
"Cardinal lastmodified-8 {b256}, " + // NOT USED + // the date of the resource; either file date or first appearance
"Cardinal size-8 {b256}", // size of resource in bytes (if known) or 0 if not known "String profile-"
Base64Order.enhancedCoder + Word.commonHashLength
); + ", "
+ // the name of the prefetch profile handle
"Cardinal depth-2 {b256}, "
+ // the prefetch depth so far, starts at 0
"Cardinal parentbr-3 {b256}, "
+ // number of anchors of the parent
"Cardinal forkfactor-4 {b256}, "
+ // sum of anchors of all ancestors
"byte[] flags-4, "
+ // flags
"Cardinal handle-4 {b256}, "
+ // handle (NOT USED)
"Cardinal loaddate-8 {b256}, "
+ // NOT USED
"Cardinal lastmodified-8 {b256}, "
+ // NOT USED
"Cardinal size-8 {b256}", // size of resource in bytes (if known) or 0 if not known
Base64Order.enhancedCoder);
private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy; private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered // if this is generated by a crawl, the own peer hash in entered
private byte[] refhash; // the url's referrer hash private byte[] refhash; // the url's referrer hash
private DigestURI url; // the url as string private DigestURI url; // the url as string
private String name; // the name of the url, from anchor tag <a>name</a> private String name; // the name of the url, from anchor tag <a>name</a>
private long appdate; // the time when the url was first time appeared. private long appdate; // the time when the url was first time appeared.
private String profileHandle; // the name of the fetch profile private String profileHandle; // the name of the fetch profile
private int depth; // the prefetch depth so far, starts at 0 private int depth; // the prefetch depth so far, starts at 0
private int anchors; // number of anchors of the parent private int anchors; // number of anchors of the parent
private int forkfactor; // sum of anchors of all ancestors private int forkfactor; // sum of anchors of all ancestors
private Bitfield flags; private Bitfield flags;
private long size; // size of resource in bytes (if known) or 0 if not known private long size; // size of resource in bytes (if known) or 0 if not known
private String statusMessage; private String statusMessage;
private int initialHash; // to provide a object hash that does not change even if the url changes because of redirection private int initialHash; // to provide a object hash that does not change even if the url changes because of redirection
/** /**
* convenience method for 'full' request object * convenience method for 'full' request object
*
* @param url * @param url
* @param referrerhash * @param referrerhash
*/ */
@ -85,9 +104,8 @@ public class Request extends WorkflowJob {
} }
/** /**
* A Request Entry is a object that is created to provide * A Request Entry is a object that is created to provide all information to load a specific resource.
* all information to load a specific resource. *
*
* @param initiator the hash of the initiator peer * @param initiator the hash of the initiator peer
* @param url the {@link URL} to crawl * @param url the {@link URL} to crawl
* @param referrer the hash of the referrer URL * @param referrer the hash of the referrer URL
@ -99,35 +117,36 @@ public class Request extends WorkflowJob {
* @param forkfactor sum of anchors of all ancestors * @param forkfactor sum of anchors of all ancestors
*/ */
public Request( public Request(
final byte[] initiator, final byte[] initiator,
final DigestURI url, final DigestURI url,
final byte[] referrerhash, final byte[] referrerhash,
final String name, final String name,
final Date appdate, final Date appdate,
final String profileHandle, final String profileHandle,
final int depth, final int depth,
final int anchors, final int anchors,
final int forkfactor, final int forkfactor,
final long size final long size) {
) {
// create new entry and store it into database // create new entry and store it into database
assert url != null; assert url != null;
assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle + " != " + Word.commonHashLength; assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle
+ " != "
+ Word.commonHashLength;
url.removeRef(); // remove anchor reference url.removeRef(); // remove anchor reference
this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator); this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator);
this.url = url; this.url = url;
this.refhash = referrerhash; this.refhash = referrerhash;
this.name = (name == null) ? "" : name; this.name = (name == null) ? "" : name;
this.appdate = (appdate == null) ? 0 : appdate.getTime(); this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null this.profileHandle = profileHandle; // must not be null
this.depth = depth; this.depth = depth;
this.anchors = anchors; this.anchors = anchors;
this.forkfactor = forkfactor; this.forkfactor = forkfactor;
this.flags = new Bitfield(rowdef.width(10)); this.flags = new Bitfield(rowdef.width(10));
this.statusMessage = "loaded(args)"; this.statusMessage = "loaded(args)";
this.initialHash = url.hashCode(); this.initialHash = url.hashCode();
this.status = WorkflowJob.STATUS_INITIATED; this.status = WorkflowJob.STATUS_INITIATED;
this.size = size; this.size = size;
} }
public Request(final Row.Entry entry) throws IOException { public Request(final Row.Entry entry) throws IOException {
@ -136,27 +155,35 @@ public class Request extends WorkflowJob {
} }
private void insertEntry(final Row.Entry entry) throws IOException { private void insertEntry(final Row.Entry entry) throws IOException {
final String urlstring = entry.getColUTF8(2); try {
if (urlstring == null) throw new IOException ("url string is null"); final String urlstring = entry.getColUTF8(2);
this.initiator = entry.getColBytes(1, true); if ( urlstring == null ) {
this.initiator = (this.initiator == null) ? null : ((this.initiator.length == 0) ? null : this.initiator); throw new IOException("url string is null");
this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes()); }
this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true); this.initiator = entry.getColBytes(1, true);
this.name = (entry.empty(4)) ? "" : entry.getColUTF8(4).trim(); this.initiator =
this.appdate = entry.getColLong(5); (this.initiator == null) ? null : ((this.initiator.length == 0) ? null : this.initiator);
this.profileHandle = (entry.empty(6)) ? null : entry.getColASCII(6).trim(); this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes());
this.depth = (int) entry.getColLong(7); this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true);
this.anchors = (int) entry.getColLong(8); this.name = (entry.empty(4)) ? "" : entry.getColUTF8(4).trim();
this.forkfactor = (int) entry.getColLong(9); this.appdate = entry.getColLong(5);
this.flags = new Bitfield(entry.getColBytes(10, true)); this.profileHandle = (entry.empty(6)) ? null : entry.getColASCII(6).trim();
//this.loaddate = entry.getColLong(12); this.depth = (int) entry.getColLong(7);
//this.lastmodified = entry.getColLong(13); this.anchors = (int) entry.getColLong(8);
this.size = entry.getColLong(14); this.forkfactor = (int) entry.getColLong(9);
this.statusMessage = "loaded(kelondroRow.Entry)"; this.flags = new Bitfield(entry.getColBytes(10, true));
this.initialHash = this.url.hashCode(); //this.loaddate = entry.getColLong(12);
//this.lastmodified = entry.getColLong(13);
this.size = entry.getColLong(14);
this.statusMessage = "loaded(kelondroRow.Entry)";
this.initialHash = this.url.hashCode();
} catch ( Throwable e ) {
throw new IOException(e.getMessage());
}
return; return;
} }
@Override
public int hashCode() { public int hashCode() {
// overloads Object.hashCode() // overloads Object.hashCode()
return this.initialHash; return this.initialHash;
@ -179,7 +206,8 @@ public class Request extends WorkflowJob {
final byte[] sizestr = NaturalOrder.encodeLong(this.size, rowdef.width(14)); final byte[] sizestr = NaturalOrder.encodeLong(this.size, rowdef.width(14));
// store the hash in the hash cache // store the hash in the hash cache
final byte[] namebytes = UTF8.getBytes(this.name); final byte[] namebytes = UTF8.getBytes(this.name);
final byte[][] entry = new byte[][] { final byte[][] entry =
new byte[][] {
this.url.hash(), this.url.hash(),
this.initiator, this.initiator,
this.url.toString().getBytes(), this.url.toString().getBytes(),
@ -194,7 +222,8 @@ public class Request extends WorkflowJob {
NaturalOrder.encodeLong(0, rowdef.width(11)), NaturalOrder.encodeLong(0, rowdef.width(11)),
loaddatestr, loaddatestr,
serverdatestr, serverdatestr,
sizestr}; sizestr
};
return rowdef.newEntry(entry); return rowdef.newEntry(entry);
} }
@ -227,6 +256,7 @@ public class Request extends WorkflowJob {
// the date when the url appeared first // the date when the url appeared first
return new Date(this.appdate); return new Date(this.appdate);
} }
/* /*
public Date loaddate() { public Date loaddate() {
// the date when the url was loaded // the date when the url was loaded
@ -255,7 +285,9 @@ public class Request extends WorkflowJob {
public String profileHandle() { public String profileHandle() {
// the handle of the crawl profile // the handle of the crawl profile
assert this.profileHandle.length() == Word.commonHashLength : this.profileHandle + " != " + Word.commonHashLength; assert this.profileHandle.length() == Word.commonHashLength : this.profileHandle
+ " != "
+ Word.commonHashLength;
return this.profileHandle; return this.profileHandle;
} }

Loading…
Cancel
Save