diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java
index 5bd0fe0fa..ba7d831c8 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@@ -60,7 +60,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlProfile;
@@ -205,7 +204,7 @@ public class IndexCreate_p {
prop.put("error_reasonString", reasonString);
plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
- crawlingStartURL.getHost(), reasonString, new bitfield(indexRWIEntryOld.urlFlagLength));
+ crawlingStartURL.getHost(), reasonString, new bitfield());
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
@@ -283,7 +282,7 @@ public class IndexCreate_p {
c++;
} else {
plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(nexturlURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
- (String) e.getValue(), rejectReason, new bitfield(indexRWIEntryOld.urlFlagLength));
+ (String) e.getValue(), rejectReason, new bitfield());
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index 88a0d10eb..5409041e5 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -50,7 +50,6 @@ import java.io.IOException;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlNURL;
@@ -157,7 +156,7 @@ public final class crawlReceipt {
} else {
try {
plasmaCrawlNURL.Entry en = switchboard.urlPool.noticeURL.getEntry(receivedUrlhash);
- plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(indexRWIEntryOld.urlFlagLength));
+ plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield());
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
switchboard.urlPool.noticeURL.remove(receivedUrlhash);
diff --git a/source/de/anomic/index/indexRWIEntryOld.java b/source/de/anomic/index/indexRWIEntryOld.java
index 1461ad77e..0d56ac0f1 100644
--- a/source/de/anomic/index/indexRWIEntryOld.java
+++ b/source/de/anomic/index/indexRWIEntryOld.java
@@ -36,35 +36,14 @@ import de.anomic.yacy.yacySeedDB;
public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
// this object stores attributes to URL references inside RWI collections
-
- // statics for value lengths
- public static final int urlStringLength = 256;// not too short for links without parameters
- public static final int urlDescrLength = 80; // The headline of a web page (meta-tag or
)
- public static final int urlNameLength = 40; // the tag content between and
- public static final int urldescrtagsLength = 320;// the url, the description and tags in one string
- public static final int urlErrorLength = 80; // a reason description for unavailable urls
- public static final int urlDateLength = 4; // any date, shortened
- public static final int urlCopyCountLength = 2; // counter for numbers of copies of this index
- public static final int urlFlagLength = 2; // any stuff
- public static final int urlLanguageLength = 2; // taken from TLD suffix as quick-hack
- public static final int urlDoctypeLength = 1; // taken from extension
- public static final int urlSizeLength = 6; // the source size, from cache
- public static final int urlWordCountLength = 3; // the number of words, from condenser
- public static final int urlCrawlProfileHandleLength = 4; // name of the prefetch profile
- public static final int urlCrawlDepthLength = 2; // prefetch depth, first is '0'
- public static final int urlParentBranchesLength = 3; // number of anchors of the parent
- public static final int urlForkFactorLength = 4; // sum of anchors of all ancestors
- public static final int urlRetryLength = 2; // number of load retries
- public static final int urlHostLength = 8; // the host as struncated name
- public static final int urlHandleLength = 4; // a handle
- public static final int urlQualityLength = 3; // taken from heuristic
+
public static kelondroRow urlEntryRow = new kelondroRow(new kelondroColumn[]{
new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacySeedDB.commonHashLength, "urlhash"),
- new kelondroColumn("q", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, urlQualityLength, "quality"),
+ new kelondroColumn("q", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 3, "quality"),
new kelondroColumn("a", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 3, "lastModified"),
new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "hitcount"),
- new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, urlLanguageLength, "language"),
+ new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, 2, "language"),
new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "doctype"),
new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "localflag"),
new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posintext"),
@@ -118,7 +97,7 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
// - boolean: appearance attributes: title, appears in header, anchor-descr, image-tag, hervorhebungen, meta-tags, word in link etc
// - boolean: URL attributes
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
- if ((language == null) || (language.length() != urlLanguageLength)) language = "uk";
+ if ((language == null) || (language.length() != urlEntryRow.width(col_language))) language = "uk";
this.entry = urlEntryRow.newEntry();
this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setCol(col_quality, quality);
diff --git a/source/de/anomic/index/indexURLEntryOld.java b/source/de/anomic/index/indexURLEntryOld.java
index 4e0ca13d0..ae0feeb8f 100644
--- a/source/de/anomic/index/indexURLEntryOld.java
+++ b/source/de/anomic/index/indexURLEntryOld.java
@@ -40,21 +40,21 @@ import de.anomic.tools.crypt;
import de.anomic.yacy.yacySeedDB;
public class indexURLEntryOld implements indexURLEntry {
-
+
public static final kelondroRow rowdef = new kelondroRow(
"String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
- "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
- "String urldescr-" + indexRWIEntryOld.urlDescrLength + ", " + // the description of the url
- "Cardinal moddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // last-modified from the httpd
- "Cardinal loaddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // time when the url was loaded
+ "String urlstring-256, " + // the url as string
+ "String urldescr-80, " + // the description of the url
+ "Cardinal moddate-4 {b64e}, " + // last-modified from the httpd
+ "Cardinal loaddate-4 {b64e}, " + // time when the url was loaded
"String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
- "Cardinal copycount-" + indexRWIEntryOld.urlCopyCountLength + " {b64e}, " + //
- "byte[] flags-" + indexRWIEntryOld.urlFlagLength + ", " + // flags
- "Cardinal quality-" + indexRWIEntryOld.urlQualityLength + " {b64e}, " + //
- "String language-" + indexRWIEntryOld.urlLanguageLength + ", " + //
- "byte[] doctype-" + indexRWIEntryOld.urlDoctypeLength + ", " + //
- "Cardinal size-" + indexRWIEntryOld.urlSizeLength + " {b64e}, " + // size of file in bytes
- "Cardinal wc-" + indexRWIEntryOld.urlWordCountLength + " {b64e}"); // word count
+ "Cardinal copycount-2 {b64e}, " + // not used
+ "byte[] flags-2, " + // flags
+ "Cardinal quality-3 {b64e}, " + // deprecated
+ "String language-2, " + // language key; mainly the TDL
+ "byte[] doctype-1, " + //
+ "Cardinal size-6 {b64e}, " + // size of file in bytes
+ "Cardinal wc-3 {b64e}"); // word count
private URL url;
private String descr;
@@ -176,8 +176,8 @@ public class indexURLEntryOld implements indexURLEntry {
}
public kelondroRow.Entry toRowEntry() throws IOException {
- final String moddatestr = kelondroBase64Order.enhancedCoder.encodeLong(moddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
- final String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ final String moddatestr = kelondroBase64Order.enhancedCoder.encodeLong(moddate.getTime() / 86400000, rowdef.width(3));
+ final String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, rowdef.width(4));
final byte[][] entry = new byte[][] {
urlHash.getBytes(),
@@ -186,13 +186,13 @@ public class indexURLEntryOld implements indexURLEntry {
moddatestr.getBytes(),
loaddatestr.getBytes(),
referrerHash.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(copyCount, indexRWIEntryOld.urlCopyCountLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(copyCount, rowdef.width(6)).getBytes(),
flags.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(quality, indexRWIEntryOld.urlQualityLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(quality, rowdef.width(8)).getBytes(),
language.getBytes(),
new byte[] { (byte) doctype },
- kelondroBase64Order.enhancedCoder.encodeLong(size, indexRWIEntryOld.urlSizeLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(wordCount, indexRWIEntryOld.urlWordCountLength).getBytes()};
+ kelondroBase64Order.enhancedCoder.encodeLong(size, rowdef.width(11)).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(wordCount, rowdef.width(12)).getBytes()};
return rowdef.newEntry(entry);
}
@@ -288,9 +288,7 @@ public class indexURLEntryOld implements indexURLEntry {
.append(",size=").append(size).append(",wc=").append(
wordCount).append(",cc=").append(copyCount).append(
",local=").append(((local()) ? "true" : "false"))
- .append(",q=").append(
- kelondroBase64Order.enhancedCoder.encodeLong(
- quality, indexRWIEntryOld.urlQualityLength))
+ .append(",q=0")
.append(",dt=").append(doctype).append(",lang=").append(
language).append(",url=").append(
crypt.simpleEncode(url.toString())).append(
diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java
index 113d87b33..1b88d2057 100644
--- a/source/de/anomic/kelondro/kelondroRecords.java
+++ b/source/de/anomic/kelondro/kelondroRecords.java
@@ -1179,6 +1179,8 @@ public class kelondroRecords {
byte[] key = nn.getKey();
if ((key == null) ||
((key.length == 1) && (key[0] == (byte) 0x80)) || // the NUL pointer ('lost' chain terminator)
+ (key.length < 3) ||
+ ((key.length > 3) && (key[2] == 0) && (key[3] == 0)) ||
((key.length > 3) && (key[0] == (byte) 0x80) && (key[1] == 0) && (key[2] == 0) && (key[3] == 0)) ||
((key.length > 0) && (key[0] == 0)) // a 'lost' pointer within a deleted-chain
) {
diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java
index 16347829d..3a5b0359e 100644
--- a/source/de/anomic/kelondro/kelondroRow.java
+++ b/source/de/anomic/kelondro/kelondroRow.java
@@ -162,6 +162,7 @@ public class kelondroRow {
}
public Entry(byte[] rowinstance, int start, int length) {
+ assert objectsize == length;
this.rowinstance = new byte[objectsize];
int ll = Math.min(objectsize, length);
System.arraycopy(rowinstance, start, this.rowinstance, 0, ll);
@@ -169,15 +170,20 @@ public class kelondroRow {
}
public Entry(byte[][] cols) {
+ assert row.length == cols.length;
rowinstance = new byte[objectsize];
int ll;
+ int cs, cw;
for (int i = 0; i < row.length; i++) {
+ cs = colstart[i];
+ cw = row[i].cellwidth();
if ((i >= cols.length) || (cols[i] == null)) {
- for (int j = 0; j < row[i].cellwidth(); j++) this.rowinstance[colstart[i] + j] = 0;
+ for (int j = 0; j < cw; j++) this.rowinstance[cs + j] = 0;
} else {
- ll = Math.min(cols[i].length, row[i].cellwidth());
- System.arraycopy(cols[i], 0, rowinstance, colstart[i], ll);
- for (int j = ll; j < row[i].cellwidth(); j++) this.rowinstance[colstart[i] + j] = 0;
+ //assert cols[i].length <= cw : "i = " + i + ", cols[i].length = " + cols[i].length + ", cw = " + cw;
+ ll = Math.min(cols[i].length, cw);
+ System.arraycopy(cols[i], 0, rowinstance, cs, ll);
+ for (int j = ll; j < cw; j++) this.rowinstance[cs + j] = 0;
}
}
}
@@ -252,6 +258,7 @@ public class kelondroRow {
System.arraycopy(cell, 0, rowinstance, offset, cell.length);
while (length-- > cell.length) rowinstance[offset + length] = 0;
} else {
+ //assert cell.length == length;
System.arraycopy(cell, 0, rowinstance, offset, length);
}
}
diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
index 95caa46c3..1e4c806df 100644
--- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
@@ -51,7 +51,6 @@ import java.io.File;
import java.io.IOException;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
@@ -298,7 +297,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
yacyCore.seedDB.mySeed.hash,
this.name,
(failreason==null)?"Unknown reason":failreason,
- new bitfield(indexRWIEntryOld.urlFlagLength)
+ new bitfield()
);
// store the entry
diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java
index 0bb32a489..52039c7d9 100644
--- a/source/de/anomic/plasma/plasmaCrawlEURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlEURL.java
@@ -54,7 +54,6 @@ import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRow;
@@ -133,20 +132,22 @@ public class plasmaCrawlEURL extends indexURL {
* ======================================================================= */
private LinkedList rejectedStack = new LinkedList(); // strings: url
+ public final static kelondroRow rowdef = new kelondroRow(
+ "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "String executor-" + yacySeedDB.commonHashLength + ", " + // the crawling executor
+ "String urlstring-256, " + // the url as string
+ "String urlname-40, " + // the name of the url, from anchor tag name
+ "Cardinal appdate-4 {b64e}, " + // the time when the url was first time appeared
+ "Cardinal loaddate-4 {b64e}, " + // the time when the url was last time tried to load
+ "Cardinal retrycount-2 {b64e}, " + // number of load retries
+ "String failcause-80, " + // string describing load failure
+ "byte[] flags-2"); // extra space
+
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
super();
- kelondroRow rowdef = new kelondroRow(
- "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
- "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
- "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
- "String executor-" + yacySeedDB.commonHashLength + ", " + // the crawling executor
- "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
- "String urlname-" + indexRWIEntryOld.urlNameLength + ", " + // the name of the url, from anchor tag name
- "Cardinal appdate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was first time appeared
- "Cardinal loaddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was last time tried to load
- "Cardinal retrycount-" + indexRWIEntryOld.urlRetryLength + " {b64e}, " + // number of load retries
- "String failcause-" + indexRWIEntryOld.urlErrorLength + ", " + // string describing load failure
- "byte[] flags-" + indexRWIEntryOld.urlFlagLength); // extra space
+
if (newdb) {
String newCacheName = "urlErr3.table";
@@ -291,8 +292,8 @@ public class plasmaCrawlEURL extends indexURL {
// stores the values from the object variables into the database
if (this.stored) return;
if (this.hash == null) return;
- String initdatestr = kelondroBase64Order.enhancedCoder.encodeLong(initdate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
- String trydatestr = kelondroBase64Order.enhancedCoder.encodeLong(trydate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ String initdatestr = kelondroBase64Order.enhancedCoder.encodeLong(initdate.getTime() / 86400000, rowdef.width(6));
+ String trydatestr = kelondroBase64Order.enhancedCoder.encodeLong(trydate.getTime() / 86400000, rowdef.width(7));
// store the hash in the hash cache
try {
@@ -306,7 +307,7 @@ public class plasmaCrawlEURL extends indexURL {
this.name.getBytes(),
initdatestr.getBytes(),
trydatestr.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, indexRWIEntryOld.urlRetryLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, rowdef.width(8)).getBytes(),
this.failreason.getBytes(),
this.flags.getBytes()
};
diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java
index 9e07210df..7b31da4cc 100644
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@@ -51,7 +51,6 @@ import java.util.HashSet;
import java.util.Iterator;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroException;
@@ -66,7 +65,7 @@ import de.anomic.tools.bitfield;
import de.anomic.yacy.yacySeedDB;
public class plasmaCrawlNURL extends indexURL {
-
+
public static final int STACK_TYPE_NULL = 0; // do not stack
public static final int STACK_TYPE_CORE = 1; // put on local stack
public static final int STACK_TYPE_LIMIT = 2; // put on global stack
@@ -80,18 +79,19 @@ public class plasmaCrawlNURL extends indexURL {
* column length definition for the {@link plasmaURL#urlIndexFile} DB
*/
public final static kelondroRow rowdef = new kelondroRow(
- "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
- "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
- "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
- "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
- "String urlname-" + indexRWIEntryOld.urlNameLength + ", " + // the name of the url, from anchor tag name
- "Cardinal appdate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was first time appeared
- "String profile-" + indexRWIEntryOld.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
- "Cardinal depth-" + indexRWIEntryOld.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
- "Cardinal parentbr-" + indexRWIEntryOld.urlParentBranchesLength + " {b64e}, " + // number of anchors of the parent
- "Cardinal forkfactor-" + indexRWIEntryOld.urlForkFactorLength + " {b64e}, " + // sum of anchors of all ancestors
- "byte[] flags-" + indexRWIEntryOld.urlFlagLength + ", " + // flags
- "String handle-" + indexRWIEntryOld.urlHandleLength); // extra handle
+ "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "String urlstring-256, " + // the url as string
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "String urlname-40, " + // the name of the url, from anchor tag name
+ "Cardinal appdate-4 {b64e}, " + // the time when the url was first time appeared
+ "String profile-4, " + // the name of the prefetch profile handle
+ "Cardinal depth-2 {b64e}, " + // the prefetch depth so far, starts at 0
+ "Cardinal parentbr-3 {b64e}, " + // number of anchors of the parent
+ "Cardinal forkfactor-4 {b64e}, " + // sum of anchors of all ancestors
+ "byte[] flags-2, " + // flags
+ "String handle-4" // extra handle
+ );
private final plasmaCrawlBalancer coreStack; // links found by crawling to depth-1
private final plasmaCrawlBalancer limitStack; // links found by crawling at target depth
@@ -259,7 +259,7 @@ public class plasmaCrawlNURL extends indexURL {
private static String normalizeHandle(int h) {
String d = Integer.toHexString(h);
- while (d.length() < indexRWIEntryOld.urlHandleLength) d = "0" + d;
+ while (d.length() < rowdef.width(11)) d = "0" + d;
return d;
}
@@ -481,7 +481,7 @@ public class plasmaCrawlNURL extends indexURL {
this.depth = depth;
this.anchors = anchors;
this.forkfactor = forkfactor;
- this.flags = new bitfield(indexRWIEntryOld.urlFlagLength);
+ this.flags = new bitfield(rowdef.width(10));
this.handle = 0;
this.stored = false;
}
@@ -535,7 +535,7 @@ public class plasmaCrawlNURL extends indexURL {
public void store() {
// stores the values from the object variables into the database
if (this.stored) return;
- String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, rowdef.width(5));
// store the hash in the hash cache
try {
// even if the entry exists, we simply overwrite it
@@ -547,9 +547,9 @@ public class plasmaCrawlNURL extends indexURL {
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, indexRWIEntryOld.urlParentBranchesLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, indexRWIEntryOld.urlForkFactorLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.depth, rowdef.width(7)).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, rowdef.width(8)).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, rowdef.width(9)).getBytes(),
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
};
diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java
index b8bfeffbf..e9eb1d787 100644
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@@ -48,7 +48,6 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroException;
@@ -63,12 +62,14 @@ public class plasmaCrawlProfile {
private int bufferkb;
private long preloadTime;
+ public static final int crawlProfileHandleLength = 4; // name of the prefetch profile
+
public plasmaCrawlProfile(File file, int bufferkb, long preloadTime) {
this.profileTableFile = file;
this.bufferkb = bufferkb;
this.preloadTime = preloadTime;
profileTableFile.getParentFile().mkdirs();
- kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexRWIEntryOld.urlCrawlProfileHandleLength, 2000, '#');
+ kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, crawlProfileHandleLength, 2000, '#');
profileTable = new kelondroMap(dyn);
domsCache = new HashMap();
}
@@ -94,7 +95,7 @@ public class plasmaCrawlProfile {
if (profileTable != null) try { profileTable.close(); } catch (IOException e) {}
if (!(profileTableFile.delete())) throw new RuntimeException("cannot delete crawl profile database");
profileTableFile.getParentFile().mkdirs();
- kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexRWIEntryOld.urlCrawlProfileHandleLength, 2000, '#');
+ kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, crawlProfileHandleLength, 2000, '#');
profileTable = new kelondroMap(dyn);
}
@@ -256,7 +257,7 @@ public class plasmaCrawlProfile {
boolean storeHTCache, boolean storeTXCache,
boolean localIndexing, boolean remoteIndexing,
boolean xsstopw, boolean xdstopw, boolean xpstopw) {
- String handle = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, indexRWIEntryOld.urlCrawlProfileHandleLength);
+ String handle = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, crawlProfileHandleLength);
mem = new HashMap();
mem.put("handle", handle);
mem.put("name", name);
diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java
index 27fc0d9ed..8ea724ec9 100644
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@@ -60,7 +60,6 @@ import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.data.robotsParser;
import de.anomic.http.httpc;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
@@ -492,7 +491,7 @@ public final class plasmaCrawlStacker {
this.depth = depth;
this.anchors = anchors;
this.forkfactor = forkfactor;
- this.flags = new bitfield(indexRWIEntryOld.urlFlagLength);
+ this.flags = new bitfield();
this.handle = 0;
} catch (Exception e) {
e.printStackTrace();
@@ -571,11 +570,11 @@ public final class plasmaCrawlStacker {
//.append("flags: ").append((flags==null) ? "null" : flags.toString())
;
return str.toString();
- }
+ }
public byte[][] getBytes() {
// stores the values from the object variables into the database
- String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, plasmaCrawlNURL.rowdef.width(5));
// store the hash in the hash cache
// even if the entry exists, we simply overwrite it
@@ -589,19 +588,19 @@ public final class plasmaCrawlStacker {
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, indexRWIEntryOld.urlParentBranchesLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, indexRWIEntryOld.urlForkFactorLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.depth, plasmaCrawlNURL.rowdef.width(7)).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, plasmaCrawlNURL.rowdef.width(8)).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, plasmaCrawlNURL.rowdef.width(9)).getBytes(),
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
- };
+ };
} catch (UnsupportedEncodingException e) { /* ignore this */ }
return entry;
}
private String normalizeHandle(int h) {
String d = Integer.toHexString(h);
- while (d.length() < indexRWIEntryOld.urlHandleLength) d = "0" + d;
+ while (d.length() < plasmaCrawlNURL.rowdef.width(11)) d = "0" + d;
return d;
}
}
@@ -1059,7 +1058,7 @@ public final class plasmaCrawlStacker {
yacyCore.seedDB.mySeed.hash,
this.theMsg.name,
rejectReason,
- new bitfield(indexRWIEntryOld.urlFlagLength)
+ new bitfield()
);
ee.store();
sb.urlPool.errorURL.stackPushEntry(ee);
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 572120f86..07746041b 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1497,7 +1497,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (document == null) return;
} catch (ParserException e) {
this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
- addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield(indexRWIEntryOld.urlFlagLength));
+ addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield());
if (document != null) {
document.close();
document = null;
@@ -1764,7 +1764,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
} else {
log.logFine("Not Indexed Resource '" + entry.normalizedURLString() + "': process case=" + processCase);
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new bitfield(indexRWIEntryOld.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new bitfield());
}
} catch (Exception ee) {
if (ee instanceof InterruptedException) throw (InterruptedException)ee;
@@ -1776,7 +1776,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, "");
}
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new bitfield(indexRWIEntryOld.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new bitfield());
}
} else {
@@ -1784,7 +1784,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
checkInterruption();
log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason);
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new bitfield(indexRWIEntryOld.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new bitfield());
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, "");
}
diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
index a6f28f456..16fa24d26 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@@ -51,7 +51,6 @@ import java.util.ArrayList;
import java.util.Date;
import de.anomic.index.indexURL;
-import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
@@ -78,18 +77,18 @@ public class plasmaSwitchboardQueue {
initQueueStack();
}
-
+
+ public static final kelondroRow rowdef = new kelondroRow(
+ "String url-256, " + // the url
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "Cardinal modifiedsince-11 {b64e}, " + // from ifModifiedSince
+ "byte[] flags-1, " + // flags
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "Cardinal depth-2 {b64e}, " + // the prefetch depth so far, starts at 0
+ "String profile-" + plasmaCrawlProfile.crawlProfileHandleLength + ", " + // the name of the prefetch profile handle
+ "String urldescr-80");
+
private void initQueueStack() {
- kelondroRow rowdef = new kelondroRow(
- "String url-" + indexRWIEntryOld.urlStringLength + ", " + // the url
- "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
- "Cardinal modifiedsince-11" + " {b64e}, " + // from ifModifiedSince
- "byte[] flags-1" + ", " + // flags
- "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
- "Cardinal depth-" + indexRWIEntryOld.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
- "String profile-" + indexRWIEntryOld.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
- "String urldescr-" + indexRWIEntryOld.urlDescrLength); //
-
sbQueueStack = kelondroStack.open(sbQueueStackPath, rowdef);
}
@@ -110,7 +109,7 @@ public class plasmaSwitchboardQueue {
kelondroBase64Order.enhancedCoder.encodeLong((entry.ifModifiedSince == null) ? 0 : entry.ifModifiedSince.getTime(), 11).getBytes(),
new byte[]{entry.flags},
(entry.initiator == null) ? indexURL.dummyHash.getBytes() : entry.initiator.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, rowdef.width(5)).getBytes(),
(entry.profileHandle == null) ? indexURL.dummyHash.getBytes() : entry.profileHandle.getBytes(),
(entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8")
}));
diff --git a/source/de/anomic/tools/bitfield.java b/source/de/anomic/tools/bitfield.java
index b417145ab..7138ddbf1 100644
--- a/source/de/anomic/tools/bitfield.java
+++ b/source/de/anomic/tools/bitfield.java
@@ -44,6 +44,10 @@ public class bitfield {
private byte[] bb;
+ public bitfield() {
+ this(0);
+ }
+
public bitfield(int bytelength) {
this.bb= new byte[bytelength];
for (int i = 0 ; i < bytelength; i++) bb[i] = 0;
@@ -58,20 +62,29 @@ public class bitfield {
return (byte) ((64 | ((a + 16) | (1< 5) || (pos < 0)) throw new RuntimeException("atom position out of bounds: " + pos);
return (byte) (((a + 16) & (0xff ^ (1< bb.length)) throw new RuntimeException("position out of bounds: " + pos);
+ if (pos < 0) throw new RuntimeException("position out of bounds: " + pos);
+ if (slot > bb.length) {
+ // extend capacity
+ byte[] nb = new byte[slot + 1];
+ System.arraycopy(bb, 0, nb, 0, bb.length);
+ for (int i = bb.length; i < nb.length; i++) nb[i] = 0;
+ bb = nb;
+ nb = null;
+ }
bb[slot] = (value) ? setAtom(bb[slot], pos % 6) : unsetAtom(bb[slot], pos % 6);
}
public boolean get(int pos) {
int slot = pos / 6;
- if ((pos < 0) || (slot > bb.length)) throw new RuntimeException("position out of bounds: " + pos);
+ if (pos < 0) throw new RuntimeException("position out of bounds: " + pos);
+ if (slot > bb.length) return false;
return (bb[slot] & (1<<(pos%6))) > 0;
}