diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java
index 111169f3a..42a36bc6d 100644
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@@ -55,8 +55,8 @@ import de.anomic.data.bookmarksDB;
import de.anomic.data.listManager;
import de.anomic.data.bookmarksDB.Tag;
import de.anomic.http.httpHeader;
+import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
@@ -147,10 +147,10 @@ public class Bookmarks {
bookmarksDB.Bookmark bookmark = switchboard.bookmarksDB.getBookmark(urlHash);
if (bookmark == null) {
// try to get the bookmark from the LURL database
- plasmaCrawlLURLEntry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
+ indexURLEntry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
plasmaParserDocument document = null;
if (urlentry != null) {
- plasmaCrawlLURLEntry.Components comp = urlentry.comp();
+ indexURLEntry.Components comp = urlentry.comp();
document = switchboard.snippetCache.retrieveDocument(comp.url(), true);
prop.put("mode_edit", 0); // create mode
prop.put("mode_url", comp.url().toNormalform());
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 73d44636f..86f3b8561 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -57,11 +57,11 @@ import java.util.TreeMap;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
@@ -161,7 +161,7 @@ public class IndexControl_p {
int i = 0;
urlx = new String[index.size()];
while (en.hasNext()) {
- urlx[i++] = ((indexEntry) en.next()).urlHash();
+ urlx[i++] = ((indexRWIEntry) en.next()).urlHash();
}
index = null;
}
@@ -218,7 +218,7 @@ public class IndexControl_p {
}
if (post.containsKey("urlhashdelete")) {
- plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+ indexURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
if (entry == null) {
prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
@@ -263,10 +263,10 @@ public class IndexControl_p {
Iterator urlIter = index.entries();
HashMap knownURLs = new HashMap();
HashSet unknownURLEntries = new HashSet();
- indexEntry iEntry;
- plasmaCrawlLURLEntry lurl;
+ indexRWIEntry iEntry;
+ indexURLEntry lurl;
while (urlIter.hasNext()) {
- iEntry = (indexEntry) urlIter.next();
+ iEntry = (indexRWIEntry) urlIter.next();
lurl = switchboard.urlPool.loadedURL.load(iEntry.urlHash(), null);
if (lurl == null) {
unknownURLEntries.add(iEntry.urlHash());
@@ -320,7 +320,7 @@ public class IndexControl_p {
URL url = new URL(urlstring);
urlhash = indexURL.urlHash(url);
prop.put("urlhash", urlhash);
- plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+ indexURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
if (entry == null) {
prop.put("urlstring", "unknown url: " + urlstring);
prop.put("urlhash", "");
@@ -334,7 +334,7 @@ public class IndexControl_p {
}
if (post.containsKey("urlhashsearch")) {
- plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+ indexURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
if (entry == null) {
prop.put("result", "No Entry for URL hash " + urlhash);
} else {
@@ -348,12 +348,12 @@ public class IndexControl_p {
try {
final Iterator entryIt = switchboard.urlPool.loadedURL.entries(true, true, urlhash);
StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:
");
- plasmaCrawlLURLEntry entry;
+ indexURLEntry entry;
int i = 0;
int rows = 0, cols = 0;
prop.put("urlhashsimilar", 1);
while (entryIt.hasNext() && i < 256) {
- entry = (plasmaCrawlLURLEntry) entryIt.next();
+ entry = (indexURLEntry) entryIt.next();
prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash());
cols++;
if (cols==8) {
@@ -400,16 +400,16 @@ public class IndexControl_p {
return prop;
}
- public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURLEntry entry, String urlhash) {
+ public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, indexURLEntry entry, String urlhash) {
serverObjects prop = new serverObjects();
if (entry == null) {
prop.put("genUrlProfile", 1);
prop.put("genUrlProfile_urlhash", urlhash);
return prop;
}
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ indexURLEntry.Components comp = entry.comp();
String referrer = null;
- plasmaCrawlLURLEntry le = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null);
+ indexURLEntry le = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null);
if (le == null) {
referrer = "";
} else {
@@ -453,11 +453,11 @@ public class IndexControl_p {
int i = 0;
final TreeMap tm = new TreeMap();
- indexEntry xi;
+ indexRWIEntry xi;
while (en.hasNext()) {
- xi = (indexEntry) en.next();
+ xi = (indexRWIEntry) en.next();
uh = new String[]{xi.urlHash(), Integer.toString(xi.posintext())};
- plasmaCrawlLURLEntry le = switchboard.urlPool.loadedURL.load(uh[0], null);
+ indexURLEntry le = switchboard.urlPool.loadedURL.load(uh[0], null);
if (le == null) {
tm.put(uh[0], uh);
} else {
diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java
index bca1de11f..5bd0fe0fa 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@@ -60,6 +60,7 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlProfile;
@@ -204,7 +205,7 @@ public class IndexCreate_p {
prop.put("error_reasonString", reasonString);
plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
- crawlingStartURL.getHost(), reasonString, new bitfield(indexURL.urlFlagLength));
+ crawlingStartURL.getHost(), reasonString, new bitfield(indexRWIEntryOld.urlFlagLength));
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
@@ -282,7 +283,7 @@ public class IndexCreate_p {
c++;
} else {
plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(nexturlURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
- (String) e.getValue(), rejectReason, new bitfield(indexURL.urlFlagLength));
+ (String) e.getValue(), rejectReason, new bitfield(indexRWIEntryOld.urlFlagLength));
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
diff --git a/htroot/IndexMonitor.java b/htroot/IndexMonitor.java
index 3a015f938..97568020e 100644
--- a/htroot/IndexMonitor.java
+++ b/htroot/IndexMonitor.java
@@ -43,22 +43,33 @@
// javac -classpath .:../Classes Settings_p.java
// if the shell's current path is HTROOT
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+
import de.anomic.http.httpHeader;
+import de.anomic.index.indexURLEntry;
+import de.anomic.net.URL;
+import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
+import de.anomic.server.logging.serverLog;
+import de.anomic.tools.nxTools;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
public class IndexMonitor {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
- plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
+ plasmaSwitchboard sb = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects();
- int showIndexedCount = 40;
- boolean si = false;
- boolean se = false;
+ int lines = 40;
+ boolean showInit = false;
+ boolean showExec = false;
if (post == null) {
@@ -67,20 +78,20 @@ public class IndexMonitor {
}
// find process number
- int process;
+ int tabletype;
try {
- process = Integer.parseInt(post.get("process", "0"));
+ tabletype = Integer.parseInt(post.get("process", "0"));
} catch (NumberFormatException e) {
- process = 0;
+ tabletype = 0;
}
// check if authorization is needed and/or given
- if (((process > 0) && (process < 6)) ||
+ if (((tabletype > 0) && (tabletype < 6)) ||
(post.containsKey("clearlist")) ||
(post.containsKey("deleteentry"))) {
String authorization = ((String) header.get("Authorization", "xxxxxx"));
if (authorization.length() != 0) {
- if (! switchboard.verifyAuthentication(header, true)){
+ if (! sb.verifyAuthentication(header, true)){
// force log-in (again, because wrong password was given)
prop.put("AUTHENTICATE", "admin log-in");
return prop;
@@ -94,33 +105,102 @@ public class IndexMonitor {
// custom number of lines
if (post.containsKey("count")) {
- showIndexedCount = Integer.parseInt(post.get("count", "40"));
+ lines = Integer.parseInt(post.get("count", "40"));
}
// do the commands
- if (post.containsKey("clearlist")) switchboard.urlPool.loadedURL.clearStack(process);
+ if (post.containsKey("clearlist")) sb.urlPool.loadedURL.clearStack(tabletype);
if (post.containsKey("deleteentry")) {
String hash = post.get("hash", null);
if (hash != null) {
// delete from database
- switchboard.urlPool.loadedURL.remove(hash);
+ sb.urlPool.loadedURL.remove(hash);
}
}
if (post.containsKey("moreIndexed")) {
- showIndexedCount = Integer.parseInt(post.get("showIndexed", "40"));
+ lines = Integer.parseInt(post.get("showIndexed", "40"));
}
- if (post.get("si") != null) si = true;
- if (post.get("se") != null) se = true;
+ if (post.get("si") != null) showInit = true;
+ if (post.get("se") != null) showExec = true;
// create table
- if (process == 0) {
+ if (tabletype == 0) {
prop.put("table", 2);
+ } else if (sb.urlPool.loadedURL.getStackSize(tabletype) == 0) {
+ prop.put("table", 0);
} else {
- prop.putAll(switchboard.urlPool.loadedURL.genTableProps(process, showIndexedCount, si, se, "unknown", null, "IndexMonitor.html", true));
+ prop.put("table", 1);
+ if (lines > sb.urlPool.loadedURL.getStackSize(tabletype)) lines = sb.urlPool.loadedURL.getStackSize(tabletype);
+ if (lines == sb.urlPool.loadedURL.getStackSize(tabletype)) {
+ prop.put("table_size", 0);
+ } else {
+ prop.put("table_size", 1);
+ prop.put("table_size_count", lines);
+ }
+ prop.put("table_size_all", sb.urlPool.loadedURL.getStackSize(tabletype));
+ prop.put("table_feedbackpage", "IndexMonitor.html");
+ prop.put("table_tabletype", tabletype);
+ prop.put("table_showInit", (showInit) ? 1 : 0);
+ prop.put("table_showExec", (showExec) ? 1 : 0);
+
+ boolean dark = true;
+ String urlHash, initiatorHash, executorHash;
+ String cachepath, urlstr, urltxt;
+ yacySeed initiatorSeed, executorSeed;
+ indexURLEntry urle;
+
+ // needed for getCachePath(url)
+ final plasmaHTCache cacheManager = sb.getCacheManager();
+
+ int i, cnt = 0;
+ for (i = sb.urlPool.loadedURL.getStackSize(tabletype) - 1; i >= (sb.urlPool.loadedURL.getStackSize(tabletype) - lines); i--) {
+ initiatorHash = sb.urlPool.loadedURL.getInitiatorHash(tabletype, i);
+ executorHash = sb.urlPool.loadedURL.getExecutorHash(tabletype, i);
+// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps initiatorHash=" + initiatorHash + " executorHash=" + executorHash);
+ urlHash = sb.urlPool.loadedURL.getUrlHash(tabletype, i);
+// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps urlHash=" + urlHash);
+ try {
+ urle = sb.urlPool.loadedURL.load(urlHash, null);
+ indexURLEntry.Components comp = urle.comp();
+// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps urle=" + urle.toString());
+ initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
+ executorSeed = yacyCore.seedDB.getConnected(executorHash);
+
+ urlstr = comp.url().toNormalform();
+ urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
+ cachepath = cacheManager.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1);
+
+ prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0);
+ prop.put("table_indexed_" + cnt + "_feedbackpage", "IndexMonitor.html");
+ prop.put("table_indexed_" + cnt + "_tabletype", tabletype);
+ prop.put("table_indexed_" + cnt + "_urlhash", urlHash);
+ prop.put("table_indexed_" + cnt + "_showInit", (showInit) ? 1 : 0);
+ prop.put("table_indexed_" + cnt + "_showInit_initiatorSeed", (initiatorSeed == null) ? "unknown" : initiatorSeed.getName());
+ prop.put("table_indexed_" + cnt + "_showExec", (showExec) ? 1 : 0);
+ prop.put("table_indexed_" + cnt + "_showExec_executorSeed", (executorSeed == null) ? "unknown" : executorSeed.getName());
+ prop.put("table_indexed_" + cnt + "_moddate", daydate(urle.moddate()));
+ prop.put("table_indexed_" + cnt + "_wordcount", urle.wordCount());
+ prop.put("table_indexed_" + cnt + "_urldescr", comp.descr());
+ prop.put("table_indexed_" + cnt + "_url", (cachepath == null) ? "-not-cached-" : "" + urltxt + "");
+ dark = !dark;
+ cnt++;
+ } catch (Exception e) {
+ serverLog.logSevere("PLASMA", "genTableProps", e);
+ }
+ }
+ prop.put("table_indexed", cnt);
}
- prop.put("process", process);
- // return rewrite properties
- return prop;
+ prop.put("process", tabletype);
+ // return rewrite properties
+ return prop;
}
+ private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
+ private static String daydate(Date date) {
+ if (date == null) {
+ return "";
+ } else {
+ return dayFormatter.format(date);
+ }
+ }
}
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 7302d7465..47d428683 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -54,7 +54,7 @@ import java.util.Enumeration;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSwitchboard;
@@ -106,7 +106,7 @@ public class ViewFile {
String viewMode = post.get("viewMode","sentences");
// getting the urlEntry that belongs to the url hash
- plasmaCrawlLURLEntry urlEntry = null;
+ indexURLEntry urlEntry = null;
urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
if (urlEntry == null) {
prop.put("error",2);
@@ -115,7 +115,7 @@ public class ViewFile {
}
// gettin the url that belongs to the entry
- plasmaCrawlLURLEntry.Components comp = urlEntry.comp();
+ indexURLEntry.Components comp = urlEntry.comp();
if ((comp == null) || (comp.url() == null)) {
prop.put("error",3);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java
index a4db30398..8e347ddde 100644
--- a/htroot/htdocsdefault/dir.java
+++ b/htroot/htdocsdefault/dir.java
@@ -61,10 +61,10 @@ import de.anomic.data.userDB;
import de.anomic.http.httpHeader;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
@@ -362,7 +362,7 @@ public class dir {
try {
final URL url = new URL(urlstring);
final plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes()));
- final plasmaCrawlLURLEntry newEntry = switchboard.urlPool.loadedURL.newEntry(
+ final indexURLEntry newEntry = switchboard.urlPool.loadedURL.newEntry(
url,
"YaCyShare: " + descr,
yacyCore.seedDB.mySeed.getName(),
diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java
index b60545195..fad5b9b39 100644
--- a/htroot/yacy/crawlOrder.java
+++ b/htroot/yacy/crawlOrder.java
@@ -50,8 +50,8 @@ import java.util.Date;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@@ -249,7 +249,7 @@ public final class crawlOrder {
// case where we have already the url loaded;
reason = reasonString;
// send lurl-Entry as response
- plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null);
+ indexURLEntry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null);
if (entry == null) {
response = "rejected";
lurl = "";
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index 1973adbb9..88a0d10eb 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -50,8 +50,9 @@ import java.io.IOException;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
+import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaCrawlEURL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
@@ -124,12 +125,12 @@ public final class crawlReceipt {
prop.put("delay", "3600");
} else if (result.equals("fill")) {
// generating a new loaded URL entry
- plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.newEntry(propStr);
+ indexURLEntry entry = switchboard.urlPool.loadedURL.newEntry(propStr);
if (entry == null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) for hash " + receivedUrlhash + " from peer " + iam +
"\n\tURL properties: "+ propStr);
} else {
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ indexURLEntry.Components comp = entry.comp();
if (comp.url() == null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + receivedUrlhash + " from peer " + iam +
"\n\tURL properties: "+ propStr);
@@ -156,7 +157,7 @@ public final class crawlReceipt {
} else {
try {
plasmaCrawlNURL.Entry en = switchboard.urlPool.noticeURL.getEntry(receivedUrlhash);
- plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(indexURL.urlFlagLength));
+ plasmaCrawlEURL.Entry ee = switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(indexRWIEntryOld.urlFlagLength));
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
switchboard.urlPool.noticeURL.remove(receivedUrlhash);
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index d09819441..7ae96f71b 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -54,7 +54,7 @@ import java.util.Set;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexURL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
@@ -249,10 +249,10 @@ public final class search {
StringBuffer links = new StringBuffer();
String resource = "";
//plasmaIndexEntry pie;
- plasmaCrawlLURLEntry urlentry;
+ indexURLEntry urlentry;
plasmaSnippetCache.Snippet snippet;
while ((acc.hasMoreElements()) && (i < squery.wantedResults)) {
- urlentry = (plasmaCrawlLURLEntry) acc.nextElement();
+ urlentry = (indexURLEntry) acc.nextElement();
if (includesnippet) {
snippet = sb.snippetCache.retrieveSnippet(urlentry.comp().url(), squery.queryHashes, false, 260, 1000);
} else {
diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java
index fb5b17266..2fa8ea4fd 100644
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@@ -51,8 +51,8 @@ import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.http.httpHeader;
-import de.anomic.index.indexEntry;
-import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
@@ -146,7 +146,7 @@ public final class transferRWI {
int p;
String wordHash;
String urlHash;
- indexEntry iEntry;
+ indexRWIEntry iEntry;
int wordhashesSize = v.size();
final HashSet unknownURL = new HashSet();
final HashSet knownURL = new HashSet();
@@ -162,7 +162,7 @@ public final class transferRWI {
if (p > 0) {
wordHash = estring.substring(0, p);
wordhashes[received] = wordHash;
- iEntry = new indexURLEntry(estring.substring(p));
+ iEntry = new indexRWIEntryOld(estring.substring(p));
urlHash = iEntry.urlHash();
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(plasmaURLPattern.BLACKLIST_DHT, urlHash))) {
int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java
index a09388ff7..6984bf679 100644
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@@ -48,7 +48,7 @@
import java.io.IOException;
import de.anomic.http.httpHeader;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
@@ -90,7 +90,7 @@ public final class transferURL {
final int sizeBefore = sb.urlPool.loadedURL.size();
// read the urls from the other properties and store
String urls;
- plasmaCrawlLURLEntry lEntry;
+ indexURLEntry lEntry;
for (int i = 0; i < urlc; i++) {
serverCore.checkInterruption();
urls = (String) post.get("url" + i);
@@ -102,7 +102,7 @@ public final class transferURL {
yacyCore.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
// TODO: should we send back an error message???
} else {
- plasmaCrawlLURLEntry.Components comp = lEntry.comp();
+ indexURLEntry.Components comp = lEntry.comp();
if (comp.url() == null) {
yacyCore.log.logWarning("transferURL: received invalid URL (url null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
// TODO: should we send back an error message???
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 1721351fd..7c1034043 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -54,10 +54,10 @@ import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSearchImages;
import de.anomic.plasma.plasmaSearchPreOrder;
@@ -189,9 +189,9 @@ public class yacysearch {
return prop;
}
final String recommendHash = post.get("recommendref", ""); // urlhash
- plasmaCrawlLURLEntry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
+ indexURLEntry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
if (urlentry != null) {
- plasmaCrawlLURLEntry.Components comp = urlentry.comp();
+ indexURLEntry.Components comp = urlentry.comp();
plasmaParserDocument document;
document = sb.snippetCache.retrieveDocument(comp.url(), true);
if (document != null) {
diff --git a/source/de/anomic/index/indexCachedRI.java b/source/de/anomic/index/indexCachedRI.java
index 90b6748e5..1fdf34efb 100644
--- a/source/de/anomic/index/indexCachedRI.java
+++ b/source/de/anomic/index/indexCachedRI.java
@@ -91,7 +91,7 @@ public class indexCachedRI implements indexRI {
return new indexContainer(wordHash, payloadrow);
}
- public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean intern) {
+ public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean intern) {
// add the entry
if (intern) {
riIntern.addEntry(wordHash, entry, updateTime, true);
diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java
index c0e9218ed..6db01166f 100644
--- a/source/de/anomic/index/indexCollectionRI.java
+++ b/source/de/anomic/index/indexCollectionRI.java
@@ -152,7 +152,7 @@ public class indexCollectionRI implements indexRI {
}
}
- public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+ public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java
index 2608a3c39..1fc7afbc9 100644
--- a/source/de/anomic/index/indexContainer.java
+++ b/source/de/anomic/index/indexContainer.java
@@ -81,18 +81,18 @@ public class indexContainer extends kelondroRowSet {
return wordHash;
}
- public int add(indexEntry entry) {
+ public int add(indexRWIEntry entry) {
this.addUnique(entry.toKelondroEntry());
return 1;
}
- public int add(indexEntry entry, long updateTime) {
+ public int add(indexRWIEntry entry, long updateTime) {
this.add(entry);
this.lastTimeWrote = updateTime;
return 1;
}
- public int add(indexEntry[] entries, long updateTime) {
+ public int add(indexRWIEntry[] entries, long updateTime) {
for (int i = 0; i < entries.length; i++) this.add(entries[i], updateTime);
return entries.length;
}
@@ -106,7 +106,7 @@ public class indexContainer extends kelondroRowSet {
Iterator i = c.entries();
while (i.hasNext()) {
try {
- if (addi((indexEntry) i.next())) x++;
+ if (addi((indexRWIEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {
e.printStackTrace();
}
@@ -117,13 +117,13 @@ public class indexContainer extends kelondroRowSet {
return x;
}
- private boolean addi(indexEntry entry) {
+ private boolean addi(indexRWIEntry entry) {
// returns true if the new entry was added, false if it already existed
kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry());
if (oldEntryRow == null) {
return true;
} else {
- indexEntry oldEntry = new indexURLEntry(oldEntryRow); // FIXME: see if cloning is necessary
+ indexRWIEntry oldEntry = new indexRWIEntryOld(oldEntryRow); // FIXME: see if cloning is necessary
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
@@ -133,16 +133,16 @@ public class indexContainer extends kelondroRowSet {
}
}
- public indexEntry get(String urlHash) {
+ public indexRWIEntry get(String urlHash) {
kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null;
- return new indexURLEntry(entry);
+ return new indexRWIEntryOld(entry);
}
- public indexEntry remove(String urlHash) {
+ public indexRWIEntry remove(String urlHash) {
kelondroRow.Entry entry = this.remove(urlHash.getBytes());
if (entry == null) return null;
- return new indexURLEntry(entry);
+ return new indexRWIEntryOld(entry);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
@@ -178,7 +178,7 @@ public class indexContainer extends kelondroRowSet {
public Object next() {
kelondroRow.Entry rentry = (kelondroRow.Entry) rowEntryIterator.next();
if (rentry == null) return null;
- return new indexURLEntry(rentry);
+ return new indexRWIEntryOld(rentry);
}
public void remove() {
@@ -288,10 +288,10 @@ public class indexContainer extends kelondroRowSet {
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
Iterator se = small.entries();
- indexEntry ie0, ie1;
+ indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis();
while ((se.hasNext()) && ((System.currentTimeMillis() - stamp) < time)) {
- ie0 = (indexEntry) se.next();
+ ie0 = (indexRWIEntry) se.next();
ie1 = large.get(ie0.urlHash());
if (ie1 != null) {
// this is a hit. Calculate word distance:
@@ -312,25 +312,25 @@ public class indexContainer extends kelondroRowSet {
Iterator e2 = i2.entries();
int c;
if ((e1.hasNext()) && (e2.hasNext())) {
- indexEntry ie1;
- indexEntry ie2;
- ie1 = (indexEntry) e1.next();
- ie2 = (indexEntry) e2.next();
+ indexRWIEntry ie1;
+ indexRWIEntry ie2;
+ ie1 = (indexRWIEntry) e1.next();
+ ie2 = (indexRWIEntry) e2.next();
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.order().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
- if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
+ if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
} else if (c > 0) {
- if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
+ if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1.combineDistance(ie2);
if (ie1.worddistance() <= maxDistance) conj.add(ie1);
- if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
- if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
+ if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
+ if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
}
}
}
diff --git a/source/de/anomic/index/indexEntryAttribute.java b/source/de/anomic/index/indexEntryAttribute.java
index 229fc0ca4..2156cad4b 100644
--- a/source/de/anomic/index/indexEntryAttribute.java
+++ b/source/de/anomic/index/indexEntryAttribute.java
@@ -35,10 +35,6 @@ import de.anomic.yacy.yacySeedDB;
public class indexEntryAttribute {
- // the size of a word hash
- public static final int wordHashLength = yacySeedDB.commonHashLength; // 12
- public static final int urlHashLength = yacySeedDB.commonHashLength; // 12
-
// doctypes:
public static final char DT_PDFPS = 'p';
public static final char DT_TEXT = 't';
@@ -86,7 +82,7 @@ public class indexEntryAttribute {
// create a word hash
public static String word2hash(String word) {
- return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(word.toLowerCase())).substring(0, indexEntryAttribute.wordHashLength);
+ return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(word.toLowerCase())).substring(0, yacySeedDB.commonHashLength);
}
// doctype calculation
diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java
index 0858ef8ba..ad00d3f28 100644
--- a/source/de/anomic/index/indexRAMRI.java
+++ b/source/de/anomic/index/indexRAMRI.java
@@ -81,7 +81,7 @@ public final class indexRAMRI implements indexRI {
this.indexArrayFileName = dumpname;
this.payloadrow = payloadrow;
this.bufferStructureBasis = new kelondroRow(
- "byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " +
+ "byte[] wordhash-" + yacySeedDB.commonHashLength + ", " +
"Cardinal occ-4 {b256}, " +
"Cardinal time-8 {b256}, " +
"byte[] urlprops-" + payloadrow.objectsize());
@@ -114,7 +114,7 @@ public final class indexRAMRI implements indexRI {
String wordHash;
indexContainer container;
long updateTime;
- indexEntry iEntry;
+ indexRWIEntry iEntry;
kelondroRow.Entry row = dumpArray.row().newEntry();
// write wCache
@@ -131,7 +131,7 @@ public final class indexRAMRI implements indexRI {
if (container != null) {
Iterator ci = container.entries();
while (ci.hasNext()) {
- iEntry = (indexEntry) ci.next();
+ iEntry = (indexRWIEntry) ci.next();
row.setCol(0, wordHash.getBytes());
row.setCol(1, kelondroNaturalOrder.encodeLong(container.size(), 4));
row.setCol(2, kelondroNaturalOrder.encodeLong(updateTime, 8));
@@ -169,7 +169,7 @@ public final class indexRAMRI implements indexRI {
Iterator i = dumpArray.contentRows(-1);
String wordHash;
//long creationTime;
- indexEntry wordEntry;
+ indexRWIEntry wordEntry;
kelondroRow.Entry row;
//Runtime rt = Runtime.getRuntime();
while (i.hasNext()) {
@@ -178,7 +178,7 @@ public final class indexRAMRI implements indexRI {
if ((row == null) || (row.empty(0)) || (row.empty(3))) continue;
wordHash = row.getColString(0, "UTF-8");
//creationTime = kelondroRecords.bytes2long(row[2]);
- wordEntry = new indexURLEntry(row.getColBytes(3));
+ wordEntry = new indexRWIEntryOld(row.getColBytes(3));
// store to cache
addEntry(wordHash, wordEntry, startTime, false);
urlCount++;
@@ -437,10 +437,10 @@ public final class indexRAMRI implements indexRI {
return null;
}
- public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+ public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
- indexEntry[] entries = new indexEntry[] { newEntry };
+ indexRWIEntry[] entries = new indexRWIEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);
hashScore.incScore(wordHash);
diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java
index 43187cb02..9618e0303 100644
--- a/source/de/anomic/index/indexRI.java
+++ b/source/de/anomic/index/indexRI.java
@@ -44,7 +44,7 @@ public interface indexRI {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete);
- public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
+ public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtCase);
public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase);
public void close(int waitingSeconds);
diff --git a/source/de/anomic/index/indexEntry.java b/source/de/anomic/index/indexRWIEntry.java
similarity index 79%
rename from source/de/anomic/index/indexEntry.java
rename to source/de/anomic/index/indexRWIEntry.java
index 8fc17aa22..f4e7caa84 100644
--- a/source/de/anomic/index/indexEntry.java
+++ b/source/de/anomic/index/indexRWIEntry.java
@@ -1,4 +1,4 @@
-// indexEntry.java
+// indexRWIEntry.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 20.05.2006 on http://www.anomic.de
//
@@ -28,7 +28,7 @@ package de.anomic.index;
import de.anomic.kelondro.kelondroRow;
-public interface indexEntry {
+public interface indexRWIEntry {
public Object clone();
public String toPropertyForm(boolean displayFormat);
@@ -48,13 +48,13 @@ public interface indexEntry {
public char getType();
public boolean isLocal();
- public void combineDistance(indexEntry oe);
+ public void combineDistance(indexRWIEntry oe);
public int worddistance();
- public void min(indexEntry other);
- public void max(indexEntry other);
- public void normalize(indexEntry min, indexEntry max);
- public indexEntry generateNormalized(indexEntry min, indexEntry max);
- public boolean isNewer(indexEntry other);
- public boolean isOlder(indexEntry other);
+ public void min(indexRWIEntry other);
+ public void max(indexRWIEntry other);
+ public void normalize(indexRWIEntry min, indexRWIEntry max);
+ public indexRWIEntry generateNormalized(indexRWIEntry min, indexRWIEntry max);
+ public boolean isNewer(indexRWIEntry other);
+ public boolean isOlder(indexRWIEntry other);
}
diff --git a/source/de/anomic/index/indexRWIEntryOld.java b/source/de/anomic/index/indexRWIEntryOld.java
new file mode 100644
index 000000000..1461ad77e
--- /dev/null
+++ b/source/de/anomic/index/indexRWIEntryOld.java
@@ -0,0 +1,323 @@
+// indexURLEntryNew.java
+// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
+// first published 21.07.2006 on http://www.anomic.de
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+package de.anomic.index;
+
+import de.anomic.kelondro.kelondroColumn;
+import de.anomic.kelondro.kelondroRow;
+import de.anomic.kelondro.kelondroRow.Entry;
+import de.anomic.plasma.plasmaWordIndex;
+import de.anomic.yacy.yacySeedDB;
+
+public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
+
+ // this object stores attributes to URL references inside RWI collections
+
+ // statics for value lengths
+ public static final int urlStringLength = 256;// not too short for links without parameters
+ public static final int urlDescrLength = 80; // The headline of a web page (meta-tag or )
+ public static final int urlNameLength = 40; // the tag content between and
+ public static final int urldescrtagsLength = 320;// the url, the description and tags in one string
+ public static final int urlErrorLength = 80; // a reason description for unavailable urls
+ public static final int urlDateLength = 4; // any date, shortened
+ public static final int urlCopyCountLength = 2; // counter for numbers of copies of this index
+ public static final int urlFlagLength = 2; // any stuff
+ public static final int urlLanguageLength = 2; // taken from TLD suffix as quick-hack
+ public static final int urlDoctypeLength = 1; // taken from extension
+ public static final int urlSizeLength = 6; // the source size, from cache
+ public static final int urlWordCountLength = 3; // the number of words, from condenser
+ public static final int urlCrawlProfileHandleLength = 4; // name of the prefetch profile
+ public static final int urlCrawlDepthLength = 2; // prefetch depth, first is '0'
+ public static final int urlParentBranchesLength = 3; // number of anchors of the parent
+ public static final int urlForkFactorLength = 4; // sum of anchors of all ancestors
+ public static final int urlRetryLength = 2; // number of load retries
+ public static final int urlHostLength = 8; // the host as struncated name
+ public static final int urlHandleLength = 4; // a handle
+ public static final int urlQualityLength = 3; // taken from heuristic
+
+ public static kelondroRow urlEntryRow = new kelondroRow(new kelondroColumn[]{
+ new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacySeedDB.commonHashLength, "urlhash"),
+ new kelondroColumn("q", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, urlQualityLength, "quality"),
+ new kelondroColumn("a", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 3, "lastModified"),
+ new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "hitcount"),
+ new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, urlLanguageLength, "language"),
+ new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "doctype"),
+ new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "localflag"),
+ new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posintext"),
+ new kelondroColumn("r", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posinphrase"),
+ new kelondroColumn("o", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posofphrase"),
+ new kelondroColumn("i", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "worddistance"),
+ new kelondroColumn("w", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "wordcount"),
+ new kelondroColumn("p", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "phrasecount")
+ });
+
+ private static final int col_urlhash = 0;
+ private static final int col_quality = 1;
+ private static final int col_lastModified = 2;
+ private static final int col_hitcount = 3;
+ private static final int col_language = 4;
+ private static final int col_doctype = 5;
+ private static final int col_localflag = 6;
+ private static final int col_posintext = 7;
+ private static final int col_posinphrase = 8;
+ private static final int col_posofphrase = 9;
+ private static final int col_worddistance = 10;
+ private static final int col_wordcount = 11;
+ private static final int col_phrasecount = 12;
+
+
+ private kelondroRow.Entry entry;
+
+ public indexRWIEntryOld(String urlHash,
+ int urlLength, // byte-length of complete URL
+ int urlComps, // number of path components
+ int titleLength, // length of description/length (longer are better?)
+ int hitcount, //*how often appears this word in the text
+ int wordcount, //*total number of words
+ int phrasecount, //*total number of phrases
+ int posintext, //*position of word in all words
+ int posinphrase, //*position of word in its phrase
+ int posofphrase, //*number of the phrase where word appears
+ int worddistance, //*word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search
+ int sizeOfPage, // # of bytes of the page
+ long lastmodified, //*last-modified time of the document where word appears
+ long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
+ int quality, //*the entropy value
+ String language, //*(guessed) language of document
+ char doctype, //*type of document
+ int outlinksSame, // outlinks to same domain
+ int outlinksOther,// outlinks to other domain
+ boolean local //*flag shows that this index was generated locally; othervise its from a remote peer
+ ) {
+
+ // more needed attributes:
+ // - boolean: appearance attributes: title, appears in header, anchor-descr, image-tag, hervorhebungen, meta-tags, word in link etc
+ // - boolean: URL attributes
+ assert (urlHash.length() == 12) : "urlhash = " + urlHash;
+ if ((language == null) || (language.length() != urlLanguageLength)) language = "uk";
+ this.entry = urlEntryRow.newEntry();
+ this.entry.setCol(col_urlhash, urlHash, null);
+ this.entry.setCol(col_quality, quality);
+ this.entry.setCol(col_lastModified, lastmodified);
+ this.entry.setCol(col_hitcount, hitcount);
+ this.entry.setCol(col_language, language, null);
+ this.entry.setCol(col_doctype, (byte) doctype);
+ this.entry.setCol(col_localflag, (byte) ((local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL));
+ this.entry.setCol(col_posintext, posintext);
+ this.entry.setCol(col_posinphrase, posinphrase);
+ this.entry.setCol(col_posofphrase, posofphrase);
+ this.entry.setCol(col_worddistance, worddistance);
+ this.entry.setCol(col_wordcount, wordcount);
+ this.entry.setCol(col_phrasecount, phrasecount);
+ //System.out.println("DEBUG-NEWENTRY " + toPropertyForm());
+ }
+
+ public indexRWIEntryOld(String urlHash, String code) {
+ // the code is the external form of the row minus the leading urlHash entry
+ this.entry = urlEntryRow.newEntry((urlHash + code).getBytes());
+ }
+
+ public indexRWIEntryOld(String external) {
+ this.entry = urlEntryRow.newEntry(external);
+ }
+
+ public indexRWIEntryOld(byte[] row) {
+ this.entry = urlEntryRow.newEntry(row);
+ }
+
+ public indexRWIEntryOld(kelondroRow.Entry rentry) {
+ // FIXME: see if cloning is necessary
+ this.entry = rentry;
+ }
+
+ public Object clone() {
+ byte[] b = new byte[urlEntryRow.objectsize()];
+ System.arraycopy(entry.bytes(), 0, b, 0, urlEntryRow.objectsize());
+ return new indexRWIEntryOld(b);
+ }
+
+ public String toPropertyForm(boolean displayFormat) {
+ return entry.toPropertyForm(true, displayFormat, displayFormat);
+ }
+
+ public Entry toKelondroEntry() {
+ return this.entry;
+ }
+
+ public String urlHash() {
+ return this.entry.getColString(col_urlhash, null);
+ }
+
+ public int quality() {
+ return (int) this.entry.getColLong(col_quality);
+ }
+
+ public int virtualAge() {
+ return plasmaWordIndex.microDateDays(lastModified());
+ }
+
+ public long lastModified() {
+ return (int) this.entry.getColLong(col_lastModified);
+ }
+
+ public int hitcount() {
+ return (int) this.entry.getColLong(col_hitcount);
+ }
+
+ public int posintext() {
+ return (int) this.entry.getColLong(col_posintext);
+ }
+
+ public int posinphrase() {
+ return (int) this.entry.getColLong(col_posinphrase);
+ }
+
+ public int posofphrase() {
+ return (int) this.entry.getColLong(col_posofphrase);
+ }
+
+ public int wordcount() {
+ return (int) this.entry.getColLong(col_wordcount);
+ }
+
+ public int phrasecount() {
+ return (int) this.entry.getColLong(col_phrasecount);
+ }
+
+ public String getLanguage() {
+ return this.entry.getColString(col_language, null);
+ }
+
+ public char getType() {
+ return (char) this.entry.getColByte(col_doctype);
+ }
+
+ public boolean isLocal() {
+ return this.entry.getColByte(col_localflag) == indexEntryAttribute.LT_LOCAL;
+ }
+
+ public static indexRWIEntryOld combineDistance(indexRWIEntryOld ie1, indexRWIEntry ie2) {
+ // returns a modified entry of the first argument
+ ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext()));
+ ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
+ ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/);
+ ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
+ ie1.entry.setCol(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2);
+ return ie1;
+ }
+
+ public void combineDistance(indexRWIEntry oe) {
+ combineDistance(this, oe);
+ }
+
+ public int worddistance() {
+ return (int) this.entry.getColLong(col_worddistance);
+ }
+
+ public static final void min(indexRWIEntryOld t, indexRWIEntry other) {
+ if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
+ if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
+ if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
+ if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext());
+ if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
+ if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
+ if (t.worddistance() > other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
+ if (t.lastModified() > other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
+ if (t.quality() > other.quality()) t.entry.setCol(col_quality, other.quality());
+ }
+
+ public static final void max(indexRWIEntryOld t, indexRWIEntry other) {
+ if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
+ if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
+ if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
+ if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext());
+ if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
+ if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
+ if (t.worddistance() < other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
+ if (t.lastModified() < other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
+ if (t.quality() < other.quality()) t.entry.setCol(col_quality, other.quality());
+ }
+
+
+ public void min(indexRWIEntry other) {
+ min(this, other);
+ }
+
+ public void max(indexRWIEntry other) {
+ max(this, other);
+ }
+
+ static void normalize(indexRWIEntryOld t, indexRWIEntry min, indexRWIEntry max) {
+ assert (t.urlHash().length() == 12) : "turlhash = " + t.urlHash();
+ assert (min.urlHash().length() == 12) : "minurlhash = " + min.urlHash();
+ assert (max.urlHash().length() == 12) : "maxurlhash = " + max.urlHash();
+ if (1 + max.worddistance() - min.worddistance() == 0) System.out.println("min = " + min.toPropertyForm(true) + "\nmax=" + max.toPropertyForm(true));
+ //System.out.println("Normalize:\nentry = " + t.toPropertyForm(true));
+ //System.out.println("min = " + min.toPropertyForm(true));
+ //System.out.println("max = " + max.toPropertyForm(true));
+ t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount()));
+ t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount()));
+ t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount()));
+ t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext()));
+ t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase()));
+ t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase()));
+ t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); // FIXME: hier gibts ein division by zero, was nur sein kann wenn die Normalisierung nicht geklappt hat.
+ t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified()));
+ t.entry.setCol(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality()));
+ //System.out.println("out = " + t.toPropertyForm(true));
+ }
+
+ public void normalize(indexRWIEntry min, indexRWIEntry max) {
+ normalize(this, min, max);
+ }
+
+ public indexRWIEntry generateNormalized(indexRWIEntry min, indexRWIEntry max) {
+ assert (this.urlHash().length() == 12) : "this.urlhash = " + this.urlHash();
+ indexRWIEntryOld e = (indexRWIEntryOld) this.clone();
+ e.normalize(min, max);
+ return e;
+ }
+
+ public boolean isNewer(indexRWIEntry other) {
+ if (other == null) return true;
+ if (this.lastModified() > other.lastModified()) return true;
+ if (this.lastModified() == other.lastModified()) {
+ if (this.quality() > other.quality()) return true;
+ }
+ return false;
+ }
+
+ public boolean isOlder(indexRWIEntry other) {
+ if (other == null) return false;
+ if (this.lastModified() < other.lastModified()) return true;
+ if (this.lastModified() == other.lastModified()) {
+ if (this.quality() < other.quality()) return true;
+ }
+ return false;
+ }
+
+}
diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java
index f67d0265b..fc2e09979 100644
--- a/source/de/anomic/index/indexURL.java
+++ b/source/de/anomic/index/indexURL.java
@@ -50,29 +50,6 @@ public class indexURL {
// day formatter for entry export
public static final SimpleDateFormat shortDayFormatter = new SimpleDateFormat("yyyyMMdd");
- // statics for value lengths
- public static final int urlHashLength = yacySeedDB.commonHashLength; // 12
- public static final int urlStringLength = 256;// not too short for links without parameters
- public static final int urlDescrLength = 80; // The headline of a web page (meta-tag or )
- public static final int urlNameLength = 40; // the tag content between and
- public static final int urldescrtagsLength = 320;// the url, the description and tags in one string
- public static final int urlErrorLength = 80; // a reason description for unavailable urls
- public static final int urlDateLength = 4; // any date, shortened
- public static final int urlCopyCountLength = 2; // counter for numbers of copies of this index
- public static final int urlFlagLength = 2; // any stuff
- public static final int urlQualityLength = 3; // taken from heuristic
- public static final int urlLanguageLength = 2; // taken from TLD suffix as quick-hack
- public static final int urlDoctypeLength = 1; // taken from extension
- public static final int urlSizeLength = 6; // the source size, from cache
- public static final int urlWordCountLength = 3; // the number of words, from condenser
- public static final int urlCrawlProfileHandleLength = 4; // name of the prefetch profile
- public static final int urlCrawlDepthLength = 2; // prefetch depth, first is '0'
- public static final int urlParentBranchesLength = 3; // number of anchors of the parent
- public static final int urlForkFactorLength = 4; // sum of anchors of all ancestors
- public static final int urlRetryLength = 2; // number of load retries
- public static final int urlHostLength = 8; // the host as struncated name
- public static final int urlHandleLength = 4; // a handle
-
private static final String[] TLD_NorthAmericaOceania={
// primary english-speaking countries
// english-speaking countries from central america are also included
@@ -397,7 +374,7 @@ public class indexURL {
static {
// create a dummy hash
dummyHash = "";
- for (int i = 0; i < urlHashLength; i++) dummyHash += "-";
+ for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-";
// assign TLD-ids and names
insertTLDProps(TLD_EuropaRussia, 0);
@@ -602,13 +579,13 @@ public class indexURL {
public static final String oldurlHash(URL url) {
if (url == null) return null;
- String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, urlHashLength);
+ String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, yacySeedDB.commonHashLength);
return hash;
}
public static final String oldurlHash(String url) throws MalformedURLException {
if ((url == null) || (url.length() < 10)) return null;
- String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(new URL(url).toNormalform())).substring(0, urlHashLength);
+ String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(new URL(url).toNormalform())).substring(0, yacySeedDB.commonHashLength);
return hash;
}
@@ -618,10 +595,10 @@ public class indexURL {
TreeMap doms = new TreeMap();
synchronized(inputContainer) {
Iterator i = inputContainer.entries();
- indexEntry iEntry;
+ indexRWIEntry iEntry;
String dom, paths;
while (i.hasNext()) {
- iEntry = (indexEntry) i.next();
+ iEntry = (indexRWIEntry) i.next();
if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer
dom = iEntry.urlHash().substring(6);
if ((paths = (String) doms.get(dom)) == null) {
diff --git a/source/de/anomic/plasma/plasmaCrawlLURLEntry.java b/source/de/anomic/index/indexURLEntry.java
similarity index 90%
rename from source/de/anomic/plasma/plasmaCrawlLURLEntry.java
rename to source/de/anomic/index/indexURLEntry.java
index fd079efb3..6531210ff 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURLEntry.java
+++ b/source/de/anomic/index/indexURLEntry.java
@@ -1,6 +1,6 @@
-// plasmaCrawlLURLEntry.java
+// indexURLEntry.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
-// first published 13.10.2006 on http://www.anomic.de
+// first published 2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
@@ -24,7 +24,8 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-package de.anomic.plasma;
+
+package de.anomic.index;
import java.io.IOException;
import java.net.MalformedURLException;
@@ -32,9 +33,9 @@ import java.util.Date;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
-public interface plasmaCrawlLURLEntry {
+public interface indexURLEntry {
public kelondroRow.Entry toRowEntry() throws IOException;
public String hash();
@@ -48,8 +49,8 @@ public interface plasmaCrawlLURLEntry {
public int size();
public int wordCount();
public String snippet();
- public indexEntry word();
- public boolean isOlder(plasmaCrawlLURLEntry other);
+ public indexRWIEntry word();
+ public boolean isOlder(indexURLEntry other);
public String toString(String snippet);
public String toString();
@@ -82,4 +83,4 @@ public interface plasmaCrawlLURLEntry {
public String ETag() { return this.ETag; }
}
-}
+}
\ No newline at end of file
diff --git a/source/de/anomic/plasma/plasmaCrawlLURLNewEntry.java b/source/de/anomic/index/indexURLEntryNew.java
similarity index 92%
rename from source/de/anomic/plasma/plasmaCrawlLURLNewEntry.java
rename to source/de/anomic/index/indexURLEntryNew.java
index 9e1dd758b..2cc9dcf34 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURLNewEntry.java
+++ b/source/de/anomic/index/indexURLEntryNew.java
@@ -1,4 +1,4 @@
-package de.anomic.plasma;
+package de.anomic.index;
import java.io.IOException;
import java.net.MalformedURLException;
@@ -7,9 +7,6 @@ import java.util.Date;
import java.util.Properties;
import java.util.ArrayList;
-import de.anomic.index.indexEntry;
-import de.anomic.index.indexURL;
-import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroRow;
@@ -20,8 +17,10 @@ import de.anomic.tools.crypt;
import de.anomic.tools.bitfield;
import de.anomic.tools.nxTools;
-public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
+public class indexURLEntryNew implements indexURLEntry {
+ // this object stores attributes for URL entries
+
public static final kelondroRow rowdef = new kelondroRow(
"String hash-12, " + // the url's hash
"String comp-360, " + // components: the url, description, author and tags. As 5th element, an ETag is possible
@@ -37,16 +36,16 @@ public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
"String lang-2, " + // language
"Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width
"Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height
- "Cardinal limage-2 {b256}, " + // # of embedded image links
+ "Cardinal limage-2 {b256}, " + // # of embedded image links
"Cardinal laudio-2 {b256}, " + // # of embedded audio links; for audio: track number; for video: number of audio tracks
"Cardinal lvideo-2 {b256}, " + // # of embedded video links
- "Cardinal lapp-2 {b256}"); // # of embedded links to applications
+ "Cardinal lapp-2 {b256}"); // # of embedded links to applications
private kelondroRow.Entry entry;
private String snippet;
- private indexEntry word; // this is only used if the url is transported via remote search requests
+ private indexRWIEntry word; // this is only used if the url is transported via remote search requests
- public plasmaCrawlLURLNewEntry(
+ public indexURLEntryNew(
URL url,
String descr,
String author,
@@ -106,13 +105,13 @@ public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
return s.toString().getBytes();
}
- public plasmaCrawlLURLNewEntry(kelondroRow.Entry entry, indexEntry searchedWord) {
+ public indexURLEntryNew(kelondroRow.Entry entry, indexRWIEntry searchedWord) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
}
- public plasmaCrawlLURLNewEntry(Properties prop){
+ public indexURLEntryNew(Properties prop){
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
@@ -159,12 +158,12 @@ public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
this.entry.setCol("lvideo", Integer.parseInt(prop.getProperty("lvideo", "0")));
this.entry.setCol("lapp", Integer.parseInt(prop.getProperty("lapp", "0")));
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
- this.word = (prop.containsKey("word")) ? new indexURLEntry(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
+ this.word = (prop.containsKey("word")) ? new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
}
private StringBuffer corePropList() {
// generate a parseable string; this is a simple property-list
- plasmaCrawlLURLEntry.Components comp = this.comp();
+ indexURLEntry.Components comp = this.comp();
final StringBuffer s = new StringBuffer(300);
try {
s.append("hash=").append(hash());
@@ -217,9 +216,9 @@ public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
return this.entry.getColString("hash", "", null);
}
- public plasmaCrawlLURLEntry.Components comp() {
+ public indexURLEntry.Components comp() {
ArrayList cl = nxTools.strings(this.entry.getCol("comp", null), "UTF-8");
- return new de.anomic.plasma.plasmaCrawlLURLEntry.Components(
+ return new indexURLEntry.Components(
(cl.size() > 0) ? (String) cl.get(0) : "",
(cl.size() > 1) ? (String) cl.get(1) : "",
(cl.size() > 2) ? (String) cl.get(2) : "",
@@ -299,11 +298,11 @@ public class plasmaCrawlLURLNewEntry implements plasmaCrawlLURLEntry {
return snippet;
}
- public indexEntry word() {
+ public indexRWIEntry word() {
return word;
}
- public boolean isOlder(plasmaCrawlLURLEntry other) {
+ public boolean isOlder(indexURLEntry other) {
if (other == null) return false;
Date tmoddate = moddate();
Date omoddate = other.moddate();
diff --git a/source/de/anomic/plasma/plasmaCrawlLURLOldEntry.java b/source/de/anomic/index/indexURLEntryOld.java
similarity index 84%
rename from source/de/anomic/plasma/plasmaCrawlLURLOldEntry.java
rename to source/de/anomic/index/indexURLEntryOld.java
index 84fb66f66..4e0ca13d0 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURLOldEntry.java
+++ b/source/de/anomic/index/indexURLEntryOld.java
@@ -24,39 +24,37 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-package de.anomic.plasma;
+package de.anomic.index;
import java.io.IOException;
import java.util.Date;
import java.util.Properties;
import de.anomic.http.httpc;
-import de.anomic.index.indexEntry;
-import de.anomic.index.indexURL;
-import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
import de.anomic.tools.crypt;
+import de.anomic.yacy.yacySeedDB;
-public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
+public class indexURLEntryOld implements indexURLEntry {
public static final kelondroRow rowdef = new kelondroRow(
- "String urlhash-" + indexURL.urlHashLength + ", " + // the url's hash
- "String urlstring-" + indexURL.urlStringLength + ", " + // the url as string
- "String urldescr-" + indexURL.urlDescrLength + ", " + // the description of the url
- "Cardinal moddate-" + indexURL.urlDateLength + " {b64e}, " + // last-modified from the httpd
- "Cardinal loaddate-" + indexURL.urlDateLength + " {b64e}, " + // time when the url was loaded
- "String refhash-" + indexURL.urlHashLength + ", " + // the url's referrer hash
- "Cardinal copycount-" + indexURL.urlCopyCountLength + " {b64e}, " + //
- "byte[] flags-" + indexURL.urlFlagLength + ", " + // flags
- "Cardinal quality-" + indexURL.urlQualityLength + " {b64e}, " + //
- "String language-" + indexURL.urlLanguageLength + ", " + //
- "byte[] doctype-" + indexURL.urlDoctypeLength + ", " + //
- "Cardinal size-" + indexURL.urlSizeLength + " {b64e}, " + // size of file in bytes
- "Cardinal wc-" + indexURL.urlWordCountLength + " {b64e}"); // word count
+ "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
+ "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
+ "String urldescr-" + indexRWIEntryOld.urlDescrLength + ", " + // the description of the url
+ "Cardinal moddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // last-modified from the httpd
+ "Cardinal loaddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // time when the url was loaded
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "Cardinal copycount-" + indexRWIEntryOld.urlCopyCountLength + " {b64e}, " + //
+ "byte[] flags-" + indexRWIEntryOld.urlFlagLength + ", " + // flags
+ "Cardinal quality-" + indexRWIEntryOld.urlQualityLength + " {b64e}, " + //
+ "String language-" + indexRWIEntryOld.urlLanguageLength + ", " + //
+ "byte[] doctype-" + indexRWIEntryOld.urlDoctypeLength + ", " + //
+ "Cardinal size-" + indexRWIEntryOld.urlSizeLength + " {b64e}, " + // size of file in bytes
+ "Cardinal wc-" + indexRWIEntryOld.urlWordCountLength + " {b64e}"); // word count
private URL url;
private String descr;
@@ -72,9 +70,9 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
private int size;
private int wordCount;
private String snippet;
- private indexEntry word; // this is only used if the url is transported via remote search requests
+ private indexRWIEntry word; // this is only used if the url is transported via remote search requests
- public plasmaCrawlLURLOldEntry(
+ public indexURLEntryOld(
URL url,
String descr,
String author,
@@ -114,7 +112,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
this.word = null;
}
- public plasmaCrawlLURLOldEntry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException {
+ public indexURLEntryOld(kelondroRow.Entry entry, indexRWIEntry searchedWord) throws IOException {
try {
this.urlHash = entry.getColString(0, null);
this.url = new URL(entry.getColString(1, "UTF-8"));
@@ -138,7 +136,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
}
}
- public plasmaCrawlLURLOldEntry(Properties prop) {
+ public indexURLEntryOld(Properties prop) {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
@@ -161,7 +159,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
this.snippet = prop.getProperty("snippet", "");
if (snippet.length() == 0) snippet = null;
else snippet = crypt.simpleDecode(snippet, null);
- this.word = (prop.containsKey("word")) ? new indexURLEntry(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
+ this.word = (prop.containsKey("word")) ? new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
} catch (Exception e) {
serverLog.logSevere("PLASMA",
"INTERNAL ERROR in plasmaLURL.entry/2:"
@@ -178,8 +176,8 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
}
public kelondroRow.Entry toRowEntry() throws IOException {
- final String moddatestr = kelondroBase64Order.enhancedCoder.encodeLong(moddate.getTime() / 86400000, indexURL.urlDateLength);
- final String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexURL.urlDateLength);
+ final String moddatestr = kelondroBase64Order.enhancedCoder.encodeLong(moddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ final String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
final byte[][] entry = new byte[][] {
urlHash.getBytes(),
@@ -188,13 +186,13 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
moddatestr.getBytes(),
loaddatestr.getBytes(),
referrerHash.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(copyCount, indexURL.urlCopyCountLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(copyCount, indexRWIEntryOld.urlCopyCountLength).getBytes(),
flags.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(quality, indexURL.urlQualityLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(quality, indexRWIEntryOld.urlQualityLength).getBytes(),
language.getBytes(),
new byte[] { (byte) doctype },
- kelondroBase64Order.enhancedCoder.encodeLong(size, indexURL.urlSizeLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(wordCount, indexURL.urlWordCountLength).getBytes()};
+ kelondroBase64Order.enhancedCoder.encodeLong(size, indexRWIEntryOld.urlSizeLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(wordCount, indexRWIEntryOld.urlWordCountLength).getBytes()};
return rowdef.newEntry(entry);
}
@@ -264,11 +262,11 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
return snippet;
}
- public indexEntry word() {
+ public indexRWIEntry word() {
return word;
}
- public boolean isOlder(plasmaCrawlLURLEntry other) {
+ public boolean isOlder(indexURLEntry other) {
if (other == null) return false;
if (moddate.before(other.moddate())) return true;
if (moddate.equals(other.moddate())) {
@@ -292,7 +290,7 @@ public class plasmaCrawlLURLOldEntry implements plasmaCrawlLURLEntry {
",local=").append(((local()) ? "true" : "false"))
.append(",q=").append(
kelondroBase64Order.enhancedCoder.encodeLong(
- quality, indexURL.urlQualityLength))
+ quality, indexRWIEntryOld.urlQualityLength))
.append(",dt=").append(doctype).append(",lang=").append(
language).append(",url=").append(
crypt.simpleEncode(url.toString())).append(
diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
index 6960ea857..95caa46c3 100644
--- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
@@ -51,6 +51,7 @@ import java.io.File;
import java.io.IOException;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
@@ -297,7 +298,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
yacyCore.seedDB.mySeed.hash,
this.name,
(failreason==null)?"Unknown reason":failreason,
- new bitfield(indexURL.urlFlagLength)
+ new bitfield(indexRWIEntryOld.urlFlagLength)
);
// store the entry
diff --git a/source/de/anomic/plasma/dbImport/AssortmentImporter.java b/source/de/anomic/plasma/dbImport/AssortmentImporter.java
index 86183dde1..20a5640eb 100644
--- a/source/de/anomic/plasma/dbImport/AssortmentImporter.java
+++ b/source/de/anomic/plasma/dbImport/AssortmentImporter.java
@@ -5,7 +5,7 @@ import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexAssortment;
@@ -63,7 +63,7 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{
// initializing the import assortment db
this.log.logInfo("Initializing source assortment file");
try {
- this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, indexURLEntry.urlEntryRow, assortmentNr, this.cacheSize/1024, preloadTime, this.log);
+ this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, indexRWIEntryOld.urlEntryRow, assortmentNr, this.cacheSize/1024, preloadTime, this.log);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
index 6528a8bd9..4026e022a 100644
--- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
@@ -7,10 +7,10 @@ import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaCrawlLURL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.server.serverDate;
@@ -134,13 +134,13 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
// loop throug the entities of the container and get the
// urlhash
Iterator importWordIdxEntries = newContainer.entries();
- indexEntry importWordIdxEntry;
+ indexRWIEntry importWordIdxEntry;
while (importWordIdxEntries.hasNext()) {
// testing if import process was aborted
if (isAborted()) break;
// getting next word index entry
- importWordIdxEntry = (indexEntry) importWordIdxEntries.next();
+ importWordIdxEntry = (indexRWIEntry) importWordIdxEntries.next();
String urlHash = importWordIdxEntry.urlHash();
entityUrls.add(urlHash);
}
@@ -162,7 +162,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
// we need to import the url
// getting the url entry
- plasmaCrawlLURLEntry urlEntry = this.importUrlDB.load(urlHash, null);
+ indexURLEntry urlEntry = this.importUrlDB.load(urlHash, null);
if (urlEntry != null) {
/* write it into the home url db */
diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java
index 81280bea0..ccde8689f 100644
--- a/source/de/anomic/plasma/plasmaCrawlBalancer.java
+++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java
@@ -48,10 +48,10 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
-import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack;
+import de.anomic.yacy.yacySeedDB;
public class plasmaCrawlBalancer {
@@ -59,7 +59,7 @@ public class plasmaCrawlBalancer {
private HashMap domainStacks;
public plasmaCrawlBalancer(File stackFile) {
- stack = kelondroStack.open(stackFile, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength));
+ stack = kelondroStack.open(stackFile, new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength));
domainStacks = new HashMap();
}
diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java
index 74dc6c6f8..0bb32a489 100644
--- a/source/de/anomic/plasma/plasmaCrawlEURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlEURL.java
@@ -54,12 +54,14 @@ import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.tools.bitfield;
+import de.anomic.yacy.yacySeedDB;
public class plasmaCrawlEURL extends indexURL {
@@ -134,17 +136,17 @@ public class plasmaCrawlEURL extends indexURL {
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
super();
kelondroRow rowdef = new kelondroRow(
- "String urlhash-" + urlHashLength + ", " + // the url's hash
- "String refhash-" + urlHashLength + ", " + // the url's referrer hash
- "String initiator-" + urlHashLength + ", " + // the crawling initiator
- "String executor-" + urlHashLength + ", " + // the crawling executor
- "String urlstring-" + urlStringLength + ", " + // the url as string
- "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag name
- "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared
- "Cardinal loaddate-" + urlDateLength + " {b64e}, " + // the time when the url was last time tried to load
- "Cardinal retrycount-" + urlRetryLength + " {b64e}, " + // number of load retries
- "String failcause-" + urlErrorLength + ", " + // string describing load failure
- "byte[] flags-" + urlFlagLength); // extra space
+ "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "String executor-" + yacySeedDB.commonHashLength + ", " + // the crawling executor
+ "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
+ "String urlname-" + indexRWIEntryOld.urlNameLength + ", " + // the name of the url, from anchor tag name
+ "Cardinal appdate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was first time appeared
+ "Cardinal loaddate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was last time tried to load
+ "Cardinal retrycount-" + indexRWIEntryOld.urlRetryLength + " {b64e}, " + // number of load retries
+ "String failcause-" + indexRWIEntryOld.urlErrorLength + ", " + // string describing load failure
+ "byte[] flags-" + indexRWIEntryOld.urlFlagLength); // extra space
if (newdb) {
String newCacheName = "urlErr3.table";
@@ -164,9 +166,9 @@ public class plasmaCrawlEURL extends indexURL {
public synchronized Entry newEntry(URL url, String referrer, String initiator, String executor,
String name, String failreason, bitfield flags) {
- if ((referrer == null) || (referrer.length() < urlHashLength)) referrer = dummyHash;
- if ((initiator == null) || (initiator.length() < urlHashLength)) initiator = dummyHash;
- if ((executor == null) || (executor.length() < urlHashLength)) executor = dummyHash;
+ if ((referrer == null) || (referrer.length() < yacySeedDB.commonHashLength)) referrer = dummyHash;
+ if ((initiator == null) || (initiator.length() < yacySeedDB.commonHashLength)) initiator = dummyHash;
+ if ((executor == null) || (executor.length() < yacySeedDB.commonHashLength)) executor = dummyHash;
if (failreason == null) failreason = "unknown";
return new Entry(url, referrer, initiator, executor, name, failreason, flags);
}
@@ -289,8 +291,8 @@ public class plasmaCrawlEURL extends indexURL {
// stores the values from the object variables into the database
if (this.stored) return;
if (this.hash == null) return;
- String initdatestr = kelondroBase64Order.enhancedCoder.encodeLong(initdate.getTime() / 86400000, urlDateLength);
- String trydatestr = kelondroBase64Order.enhancedCoder.encodeLong(trydate.getTime() / 86400000, urlDateLength);
+ String initdatestr = kelondroBase64Order.enhancedCoder.encodeLong(initdate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
+ String trydatestr = kelondroBase64Order.enhancedCoder.encodeLong(trydate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
// store the hash in the hash cache
try {
@@ -304,7 +306,7 @@ public class plasmaCrawlEURL extends indexURL {
this.name.getBytes(),
initdatestr.getBytes(),
trydatestr.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, urlRetryLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, indexRWIEntryOld.urlRetryLength).getBytes(),
this.failreason.getBytes(),
this.flags.getBytes()
};
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java
index 459f229f6..a592cf8c2 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@@ -55,17 +55,18 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
-import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
-import java.util.Locale;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexURLEntryNew;
+import de.anomic.index.indexURLEntryOld;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexSplitTable;
import de.anomic.kelondro.kelondroBase64Order;
@@ -74,12 +75,9 @@ import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
-import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
-import de.anomic.tools.nxTools;
-import de.anomic.yacy.yacyCore;
-import de.anomic.yacy.yacySeed;
+import de.anomic.yacy.yacySeedDB;
public final class plasmaCrawlLURL extends indexURL {
@@ -101,11 +99,11 @@ public final class plasmaCrawlLURL extends indexURL {
try {
if (newdb) {
- urlIndexFile = new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb * 0x400, preloadTime, plasmaCrawlLURLNewEntry.rowdef, kelondroBase64Order.enhancedCoder);
+ urlIndexFile = new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb * 0x400, preloadTime, indexURLEntryNew.rowdef, kelondroBase64Order.enhancedCoder);
} else {
File oldLURLDB = new File(plasmaPath, "urlHash.db");
oldLURLDB.getParentFile().mkdirs();
- urlIndexFile = new kelondroCache(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLOldEntry.rowdef), bufferkb / 2 * 0x400, true, false);
+ urlIndexFile = new kelondroCache(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, indexURLEntryOld.rowdef), bufferkb / 2 * 0x400, true, false);
}
} catch (IOException e) {
e.printStackTrace();
@@ -121,7 +119,7 @@ public final class plasmaCrawlLURL extends indexURL {
gcrawlResultStack = new LinkedList();
}
- public synchronized void stack(plasmaCrawlLURLEntry e, String initiatorHash, String executorHash, int stackType) {
+ public synchronized void stack(indexURLEntry e, String initiatorHash, String executorHash, int stackType) {
if (e == null) { return; }
try {
if (initiatorHash == null) { initiatorHash = dummyHash; }
@@ -159,7 +157,7 @@ public final class plasmaCrawlLURL extends indexURL {
return 0;
}
- public synchronized plasmaCrawlLURLEntry load(String urlHash, indexEntry searchedWord) {
+ public synchronized indexURLEntry load(String urlHash, indexRWIEntry searchedWord) {
// generates an plasmaLURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.
@@ -171,17 +169,17 @@ public final class plasmaCrawlLURL extends indexURL {
kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes());
if (entry == null) return null;
if (newdb)
- return new plasmaCrawlLURLNewEntry(entry, searchedWord);
+ return new indexURLEntryNew(entry, searchedWord);
else
- return new plasmaCrawlLURLOldEntry(entry, searchedWord);
+ return new indexURLEntryOld(entry, searchedWord);
} catch (IOException e) {
return null;
}
}
- public synchronized void store(plasmaCrawlLURLEntry entry) throws IOException {
+ public synchronized void store(indexURLEntry entry) throws IOException {
// Check if there is a more recent Entry already in the DB
- plasmaCrawlLURLEntry oldEntry;
+ indexURLEntry oldEntry;
try {
if (exists(entry.hash())) {
oldEntry = load(entry.hash(), null);
@@ -202,18 +200,18 @@ public final class plasmaCrawlLURL extends indexURL {
urlIndexFile.put(entry.toRowEntry(), entry.loaddate());
}
- public synchronized plasmaCrawlLURLEntry newEntry(String propStr) {
+ public synchronized indexURLEntry newEntry(String propStr) {
if (propStr.startsWith("{") && propStr.endsWith("}")) {
if (newdb)
- return new plasmaCrawlLURLNewEntry(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
+ return new indexURLEntryNew(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
else
- return new plasmaCrawlLURLOldEntry(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
+ return new indexURLEntryOld(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
} else {
return null;
}
}
- public synchronized plasmaCrawlLURLEntry newEntry(
+ public synchronized indexURLEntry newEntry(
URL url,
String descr,
String author,
@@ -236,10 +234,10 @@ public final class plasmaCrawlLURL extends indexURL {
int lvideo,
int lapp) {
if (newdb)
- return new plasmaCrawlLURLNewEntry(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
+ return new indexURLEntryNew(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
size, wc, dt, flags, lang, llocal, lother, laudio, limage, lvideo, lapp);
else
- return new plasmaCrawlLURLOldEntry(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
+ return new indexURLEntryOld(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
size, wc, dt, flags, lang, llocal, lother, laudio, limage, lvideo, lapp);
}
@@ -257,36 +255,36 @@ public final class plasmaCrawlLURL extends indexURL {
public synchronized String getUrlHash(int stack, int pos) {
switch (stack) {
- case 1: return ((String) externResultStack.get(pos)).substring(0, urlHashLength);
- case 2: return ((String) searchResultStack.get(pos)).substring(0, urlHashLength);
- case 3: return ((String) transfResultStack.get(pos)).substring(0, urlHashLength);
- case 4: return ((String) proxyResultStack.get(pos)).substring(0, urlHashLength);
- case 5: return ((String) lcrawlResultStack.get(pos)).substring(0, urlHashLength);
- case 6: return ((String) gcrawlResultStack.get(pos)).substring(0, urlHashLength);
+ case 1: return ((String) externResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
+ case 2: return ((String) searchResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
+ case 3: return ((String) transfResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
+ case 4: return ((String) proxyResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
+ case 5: return ((String) lcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
+ case 6: return ((String) gcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
}
return null;
}
public synchronized String getInitiatorHash(int stack, int pos) {
switch (stack) {
- case 1: return ((String) externResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
- case 2: return ((String) searchResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
- case 3: return ((String) transfResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
- case 4: return ((String) proxyResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
- case 5: return ((String) lcrawlResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
- case 6: return ((String) gcrawlResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2);
+ case 1: return ((String) externResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
+ case 2: return ((String) searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
+ case 3: return ((String) transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
+ case 4: return ((String) proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
+ case 5: return ((String) lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
+ case 6: return ((String) gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
}
return null;
}
public synchronized String getExecutorHash(int stack, int pos) {
switch (stack) {
- case 1: return ((String) externResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
- case 2: return ((String) searchResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
- case 3: return ((String) transfResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
- case 4: return ((String) proxyResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
- case 5: return ((String) lcrawlResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
- case 6: return ((String) gcrawlResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3);
+ case 1: return ((String) externResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
+ case 2: return ((String) searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
+ case 3: return ((String) transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
+ case 4: return ((String) proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
+ case 5: return ((String) lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
+ case 6: return ((String) gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
}
return null;
}
@@ -341,88 +339,10 @@ public final class plasmaCrawlLURL extends indexURL {
return false;
}
}
-
- private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
- private static String daydate(Date date) {
- if (date == null) {
- return "";
- } else {
- return dayFormatter.format(date);
- }
- }
- public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) {
-/* serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps tabletype=" + tabletype + " lines=" + lines +
- " showInit=" + showInit + " showExec=" + showExec +
- " dfltInit=" + dfltInit + " dfltExec=" + dfltExec +
- " feedbackpage=" + feedbackpage + " makeLink=" + makeLink); */
- final serverObjects prop = new serverObjects();
- if (getStackSize(tabletype) == 0) {
- prop.put("table", 0);
- return prop;
- }
- prop.put("table", 1);
- if (lines > getStackSize(tabletype)) lines = getStackSize(tabletype);
- if (lines == getStackSize(tabletype)) {
- prop.put("table_size", 0);
- } else {
- prop.put("table_size", 1);
- prop.put("table_size_count", lines);
- }
- prop.put("table_size_all", getStackSize(tabletype));
- prop.put("table_feedbackpage", feedbackpage);
- prop.put("table_tabletype", tabletype);
- prop.put("table_showInit", (showInit) ? 1 : 0);
- prop.put("table_showExec", (showExec) ? 1 : 0);
-
- boolean dark = true;
- String urlHash, initiatorHash, executorHash;
- String cachepath, urlstr, urltxt;
- yacySeed initiatorSeed, executorSeed;
- plasmaCrawlLURLEntry urle;
-
- // needed for getCachePath(url)
- final plasmaSwitchboard switchboard = plasmaSwitchboard.getSwitchboard();
- final plasmaHTCache cacheManager = switchboard.getCacheManager();
-
- int i, cnt = 0;
- for (i = getStackSize(tabletype) - 1; i >= (getStackSize(tabletype) - lines); i--) {
- initiatorHash = getInitiatorHash(tabletype, i);
- executorHash = getExecutorHash(tabletype, i);
-// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps initiatorHash=" + initiatorHash + " executorHash=" + executorHash);
- urlHash = getUrlHash(tabletype, i);
-// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps urlHash=" + urlHash);
- try {
- urle = load(urlHash, null);
- plasmaCrawlLURLEntry.Components comp = urle.comp();
-// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps urle=" + urle.toString());
- initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
- executorSeed = yacyCore.seedDB.getConnected(executorHash);
-
- urlstr = comp.url().toNormalform();
- urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
- cachepath = cacheManager.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1);
-
- prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0);
- prop.put("table_indexed_" + cnt + "_feedbackpage", feedbackpage);
- prop.put("table_indexed_" + cnt + "_tabletype", tabletype);
- prop.put("table_indexed_" + cnt + "_urlhash", urlHash);
- prop.put("table_indexed_" + cnt + "_showInit", (showInit) ? 1 : 0);
- prop.put("table_indexed_" + cnt + "_showInit_initiatorSeed", (initiatorSeed == null) ? dfltInit : initiatorSeed.getName());
- prop.put("table_indexed_" + cnt + "_showExec", (showExec) ? 1 : 0);
- prop.put("table_indexed_" + cnt + "_showExec_executorSeed", (executorSeed == null) ? dfltExec : executorSeed.getName());
- prop.put("table_indexed_" + cnt + "_moddate", daydate(urle.moddate()));
- prop.put("table_indexed_" + cnt + "_wordcount", urle.wordCount());
- prop.put("table_indexed_" + cnt + "_urldescr", comp.descr());
- prop.put("table_indexed_" + cnt + "_url", (cachepath == null) ? "-not-cached-" : ((makeLink) ? ("" + urltxt + "") : urlstr));
- dark = !dark;
- cnt++;
- } catch (Exception e) {
- serverLog.logSevere("PLASMA", "genTableProps", e);
- }
- }
- prop.put("table_indexed", cnt);
- return prop;
+ public Iterator entries(boolean up, boolean rotating, String firstHash) throws IOException {
+ // enumerates entry elements
+ return new kiter(up, rotating, firstHash);
}
public class kiter implements Iterator {
@@ -445,9 +365,9 @@ public final class plasmaCrawlLURL extends indexURL {
if (e == null) return null;
try {
if (newdb)
- return new plasmaCrawlLURLNewEntry(e, null);
+ return new indexURLEntryNew(e, null);
else
- return new plasmaCrawlLURLOldEntry(e, null);
+ return new indexURLEntryOld(e, null);
} catch (IOException ex) {
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0, null));
}
@@ -459,11 +379,6 @@ public final class plasmaCrawlLURL extends indexURL {
}
- public Iterator entries(boolean up, boolean rotating, String firstHash) throws IOException {
- // enumerates entry elements
- return new kiter(up, rotating, firstHash);
- }
-
/**
* Uses an Iteration over urlHash.db to detect malformed URL-Entries.
* Damaged URL-Entries will be marked in a HashSet and removed at the end of the function.
@@ -578,8 +493,8 @@ public final class plasmaCrawlLURL extends indexURL {
}
}
- plasmaCrawlLURLEntry entry = (plasmaCrawlLURLEntry) eiter.next();
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ indexURLEntry entry = (indexURLEntry) eiter.next();
+ indexURLEntry.Components comp = entry.comp();
totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, comp.url()) ||
plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url())) {
@@ -650,7 +565,7 @@ public final class plasmaCrawlLURL extends indexURL {
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), new File(args[2]), 1, 0, false);
final Iterator enu = urls.entries(true, false, null);
while (enu.hasNext()) {
- System.out.println(((plasmaCrawlLURLEntry) enu.next()).toString());
+ System.out.println(((indexURLEntry) enu.next()).toString());
}
} catch (Exception e) {
e.printStackTrace();
diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java
index a3b88c45c..9e07210df 100644
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@@ -51,6 +51,7 @@ import java.util.HashSet;
import java.util.Iterator;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroException;
@@ -62,6 +63,7 @@ import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
+import de.anomic.yacy.yacySeedDB;
public class plasmaCrawlNURL extends indexURL {
@@ -78,18 +80,18 @@ public class plasmaCrawlNURL extends indexURL {
* column length definition for the {@link plasmaURL#urlIndexFile} DB
*/
public final static kelondroRow rowdef = new kelondroRow(
- "String urlhash-" + urlHashLength + ", " + // the url's hash
- "String initiator-" + urlHashLength + ", " + // the crawling initiator
- "String urlstring-" + urlStringLength + ", " + // the url as string
- "String refhash-" + urlHashLength + ", " + // the url's referrer hash
- "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag name
- "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared
- "String profile-" + urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
- "Cardinal depth-" + urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
- "Cardinal parentbr-" + urlParentBranchesLength + " {b64e}, " + // number of anchors of the parent
- "Cardinal forkfactor-" + urlForkFactorLength + " {b64e}, " + // sum of anchors of all ancestors
- "byte[] flags-" + urlFlagLength + ", " + // flags
- "String handle-" + urlHandleLength); // extra handle
+ "String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "String urlstring-" + indexRWIEntryOld.urlStringLength + ", " + // the url as string
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "String urlname-" + indexRWIEntryOld.urlNameLength + ", " + // the name of the url, from anchor tag name
+ "Cardinal appdate-" + indexRWIEntryOld.urlDateLength + " {b64e}, " + // the time when the url was first time appeared
+ "String profile-" + indexRWIEntryOld.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
+ "Cardinal depth-" + indexRWIEntryOld.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
+ "Cardinal parentbr-" + indexRWIEntryOld.urlParentBranchesLength + " {b64e}, " + // number of anchors of the parent
+ "Cardinal forkfactor-" + indexRWIEntryOld.urlForkFactorLength + " {b64e}, " + // sum of anchors of all ancestors
+ "byte[] flags-" + indexRWIEntryOld.urlFlagLength + ", " + // flags
+ "String handle-" + indexRWIEntryOld.urlHandleLength); // extra handle
private final plasmaCrawlBalancer coreStack; // links found by crawling to depth-1
private final plasmaCrawlBalancer limitStack; // links found by crawling at target depth
@@ -128,7 +130,7 @@ public class plasmaCrawlNURL extends indexURL {
limitStack = new plasmaCrawlBalancer(limitStackFile);
overhangStack = new plasmaCrawlBalancer(overhangStackFile);
remoteStack = new plasmaCrawlBalancer(remoteStackFile);
- kelondroRow rowdef = new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength);
+ kelondroRow rowdef = new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength);
imageStack = kelondroStack.open(imageStackFile, rowdef);
movieStack = kelondroStack.open(movieStackFile, rowdef);
musicStack = kelondroStack.open(musicStackFile, rowdef);
@@ -257,7 +259,7 @@ public class plasmaCrawlNURL extends indexURL {
private static String normalizeHandle(int h) {
String d = Integer.toHexString(h);
- while (d.length() < urlHandleLength) d = "0" + d;
+ while (d.length() < indexRWIEntryOld.urlHandleLength) d = "0" + d;
return d;
}
@@ -479,7 +481,7 @@ public class plasmaCrawlNURL extends indexURL {
this.depth = depth;
this.anchors = anchors;
this.forkfactor = forkfactor;
- this.flags = new bitfield(urlFlagLength);
+ this.flags = new bitfield(indexRWIEntryOld.urlFlagLength);
this.handle = 0;
this.stored = false;
}
@@ -533,7 +535,7 @@ public class plasmaCrawlNURL extends indexURL {
public void store() {
// stores the values from the object variables into the database
if (this.stored) return;
- String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, urlDateLength);
+ String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
// store the hash in the hash cache
try {
// even if the entry exists, we simply overwrite it
@@ -545,9 +547,9 @@ public class plasmaCrawlNURL extends indexURL {
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.depth, urlCrawlDepthLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, urlParentBranchesLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, urlForkFactorLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, indexRWIEntryOld.urlParentBranchesLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, indexRWIEntryOld.urlForkFactorLength).getBytes(),
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
};
diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java
index e14c3dee0..b8bfeffbf 100644
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@@ -48,7 +48,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
-import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroException;
@@ -68,7 +68,7 @@ public class plasmaCrawlProfile {
this.bufferkb = bufferkb;
this.preloadTime = preloadTime;
profileTableFile.getParentFile().mkdirs();
- kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexURL.urlCrawlProfileHandleLength, 2000, '#');
+ kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexRWIEntryOld.urlCrawlProfileHandleLength, 2000, '#');
profileTable = new kelondroMap(dyn);
domsCache = new HashMap();
}
@@ -94,7 +94,7 @@ public class plasmaCrawlProfile {
if (profileTable != null) try { profileTable.close(); } catch (IOException e) {}
if (!(profileTableFile.delete())) throw new RuntimeException("cannot delete crawl profile database");
profileTableFile.getParentFile().mkdirs();
- kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexURL.urlCrawlProfileHandleLength, 2000, '#');
+ kelondroDyn dyn = kelondroDyn.open(profileTableFile, bufferkb * 1024, preloadTime, indexRWIEntryOld.urlCrawlProfileHandleLength, 2000, '#');
profileTable = new kelondroMap(dyn);
}
@@ -256,7 +256,7 @@ public class plasmaCrawlProfile {
boolean storeHTCache, boolean storeTXCache,
boolean localIndexing, boolean remoteIndexing,
boolean xsstopw, boolean xdstopw, boolean xpstopw) {
- String handle = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, indexURL.urlCrawlProfileHandleLength);
+ String handle = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, indexRWIEntryOld.urlCrawlProfileHandleLength);
mem = new HashMap();
mem.put("handle", handle);
mem.put("name", name);
diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java
index a4d3bebf1..27fc0d9ed 100644
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@@ -60,6 +60,8 @@ import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.data.robotsParser;
import de.anomic.http.httpc;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroException;
@@ -391,7 +393,7 @@ public final class plasmaCrawlStacker {
checkInterruption();
String nexturlhash = indexURL.urlHash(nexturl);
String dbocc = this.sb.urlPool.exists(nexturlhash);
- plasmaCrawlLURLEntry oldEntry = null;
+ indexURLEntry oldEntry = null;
oldEntry = this.sb.urlPool.loadedURL.load(nexturlhash, null);
boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
if ((dbocc != null) && (!(recrawl))) {
@@ -490,7 +492,7 @@ public final class plasmaCrawlStacker {
this.depth = depth;
this.anchors = anchors;
this.forkfactor = forkfactor;
- this.flags = new bitfield(indexURL.urlFlagLength);
+ this.flags = new bitfield(indexRWIEntryOld.urlFlagLength);
this.handle = 0;
} catch (Exception e) {
e.printStackTrace();
@@ -573,7 +575,7 @@ public final class plasmaCrawlStacker {
public byte[][] getBytes() {
// stores the values from the object variables into the database
- String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexURL.urlDateLength);
+ String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, indexRWIEntryOld.urlDateLength);
// store the hash in the hash cache
// even if the entry exists, we simply overwrite it
@@ -587,9 +589,9 @@ public final class plasmaCrawlStacker {
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexURL.urlCrawlDepthLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, indexURL.urlParentBranchesLength).getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, indexURL.urlForkFactorLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.anchors, indexRWIEntryOld.urlParentBranchesLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong(this.forkfactor, indexRWIEntryOld.urlForkFactorLength).getBytes(),
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
};
@@ -599,7 +601,7 @@ public final class plasmaCrawlStacker {
private String normalizeHandle(int h) {
String d = Integer.toHexString(h);
- while (d.length() < indexURL.urlHandleLength) d = "0" + d;
+ while (d.length() < indexRWIEntryOld.urlHandleLength) d = "0" + d;
return d;
}
}
@@ -1057,7 +1059,7 @@ public final class plasmaCrawlStacker {
yacyCore.seedDB.mySeed.hash,
this.theMsg.name,
rejectReason,
- new bitfield(indexURL.urlFlagLength)
+ new bitfield(indexRWIEntryOld.urlFlagLength)
);
ee.store();
sb.urlPool.errorURL.stackPushEntry(ee);
diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java
index 82873c20d..5fce764b7 100644
--- a/source/de/anomic/plasma/plasmaDHTChunk.java
+++ b/source/de/anomic/plasma/plasmaDHTChunk.java
@@ -48,7 +48,8 @@ import java.util.HashSet;
import java.util.Iterator;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.serverCodings;
@@ -200,8 +201,8 @@ public class plasmaDHTChunk {
Iterator indexContainerIterator = wordIndex.indexContainerSet(hash, resourceLevel, true, maxcount).iterator();
indexContainer container;
Iterator urlIter;
- indexEntry iEntry;
- plasmaCrawlLURLEntry lurl;
+ indexRWIEntry iEntry;
+ indexURLEntry lurl;
int refcount = 0;
int wholesize;
@@ -227,7 +228,7 @@ public class plasmaDHTChunk {
urlIter = container.entries();
// iterate over indexes to fetch url entries and store them in the urlCache
while ((urlIter.hasNext()) && (maxcount > refcount) && (System.currentTimeMillis() < timeout)) {
- iEntry = (indexEntry) urlIter.next();
+ iEntry = (indexRWIEntry) urlIter.next();
lurl = lurls.load(iEntry.urlHash(), iEntry);
if ((lurl == null) || (lurl.comp().url() == null)) {
//yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash());
@@ -243,7 +244,7 @@ public class plasmaDHTChunk {
// remove all remaining; we have enough
while (urlIter.hasNext()) {
- iEntry = (indexEntry) urlIter.next();
+ iEntry = (indexRWIEntry) urlIter.next();
urlIter.remove();
}
@@ -285,7 +286,7 @@ public class plasmaDHTChunk {
public synchronized String deleteTransferIndexes() {
Iterator urlIter;
- indexEntry iEntry;
+ indexRWIEntry iEntry;
HashSet urlHashes;
String count = "0";
@@ -299,7 +300,7 @@ public class plasmaDHTChunk {
urlHashes = new HashSet(this.indexContainers[i].size());
urlIter = this.indexContainers[i].entries();
while (urlIter.hasNext()) {
- iEntry = (indexEntry) urlIter.next();
+ iEntry = (indexRWIEntry) urlIter.next();
urlHashes.add(iEntry.urlHash());
}
String wordHash = indexContainers[i].getWordHash();
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 496cd430e..701fa0318 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -90,6 +90,7 @@ import de.anomic.server.serverThread;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.enumerateFiles;
import de.anomic.yacy.yacySeed;
+import de.anomic.yacy.yacySeedDB;
public final class plasmaHTCache {
@@ -173,7 +174,7 @@ public final class plasmaHTCache {
// open the response header database
File dbfile = new File(this.cachePath, "responseHeader.db");
try {
- this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, preloadTime, indexURL.urlHashLength, 150, '#'));
+ this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, preloadTime, yacySeedDB.commonHashLength, 150, '#'));
} catch (IOException e) {
this.log.logSevere("the request header database could not be opened: " + e.getMessage());
System.exit(0);
@@ -717,7 +718,7 @@ public final class plasmaHTCache {
if (hexHash.indexOf('.') >= 0) return null;
try {
String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.decodeHex(hexHash));
- if (hash.length() == indexURL.urlHashLength) return hash;
+ if (hash.length() == yacySeedDB.commonHashLength) return hash;
return null;
} catch (Exception e) {
//log.logWarning("getHash: " + e.getMessage(), e);
diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java
index 07345cf12..39def7504 100644
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@@ -51,7 +51,8 @@ import java.util.Set;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.server.logging.serverLog;
@@ -379,8 +380,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
//if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
//if (searchResult.size() == 0) return acc; // case that we have nothing to do
- indexEntry entry;
- plasmaCrawlLURLEntry page;
+ indexRWIEntry entry;
+ indexURLEntry page;
Long preranking;
Object[] preorderEntry;
int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT);
@@ -388,7 +389,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
while (preorder.hasNext()) {
if ((System.currentTimeMillis() >= postorderLimitTime) && (acc.sizeFetched() >= minEntries)) break;
preorderEntry = preorder.next();
- entry = (indexEntry) preorderEntry[0];
+ entry = (indexRWIEntry) preorderEntry[0];
// load only urls if there was not yet a root url of that hash
preranking = (Long) preorderEntry[1];
// find the url entry
@@ -425,11 +426,11 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
preorder.remove(true, true);
// start url-fetch
- indexEntry entry;
+ indexRWIEntry entry;
try {
while (preorder.hasNext()) {
if (System.currentTimeMillis() >= timeout) break;
- entry = (indexEntry) (preorder.next()[0]);
+ entry = (indexRWIEntry) (preorder.next()[0]);
// find and fetch the url entry
urlStore.load(entry.urlHash(), entry);
}
diff --git a/source/de/anomic/plasma/plasmaSearchImages.java b/source/de/anomic/plasma/plasmaSearchImages.java
index 78a834304..1b80f4ba2 100644
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@@ -48,6 +48,7 @@ import java.util.Map;
import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterImageEntry;
+import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverDate;
@@ -101,7 +102,7 @@ public final class plasmaSearchImages {
public plasmaSearchImages(plasmaSnippetCache sc, long maxTime, plasmaSearchResult sres, int depth) {
long start = System.currentTimeMillis();
this.images = new TreeSet();
- plasmaCrawlLURLEntry urlentry;
+ indexURLEntry urlentry;
while (sres.hasMoreElements()) {
urlentry = sres.nextElement();
addAll(new plasmaSearchImages(sc, serverDate.remainingTime(start, maxTime, 10), urlentry.comp().url(), depth));
diff --git a/source/de/anomic/plasma/plasmaSearchPreOrder.java b/source/de/anomic/plasma/plasmaSearchPreOrder.java
index 4985859f4..5b50aa0c9 100644
--- a/source/de/anomic/plasma/plasmaSearchPreOrder.java
+++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java
@@ -50,7 +50,7 @@ import java.util.Map;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBinSearch;
import de.anomic.server.serverCodings;
@@ -61,7 +61,7 @@ public final class plasmaSearchPreOrder {
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
private static boolean useYBR = true;
- private indexEntry entryMin, entryMax;
+ private indexRWIEntry entryMin, entryMax;
private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry
private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking;
@@ -79,7 +79,7 @@ public final class plasmaSearchPreOrder {
this.ranking = ranking;
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
- indexEntry iEntry;
+ indexRWIEntry iEntry;
// first pass: find min/max to obtain limits for normalization
Iterator i = container.entries();
@@ -88,9 +88,9 @@ public final class plasmaSearchPreOrder {
this.entryMax = null;
while (i.hasNext()) {
if (System.currentTimeMillis() > limitTime) break;
- iEntry = (indexEntry) i.next();
- if (this.entryMin == null) this.entryMin = (indexEntry) iEntry.clone(); else this.entryMin.min(iEntry);
- if (this.entryMax == null) this.entryMax = (indexEntry) iEntry.clone(); else this.entryMax.max(iEntry);
+ iEntry = (indexRWIEntry) i.next();
+ if (this.entryMin == null) this.entryMin = (indexRWIEntry) iEntry.clone(); else this.entryMin.min(iEntry);
+ if (this.entryMax == null) this.entryMax = (indexRWIEntry) iEntry.clone(); else this.entryMax.max(iEntry);
count++;
}
@@ -98,7 +98,7 @@ public final class plasmaSearchPreOrder {
i = container.entries();
this.pageAcc = new TreeMap();
for (int j = 0; j < count; j++) {
- iEntry = (indexEntry) i.next();
+ iEntry = (indexRWIEntry) i.next();
pageAcc.put(serverCodings.encodeHex(Long.MAX_VALUE - this.ranking.preRanking(iEntry.generateNormalized(this.entryMin, this.entryMax), query.words("")), 16) + iEntry.urlHash(), iEntry);
}
}
@@ -110,13 +110,13 @@ public final class plasmaSearchPreOrder {
HashSet doubleDoms = new HashSet();
Iterator i = pageAcc.entrySet().iterator();
Map.Entry entry;
- indexEntry iEntry;
+ indexRWIEntry iEntry;
String hashpart;
boolean isWordRootURL;
while (i.hasNext()) {
if (pageAcc.size() <= query.wantedResults) break;
entry = (Map.Entry) i.next();
- iEntry = (indexEntry) entry.getValue();
+ iEntry = (indexRWIEntry) entry.getValue();
hashpart = iEntry.urlHash().substring(6);
isWordRootURL = indexURL.isWordRootURL(iEntry.urlHash(), query.words(""));
if ((!(isWordRootURL)) &&
@@ -192,11 +192,11 @@ public final class plasmaSearchPreOrder {
e.printStackTrace();
preranking = new Long(0);
}
- return new Object[]{(indexEntry) pageAcc.remove(top), preranking};
+ return new Object[]{(indexRWIEntry) pageAcc.remove(top), preranking};
}
- public indexEntry[] getNormalizer() {
- return new indexEntry[] {entryMin, entryMax};
+ public indexRWIEntry[] getNormalizer() {
+ return new indexRWIEntry[] {entryMin, entryMax};
}
public static int ybr_p(String urlHash) {
diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java
index cd4874827..03a7230cc 100644
--- a/source/de/anomic/plasma/plasmaSearchQuery.java
+++ b/source/de/anomic/plasma/plasmaSearchQuery.java
@@ -51,6 +51,7 @@ import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.index.indexEntryAttribute;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.serverCharBuffer;
+import de.anomic.yacy.yacySeedDB;
public final class plasmaSearchQuery {
@@ -120,16 +121,16 @@ public final class plasmaSearchQuery {
public static Set hashes2Set(String query) {
if (query == null) return new HashSet();
- final HashSet keyhashes = new HashSet(query.length() / indexEntryAttribute.wordHashLength);
- for (int i = 0; i < (query.length() / indexEntryAttribute.wordHashLength); i++) {
- keyhashes.add(query.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
+ final HashSet keyhashes = new HashSet(query.length() / yacySeedDB.commonHashLength);
+ for (int i = 0; i < (query.length() / yacySeedDB.commonHashLength); i++) {
+ keyhashes.add(query.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength));
}
return keyhashes;
}
public static String hashSet2hashString(Set words) {
Iterator i = words.iterator();
- StringBuffer sb = new StringBuffer(words.size() * indexEntryAttribute.wordHashLength);
+ StringBuffer sb = new StringBuffer(words.size() * yacySeedDB.commonHashLength);
while (i.hasNext()) sb.append((String) i.next());
return new String(sb);
}
diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
index b628ed45b..2ad91fc4f 100644
--- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
@@ -46,8 +46,9 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
public class plasmaSearchRankingProfile {
@@ -164,7 +165,7 @@ public class plasmaSearchRankingProfile {
return new String(ext);
}
- public long preRanking(indexEntry normalizedEntry, String searchedWord) {
+ public long preRanking(indexRWIEntry normalizedEntry, String searchedWord) {
// the normalizedEntry must be a normalized indexEntry
long ranking = 0;
ranking += normalizedEntry.quality() << ((Integer) coeff.get(ENTROPY)).intValue();
@@ -191,13 +192,13 @@ public class plasmaSearchRankingProfile {
Set topwords,
String[] urlcomps,
String[] descrcomps,
- plasmaCrawlLURLEntry page) {
+ indexURLEntry page) {
// apply pre-calculated order attributes
long ranking = preranking;
// prefer hit with 'prefer' pattern
- plasmaCrawlLURLEntry.Components comp = page.comp();
+ indexURLEntry.Components comp = page.comp();
if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
if (comp.descr().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
diff --git a/source/de/anomic/plasma/plasmaSearchResult.java b/source/de/anomic/plasma/plasmaSearchResult.java
index 0878c2350..8e5265fe0 100644
--- a/source/de/anomic/plasma/plasmaSearchResult.java
+++ b/source/de/anomic/plasma/plasmaSearchResult.java
@@ -54,6 +54,7 @@ import java.util.TreeMap;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.net.URL;
import de.anomic.server.serverCodings;
@@ -99,16 +100,16 @@ public final class plasmaSearchResult {
return pageAcc.size() > 0;
}
- public plasmaCrawlLURLEntry nextElement() {
+ public indexURLEntry nextElement() {
Object top = pageAcc.firstKey();
//System.out.println("postorder-key: " + ((String) top));
- return (plasmaCrawlLURLEntry) pageAcc.remove(top);
+ return (indexURLEntry) pageAcc.remove(top);
}
- protected void addResult(plasmaCrawlLURLEntry page, Long preranking) {
+ protected void addResult(indexURLEntry page, Long preranking) {
// take out relevant information for reference computation
- plasmaCrawlLURLEntry.Components comp = page.comp();
+ indexURLEntry.Components comp = page.comp();
if ((comp.url() == null) || (comp.descr() == null)) return;
String[] urlcomps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()); // word components of the url
String[] descrcomps = comp.descr().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description
@@ -131,12 +132,12 @@ public final class plasmaSearchResult {
for (int i = 0; i < references.length; i++) commonSense.add(references[i]);
Object[] resultVector;
- plasmaCrawlLURLEntry page;
+ indexURLEntry page;
long ranking;
for (int i = 0; i < results.size(); i++) {
// take out values from result array
resultVector = (Object[]) results.get(i);
- page = (plasmaCrawlLURLEntry) resultVector[0];
+ page = (indexURLEntry) resultVector[0];
// calculate ranking
if (postsort)
@@ -172,7 +173,7 @@ public final class plasmaSearchResult {
// first scan all entries and find all urls that are referenced
while (i.hasNext()) {
entry = (Map.Entry) i.next();
- path = urlPath(((plasmaCrawlLURLEntry) entry.getValue()).comp().url());
+ path = urlPath(((indexURLEntry) entry.getValue()).comp().url());
paths.put(path, entry.getKey());
//if (path != null) path = shortenPath(path);
//if (path != null) paths.put(path, entry.getKey());
@@ -183,7 +184,7 @@ public final class plasmaSearchResult {
String shorten;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
- path = urlPath(((plasmaCrawlLURLEntry) entry.getValue()).comp().url());
+ path = urlPath(((indexURLEntry) entry.getValue()).comp().url());
shorten = shortenPath(path);
// scan all subpaths of the url
while (shorten != null) {
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 503570692..76e798362 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -58,6 +58,7 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.net.URL;
import de.anomic.plasma.cache.IResourceInfo;
@@ -630,12 +631,12 @@ public class plasmaSnippetCache {
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) {
// fetch snippets
int i = 0;
- plasmaCrawlLURLEntry urlentry;
+ indexURLEntry urlentry;
String urlstring;
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) {
urlentry = acc.nextElement();
- plasmaCrawlLURLEntry.Components comp = urlentry.comp();
+ indexURLEntry.Components comp = urlentry.comp();
if (comp.url().getHost().endsWith(".yacyh")) continue;
urlstring = comp.url().toNormalform();
if ((urlstring.matches(urlmask)) &&
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 457fff15c..572120f86 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -132,9 +132,10 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
@@ -1429,14 +1430,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
private plasmaParserDocument parseResource(plasmaSwitchboardQueue.Entry entry, String initiatorHash) throws InterruptedException, ParserException {
- plasmaParserDocument document = null;
-
+
// the mimetype of this entry
String mimeType = entry.getMimeType();
String charset = entry.getCharacterEncoding();
// the parser logger
- serverLog parserLogger = parser.getLogger();
+ //serverLog parserLogger = parser.getLogger();
// parse the document
return parseResource(entry.url(), mimeType, charset, entry.cacheFile());
@@ -1497,7 +1497,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (document == null) return;
} catch (ParserException e) {
this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
- addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield(indexURL.urlFlagLength));
+ addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield(indexRWIEntryOld.urlFlagLength));
if (document != null) {
document.close();
document = null;
@@ -1574,7 +1574,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
checkInterruption();
// create a new loaded URL db entry
- plasmaCrawlLURLEntry newEntry = urlPool.loadedURL.newEntry(
+ indexURLEntry newEntry = urlPool.loadedURL.newEntry(
entry.url(), // URL
docDescription, // document description
"", // author
@@ -1660,7 +1660,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String language = indexEntryAttribute.language(entry.url());
char doctype = indexEntryAttribute.docType(document.getMimeType());
- plasmaCrawlLURLEntry.Components comp = newEntry.comp();
+ indexURLEntry.Components comp = newEntry.comp();
int urlLength = comp.url().toNormalform().length();
int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()).length;
@@ -1673,7 +1673,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String word = (String) wentry.getKey();
wordStat = (plasmaCondenser.wordStatProp) wentry.getValue();
String wordHash = indexEntryAttribute.word2hash(word);
- indexEntry wordIdxEntry = new indexURLEntry(
+ indexRWIEntry wordIdxEntry = new indexRWIEntryOld(
urlHash,
urlLength, urlComps,
wordStat.count,
@@ -1764,7 +1764,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
} else {
log.logFine("Not Indexed Resource '" + entry.normalizedURLString() + "': process case=" + processCase);
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new bitfield(indexURL.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new bitfield(indexRWIEntryOld.urlFlagLength));
}
} catch (Exception ee) {
if (ee instanceof InterruptedException) throw (InterruptedException)ee;
@@ -1776,7 +1776,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, "");
}
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new bitfield(indexURL.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new bitfield(indexRWIEntryOld.urlFlagLength));
}
} else {
@@ -1784,7 +1784,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
checkInterruption();
log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason);
- addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new bitfield(indexURL.urlFlagLength));
+ addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new bitfield(indexRWIEntryOld.urlFlagLength));
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, "");
}
@@ -1991,7 +1991,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String lurl = (String) page.get("lurl");
if ((lurl != null) && (lurl.length() != 0)) {
String propStr = crypt.simpleDecode(lurl, (String) page.get("key"));
- plasmaCrawlLURLEntry entry = urlPool.loadedURL.newEntry(propStr);
+ indexURLEntry entry = urlPool.loadedURL.newEntry(propStr);
urlPool.loadedURL.store(entry);
urlPool.loadedURL.stack(entry, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1); // *** ueberfluessig/doppelt?
urlPool.noticeURL.remove(entry.hash());
@@ -2070,7 +2070,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
prop.put("type_globalresults", acc.globalContributions);
int i = 0;
int p;
- plasmaCrawlLURLEntry urlentry;
+ indexURLEntry urlentry;
String urlstring, urlname, filename, urlhash;
String host, hash, address;
yacySeed seed;
@@ -2081,7 +2081,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 1000;
while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) {
urlentry = acc.nextElement();
- plasmaCrawlLURLEntry.Components comp = urlentry.comp();
+ indexURLEntry.Components comp = urlentry.comp();
urlhash = urlentry.hash();
assert (urlhash != null);
assert (urlhash.length() == 12) : "urlhash = " + urlhash;
@@ -2218,9 +2218,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// finally, delete the url entry
// determine the url string
- plasmaCrawlLURLEntry entry = urlPool.loadedURL.load(urlhash, null);
+ indexURLEntry entry = urlPool.loadedURL.load(urlhash, null);
if (entry == null) return 0;
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ indexURLEntry.Components comp = entry.comp();
if (comp.url() == null) return 0;
InputStream resourceContent = null;
diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
index 584d1ff53..b7d4893e1 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@@ -51,6 +51,8 @@ import java.util.ArrayList;
import java.util.Date;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
@@ -79,14 +81,14 @@ public class plasmaSwitchboardQueue {
private void initQueueStack() {
kelondroRow rowdef = new kelondroRow(
- "String url-" + indexURL.urlStringLength + ", " + // the url
- "String refhash-" + indexURL.urlHashLength + ", " + // the url's referrer hash
- "Cardinal modifiedsince-11" + " {b64e}, " + // from ifModifiedSince
- "byte[] flags-1" + ", " + // flags
- "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
- "Cardinal depth-" + indexURL.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
- "String profile-" + indexURL.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
- "String urldescr-" + indexURL.urlDescrLength); //
+ "String url-" + yacySeedDB.commonHashLength + ", " + // the url
+ "String refhash-" + yacySeedDB.commonHashLength + ", " + // the url's referrer hash
+ "Cardinal modifiedsince-11" + " {b64e}, " + // from ifModifiedSince
+ "byte[] flags-1" + ", " + // flags
+ "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
+ "Cardinal depth-" + indexRWIEntryOld.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
+ "String profile-" + indexRWIEntryOld.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
+ "String urldescr-" + indexRWIEntryOld.urlDescrLength); //
sbQueueStack = kelondroStack.open(sbQueueStackPath, rowdef);
}
@@ -108,7 +110,7 @@ public class plasmaSwitchboardQueue {
kelondroBase64Order.enhancedCoder.encodeLong((entry.ifModifiedSince == null) ? 0 : entry.ifModifiedSince.getTime(), 11).getBytes(),
new byte[]{entry.flags},
(entry.initiator == null) ? indexURL.dummyHash.getBytes() : entry.initiator.getBytes(),
- kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, indexURL.urlCrawlDepthLength).getBytes(),
+ kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, indexRWIEntryOld.urlCrawlDepthLength).getBytes(),
(entry.profileHandle == null) ? indexURL.dummyHash.getBytes() : entry.profileHandle.getBytes(),
(entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8")
}));
@@ -333,7 +335,7 @@ public class plasmaSwitchboardQueue {
public URL referrerURL() {
if (referrerURL == null) {
if ((referrerHash == null) || (referrerHash.equals(indexURL.dummyHash))) return null;
- plasmaCrawlLURLEntry entry = lurls.load(referrerHash, null);
+ indexURLEntry entry = lurls.load(referrerHash, null);
if (entry == null) referrerURL = null; else referrerURL = entry.comp().url();
}
return referrerURL;
diff --git a/source/de/anomic/plasma/plasmaURLPool.java b/source/de/anomic/plasma/plasmaURLPool.java
index f861d748a..08ecb0e65 100644
--- a/source/de/anomic/plasma/plasmaURLPool.java
+++ b/source/de/anomic/plasma/plasmaURLPool.java
@@ -48,6 +48,7 @@ import java.io.File;
import java.io.IOException;
import de.anomic.index.indexURL;
+import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
public class plasmaURLPool {
@@ -83,7 +84,7 @@ public class plasmaURLPool {
plasmaCrawlNURL.Entry ne = noticeURL.getEntry(urlhash);
if (ne != null) return ne.url();
} catch (IOException e) {}
- plasmaCrawlLURLEntry le = loadedURL.load(urlhash, null);
+ indexURLEntry le = loadedURL.load(urlhash, null);
if (le != null) return le.comp().url();
plasmaCrawlEURL.Entry ee = errorURL.getEntry(urlhash);
if (ee != null) return ee.url();
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 8ab5079c4..805dc49ea 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -40,10 +40,11 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexCollectionRI;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexRAMRI;
import de.anomic.index.indexRI;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
@@ -60,7 +61,7 @@ public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
- private static final kelondroRow payloadrow = indexURLEntry.urlEntryRow;
+ private static final kelondroRow payloadrow = indexRWIEntryOld.urlEntryRow;
private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
@@ -201,7 +202,7 @@ public final class plasmaWordIndex implements indexRI {
return new indexContainer(wordHash, payloadrow);
}
- public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
+ public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) {
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;
@@ -318,7 +319,7 @@ public final class plasmaWordIndex implements indexRI {
Iterator i = condenser.words();
Map.Entry wentry;
String word;
- indexEntry ientry;
+ indexRWIEntry ientry;
plasmaCondenser.wordStatProp wprop;
String wordHash;
int urlLength = url.toString().length();
@@ -330,7 +331,7 @@ public final class plasmaWordIndex implements indexRI {
wprop = (plasmaCondenser.wordStatProp) wentry.getValue();
// if ((s.length() > 4) && (c > 1)) System.out.println("# " + s + ":" + c);
wordHash = indexEntryAttribute.word2hash(word);
- ientry = new indexURLEntry(urlHash,
+ ientry = new indexRWIEntryOld(urlHash,
urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(),
wprop.count,
condenser.RESULT_SIMI_WORDS,
@@ -685,11 +686,11 @@ public final class plasmaWordIndex implements indexRI {
// the combined container will fit, read the container
try {
Iterator entries = entity.elements(true);
- indexEntry entry;
+ indexRWIEntry entry;
while (entries.hasNext()) {
- entry = (indexEntry) entries.next();
+ entry = (indexRWIEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash());
- container.add(new indexEntry[]{entry}, System.currentTimeMillis());
+ container.add(new indexRWIEntry[]{entry}, System.currentTimeMillis());
}
// we have read all elements, now delete the entity
entity.deleteComplete();
@@ -723,11 +724,11 @@ public final class plasmaWordIndex implements indexRI {
try {
Iterator entries = entity.elements(true);
- indexEntry entry;
+ indexRWIEntry entry;
while (entries.hasNext()) {
- entry = (indexEntry) entries.next();
+ entry = (indexRWIEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash());
- container.add(new indexEntry[] { entry }, System.currentTimeMillis());
+ container.add(new indexRWIEntry[] { entry }, System.currentTimeMillis());
}
// we have read all elements, now delete the entity
entity.deleteComplete();
@@ -775,7 +776,7 @@ public final class plasmaWordIndex implements indexRI {
public void run() {
serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
indexContainer container = null;
- indexEntry entry = null;
+ indexRWIEntry entry = null;
URL url = null;
HashSet urlHashs = new HashSet();
try {
@@ -787,9 +788,9 @@ public final class plasmaWordIndex implements indexRI {
wordHashNow = container.getWordHash();
while (containerIterator.hasNext() && run) {
waiter();
- entry = (indexEntry) containerIterator.next();
+ entry = (indexRWIEntry) containerIterator.next();
// System.out.println("Wordhash: "+wordHash+" UrlHash: "+entry.getUrlHash());
- plasmaCrawlLURLEntry ue = lurl.load(entry.urlHash(), null);
+ indexURLEntry ue = lurl.load(entry.urlHash(), null);
if (ue == null) {
urlHashs.add(entry.urlHash());
} else {
diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java
index 10215e064..85356539a 100644
--- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java
@@ -57,15 +57,15 @@ import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
-import de.anomic.index.indexEntryAttribute;
-import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.server.logging.serverLog;
+import de.anomic.yacy.yacySeedDB;
public final class plasmaWordIndexAssortment {
@@ -89,7 +89,7 @@ public final class plasmaWordIndexAssortment {
private kelondroRow bufferStructure(int assortmentCapacity) {
kelondroColumn[] structure = new kelondroColumn[3 + assortmentCapacity];
- structure[0] = new kelondroColumn("byte[] wordhash-" + indexEntryAttribute.wordHashLength);
+ structure[0] = new kelondroColumn("byte[] wordhash-" + yacySeedDB.commonHashLength);
structure[1] = new kelondroColumn("Cardinal occ-4 {b256}");
structure[2] = new kelondroColumn("Cardinal time-8 {b256}");
kelondroColumn p = new kelondroColumn("byte[] urlprops-" + payloadrow.objectsize());
@@ -98,7 +98,7 @@ public final class plasmaWordIndexAssortment {
}
private int assortmentCapacity(int rowsize) {
- return (rowsize - indexEntryAttribute.wordHashLength - 12) / payloadrow.objectsize();
+ return (rowsize - yacySeedDB.commonHashLength - 12) / payloadrow.objectsize();
}
public plasmaWordIndexAssortment(File storagePath, kelondroRow payloadrow, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException {
@@ -133,9 +133,9 @@ public final class plasmaWordIndexAssortment {
row.setCol(1, 1);
row.setCol(2, newContainer.updated());
Iterator entries = newContainer.entries();
- indexEntry entry;
+ indexRWIEntry entry;
for (int i = 0; i < assortmentLength; i++) {
- entry = (indexEntry) entries.next();
+ entry = (indexRWIEntry) entries.next();
row.setCol(3 + i, entry.toKelondroEntry().bytes());
}
kelondroRow.Entry oldrow = null;
@@ -221,7 +221,7 @@ public final class plasmaWordIndexAssortment {
indexContainer container = new indexContainer(wordHash, payloadrow);
int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) {
- container.add(new indexEntry[] { new indexURLEntry(row.getColBytes(3 + i)) }, updateTime);
+ container.add(new indexRWIEntry[] { new indexRWIEntryOld(row.getColBytes(3 + i)) }, updateTime);
}
return container;
}
diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
index c26ab24e9..983f1307e 100644
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@@ -54,7 +54,7 @@ import java.util.Set;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRI;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroMergeIterator;
@@ -168,7 +168,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
c = new indexContainer(newContainer.getWordHash(), payloadrow);
for (int k = 0; k < j; k++) {
if (i.hasNext()) {
- c.add((indexEntry) i.next(), newContainer.updated());
+ c.add((indexRWIEntry) i.next(), newContainer.updated());
} else {
storeForced(c);
return;
@@ -178,7 +178,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
}
}
- public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+ public indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, payloadrow);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
@@ -223,7 +223,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
c = new indexContainer(newContainer.getWordHash(), payloadrow);
for (int k = 0; k <= j; k++) {
assert (i.hasNext());
- c.add((indexEntry) i.next(), newContainer.updated());
+ c.add((indexRWIEntry) i.next(), newContainer.updated());
}
try {
storeForced(c);
@@ -306,9 +306,9 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
if (buffer != null) {
// sort out url hashes that shall be deleted
Iterator bi = buffer.entries();
- indexEntry entry;
+ indexRWIEntry entry;
while (bi.hasNext()) {
- entry = (indexEntry) bi.next();
+ entry = (indexRWIEntry) bi.next();
if (urlHashes.remove(entry.urlHash())) bi.remove();
}
record.add(buffer, -1);
diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java
index 4eb5785c0..68ed2691f 100644
--- a/source/de/anomic/plasma/plasmaWordIndexFile.java
+++ b/source/de/anomic/plasma/plasmaWordIndexFile.java
@@ -49,13 +49,13 @@ import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
-import de.anomic.index.indexURL;
-import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.server.logging.serverLog;
+import de.anomic.yacy.yacySeedDB;
public final class plasmaWordIndexFile {
@@ -91,7 +91,7 @@ public final class plasmaWordIndexFile {
long cacheSize = theLocation.length();
if (cacheSize > 1048576) cacheSize = 1048576;
return kelondroTree.open(theLocation, cacheSize, 0,
- new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + (indexURLEntry.urlEntryRow.objectsize() - indexURL.urlHashLength)));
+ new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength + ", byte[] ba-" + (indexRWIEntryOld.urlEntryRow.objectsize() - yacySeedDB.commonHashLength)));
}
public static File wordHash2path(File databaseRoot, String hash) {
@@ -128,23 +128,23 @@ public final class plasmaWordIndexFile {
} catch (IOException e) {}
}
- public indexEntry getEntry(String urlhash) throws IOException {
+ public indexRWIEntry getEntry(String urlhash) throws IOException {
kelondroRow.Entry n = theIndex.get(urlhash.getBytes());
if (n == null) return null;
- return new indexURLEntry(n.getColString(0, null), n.getColString(1, null));
+ return new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null));
}
public boolean contains(String urlhash) throws IOException {
return (theIndex.get(urlhash.getBytes()) != null);
}
- public boolean contains(indexEntry entry) throws IOException {
+ public boolean contains(indexRWIEntry entry) throws IOException {
return (theIndex.get(entry.urlHash().getBytes()) != null);
}
- public boolean addEntry(indexEntry entry) throws IOException {
+ public boolean addEntry(indexRWIEntry entry) throws IOException {
if (entry == null) return false;
- indexEntry oldEntry = getEntry(entry.urlHash());
+ indexRWIEntry oldEntry = getEntry(entry.urlHash());
if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this entity
return false;
}
@@ -163,7 +163,7 @@ public final class plasmaWordIndexFile {
if (container != null) {
Iterator i = container.entries();
while (i.hasNext()) {
- if (addEntry((indexEntry) i.next())) count++;
+ if (addEntry((indexRWIEntry) i.next())) count++;
}
}
@@ -228,7 +228,7 @@ public final class plasmaWordIndexFile {
public Object next() {
if (i == null) return null;
kelondroRow.Entry n = (kelondroRow.Entry) i.next();
- return new indexURLEntry(n.getColString(0, null), n.getColString(1, null));
+ return new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null));
}
public void remove() {
throw new UnsupportedOperationException();
@@ -248,7 +248,7 @@ public final class plasmaWordIndexFile {
long timeout = (time == -1) ? Long.MAX_VALUE : System.currentTimeMillis() + time;
try {
while ((i.hasNext()) && (System.currentTimeMillis() < timeout)) {
- addEntry((indexEntry) i.next());
+ addEntry((indexRWIEntry) i.next());
}
} catch (kelondroException e) {
serverLog.logSevere("PLASMA", "plasmaWordIndexEntity.merge: " + e.getMessage());
diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java
index 961033617..720c01ac4 100644
--- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java
@@ -51,7 +51,7 @@ import java.util.Set;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRI;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRow;
@@ -235,10 +235,10 @@ public class plasmaWordIndexFileCluster implements indexRI {
if (exists(wordHash)) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexContainer container = new indexContainer(wordHash, payloadrow);
- indexEntry entry;
+ indexRWIEntry entry;
Iterator i = entity.elements(true);
while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) {
- entry = (indexEntry) i.next();
+ entry = (indexRWIEntry) i.next();
if ((urlselection == null) || (urlselection.contains(entry.urlHash()))) container.add(entry);
}
return container;
@@ -302,7 +302,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
} else return 0;
}
- public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
+ public indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, payloadrow);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 3bea9a0dd..76b117276 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -55,14 +55,14 @@ import java.util.TreeMap;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlLURL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
@@ -491,33 +491,33 @@ public final class yacyClient {
//System.out.println("***result count " + results);
// create containers
- final int words = wordhashes.length() / indexEntryAttribute.wordHashLength;
+ final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
- container[i] = new indexContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength), indexURLEntry.urlEntryRow);
+ container[i] = new indexContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), indexRWIEntryOld.urlEntryRow);
}
// insert results to containers
- plasmaCrawlLURLEntry urlEntry;
+ indexURLEntry urlEntry;
String[] urls = new String[results];
for (int n = 0; n < results; n++) {
// get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n));
if (urlEntry == null) continue;
assert (urlEntry.hash().length() == 12) : "urlEntry.hash() = " + urlEntry.hash();
- plasmaCrawlLURLEntry.Components comp = urlEntry.comp();
+ indexURLEntry.Components comp = urlEntry.comp();
if (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, comp.url())) continue; // block with backlist
urlManager.store(urlEntry);
urlManager.stack(urlEntry, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2);
// save the url entry
- final indexEntry entry;
+ final indexRWIEntry entry;
if (urlEntry.word() == null) {
// the old way to define words
int urlLength = comp.url().toNormalform().length();
int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()).length;
- entry = new indexURLEntry(
+ entry = new indexRWIEntryOld(
urlEntry.hash(),
urlLength,
urlComps,
@@ -545,7 +545,7 @@ public final class yacyClient {
}
// add the url entry to the word indexes
for (int m = 0; m < words; m++) {
- container[m].add(new indexEntry[]{entry}, System.currentTimeMillis());
+ container[m].add(new indexRWIEntry[]{entry}, System.currentTimeMillis());
}
// store url hash for statistics
urls[n] = urlEntry.hash();
@@ -869,7 +869,7 @@ public final class yacyClient {
-er crawlt, Ergebnis erscheint aber unter falschem initiator
*/
- public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, plasmaCrawlLURLEntry entry, String wordhashes) {
+ public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, indexURLEntry entry, String wordhashes) {
if (targetSeed == null) { return null; }
if (yacyCore.seedDB.mySeed == null) { return null; }
if (yacyCore.seedDB.mySeed == targetSeed) { return null; }
@@ -943,11 +943,11 @@ public final class yacyClient {
// check if we got all necessary urls in the urlCache (only for debugging)
Iterator eenum;
- indexEntry entry;
+ indexRWIEntry entry;
for (int i = 0; i < indexes.length; i++) {
eenum = indexes[i].entries();
while (eenum.hasNext()) {
- entry = (indexEntry) eenum.next();
+ entry = (indexRWIEntry) eenum.next();
if (urlCache.get(entry.urlHash()) == null) {
yacyCore.log.logFine("DEBUG transferIndex: to-send url hash '" + entry.urlHash() + "' is not contained in urlCache");
}
@@ -988,9 +988,9 @@ public final class yacyClient {
if (uhs.length == 0) { return resultObj; } // all url's known
// extract the urlCache from the result
- plasmaCrawlLURLEntry[] urls = new plasmaCrawlLURLEntry[uhs.length];
+ indexURLEntry[] urls = new indexURLEntry[uhs.length];
for (int i = 0; i < uhs.length; i++) {
- urls[i] = (plasmaCrawlLURLEntry) urlCache.get(uhs[i]);
+ urls[i] = (indexURLEntry) urlCache.get(uhs[i]);
if (urls[i] == null) {
yacyCore.log.logFine("DEBUG transferIndex: requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
}
@@ -1051,11 +1051,11 @@ public final class yacyClient {
int indexcount = 0;
final StringBuffer entrypost = new StringBuffer(indexes.length*73);
Iterator eenum;
- indexEntry entry;
+ indexRWIEntry entry;
for (int i = 0; i < indexes.length; i++) {
eenum = indexes[i].entries();
while (eenum.hasNext()) {
- entry = (indexEntry) eenum.next();
+ entry = (indexRWIEntry) eenum.next();
entrypost.append(indexes[i].getWordHash())
.append(entry.toPropertyForm(false))
.append(serverCore.crlfString);
@@ -1099,7 +1099,7 @@ public final class yacyClient {
}
}
- private static HashMap transferURL(yacySeed targetSeed, plasmaCrawlLURLEntry[] urls, boolean gzipBody, int timeout) {
+ private static HashMap transferURL(yacySeed targetSeed, indexURLEntry[] urls, boolean gzipBody, int timeout) {
// this post a message to the remote message board
final String address = targetSeed.getAddress();
if (address == null) { return null; }
diff --git a/source/yacy.java b/source/yacy.java
index b648e9e2d..08dcfd6d2 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -71,10 +71,11 @@ import de.anomic.http.httpd;
import de.anomic.http.httpdFileHandler;
import de.anomic.http.httpdProxyHandler;
import de.anomic.index.indexContainer;
-import de.anomic.index.indexEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexEntryAttribute;
-import de.anomic.index.indexURL;
+import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
+import de.anomic.index.indexURLEntryOld;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMap;
@@ -83,8 +84,6 @@ import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlLURL;
-import de.anomic.plasma.plasmaCrawlLURLEntry;
-import de.anomic.plasma.plasmaCrawlLURLOldEntry;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPool;
@@ -623,7 +622,7 @@ public final class yacy {
kelondroMScoreCluster hs = new kelondroMScoreCluster();
while (ef.hasMoreElements()) {
f = (File) ef.nextElement();
- h = f.getName().substring(0, indexURL.urlHashLength);
+ h = f.getName().substring(0, yacySeedDB.commonHashLength);
hs.addScore(h, (int) f.length());
}
@@ -740,12 +739,12 @@ public final class yacy {
// the combined container will fit, read the container
Iterator wordIdxEntries = wordIdxContainer.entries();
- indexEntry iEntry;
+ indexRWIEntry iEntry;
while (wordIdxEntries.hasNext()) {
- iEntry = (indexEntry) wordIdxEntries.next();
+ iEntry = (indexRWIEntry) wordIdxEntries.next();
String urlHash = iEntry.urlHash();
if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
- plasmaCrawlLURLEntry urlEntry = currentUrlDB.load(urlHash, null);
+ indexURLEntry urlEntry = currentUrlDB.load(urlHash, null);
urlCounter++;
minimizedUrlDB.store(urlEntry);
if (urlCounter % 500 == 0) {
@@ -965,11 +964,11 @@ public final class yacy {
long start = System.currentTimeMillis();
if (source.equals("lurl")) {
Iterator eiter = pool.loadedURL.entries(true, false, null);
- plasmaCrawlLURLEntry entry;
+ indexURLEntry entry;
while (eiter.hasNext()) {
try {
- entry = (plasmaCrawlLURLEntry) eiter.next();
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ entry = (indexURLEntry) eiter.next();
+ indexURLEntry.Components comp = entry.comp();
if ((entry != null) && (comp.url() != null)) doms.put(comp.url().getHost(), null);
} catch (Exception e) {
// here a MalformedURLException may occur
@@ -1077,10 +1076,10 @@ public final class yacy {
if (source.equals("lurl")) {
Iterator eiter = pool.loadedURL.entries(true, false, null);
- plasmaCrawlLURLEntry entry;
+ indexURLEntry entry;
while (eiter.hasNext()) {
- entry = (plasmaCrawlLURLEntry) eiter.next();
- plasmaCrawlLURLEntry.Components comp = entry.comp();
+ entry = (indexURLEntry) eiter.next();
+ indexURLEntry.Components comp = entry.comp();
if ((entry != null) && (comp.url() != null)) {
if (html) {
bos.write(("" + comp.descr() + "
").getBytes("UTF-8"));
@@ -1135,7 +1134,7 @@ public final class yacy {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, true, 1000, true, 1000, true, 10000);
kelondroTree oldindex = null;
try {
- oldindex = new kelondroTree(urlHash, 1000, -1, plasmaCrawlLURLOldEntry.rowdef);
+ oldindex = new kelondroTree(urlHash, 1000, -1, indexURLEntryOld.rowdef);
} catch (IOException e) {
System.out.println("ERROR: CANNOT OPEN OLD INDEX: " + e.getMessage());
}
@@ -1145,9 +1144,9 @@ public final class yacy {
int tc = oldindex.size(), c = 0;
Iterator eiter = oldindex.contentRows(-1);
kelondroRow.Entry oldrow;
- plasmaCrawlLURLEntry oldentry;
- plasmaCrawlLURLEntry newentry;
- plasmaCrawlLURLEntry.Components comp;
+ indexURLEntry oldentry;
+ indexURLEntry newentry;
+ indexURLEntry.Components comp;
byte[] dummymd5 = new byte[0];
while (eiter.hasNext()) {
try {
@@ -1158,7 +1157,7 @@ public final class yacy {
oldrow = null;
}
if (oldrow != null) try {
- oldentry = new plasmaCrawlLURLOldEntry(oldrow, null);
+ oldentry = new indexURLEntryOld(oldrow, null);
comp = oldentry.comp();
newentry = pool.loadedURL.newEntry(
comp.url(),
@@ -1236,7 +1235,7 @@ public final class yacy {
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, true, 8*1024*1024, 3000, log, sps.getConfigBool("useCollectionIndex", false));
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
} else if (resource.equals("assortments")) {
- plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, indexURLEntry.urlEntryRow, 16*1024*1024, 3000, log);
+ plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, indexRWIEntryOld.urlEntryRow, 16*1024*1024, 3000, log);
indexContainerIterator = assortmentCluster.wordContainers(wordChunkStartHash, true, false);
} /*else if (resource.startsWith("assortment")) {
int a = Integer.parseInt(resource.substring(10));