- refactoring of plasmaCrawlLURL.Entry to prepare new Entry format

- added test migration method to migrate the old LURL to a new LURL the new LURL will be splitted into different tables for each month this solves several problems: - the biggest table in YaCy is splitted in different parts and can also be managed in filesystems that are limited to 2GB - the oldest entries can easily be identified, used for re-crawl und deleted - The complete database can be limited to a specific size (as wanted many times) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2755 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · a5dd0d41af
parent 130cc76927
commit a5dd0d41af
37 changed files with 193 additions and 453 deletions
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@ -56,7 +56,7 @@ import de.anomic.data.listManager;
 import de.anomic.data.bookmarksDB.Tag;
 import de.anomic.http.httpHeader;
 import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverObjects;
@ -147,7 +147,7 @@ public class Bookmarks {
                    bookmarksDB.Bookmark bookmark = switchboard.bookmarksDB.getBookmark(urlHash);
                    if (bookmark == null) {
                        // try to get the bookmark from the LURL database
-                        plasmaCrawlLURL.Entry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
+                        plasmaCrawlLURLEntry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
                        plasmaParserDocument document = null;
                        if(urlentry != null){
                            document = switchboard.snippetCache.retrieveDocument(urlentry.url(), true);
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@ -61,7 +61,7 @@ import de.anomic.index.indexEntry;
 import de.anomic.index.indexEntryAttribute;
 import de.anomic.index.indexURL;
 import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
@ -218,7 +218,7 @@ public class IndexControl_p {
        }

        if (post.containsKey("urlhashdelete")) {
-            plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+            plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
            if (entry == null) {
                prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
            } else {
@ -265,7 +265,7 @@ public class IndexControl_p {
            HashMap knownURLs = new HashMap();
            HashSet unknownURLEntries = new HashSet();
            indexEntry iEntry;
-            plasmaCrawlLURL.Entry lurl;
+            plasmaCrawlLURLEntry lurl;
            while (urlIter.hasNext()) {
                iEntry = (indexEntry) urlIter.next();
                lurl = switchboard.urlPool.loadedURL.load(iEntry.urlHash(), null);
@ -321,7 +321,7 @@ public class IndexControl_p {
                URL url = new URL(urlstring);
                urlhash = indexURL.urlHash(url);
                prop.put("urlhash", urlhash);
-                plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+                plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
                if (entry == null) {
                    prop.put("urlstring", "unknown url: " + urlstring);
                    prop.put("urlhash", "");
@ -335,7 +335,7 @@ public class IndexControl_p {
        }

        if (post.containsKey("urlhashsearch")) {
-            plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
+            plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
            if (entry == null) {
                prop.put("result", "No Entry for URL hash " + urlhash);
            } else {
@ -351,12 +351,12 @@ public class IndexControl_p {
            try {
                final Iterator entryIt = switchboard.urlPool.loadedURL.entries(true, true, urlhash); 
                StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:<br>");
-                plasmaCrawlLURL.Entry entry;
+                plasmaCrawlLURLEntry entry;
                int i = 0;
                int rows = 0, cols = 0;
                prop.put("urlhashsimilar", 1);
                while (entryIt.hasNext() && i < 256) {
-                    entry = (plasmaCrawlLURL.Entry) entryIt.next();
+                    entry = (plasmaCrawlLURLEntry) entryIt.next();
                    prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash());
                    cols++;
                    if (cols==8) {
@ -403,7 +403,7 @@ public class IndexControl_p {
        return prop;
    }

-    public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURL.Entry entry, String urlhash) {
+    public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURLEntry entry, String urlhash) {
        serverObjects prop = new serverObjects();
        if (entry == null) {
            prop.put("genUrlProfile", 1);
@ -412,7 +412,7 @@ public class IndexControl_p {
        }
        URL url = entry.url();
        String referrer = null;
-        plasmaCrawlLURL.Entry le = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null);
+        plasmaCrawlLURLEntry le = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null);
        if (le == null) {
            referrer = "<unknown>";
        } else {
@ -463,7 +463,7 @@ public class IndexControl_p {
                while (en.hasNext()) {
                    xi = (indexEntry) en.next();
                    uh = new String[]{xi.urlHash(), Integer.toString(xi.posintext())};
-                    plasmaCrawlLURL.Entry le = switchboard.urlPool.loadedURL.load(uh[0], null);
+                    plasmaCrawlLURLEntry le = switchboard.urlPool.loadedURL.load(uh[0], null);
                    if (le == null) {
                        tm.put(uh[0], uh);
                    } else {
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -55,13 +55,13 @@ import de.anomic.data.wikiCode;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpc;
 import de.anomic.net.URL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.cache.IResourceInfo;
 import de.anomic.plasma.crawler.plasmaCrawlerException;
 import de.anomic.plasma.parser.ParserException;
-import de.anomic.plasma.plasmaCrawlLURL.Entry;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -107,7 +107,7 @@ public class ViewFile {
            String viewMode = post.get("viewMode","sentences");

            // getting the urlEntry that belongs to the url hash
-            Entry urlEntry = null;
+            plasmaCrawlLURLEntry urlEntry = null;
            urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
            if (urlEntry == null) {
                prop.put("error",2);
--- a/htroot/htdocsdefault/dir.java
+++ b/htroot/htdocsdefault/dir.java
@ -64,7 +64,7 @@ import de.anomic.index.indexURL;
 import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCondenser;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverCore;
@ -358,7 +358,7 @@ public class dir {
        try {
            final URL url = new URL(urlstring);
            final plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes()));
-            final plasmaCrawlLURL.Entry newEntry = switchboard.urlPool.loadedURL.newEntry(
+            final plasmaCrawlLURLEntry newEntry = switchboard.urlPool.loadedURL.newEntry(
                url, "YaCyShare: " + descr, new Date(), new Date(),
                "AAAAAAAAAAAA", /*referrer*/
                0, /*copycount*/
--- a/htroot/yacy/crawlOrder.java
+++ b/htroot/yacy/crawlOrder.java
@ -51,7 +51,7 @@ import java.util.Date;
 import de.anomic.http.httpHeader;
 import de.anomic.index.indexURL;
 import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -249,7 +249,7 @@ public final class crawlOrder {
            // case where we have already the url loaded;
            reason = reasonString;
            // send lurl-Entry as response
-            plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null);
+            plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null);
            if (entry == null) {
                response = "rejected";
                lurl = "";
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@ -51,7 +51,7 @@ import java.io.IOException;
 import de.anomic.http.httpHeader;
 import de.anomic.index.indexURL;
 import de.anomic.plasma.plasmaCrawlEURL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaCrawlNURL;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverObjects;
@ -124,7 +124,7 @@ public final class crawlReceipt {
            prop.put("delay", "3600");
        } else if (result.equals("fill")) {
            // generating a new loaded URL entry
-            plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.newEntry(propStr, true);
+            plasmaCrawlLURLEntry entry = switchboard.urlPool.loadedURL.newEntry(propStr, true);
            if ((entry == null)||(entry.url()==null)) {
                log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT for hash " + receivedUrlhash + " from peer " + iam +
                              "\n\tURL properties: "+ propStr);
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -54,7 +54,7 @@ import java.util.Set;
 import de.anomic.http.httpHeader;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexURL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSearchEvent;
 import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProfile;
@ -244,10 +244,10 @@ public final class search {
            StringBuffer links = new StringBuffer();
            String resource = "";
            //plasmaIndexEntry pie;
-            plasmaCrawlLURL.Entry urlentry;
+            plasmaCrawlLURLEntry urlentry;
            plasmaSnippetCache.Snippet snippet;
            while ((acc.hasMoreElements()) && (i < squery.wantedResults)) {
-                urlentry = acc.nextElement();
+                urlentry = (plasmaCrawlLURLEntry) acc.nextElement();
                if (includesnippet) {
                    snippet = sb.snippetCache.retrieveSnippet(urlentry.url(), squery.queryHashes, false, 260, 1000);
                } else {
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@ -48,7 +48,7 @@
 import java.io.IOException;

 import de.anomic.http.httpHeader;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverCore;
@ -90,7 +90,7 @@ public final class transferURL {
            final int sizeBefore = sb.urlPool.loadedURL.size();
            // read the urls from the other properties and store
            String urls;
-            plasmaCrawlLURL.Entry lEntry;
+            plasmaCrawlLURLEntry lEntry;
            for (int i = 0; i < urlc; i++) {
                serverCore.checkInterruption();
                urls = (String) post.get("url" + i);
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -57,7 +57,7 @@ import de.anomic.http.httpHeader;
 import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.net.URL;
-import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSearchImages;
 import de.anomic.plasma.plasmaSearchPreOrder;
@ -189,7 +189,7 @@ public class yacysearch {
                    return prop;
                }
                final String recommendHash = post.get("recommendref", ""); // urlhash
-                plasmaCrawlLURL.Entry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
+                plasmaCrawlLURLEntry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
                if (urlentry != null) {
                    plasmaParserDocument document = sb.snippetCache.retrieveDocument(urlentry.url(), true);
                    if (document != null) {
--- a/source/dbtest.java
+++ b/source/dbtest.java
@ -13,6 +13,7 @@ import java.util.Iterator;
 import java.util.Random;

 import de.anomic.kelondro.kelondroBase64Order;
+import de.anomic.kelondro.kelondroFlexSplitTable;
 import de.anomic.kelondro.kelondroFlexTable;
 import de.anomic.kelondro.kelondroIndex;
 import de.anomic.kelondro.kelondroNaturalOrder;
@ -186,6 +187,10 @@ public class dbtest {
                File tablepath = new File(tablename).getParentFile();
                table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, preload, testRow, kelondroBase64Order.enhancedCoder);
            }
+            if (dbe.equals("kelondroFlexSplitTable")) {
+                File tablepath = new File(tablename).getParentFile();
+                table = new kelondroFlexSplitTable(tablepath, new File(tablename).getName(), buffer, preload, testRow, kelondroBase64Order.enhancedCoder);
+            }
            if (dbe.equals("mysql")) {
                table = new dbTable("mysql", testRow);
            }
@ -513,6 +518,10 @@ final class dbTable implements kelondroIndex {
        }
    }

+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
        try {
            
--- a/source/de/anomic/index/indexURL.java
+++ b/source/de/anomic/index/indexURL.java
@ -47,7 +47,7 @@ import de.anomic.yacy.yacySeedDB;
 public class indexURL {

 // day formatter for entry export
- protected static final SimpleDateFormat shortDayFormatter = new SimpleDateFormat("yyyyMMdd");
+ public static final SimpleDateFormat shortDayFormatter = new SimpleDateFormat("yyyyMMdd");
 
 // statics for value lengths
 public static final int urlHashLength               = yacySeedDB.commonHashLength; // 12
@ -428,15 +428,6 @@ public class indexURL {
    }
 }

- public void store(kelondroRow.Entry entry, boolean cached) throws IOException {
-     if ((cached) && (urlIndexCache != null))
-         synchronized (urlIndexCache) {
-             urlIndexCache.put(entry);
-         }
-     else
-         urlIndexFile.put(entry);
- }
- 
 public void flushCacheSome() {
     if (urlIndexCache == null) return;
     if (urlIndexCache.size() == 0) return;
--- a/source/de/anomic/kelondro/kelondroCollectionIndex.java
+++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java
@ -446,7 +446,7 @@ public class kelondroCollectionIndex {
            indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
            indexEntry.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
            index.put(indexEntry);
-            throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key)) + "', the row has been fixed");
+            throw new kelondroException(array.filename, "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key)) + "', the row has been fixed");
        }
        int chunkcountInArray = collection.size();
        if (chunkcountInArray != chunkcount) {
--- a/source/de/anomic/kelondro/kelondroColumn.java
+++ b/source/de/anomic/kelondro/kelondroColumn.java
@ -209,6 +209,11 @@ public class kelondroColumn {
    public String toString() {
        StringBuffer s = new StringBuffer();
        switch (celltype) {
+        case celltype_undefined:
+            s.append(nickname);
+            s.append('-');
+            s.append(cellwidth);
+            break;
        case celltype_boolean:
            s.append("boolean ");
            s.append(nickname);
--- a/source/de/anomic/kelondro/kelondroFlexTable.java
+++ b/source/de/anomic/kelondro/kelondroFlexTable.java
@ -27,6 +27,7 @@ package de.anomic.kelondro;

 import java.io.File;
 import java.io.IOException;
+import java.util.Date;
 import java.util.Iterator;

 public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex {
@ -137,6 +138,10 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
            return super.get(i);
    }

+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
            int i = index.geti(row.getColBytes(0));
            if (i < 0) {
--- a/source/de/anomic/kelondro/kelondroIndex.java
+++ b/source/de/anomic/kelondro/kelondroIndex.java
@ -51,6 +51,7 @@
 package de.anomic.kelondro;

 import java.io.IOException;
+import java.util.Date;
 import java.util.Iterator;

 public interface kelondroIndex {
@ -60,6 +61,7 @@ public interface kelondroIndex {
    public kelondroRow row() throws IOException;
    public kelondroRow.Entry get(byte[] key) throws IOException;
    public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
    public kelondroRow.Entry remove(byte[] key) throws IOException;
    public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException;
    public void close() throws IOException;
--- a/source/de/anomic/kelondro/kelondroRAMIndex.java
+++ b/source/de/anomic/kelondro/kelondroRAMIndex.java
@ -26,6 +26,8 @@

 package de.anomic.kelondro;

+import java.io.IOException;
+import java.util.Date;
 import java.util.Iterator;
 import java.util.TreeMap;

@ -59,6 +61,10 @@ public class kelondroRAMIndex implements kelondroIndex {
        return (kelondroRow.Entry) index.get(key);
    }

+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public synchronized Entry put(Entry row) {
        return (kelondroRow.Entry) index.put(row.getColBytes(0), row);
    }
--- a/source/de/anomic/kelondro/kelondroRecords.java
+++ b/source/de/anomic/kelondro/kelondroRecords.java
@ -976,7 +976,7 @@ public class kelondroRecords {
        return USAGE.FREEC;
    }

-    private final void dispose(Handle h) throws IOException {
+    private synchronized final void dispose(Handle h) throws IOException {
        // delete element with handle h
        // this element is then connected to the deleted-chain and can be
        // re-used change counter
@ -1052,7 +1052,7 @@ public class kelondroRecords {
                    if (markedDeleted.contains(h)) {
                        // loop detection
                        this.theLogger.severe("KELONDRO WARNING " + this.filename + ": FREE-Queue contains loops");
-                        return markedDeleted;
+                        return markedDeleted; // TODO: automatic fix
                    }
                    markedDeleted.add(h);
                    seekp = seekpos(h);
--- a/source/de/anomic/kelondro/kelondroRowSet.java
+++ b/source/de/anomic/kelondro/kelondroRowSet.java
@ -25,6 +25,7 @@
 package de.anomic.kelondro;

 import java.io.IOException;
+import java.util.Date;
 import java.util.Iterator;
 import java.util.Random;
 import java.util.TreeSet;
@ -76,6 +77,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
        return entry;
    }
    
+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public kelondroRow.Entry put(kelondroRow.Entry entry) {
        long handle = profile.startWrite();
        int index = -1;
--- a/source/de/anomic/kelondro/kelondroSplittedTree.java
+++ b/source/de/anomic/kelondro/kelondroSplittedTree.java
@ -47,6 +47,7 @@ package de.anomic.kelondro;

 import java.io.File;
 import java.io.IOException;
+import java.util.Date;
 import java.util.Iterator;

 public class kelondroSplittedTree implements kelondroIndex {
@ -109,6 +110,10 @@ public class kelondroSplittedTree implements kelondroIndex {
        return ktfs[partition(key)].get(key);
    }

+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
        return ktfs[partition(row.getColBytes(0))].put(row);
    }
--- a/source/de/anomic/kelondro/kelondroTree.java
+++ b/source/de/anomic/kelondro/kelondroTree.java
@ -50,6 +50,7 @@ import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
@ -404,8 +405,12 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
        return (lc.equals(childn.handle()));
    }
    
-    // Associates the specified value with the specified key in this map
+    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
+        return put(row);
+    }
+    
    public kelondroRow.Entry put(kelondroRow.Entry newrow) throws IOException {
+        // Associates the specified value with the specified key in this map
        kelondroRow.Entry result = null;
        //writeLock.stay(2000, 1000);
        if (newrow.columns() != row().columns()) throw new IllegalArgumentException("put: wrong row length " + newrow.columns() + "; must be " + row().columns());
--- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
@ -9,6 +9,7 @@ import de.anomic.index.indexContainer;
 import de.anomic.index.indexEntry;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.server.serverDate;
@ -155,7 +156,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
                            // we need to import the url

                            // getting the url entry
-                            plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.load(urlHash, null);
+                            plasmaCrawlLURLEntry urlEntry = this.importUrlDB.load(urlHash, null);
                            if (urlEntry != null) {

                                /* write it into the home url db */
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@ -61,14 +61,11 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Locale;
-import java.util.Properties;

 import de.anomic.http.httpc;
 import de.anomic.http.httpc.response;
 import de.anomic.index.indexEntry;
 import de.anomic.index.indexURL;
-import de.anomic.index.indexURLEntry;
-import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.kelondro.kelondroRAMIndex;
 import de.anomic.kelondro.kelondroRow;
@ -78,7 +75,6 @@ import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverObjects;
 import de.anomic.server.logging.serverLog;
-import de.anomic.tools.crypt;
 import de.anomic.tools.nxTools;
 import de.anomic.yacy.yacyCore;
 import de.anomic.yacy.yacySeed;
@ -95,31 +91,15 @@ public final class plasmaCrawlLURL extends indexURL {
    private final LinkedList lcrawlResultStack; // 5 - local index: result of local crawling
    private final LinkedList gcrawlResultStack; // 6 - local index: triggered external
    
-    //public static Set damagedURLS = Collections.synchronizedSet(new HashSet());
-    
    public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
        super();
-        kelondroRow rowdef = new kelondroRow(
-            "String urlhash-"      + urlHashLength      + ", " +        // the url's hash
-            "String urlstring-"    + urlStringLength    + ", " +        // the url as string
-            "String urldescr-"     + urlDescrLength     + ", " +        // the description of the url
-            "Cardinal moddate-"    + urlDateLength      + " {b64e}, " + // last-modified from the httpd
-            "Cardinal loaddate-"   + urlDateLength      + " {b64e}, " + // time when the url was loaded
-            "String refhash-"      + urlHashLength      + ", " +        // the url's referrer hash
-            "Cardinal copycount-"  + urlCopyCountLength + " {b64e}, " + //
-            "byte[] flags-"        + urlFlagLength      + ", " +        // flags
-            "Cardinal quality-"    + urlQualityLength   + " {b64e}, " + // 
-            "String language-"     + urlLanguageLength  + ", " +        //
-            "byte[] doctype-"      + urlDoctypeLength   + ", " +        //
-            "Cardinal size-"       + urlSizeLength      + " {b64e}, " + // size of file in bytes
-            "Cardinal wc-"         + urlWordCountLength + " {b64e}");   // word count

        File cacheFile = new File(cachePath, "urlHash.db");
        
        cacheFile.getParentFile().mkdirs();
        try {
-            urlIndexFile = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef);
-            urlIndexCache = new kelondroRAMIndex(kelondroNaturalOrder.naturalOrder, rowdef);
+            urlIndexFile = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef);
+            urlIndexCache = new kelondroRAMIndex(kelondroNaturalOrder.naturalOrder, plasmaCrawlLURLOldEntry.rowdef);
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
@ -134,19 +114,19 @@ public final class plasmaCrawlLURL extends indexURL {
        gcrawlResultStack = new LinkedList();
    }
    
-    public synchronized void stack(Entry e, String initiatorHash, String executorHash, int stackType) {
+    public synchronized void stack(plasmaCrawlLURLEntry e, String initiatorHash, String executorHash, int stackType) {
        if (e == null) { return; }
        try {
            if (initiatorHash == null) { initiatorHash = dummyHash; }
            if (executorHash == null) { executorHash = dummyHash; }
            switch (stackType) {
                case 0: break;
-                case 1: externResultStack.add(e.urlHash + initiatorHash + executorHash); break;
-                case 2: searchResultStack.add(e.urlHash + initiatorHash + executorHash); break;
-                case 3: transfResultStack.add(e.urlHash + initiatorHash + executorHash); break;
-                case 4: proxyResultStack.add(e.urlHash + initiatorHash + executorHash); break;
-                case 5: lcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break;
-                case 6: gcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break;
+                case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break;
+                case 2: searchResultStack.add(e.hash() + initiatorHash + executorHash); break;
+                case 3: transfResultStack.add(e.hash() + initiatorHash + executorHash); break;
+                case 4: proxyResultStack.add(e.hash() + initiatorHash + executorHash); break;
+                case 5: lcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
+                case 6: gcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
            }
            return;
        } catch (Exception ex) {
@ -159,7 +139,7 @@ public final class plasmaCrawlLURL extends indexURL {
        gcrawlResultStack.add(urlHash + initiatorHash + executorHash);
    }

-    public Entry load(String urlHash, indexEntry searchedWord) {
+    public plasmaCrawlLURLEntry load(String urlHash, indexEntry searchedWord) {
        // generates an plasmaLURLEntry using the url hash
        // to speed up the access, the url-hashes are buffered
        // in the hash cache.
@ -171,19 +151,18 @@ public final class plasmaCrawlLURL extends indexURL {
        try {
            if (entry == null) entry = urlIndexFile.get(urlHash.getBytes());
            if (entry == null) return null;
-            return new Entry(entry, searchedWord);
+            return new plasmaCrawlLURLOldEntry(entry, searchedWord);
        } catch (IOException e) {
            return null;
        }
    }

-    public void store(Entry entry, boolean cached) throws IOException {
+    public void store(plasmaCrawlLURLEntry entry, boolean cached) throws IOException {
        // Check if there is a more recent Entry already in the DB
-        if (entry.stored) return;
-        Entry oldEntry;
+        plasmaCrawlLURLEntry oldEntry;
        try {
-            if (exists(entry.urlHash)) {
-                oldEntry = load(entry.urlHash, null);
+            if (exists(entry.hash())) {
+                oldEntry = load(entry.hash(), null);
            } else {
                oldEntry = null;
            }
@ -194,40 +173,32 @@ public final class plasmaCrawlLURL extends indexURL {
            // the fetched oldEntry is better, so return its properties instead of the new ones
            // this.urlHash = oldEntry.urlHash; // unnecessary, should be the same
            // this.url = oldEntry.url; // unnecessary, should be the same
-            entry.descr = oldEntry.descr;
-            entry.moddate = oldEntry.moddate;
-            entry.loaddate = oldEntry.loaddate;
-            entry.referrerHash = oldEntry.referrerHash;
-            entry.copyCount = oldEntry.copyCount;
-            entry.flags = oldEntry.flags;
-            entry.quality = oldEntry.quality;
-            entry.language = oldEntry.language;
-            entry.doctype = oldEntry.doctype;
-            entry.size = oldEntry.size;
-            entry.wordCount = oldEntry.wordCount;
-            // this.snippet // not read from db
-            // this.word // not read from db
-            entry.stored = true;
+            entry = oldEntry;
            return; // this did not need to be stored, but is updated
        }

-        super.store(entry.toRowEntry(), cached);
-        entry.stored = true;
+        if ((cached) && (urlIndexCache != null)) {
+            synchronized (urlIndexCache) {
+                urlIndexCache.put(entry.toRowEntry());
+            }
+        } else {
+            urlIndexFile.put(entry.toRowEntry(), entry.loaddate());
+        }
    }
    
-    public synchronized Entry newEntry(String propStr, boolean setGlobal) {
+    public synchronized plasmaCrawlLURLEntry newEntry(String propStr, boolean setGlobal) {
        if (propStr.startsWith("{") && propStr.endsWith("}")) {
-            return new Entry(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)), setGlobal);
+            return new plasmaCrawlLURLOldEntry(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)), setGlobal);
        } else {
            return null;
        }
    }

-    public synchronized Entry newEntry(URL url, String descr, Date moddate, Date loaddate,
+    public synchronized plasmaCrawlLURLEntry newEntry(URL url, String descr, Date moddate, Date loaddate,
            String referrerHash, int copyCount, boolean localNeed,
            int quality, String language, char doctype,
            int size, int wordCount) {
-        Entry e = new Entry(url, descr, moddate, loaddate, referrerHash, copyCount, localNeed, quality, language, doctype, size, wordCount);
+        plasmaCrawlLURLEntry e = new plasmaCrawlLURLOldEntry(url, descr, moddate, loaddate, referrerHash, copyCount, localNeed, quality, language, doctype, size, wordCount);
        return e;
    }
    
@ -365,7 +336,7 @@ public final class plasmaCrawlLURL extends indexURL {
        String urlHash, initiatorHash, executorHash;
        String cachepath, urlstr, urltxt;
        yacySeed initiatorSeed, executorSeed;
-        plasmaCrawlLURL.Entry urle;
+        plasmaCrawlLURLEntry urle;
        URL url;

        // needed for getCachePath(url)
@ -412,317 +383,6 @@ public final class plasmaCrawlLURL extends indexURL {
        return prop;
    }
    
-    public class Entry {
-
-        private URL url;
-
-        private String descr;
-        private Date moddate;
-        private Date loaddate;
-        private String urlHash;
-        private String referrerHash;
-        private int copyCount;
-        private String flags;
-        private int quality;
-        private String language;
-        private char doctype;
-        private int size;
-        private int wordCount;
-        private String snippet;
-        private indexEntry word; // this is only used if the url is transported via remote search requests
-        private boolean stored;
-        
-        // more needed attributes:
-        // - author / copyright owner
-        // - keywords
-        // - phrasecount, total number of phrases
-        // - boolean: URL attributes (see Word-Entity definition)
-        // - boolean: appearance of bold and/or italics
-        // - ETag: for re-crawl decision upon HEAD request
-        // - int: # of outlinks to same domain
-        // - int: # of outlinks to outside domain
-        // - int: # of keywords
-        // - int: # der auf der Seite vorhandenen Links zu image, audio, video, applications
-        
-        public Entry(URL url, String descr, Date moddate, Date loaddate, String referrerHash, int copyCount, boolean localNeed, int quality, String language, char doctype, int size, int wordCount) {
-            // create new entry and store it into database
-            this.urlHash = urlHash(url);
-            this.url = url;
-            this.descr = (descr == null) ? this.url.toString() : descr;
-            this.moddate = moddate;
-            this.loaddate = loaddate;
-            this.referrerHash = (referrerHash == null) ? dummyHash : referrerHash;
-            this.copyCount = copyCount; // the number of remote (global) copies of this object without this one
-            this.flags = (localNeed) ? "L " : "  ";
-            this.quality = quality;
-            this.language = (language == null) ? "uk" : language;
-            this.doctype = doctype;
-            this.size = size;
-            this.wordCount = wordCount;
-            this.snippet = null;
-            this.word = null;
-            this.stored = false;
-        }
-        
-        public Entry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException {
-            try {
-                this.urlHash = entry.getColString(0, null);
-                this.url = new URL(entry.getColString(1, "UTF-8").trim());
-                this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
-                this.moddate = new Date(86400000 * entry.getColLong(3));
-                this.loaddate = new Date(86400000 * entry.getColLong(4));
-                this.referrerHash = (entry.empty(5)) ? dummyHash : entry.getColString(5, "UTF-8");
-                this.copyCount = (int) entry.getColLong(6);
-                this.flags = entry.getColString(7, "UTF-8");
-                this.quality = (int) entry.getColLong(8);
-                this.language = entry.getColString(9, "UTF-8");
-                this.doctype = (char) entry.getColByte(10);
-                this.size = (int) entry.getColLong(11);
-                this.wordCount = (int) entry.getColLong(12);
-                this.snippet = null;
-                this.word = searchedWord;
-                this.stored = false;
-                return;
-            } catch (Exception e) {
-                serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString(), e);
-                throw new IOException("plasmaLURL.entry/1: " + e.toString());
-            }
-        }
-
-        public Entry(Properties prop, boolean setGlobal) {
-            // generates an plasmaLURLEntry using the properties from the argument
-            // the property names must correspond to the one from toString
-            //System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
-            this.urlHash = prop.getProperty("hash", dummyHash);
-            try {
-                //byte[][] entry = urlHashCache.get(urlHash.getBytes());
-                //if (entry == null) {
-                this.referrerHash = prop.getProperty("referrer", dummyHash);
-                this.moddate = shortDayFormatter.parse(prop.getProperty("mod", "20000101"));
-                //System.out.println("DEBUG: moddate = " + moddate + ", prop=" + prop.getProperty("mod"));
-                this.loaddate = shortDayFormatter.parse(prop.getProperty("load", "20000101"));
-                this.copyCount = Integer.parseInt(prop.getProperty("cc", "0"));
-                this.flags = ((prop.getProperty("local", "true").equals("true")) ? "L " : "  ");
-                if (setGlobal) this.flags = "G ";
-                this.url = new URL(crypt.simpleDecode(prop.getProperty("url", ""), null));
-                this.descr = crypt.simpleDecode(prop.getProperty("descr", ""), null);
-                        if (this.descr == null) this.descr = this.url.toString();
-                this.quality = (int) kelondroBase64Order.enhancedCoder.decodeLong(prop.getProperty("q", ""));
-                this.language = prop.getProperty("lang", "uk");
-                this.doctype = prop.getProperty("dt", "t").charAt(0);
-                this.size = Integer.parseInt(prop.getProperty("size", "0"));
-                this.wordCount = Integer.parseInt(prop.getProperty("wc", "0"));
-                this.snippet = prop.getProperty("snippet", "");
-                if (snippet.length() == 0) snippet = null; else snippet = crypt.simpleDecode(snippet, null);
-                this.word = (prop.containsKey("word")) ? new indexURLEntry(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word",""))) : null;
-                this.stored = false;
-                //}
-            } catch (Exception e) {
-                serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/2:" +
-                        "\nProperties: " + ((prop==null)?null:prop.toString()) +
-                        ((prop.containsKey("word")) ? "\nWord:       " + kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word","")) : "") +
-                        "\nErrorMsg:   " + e.toString(), e);
-            }
-        }
-        
-        public kelondroRow.Entry toRowEntry() throws IOException {
-            final String moddatestr = kelondroBase64Order.enhancedCoder.encodeLong(moddate.getTime() / 86400000, urlDateLength);
-            final String loaddatestr = kelondroBase64Order.enhancedCoder.encodeLong(loaddate.getTime() / 86400000, urlDateLength);
-
-            final byte[][] entry = new byte[][] {
-                        urlHash.getBytes(),
-                        url.toString().getBytes(),
-                        descr.getBytes(), // null?
-                        moddatestr.getBytes(),
-                        loaddatestr.getBytes(),
-                        referrerHash.getBytes(),
-                        kelondroBase64Order.enhancedCoder.encodeLong(copyCount, urlCopyCountLength).getBytes(),
-                        flags.getBytes(),
-                        kelondroBase64Order.enhancedCoder.encodeLong(quality, urlQualityLength).getBytes(),
-                        language.getBytes(),
-                        new byte[] {(byte) doctype},
-                        kelondroBase64Order.enhancedCoder.encodeLong(size, urlSizeLength).getBytes(),
-                        kelondroBase64Order.enhancedCoder.encodeLong(wordCount, urlWordCountLength).getBytes(),
-            };
-            return urlIndexFile.row().newEntry(entry);
-        }
-
-        public String hash() {
-            // return a url-hash, based on the md5 algorithm
-            // the result is a String of 12 bytes within a 72-bit space
-            // (each byte has an 6-bit range)
-            // that should be enough for all web pages on the world
-            return this.urlHash;
-        }
-
-        public URL url() {
-            return url;
-        }
-
-        public String descr() {
-            return descr;
-        }
-
-        public Date moddate() {
-            return moddate;
-        }
-
-        public Date loaddate() {
-            return loaddate;
-        }
-
-        public String referrerHash() {
-            // return the creator's hash
-            return referrerHash;
-        }
-
-        public char doctype() {
-            return doctype;
-        }
-
-        public int copyCount() {
-            // return number of copies of this object in the global index
-            return copyCount;
-        }
-
-        public boolean local() {
-            // returns true if the url was created locally and is needed for own word index
-            if (flags == null) return false;
-            return flags.charAt(0) == 'L';
-        }
-
-        public int quality() {
-            return quality;
-        }
-
-        public String language() {
-            return language;
-        }
-
-        public int size() {
-            return size;
-        }
-
-        public int wordCount() {
-            return wordCount;
-        }
-
-        public String snippet() {
-            // the snippet may appear here if the url was transported in a remote search
-            // it will not be saved anywhere, but can only be requested here
-            return snippet;
-        }
-
-        public indexEntry word() {
-            return word;
-        }
-    
-        public boolean isOlder (Entry other) {
-            if (other == null) return false;
-            if (moddate.before(other.moddate())) return true;
-            if (moddate.equals(other.moddate())) {
-                if (loaddate.before(other.loaddate())) return true;
-                if (loaddate.equals(other.loaddate())) {
-                    if (quality < other.quality()) return true;
-                }
-            }
-            return false;
-        }
-
-        private StringBuffer corePropList() {
-            // generate a parseable string; this is a simple property-list
-            final StringBuffer corePropStr = new StringBuffer(300);
-            try {
-                corePropStr
-                .append("hash=")     .append(urlHash)
-                .append(",referrer=").append(referrerHash)
-                .append(",mod=")     .append(shortDayFormatter.format(moddate))
-                .append(",load=")    .append(shortDayFormatter.format(loaddate))
-                .append(",size=")    .append(size)
-                .append(",wc=")      .append(wordCount)
-                .append(",cc=")      .append(copyCount)
-                .append(",local=")   .append(((local()) ? "true" : "false"))
-                .append(",q=")       .append(kelondroBase64Order.enhancedCoder.encodeLong(quality, urlQualityLength))
-                .append(",dt=")      .append(doctype)
-                .append(",lang=")    .append(language)
-                .append(",url=")     .append(crypt.simpleEncode(url.toString()))
-                .append(",descr=")   .append(crypt.simpleEncode(descr));
-
-                if (this.word != null) {
-                    // append also word properties
-                    corePropStr.append(",word=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm(false)));
-                }
-                return corePropStr;
-
-            } catch (Exception e) {
-//          serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
-//          if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
-//          if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
-//          e.printStackTrace();
-                return null;
-            }
-        }
-
-    /*
-    public String toString(int posintext, int posinphrase, int posofphrase) {
-        // add information needed for remote transport
-        final StringBuffer core = corePropList();
-        if (core == null) return null;
-
-        core.ensureCapacity(core.length() + 200);
-        core.insert(0,"{")
-            .append(",posintext=").append(posintext)
-            .append(",posinphrase=").append(posinphrase)
-            .append(",posofphraseint=").append(posofphrase)
-            .append("}");
-        return core.toString();
-    }        
-    */
-    
-        public String toString(String snippet) {
-            // add information needed for remote transport
-            final StringBuffer core = corePropList();
-            if (core == null) return null;
-
-            core.ensureCapacity(core.length() + snippet.length()*2);
-            core.insert(0,"{");
-            core.append(",snippet=").append(crypt.simpleEncode(snippet));
-            core.append("}");
-
-            return core.toString();        
-            //return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
-        }
-
-        /**
-         * Returns this object as String.<br> 
-         * This e.g. looks like this:
-         * <pre>{hash=jmqfMk7Y3NKw,referrer=------------,mod=20050610,load=20051003,size=51666,wc=1392,cc=0,local=true,q=AEn,dt=h,lang=uk,url=b|aHR0cDovL3d3dy50cmFuc3BhcmVuY3kub3JnL3N1cnZleXMv,descr=b|S25vd2xlZGdlIENlbnRyZTogQ29ycnVwdGlvbiBTdXJ2ZXlzIGFuZCBJbmRpY2Vz}</pre>
-         */
-        public String toString() {
-            final StringBuffer core = corePropList();
-            if (core == null) return null;
-
-            core.insert(0,"{");
-            core.append("}");
-
-            return core.toString();
-            //return "{" + core + "}";
-        }
-
-        public void print() {
-            System.out.println("URL           : " + url);
-            System.out.println("Description   : " + descr);
-            System.out.println("Modified      : " + httpc.dateString(moddate));
-            System.out.println("Loaded        : " + httpc.dateString(loaddate));
-            System.out.println("Size          : " + size + " bytes, " + wordCount + " words");
-            System.out.println("Referrer Hash : " + referrerHash);
-            System.out.println("Quality       : " + quality);
-            System.out.println("Language      : " + language);
-            System.out.println("DocType       : " + doctype);
-            System.out.println();
-        }
-    } // class Entry
-
    public class kiter implements Iterator {
        // enumerates entry elements
        Iterator i;
@ -742,7 +402,7 @@ public final class plasmaCrawlLURL extends indexURL {
            kelondroRow.Entry e = (kelondroRow.Entry) i.next();
            if (e == null) return null;
            try {
-                return new Entry(e, null);
+                return new plasmaCrawlLURLOldEntry(e, null);
            } catch (IOException ex) {
                throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0, null));
            }
@ -873,7 +533,7 @@ public final class plasmaCrawlLURL extends indexURL {
                        }
                    }
                    
-                    plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
+                    plasmaCrawlLURLEntry entry = (plasmaCrawlLURLEntry) eiter.next();
                    totalSearchedUrls++;
                    if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, entry.url()) ||
                        plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, entry.url())) {
@ -944,7 +604,7 @@ public final class plasmaCrawlLURL extends indexURL {
            final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1, 0, false);
            final Iterator enu = urls.entries(true, false, null);
            while (enu.hasNext()) {
-                ((Entry) enu.next()).print();
+                ((plasmaCrawlLURLEntry) enu.next()).print();
            }
        } catch (Exception e) {
            e.printStackTrace();
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@ -385,7 +385,7 @@ public final class plasmaCrawlStacker {
        checkInterruption();
        String nexturlhash = indexURL.urlHash(nexturl);
        String dbocc = this.sb.urlPool.exists(nexturlhash);
-        plasmaCrawlLURL.Entry oldEntry = null;
+        plasmaCrawlLURLEntry oldEntry = null;
        oldEntry = this.sb.urlPool.loadedURL.load(nexturlhash, null);
        boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
        if ((dbocc != null) && (!(recrawl))) {
--- a/source/de/anomic/plasma/plasmaDHTChunk.java
+++ b/source/de/anomic/plasma/plasmaDHTChunk.java
@ -199,7 +199,7 @@ public class plasmaDHTChunk {
            indexContainer container;
            Iterator urlIter;
            indexEntry iEntry;
-            plasmaCrawlLURL.Entry lurl;
+            plasmaCrawlLURLEntry lurl;
            int refcount = 0;
            int wholesize;
            
@ -281,11 +281,11 @@ public class plasmaDHTChunk {
    }
    
    
-    public synchronized int deleteTransferIndexes() {
+    public synchronized String deleteTransferIndexes() {
        Iterator urlIter;
        indexEntry iEntry;
        HashSet urlHashes;
-        int count = 0;
+        String count = "0";
        
        for (int i = 0; i < this.indexContainers.length; i++) {
            // delete entries separately
@ -301,7 +301,7 @@ public class plasmaDHTChunk {
                urlHashes.add(iEntry.urlHash());
            }
            String wordHash = indexContainers[i].getWordHash();
-            count += wordIndex.removeEntries(this.indexContainers[i].getWordHash(), urlHashes, true);
+            count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes, true);
            if (log.isFine()) 
                log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left");
            this.indexContainers[i] = null;
--- a/source/de/anomic/plasma/plasmaDHTFlush.java
+++ b/source/de/anomic/plasma/plasmaDHTFlush.java
@ -222,7 +222,7 @@ public class plasmaDHTFlush extends Thread {
                        // deleting transfered words from index
                        if (this.delete) {
                            this.status = "Running: Deleting chunk " + iteration;
-                            int urlReferences = oldDHTChunk.deleteTransferIndexes();
+                            String urlReferences = oldDHTChunk.deleteTransferIndexes();
                            this.log.logFine("Deleted from " + oldDHTChunk.containerSize() + " transferred RWIs locally " + urlReferences + " URL references");
                        } 
                        oldDHTChunk = null;
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@ -370,7 +370,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
        //if (searchResult.size() == 0) return acc; // case that we have nothing to do

        indexEntry entry;
-        plasmaCrawlLURL.Entry page;
+        plasmaCrawlLURLEntry page;
        Long preranking;
        Object[] preorderEntry;
        int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT);
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@ -101,7 +101,7 @@ public final class plasmaSearchImages {
    public plasmaSearchImages(plasmaSnippetCache sc, long maxTime, plasmaSearchResult sres, int depth) {
        long start = System.currentTimeMillis();
        this.images = new TreeSet();
-        plasmaCrawlLURL.Entry urlentry;
+        plasmaCrawlLURLEntry urlentry;
        while (sres.hasMoreElements()) {
            urlentry = sres.nextElement();
            addAll(new plasmaSearchImages(sc, serverDate.remainingTime(start, maxTime, 10), urlentry.url(), depth));
--- a/source/de/anomic/plasma/plasmaSearchPreOrder.java
+++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java
@ -185,7 +185,13 @@ public final class plasmaSearchPreOrder {
    public Object[] /*{indexEntry, Long}*/ next() {
        String top = (String) pageAcc.firstKey();
        //System.out.println("preorder-key:  " + top);
-        Long preranking = new Long(Long.MAX_VALUE - Long.parseLong(top.substring(0, 16), 16)); // java.lang.NumberFormatException: For input string: "8000000000020b17" ???
+        Long preranking;
+        try {
+            preranking = new Long(Long.MAX_VALUE - Long.parseLong(top.substring(0, 16), 16)); // java.lang.NumberFormatException: For input string: "8000000000020b17" ???
+        } catch (NumberFormatException e) {
+            e.printStackTrace();
+            preranking = new Long(0);
+        }
        return new Object[]{(indexEntry) pageAcc.remove(top), preranking};
    }
    
--- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
@ -191,7 +191,7 @@ public class plasmaSearchRankingProfile {
                    Set topwords,
                    String[] urlcomps,
                    String[] descrcomps,
-                    plasmaCrawlLURL.Entry page) {
+                    plasmaCrawlLURLEntry page) {

        // apply pre-calculated order attributes
        long ranking = preranking;
--- a/source/de/anomic/plasma/plasmaSearchResult.java
+++ b/source/de/anomic/plasma/plasmaSearchResult.java
@ -99,13 +99,13 @@ public final class plasmaSearchResult {
        return pageAcc.size() > 0;
    }
    
-    public plasmaCrawlLURL.Entry nextElement() {
+    public plasmaCrawlLURLEntry nextElement() {
        Object top = pageAcc.firstKey();
        //System.out.println("postorder-key: " + ((String) top));
-        return (plasmaCrawlLURL.Entry) pageAcc.remove(top);
+        return (plasmaCrawlLURLEntry) pageAcc.remove(top);
    }
    
-    protected void addResult(plasmaCrawlLURL.Entry page, Long preranking) {
+    protected void addResult(plasmaCrawlLURLEntry page, Long preranking) {
        
        // take out relevant information for reference computation
        URL url = page.url();
@ -132,12 +132,12 @@ public final class plasmaSearchResult {
        for (int i = 0; i < references.length; i++) commonSense.add(references[i]);
        
        Object[] resultVector;
-        plasmaCrawlLURL.Entry page;
+        plasmaCrawlLURLEntry page;
        long ranking;
        for (int i = 0; i < results.size(); i++) {
            // take out values from result array
            resultVector = (Object[]) results.get(i);
-            page = (plasmaCrawlLURL.Entry) resultVector[0];
+            page = (plasmaCrawlLURLEntry) resultVector[0];
            
            // calculate ranking
            if (postsort)
@ -173,7 +173,7 @@ public final class plasmaSearchResult {
        // first scan all entries and find all urls that are referenced
        while (i.hasNext()) {
            entry = (Map.Entry) i.next();
-            path = urlPath(((plasmaCrawlLURL.Entry) entry.getValue()).url());
+            path = urlPath(((plasmaCrawlLURLEntry) entry.getValue()).url());
            paths.put(path, entry.getKey());
            //if (path != null) path = shortenPath(path);
            //if (path != null) paths.put(path, entry.getKey());
@ -184,7 +184,7 @@ public final class plasmaSearchResult {
        String shorten;
        while (i.hasNext()) {
            entry = (Map.Entry) i.next();
-            path = urlPath(((plasmaCrawlLURL.Entry) entry.getValue()).url());
+            path = urlPath(((plasmaCrawlLURLEntry) entry.getValue()).url());
            shorten = shortenPath(path);
            // scan all subpaths of the url
            while (shorten != null) {
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -629,7 +629,7 @@ public class plasmaSnippetCache {
    public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) {
        // fetch snippets
        int i = 0;
-        plasmaCrawlLURL.Entry urlentry;
+        plasmaCrawlLURLEntry urlentry;
        String urlstring;
        long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
        while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) {
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -1011,7 +1011,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            // possibly delete entries from last chunk
            if ((this.dhtTransferChunk != null) &&
                    (this.dhtTransferChunk.getStatus() == plasmaDHTChunk.chunkStatus_COMPLETE)) {
-                int deletedURLs = this.dhtTransferChunk.deleteTransferIndexes();
+                String deletedURLs = this.dhtTransferChunk.deleteTransferIndexes();
                this.log.logFine("Deleted from " + this.dhtTransferChunk.containers().length + " transferred RWIs locally, removed " + deletedURLs + " URL references");
                this.dhtTransferChunk = null;
            }
@ -1556,7 +1556,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                    checkInterruption();
                    
                    // create a new loaded URL db entry
-                    plasmaCrawlLURL.Entry newEntry = urlPool.loadedURL.newEntry(
+                    plasmaCrawlLURLEntry newEntry = urlPool.loadedURL.newEntry(
                            entry.url(),                                            // URL
                            docDescription,                                         // document description
                            docDate,                                                // modification date
@ -1965,7 +1965,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                        String lurl = (String) page.get("lurl");
                        if ((lurl != null) && (lurl.length() != 0)) {
                            String propStr = crypt.simpleDecode(lurl, (String) page.get("key"));
-                            plasmaCrawlLURL.Entry entry = urlPool.loadedURL.newEntry(propStr, true);
+                            plasmaCrawlLURLEntry entry = urlPool.loadedURL.newEntry(propStr, true);
                            urlPool.loadedURL.store(entry, false);
                            urlPool.loadedURL.stack(entry, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1); // *** ueberfluessig/doppelt?
                            urlPool.noticeURL.remove(entry.hash());
@ -2045,7 +2045,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                int i = 0;
                int p;
                URL url;
-                plasmaCrawlLURL.Entry urlentry;
+                plasmaCrawlLURLEntry urlentry;
                String urlstring, urlname, filename, urlhash;
                String host, hash, address, descr = "";
                yacySeed seed;
@ -2192,7 +2192,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        // finally, delete the url entry
        
        // determine the url string
-        plasmaCrawlLURL.Entry entry = urlPool.loadedURL.load(urlhash, null);
+        plasmaCrawlLURLEntry entry = urlPool.loadedURL.load(urlhash, null);
        if (entry == null) return 0;
        
        URL url = entry.url();
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@ -333,7 +333,7 @@ public class plasmaSwitchboardQueue {
        public URL referrerURL() {
            if (referrerURL == null) {
                if ((referrerHash == null) || (referrerHash.equals(indexURL.dummyHash))) return null;
-                plasmaCrawlLURL.Entry entry = lurls.load(referrerHash, null);
+                plasmaCrawlLURLEntry entry = lurls.load(referrerHash, null);
                if (entry == null) referrerURL = null; else referrerURL = entry.url();
            }
            return referrerURL;
--- a/source/de/anomic/plasma/plasmaURLPool.java
+++ b/source/de/anomic/plasma/plasmaURLPool.java
@ -83,7 +83,7 @@ public class plasmaURLPool {
            plasmaCrawlNURL.Entry ne = noticeURL.getEntry(urlhash);
            if (ne != null) return ne.url();
        } catch (IOException e) {}
-        plasmaCrawlLURL.Entry le = loadedURL.load(urlhash, null);
+        plasmaCrawlLURLEntry le = loadedURL.load(urlhash, null);
        if (le != null) return le.url();
        plasmaCrawlEURL.Entry ee = errorURL.getEntry(urlhash);
        if (ee != null) return ee.url();
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -484,13 +484,25 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
        int removed = 0;
        removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
        removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
-        if (removed == urlHashes.size()) return removed;
+        //if (removed == urlHashes.size()) return removed;
        if (useCollectionIndex) {
            removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
-            if (removed == urlHashes.size()) return removed;
+            //if (removed == urlHashes.size()) return removed;
        }
        removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
-        if (removed == urlHashes.size()) return removed;
+        //if (removed == urlHashes.size()) return removed;
+        removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
+        return removed;
+    }
+    
+    public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) {
+        String removed = "";
+        removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
+        removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
+        if (useCollectionIndex) {
+            removed += collections.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
+        } else removed += "0, ";
+        removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
        removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
        return removed;
    }
@ -772,7 +784,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
                        waiter();
                        entry = (indexEntry) containerIterator.next();
                        // System.out.println("Wordhash: "+wordHash+" UrlHash: "+entry.getUrlHash());
-                        plasmaCrawlLURL.Entry ue = lurl.load(entry.urlHash(), null);
+                        plasmaCrawlLURLEntry ue = lurl.load(entry.urlHash(), null);
                        if (ue == null) {
                            urlHashs.add(entry.urlHash());
                        } else {
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -62,6 +62,7 @@ import de.anomic.index.indexURLEntry;
 import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
 import de.anomic.plasma.plasmaSearchRankingProfile;
 import de.anomic.plasma.plasmaSearchTimingProfile;
 import de.anomic.plasma.plasmaSnippetCache;
@ -496,7 +497,7 @@ public final class yacyClient {
            }

            // insert results to containers
-            plasmaCrawlLURL.Entry urlEntry;
+            plasmaCrawlLURLEntry urlEntry;
            String[] urls = new String[results];
            for (int n = 0; n < results; n++) {
                // get one single search result
@ -862,7 +863,7 @@ public final class yacyClient {
        -er crawlt, Ergebnis erscheint aber unter falschem initiator
     */

-    public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, plasmaCrawlLURL.Entry entry, String wordhashes) {
+    public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, plasmaCrawlLURLEntry entry, String wordhashes) {
        if (targetSeed == null) { return null; }
        if (yacyCore.seedDB.mySeed == null) { return null; }
        if (yacyCore.seedDB.mySeed == targetSeed) { return null; }
@ -981,9 +982,9 @@ public final class yacyClient {
            if (uhs.length == 0) { return resultObj; } // all url's known
            
            // extract the urlCache from the result
-            plasmaCrawlLURL.Entry[] urls = new plasmaCrawlLURL.Entry[uhs.length];
+            plasmaCrawlLURLEntry[] urls = new plasmaCrawlLURLEntry[uhs.length];
            for (int i = 0; i < uhs.length; i++) {
-                urls[i] = (plasmaCrawlLURL.Entry) urlCache.get(uhs[i]);
+                urls[i] = (plasmaCrawlLURLEntry) urlCache.get(uhs[i]);
                if (urls[i] == null) {
                    yacyCore.log.logFine("DEBUG transferIndex: requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
                }
@ -1092,7 +1093,7 @@ public final class yacyClient {
        }
    }

-    private static HashMap transferURL(yacySeed targetSeed, plasmaCrawlLURL.Entry[] urls, boolean gzipBody, int timeout) {
+    private static HashMap transferURL(yacySeed targetSeed, plasmaCrawlLURLEntry[] urls, boolean gzipBody, int timeout) {
        // this post a message to the remote message board
        final String address = targetSeed.getAddress();
        if (address == null) { return null; }
--- a/source/yacy.java
+++ b/source/yacy.java
@ -75,11 +75,15 @@ import de.anomic.index.indexEntry;
 import de.anomic.index.indexEntryAttribute;
 import de.anomic.index.indexURL;
 import de.anomic.kelondro.kelondroDyn;
+import de.anomic.kelondro.kelondroFlexSplitTable;
 import de.anomic.kelondro.kelondroMScoreCluster;
 import de.anomic.kelondro.kelondroMap;
+import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlEURL;
 import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.plasma.plasmaCrawlLURLEntry;
+import de.anomic.plasma.plasmaCrawlLURLOldEntry;
 import de.anomic.plasma.plasmaCrawlNURL;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaURLPool;
@ -730,7 +734,7 @@ public final class yacy {
                        iEntry = (indexEntry) wordIdxEntries.next();
                        String urlHash = iEntry.urlHash();                    
                        if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
-                            plasmaCrawlLURL.Entry urlEntry = currentUrlDB.load(urlHash, null);                       
+                            plasmaCrawlLURLEntry urlEntry = currentUrlDB.load(urlHash, null);                       
                            urlCounter++;
                            minimizedUrlDB.store(urlEntry, false);
                            if (urlCounter % 500 == 0) {
@ -950,10 +954,10 @@ public final class yacy {
            long start = System.currentTimeMillis();
            if (source.equals("lurl")) {
                Iterator eiter = pool.loadedURL.entries(true, false, null);
-                plasmaCrawlLURL.Entry entry;
+                plasmaCrawlLURLEntry entry;
                while (eiter.hasNext()) {
                    try {
-                        entry = (plasmaCrawlLURL.Entry) eiter.next();
+                        entry = (plasmaCrawlLURLEntry) eiter.next();
                        if ((entry != null) && (entry.url() != null)) doms.put(entry.url().getHost(), null);
                    } catch (Exception e) {
                        // here a MalformedURLException may occur
@ -1061,9 +1065,9 @@ public final class yacy {
            
            if (source.equals("lurl")) {
                Iterator eiter = pool.loadedURL.entries(true, false, null);
-                plasmaCrawlLURL.Entry entry;
+                plasmaCrawlLURLEntry entry;
                while (eiter.hasNext()) {
-                    entry = (plasmaCrawlLURL.Entry) eiter.next();
+                    entry = (plasmaCrawlLURLEntry) eiter.next();
                    if ((entry != null) && (entry.url() != null)) {
                        if (html) {
                            bos.write(("<a href=\"" + entry.url() + "\">" + entry.descr() + "</a><br>").getBytes("UTF-8"));
@ -1114,6 +1118,27 @@ public final class yacy {
        }
    }
    
+    private static void migratelurls(String homePath) {
+        File root = new File(homePath);
+        try {
+            plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, false, 1000, false, 1000, false, 10000);
+            kelondroFlexSplitTable fsp = new kelondroFlexSplitTable(new File(root, "DATA//INDEX/PUBLIC/TEXT"), "urls", 1000, -1, plasmaCrawlLURLOldEntry.rowdef, kelondroNaturalOrder.naturalOrder);
+            
+            Iterator eiter = pool.loadedURL.entries(true, false, null);
+            plasmaCrawlLURLEntry entry;
+            while (eiter.hasNext()) {
+                entry = (plasmaCrawlLURLEntry) eiter.next();
+                if ((entry != null) && (entry.url() != null)) {
+                    fsp.put(entry.toRowEntry(), entry.loaddate());
+                }
+            }
+            
+            pool.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+    
    private static String[] shift(String[] args, int pos, int count) {
        String[] newargs = new String[args.length - count];
        System.arraycopy(args, 0, newargs, 0, pos);
@ -1365,6 +1390,8 @@ public final class yacy {
            if (args.length == 2) applicationRoot= args[1];
            String outfile = "urllist_" + source + "_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
            urllist(applicationRoot, source, html, outfile);
+        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-migratelurls"))) {
+            migratelurls(applicationRoot);            
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) {
            // generate a url list and save it in a file
            if (args.length == 2) applicationRoot= args[1];