*) new import function for IndexImport_p.html

- can be used to import the crawling queue (noticeUrlDB + stacks) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1518 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · 50d85657b8
parent 4fa2be73c3
commit 50d85657b8
10 changed files with 361 additions and 72 deletions
--- a/htroot/IndexImport_p.html
+++ b/htroot/IndexImport_p.html
@ -30,7 +30,8 @@
 	<td>Import&nbsp;Type:</td>
 	<td title="the path to the database that should be imported"><select name="importType" size="1">
    		<option value="plasmaDB">PLASMA DB Import</option>
-    		<option value="assortment">Assortment File Import</option>
+    		<option value="assortment">Assortment File Import</option>
+    		<option value="NURL">Crawling Queue Import</option>
  		</select>
  	</td>
  	<td title="the cache size that should be used for the import db">Cache Size</td>
@ -69,7 +70,7 @@
 <td class="small" >Status</td>
 <td class="small" >%</td>
 <td class="small" >Elapsed<br>Time</td>
-<td class="small" >Estimated<br>Time</td>
+<td class="small" >Time<br>Left</td>
 <td class="small" >Import Status</td>
 <td class="small" >Abort Import</td>
 <td class="small" >Pause Import</td>
@ -120,7 +121,7 @@
    <tr class="TableCellLight">
      <td class="small">#[type]#</td>
      <td class="small" title="#[fullName]#">#[shortName]#</td>
-      <td class="small"><font color="#(runningStatus)#red::green::red#(/runningStatus)#">#(runningStatus)#Finished::<b>Error:</b> #[errorMsg]#::Paused#(/runningStatus)#</font></td>
+      <td class="small"><font color="#(runningStatus)#green::red::blue#(/runningStatus)#">#(runningStatus)#Finished::<b>Error:</b> #[errorMsg]#::Paused#(/runningStatus)#</font></td>
 	  <td class="small" align="right">#[percent]#</td>
      <td class="small" align="right">#[elapsed]#</td>
      <td class="small" align="right"><tt>#[status]#</tt></td>
@ -189,7 +190,8 @@ You need to have at least the following directories and files in this path:
 		<td>No</td>
 		<td>The assortment file that should be imported.<br>
 			<b>Attention:</b> The assortment file must have the postfix "[0-9]{3}\.db".
-			If you would like to import an assortment file from the <tt>PLASMADB\ACLUSTER\ABKP</tt></td>
+			If you would like to import an assortment file from the <tt>PLASMADB\ACLUSTER\ABKP</tt>
+			you have to rename it first.</td>
 	</tr>
 </table>
 </p>
@ -199,7 +201,6 @@ Please note that the imported words are useless if the destination peer doesn't
 the URLs the imported words belongs to.
 </p>

-<!--
 <p><h3>Crawling Queue Import:</h3></p>
 <p>
 	<b>Example Path:</b> <tt>E:\PLASMADB\</tt>
@ -240,7 +241,7 @@ You need to have at least the following directories and files in this path:
 	<tr><td><tt>urlNoticeRemote0.stack</tt></td></tr>		
 </table>
 </p>
-->
+
 #%env/templates/footer.template%#
 </body>
 </html>
--- a/htroot/IndexImport_p.java
+++ b/htroot/IndexImport_p.java
@ -171,13 +171,13 @@ public final class IndexImport_p {
            prop.put("running.jobs_" + i + "_shortName",shortName);
            
            // specifies if the importer is still running
-            prop.put("running.jobs_" + i + "_stopped",         currThread.isStopped() ? 1:0);
+            prop.put("running.jobs_" + i + "_stopped",         currThread.isStopped() ? 0:1);
            
            // specifies if the importer was paused
            prop.put("running.jobs_" + i + "_paused",          currThread.isPaused() ? 1:0);
            
            // setting the status
-            prop.put("running.jobs_" + i + "_runningStatus",          currThread.isPaused() ? 2 : currThread.isStopped() ? 1 : 0);
+            prop.put("running.jobs_" + i + "_runningStatus",          currThread.isPaused() ? 2 : currThread.isStopped() ? 0 : 1);
            
            // other information
            prop.put("running.jobs_" + i + "_percent",         Integer.toString(currThread.getProcessingStatusPercent()));
@ -202,9 +202,9 @@ public final class IndexImport_p {
            prop.put("finished.jobs_" + i + "_type", currThread.getJobType());
            prop.put("finished.jobs_" + i + "_fullName", fullName);
            prop.put("finished.jobs_" + i + "_shortName", shortName);
-            if (error != null) {
-                prop.put("finished.jobs_" + i + "_runningStatus", 2);
-                prop.put("finished.jobs_" + i + "_runningStatus_errorMsg", error);
+            if (error != null) { 
+                prop.put("finished.jobs_" + i + "_runningStatus", 1);
+                prop.put("finished.jobs_" + i + "_runningStatus_errorMsg", error.replaceAll("\n", "<br>"));
            } else {
                prop.put("finished.jobs_" + i + "_runningStatus", 0);
            }
--- a/source/de/anomic/plasma/dbImport/AbstractImporter.java
+++ b/source/de/anomic/plasma/dbImport/AbstractImporter.java
@ -31,9 +31,13 @@ public abstract class AbstractImporter extends Thread implements dbImporter{
    }    
    
    public void init(File theImportPath) {
+        if (theImportPath == null) throw new NullPointerException("The Import path must not be null.");
        this.importPath = theImportPath;      
        
+        // getting a job id from the import manager
        this.jobID = this.sb.dbImportManager.getJobID();
+        
+        // initializing the logger and setting a more verbose thread name
        this.log = new serverLog("IMPORT_" + this.jobType + "_" + this.jobID);
        this.setName("IMPORT_" + this.jobType + "_" + this.sb.dbImportManager.getJobID());
    }
@ -83,7 +87,7 @@ public abstract class AbstractImporter extends Thread implements dbImporter{
    }    
    
    public boolean isStopped() {
-        return this.isAlive();
+        return !this.isAlive();
    }
    
    public int getJobID() {
@ -95,7 +99,7 @@ public abstract class AbstractImporter extends Thread implements dbImporter{
    }    
    
    public long getElapsedTime() {
-        return System.currentTimeMillis()-this.globalStart;
+        return isStopped()?this.globalEnd-this.globalStart:System.currentTimeMillis()-this.globalStart;
    }

    public String getJobType() {
--- a/source/de/anomic/plasma/dbImport/dbImportManager.java
+++ b/source/de/anomic/plasma/dbImport/dbImportManager.java
@ -62,6 +62,8 @@ public class dbImportManager {
            newImporter = new plasmaDbImporter(this.sb);
        } else if (type.equalsIgnoreCase("ASSORTMENT")) {
            newImporter = new plasmaWordIndexAssortmentImporter(this.sb);
+        } else if (type.equalsIgnoreCase("NURL")) {
+            newImporter = new plasmaCrawlNURLImporter(this.sb);
        }
        return newImporter;
    }
@ -71,10 +73,13 @@ public class dbImportManager {
     * e.g. on server shutdown
     */
    public void close() {
+        /* clear the finished thread list */
+        this.finishedJobs.clear();
+        
        /* waiting for all threads to finish */
-        int threadCount  = runningJobs.activeCount();    
+        int threadCount  = this.runningJobs.activeCount();    
        Thread[] threadList = new Thread[threadCount];     
-        threadCount = runningJobs.enumerate(threadList);
+        threadCount = this.runningJobs.enumerate(threadList);
        
        if (threadCount == 0) return;
        
--- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
@ -0,0 +1,212 @@
+package de.anomic.plasma.dbImport;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.TreeMap;
+
+import de.anomic.plasma.plasmaCrawlNURL;
+import de.anomic.plasma.plasmaCrawlProfile;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaCrawlNURL.Entry;
+
+public class plasmaCrawlNURLImporter extends AbstractImporter implements
+        dbImporter {
+
+    private HashSet importProfileHandleCache = new HashSet();
+    private plasmaCrawlProfile importProfileDB;
+    private plasmaCrawlNURL importNurlDB;
+    private int importStartSize;
+    private int urlCount = 0;
+    private int profileCount = 0;
+    
+    public plasmaCrawlNURLImporter(plasmaSwitchboard theSb) {
+        super(theSb);
+        this.jobType="NURL";
+    }
+
+    public long getEstimatedTime() {
+        return (this.urlCount==0)?0:((this.importStartSize*getElapsedTime())/(this.urlCount))-getElapsedTime();
+    }
+
+    public String getJobName() {
+        return this.importPath.toString();
+    }
+
+    public int getProcessingStatusPercent() {
+        return (this.urlCount)/((this.importStartSize<100)?1:(this.importStartSize)/100);
+    }
+
+    public String getStatus() {
+        StringBuffer theStatus = new StringBuffer();
+        
+        theStatus.append("#URLs=").append(this.urlCount).append("\n");
+        theStatus.append("#Profiles=").append(this.profileCount);
+        
+        return theStatus.toString();
+    }
+
+    public void init(File theImportPath, int theCacheSize) {
+        super.init(theImportPath);
+        this.cacheSize = theCacheSize;
+        
+        File noticeUrlDbFile = new File(this.importPath,"urlNotice1.db");
+        File profileDbFile = new File(this.importPath, "crawlProfiles0.db");
+        
+        String errorMsg = null;
+        if (!this.importPath.exists()) 
+            errorMsg = "The import path '" + this.importPath + "' does not exist.";
+        else if (!this.importPath.isDirectory()) 
+            errorMsg = "The import path '" + this.importPath + "' is not a directory.";
+        else if (!this.importPath.canRead()) 
+            errorMsg = "The import path '" + this.importPath + "' is not readable.";
+        else if (!this.importPath.canWrite()) 
+            errorMsg = "The import path '" + this.importPath + "' is not writeable.";
+        
+        else if (!noticeUrlDbFile.exists()) 
+            errorMsg = "The noticeUrlDB file '" + noticeUrlDbFile + "' does not exist.";
+        else if (noticeUrlDbFile.isDirectory()) 
+            errorMsg = "The noticeUrlDB file '" + noticeUrlDbFile + "' is not a file.";
+        else if (!noticeUrlDbFile.canRead()) 
+            errorMsg = "The noticeUrlDB file '" + noticeUrlDbFile + "' is not readable.";
+        else if (!noticeUrlDbFile.canWrite()) 
+            errorMsg = "The noticeUrlDB file '" + noticeUrlDbFile + "' is not writeable.";   
+        
+        else if (!profileDbFile.exists()) 
+            errorMsg = "The profileDB file '" + profileDbFile + "' does not exist.";
+        else if (profileDbFile.isDirectory()) 
+            errorMsg = "The profileDB file '" + profileDbFile + "' is not a file.";
+        else if (!profileDbFile.canRead()) 
+            errorMsg = "The profileDB file '" + profileDbFile + "' is not readable.";
+//        else if (!profileDbFile.canWrite()) 
+//            errorMsg = "The profileDB file '" + profileDbFile + "' is not writeable.";                
+        
+        if (errorMsg != null) {
+            this.log.logSevere(errorMsg);
+            throw new IllegalArgumentException(errorMsg);
+        }   
+        
+        // init noticeUrlDB
+        this.log.logInfo("Initializing the source noticeUrlDB");
+        this.importNurlDB =  new plasmaCrawlNURL(this.importPath, this.cacheSize*(3/4));
+        this.importStartSize = this.importNurlDB.size();
+        int stackSize = this.importNurlDB.stackSize();
+        
+        // init profile DB
+        this.log.logInfo("Initializing the source profileDB");
+        this.importProfileDB = new plasmaCrawlProfile(profileDbFile,this.cacheSize*(1/3));
+    }
+
+    public void run() {
+        try {   
+            // waiting on init thread to finish
+            this.importNurlDB.waitOnInitThread();
+            
+            // the stack types we want to import
+            int[] stackTypes = new int[] {plasmaCrawlNURL.STACK_TYPE_CORE,
+                                          plasmaCrawlNURL.STACK_TYPE_LIMIT,
+                                          plasmaCrawlNURL.STACK_TYPE_REMOTE,
+                                          -1};
+            
+            // looping through the various stacks
+            for (int i=0; i< stackTypes.length; i++) {
+                if (stackTypes[i] != -1) {
+                    this.log.logInfo("Starting to import stacktype '" + stackTypes[i] + "' containing '" + this.importNurlDB.stackSize(stackTypes[i]) + "' entries.");
+                } else {
+                    this.log.logInfo("Starting to import '" + this.importNurlDB.size() + "' entries not available in any stack.");
+                }
+                
+                // getting an interator and loop through the URL entries
+                Iterator iter = (stackTypes[i] == -1)?this.importNurlDB.urlHashes("------------", true):null;
+                while (true) {
+                    
+                    String nextHash = null;
+                    Entry urlEntry = null;
+                    
+                    try {                        
+                        if (stackTypes[i] != -1) {
+                            if (this.importNurlDB.stackSize(stackTypes[i]) == 0) break;
+                            
+                            this.urlCount++;
+                            urlEntry = this.importNurlDB.pop(stackTypes[i]);
+                            nextHash = urlEntry.hash();
+                        } else {
+                            if (!iter.hasNext()) break;
+                            
+                            this.urlCount++;
+                            nextHash = (String)iter.next();                            
+                            urlEntry = this.importNurlDB.getEntry(nextHash);                
+                        }
+                    } catch (IOException e) {
+                        this.log.logWarning("Unable to import entry: " + e.toString());
+                        
+                        if ((stackTypes[i] != -1) &&(this.importNurlDB.stackSize(stackTypes[i]) == 0)) break;
+                        continue;
+                    }
+                    
+                    // getting a handler to the crawling profile the url belongs to
+                    try {
+                        String profileHandle = urlEntry.profileHandle();
+                        if (profileHandle == null) {
+                            this.log.logWarning("Profile handle of url entry '" + nextHash + "' unknown.");
+                            continue;
+                        }
+                        
+                        // if we havn't imported the profile until yet we need to do it now
+                        if (!this.importProfileHandleCache.contains(profileHandle)) {
+                            
+                            // testing if the profile is already known
+                            plasmaCrawlProfile.entry profileEntry = this.sb.profiles.getEntry(profileHandle);
+                            
+                            // if not we need to import it
+                            if (profileEntry == null) {
+                                // copy and store the source profile entry into the destination db
+                                plasmaCrawlProfile.entry sourceEntry = this.importProfileDB.getEntry(profileHandle);
+                                if (sourceEntry != null) {
+                                    this.profileCount++;
+                                    this.importProfileHandleCache.add(profileHandle);
+                                    this.sb.profiles.newEntry((TreeMap)((TreeMap)sourceEntry.map()).clone());
+                                } else {
+                                    this.log.logWarning("Profile '" + profileHandle + "' of url entry '" + nextHash + "' unknown.");
+                                    continue;
+                                }
+                            }                        
+                        }
+                        
+                        // if the url does not alredy exists in the destination stack we insert it now
+                        if (!this.sb.urlPool.noticeURL.existsInStack(nextHash)) {
+                            this.sb.urlPool.noticeURL.newEntry(urlEntry,(stackTypes[i] != -1)?stackTypes[i]:plasmaCrawlNURL.STACK_TYPE_CORE);
+                        }
+                        
+                        // removing hash from the import db
+                    } finally {
+                        this.importNurlDB.remove(nextHash);
+                    }
+                    
+                    if (this.urlCount % 100 == 0) {
+                        this.log.logFine(this.urlCount + " URLs and '" + this.profileCount + "' profile entries processed so far.");
+                    }                 
+                    if (this.isAborted()) break; 
+                }
+                this.log.logInfo("Finished to import stacktype '" + stackTypes[i] + "'");
+            }
+            
+            int size = this.importNurlDB.size();
+            int stackSize = this.importNurlDB.stackSize();
+            
+            // TODO: what todo with nurlDB entries that do not exist in any stack?
+            
+        } catch (Exception e) {
+            this.error = e.toString();     
+            this.log.logSevere("Import process had detected an error",e);
+        } finally { 
+            this.log.logInfo("Import process finished.");
+            this.globalEnd = System.currentTimeMillis();
+            this.sb.dbImportManager.finishedJobs.add(this);
+            this.importNurlDB.close();
+            this.importProfileDB.close();
+        }
+    }
+    
+}
--- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
@ -3,8 +3,6 @@ package de.anomic.plasma.dbImport;
 import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.Vector;
-
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaWordIndex;
@ -29,8 +27,8 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
    private long urlCounter = 0, wordCounter = 0, entryCounter = 0;
    

-    public plasmaDbImporter(plasmaSwitchboard sb) {
-        super(sb);
+    public plasmaDbImporter(plasmaSwitchboard theSb) {
+        super(theSb);
        this.jobType = "PLASMADB";
    }
    
@ -49,12 +47,12 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
        return theStatus.toString();
    }
    
-    public void init(File theImportPath, int cacheSize) {
+    public void init(File theImportPath, int theCacheSize) {
        super.init(theImportPath);
            
        this.homeWordIndex = this.sb.wordIndex;
        this.homeUrlDB = this.sb.urlPool.loadedURL;
-        this.cacheSize = cacheSize;
+        this.cacheSize = theCacheSize;
        if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
        
        if (this.homeWordIndex.getRoot().equals(this.importPath)) {
@ -94,12 +92,13 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
        // thid seems to be better:
        // (this.importStartSize-this.importWordIndex.size())*100/((this.importStartSize==0)?1:this.importStartSize);
        // but maxint (2,147,483,647) could be exceeded when WordIndexes reach 20M entries
-        return (this.importStartSize-this.importWordIndex.size())/((this.importStartSize<100)?1:(this.importStartSize)/100);
+        //return (this.importStartSize-this.importWordIndex.size())/((this.importStartSize<100)?1:(this.importStartSize)/100);
+        return (int)(this.wordCounter)/((this.importStartSize<100)?1:(this.importStartSize)/100);
    }

    
    public long getEstimatedTime() {
-        return (this.wordCounter==0)?0:this.importWordIndex.size()*((System.currentTimeMillis()-this.globalStart)/this.wordCounter);
+        return (this.wordCounter==0)?0:((this.importStartSize*getElapsedTime())/this.wordCounter)-getElapsedTime();
    }
    
    public void importWordsDB() {
@ -112,14 +111,14 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
            
            // iterate over all words from import db

-            Iterator importWordHashIterator = this.importWordIndex.wordHashes(wordChunkStartHash, true, false);
+            Iterator importWordHashIterator = this.importWordIndex.wordHashes(this.wordChunkStartHash, true, false);
            while (!isAborted() && importWordHashIterator.hasNext()) {
                
-                plasmaWordIndexEntryContainer newContainer;
+                plasmaWordIndexEntryContainer newContainer = null;
                try {
-                    wordCounter++;
-                    wordHash = (String) importWordHashIterator.next();
-                    newContainer = importWordIndex.getContainer(wordHash, true, -1);
+                    this.wordCounter++;
+                    this.wordHash = (String) importWordHashIterator.next();
+                    newContainer = this.importWordIndex.getContainer(this.wordHash, true, -1);
                    
                    if (newContainer.size() == 0) continue;
                    
@ -132,22 +131,22 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
                        if (isAborted()) break;

                        // getting next word index entry
-                        entryCounter++;
+                        this.entryCounter++;
                        importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next();
                        String urlHash = importWordIdxEntry.getUrlHash();                    
                        if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) try {
                            // importing the new url
                            plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.getEntry(urlHash, importWordIdxEntry);                       
-                            urlCounter++;
+                            this.urlCounter++;
                            this.homeUrlDB.newEntry(urlEntry);
                            
-                            if (urlCounter % 500 == 0) {
-                                this.log.logFine(urlCounter + " URLs processed so far.");
+                            if (this.urlCounter % 500 == 0) {
+                                this.log.logFine(this.urlCounter + " URLs processed so far.");
                            }
                        } catch (IOException e) {}
                        
-                        if (entryCounter % 500 == 0) {
-                            this.log.logFine(entryCounter + " word entries and " + wordCounter + " word entities processed so far.");
+                        if (this.entryCounter % 500 == 0) {
+                            this.log.logFine(this.entryCounter + " word entries and " + this.wordCounter + " word entities processed so far.");
                        }
                    }
                    
@ -155,45 +154,45 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
                    if (isAborted()) break;
                    
                    // importing entity container to home db
-                    homeWordIndex.addEntries(newContainer, true);
+                    this.homeWordIndex.addEntries(newContainer, false);
                                        
                    // delete complete index entity file
-                    importWordIndex.deleteIndex(wordHash);                 
+                    this.importWordIndex.deleteIndex(this.wordHash);                 
                    
                    // print out some statistical information
-                    if (wordCounter%500 == 0) {
-                        wordChunkEndHash = wordHash;
-                        wordChunkEnd = System.currentTimeMillis();
-                        long duration = wordChunkEnd - wordChunkStart;
-                        log.logInfo(wordCounter + " word entities imported " +
-                                "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "] " +
+                    if (this.wordCounter%500 == 0) {
+                        this.wordChunkEndHash = this.wordHash;
+                        this.wordChunkEnd = System.currentTimeMillis();
+                        long duration = this.wordChunkEnd - this.wordChunkStart;
+                        this.log.logInfo(this.wordCounter + " word entities imported " +
+                                "[" + this.wordChunkStartHash + " .. " + this.wordChunkEndHash + "] " +
                                this.getProcessingStatusPercent() + "%\n" + 
                                "Speed: "+ 500*1000/duration + " word entities/s" +
                                " | Elapsed time: " + serverDate.intervalToString(getElapsedTime()) +
                                " | Estimated time: " + serverDate.intervalToString(getEstimatedTime()) + "\n" + 
-                                "Home Words = " + homeWordIndex.size() + 
-                                " | Import Words = " + importWordIndex.size());
-                        wordChunkStart = wordChunkEnd;
-                        wordChunkStartHash = wordChunkEndHash;
+                                "Home Words = " + this.homeWordIndex.size() + 
+                                " | Import Words = " + this.importWordIndex.size());
+                        this.wordChunkStart = this.wordChunkEnd;
+                        this.wordChunkStartHash = this.wordChunkEndHash;
                    }                    
                    
                } catch (Exception e) {
-                    log.logSevere("Import of word entity '" + wordHash + "' failed.",e);
+                    this.log.logSevere("Import of word entity '" + this.wordHash + "' failed.",e);
                } finally {
+                    if (newContainer != null) newContainer.clear();
                }
            }
            
-            this.log.logInfo("Home word index contains " + homeWordIndex.size() + " words and " + homeUrlDB.size() + " URLs.");
-            this.log.logInfo("Import word index contains " + importWordIndex.size() + " words and " + importUrlDB.size() + " URLs.");
-            
-            this.log.logInfo("DB-IMPORT FINISHED");
+            this.log.logInfo("Home word index contains " + this.homeWordIndex.size() + " words and " + this.homeUrlDB.size() + " URLs.");
+            this.log.logInfo("Import word index contains " + this.importWordIndex.size() + " words and " + this.importUrlDB.size() + " URLs.");
        } catch (Exception e) {
            this.log.logSevere("Database import failed.",e);
            e.printStackTrace();
            this.error = e.toString();
        } finally {
-            if (importUrlDB != null) try { importUrlDB.close(); } catch (Exception e){}
-            if (importWordIndex != null) try { importWordIndex.close(5000); } catch (Exception e){}
+            this.log.logInfo("Import process finished.");
+            if (this.importUrlDB != null) try { this.importUrlDB.close(); } catch (Exception e){}
+            if (this.importWordIndex != null) try { this.importWordIndex.close(5000); } catch (Exception e){}
        }
    }    
    
--- a/source/de/anomic/plasma/dbImport/plasmaWordIndexAssortmentImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaWordIndexAssortmentImporter.java
@ -4,7 +4,6 @@ import java.io.File;
 import java.util.Iterator;

 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.plasma.plasmaWordIndexAssortment;
 import de.anomic.plasma.plasmaWordIndexEntryContainer;

@ -22,27 +21,33 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
        this.jobType = "ASSORTMENT";
    }
    
-    public void init(File importAssortmentFile, int cacheSize) {
-        super.init(importAssortmentFile);
-        this.importAssortmentFile = importAssortmentFile;
-        this.cacheSize = cacheSize;
-        if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
+    public void init(File theImportAssortmentFile, int theCacheSize) {
+        super.init(theImportAssortmentFile);
+        this.importAssortmentFile = theImportAssortmentFile;
+        this.cacheSize = theCacheSize;
+        if (this.cacheSize < 2*1024*1024) this.cacheSize = 2*1024*1024;
        
        String errorMsg = null;
-        if (!importAssortmentFile.getName().matches("indexAssortment0[0-6][0-9]\\.db")) errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
-        if (!importAssortmentFile.exists()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' does not exist.";
-        else if (importAssortmentFile.isDirectory()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is a directory.";
-        else if (!importAssortmentFile.canRead()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not readable.";
-        else if (!importAssortmentFile.canWrite()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not writeable.";
+        if (!this.importAssortmentFile.getName().matches("indexAssortment0[0-6][0-9]\\.db")) 
+            errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' has an invalid name.";
+        if (!this.importAssortmentFile.exists()) 
+            errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' does not exist.";
+        else if (this.importAssortmentFile.isDirectory()) 
+            errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' is a directory.";
+        else if (!this.importAssortmentFile.canRead()) 
+            errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' is not readable.";
+        else if (!this.importAssortmentFile.canWrite()) 
+            errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' is not writeable.";
        
        
+        // getting the assortment length 
        File importAssortmentPath = null;
        int assortmentNr = -1;
        try {
-            importAssortmentPath = new File(importAssortmentFile.getParent());
-            assortmentNr = Integer.valueOf(importAssortmentFile.getName().substring("indexAssortment".length(),"indexAssortment".length()+3)).intValue();
+            importAssortmentPath = new File(this.importAssortmentFile.getParent());
+            assortmentNr = Integer.valueOf(this.importAssortmentFile.getName().substring("indexAssortment".length(),"indexAssortment".length()+3)).intValue();
            if (assortmentNr <1 || assortmentNr > 64) {
-                errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
+                errorMsg = "AssortmentFile '" + this.importAssortmentFile + "' has an invalid name.";
            }
        } catch (NumberFormatException e) {
            errorMsg = "Unable to parse the assortment file number.";
@ -53,14 +58,14 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
            throw new IllegalStateException(errorMsg);
        }

-        
+        // initializing the import assortment db
        this.log.logInfo("Initializing source assortment file");
        this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr,8*1024*1024, this.log);
        this.importStartSize = this.assortmentFile.size();
    }
    
    public long getEstimatedTime() {
-        return (this.wordEntityCount==0)?0:this.assortmentFile.size()*((System.currentTimeMillis()-this.globalStart)/this.wordEntityCount);
+        return (this.wordEntityCount==0)?0:((this.assortmentFile.size()*getElapsedTime())/(this.wordEntityCount))-getElapsedTime();
    }

    public String getJobName() {
@ -82,12 +87,18 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
    
    public void run() {
        try {            
+            // getting a content interator
            Iterator contentIter = this.assortmentFile.content();
            while (contentIter.hasNext()) {
                this.wordEntityCount++;                
                
+                // getting next entry as byte array
                byte[][] row = (byte[][]) contentIter.next();
+                
+                // getting the word hash
                String hash = new String(row[0]);
+                
+                // creating an word entry container
                plasmaWordIndexEntryContainer container;
                try {
                    container = this.assortmentFile.row2container(hash, row);
@ -99,7 +110,7 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
                this.wordEntryCount += container.size();
                
                // importing entity container to home db
-                this.sb.wordIndex.addEntries(container, true);
+                this.sb.wordIndex.addEntries(container, false);
                
                if (this.wordEntityCount % 500 == 0) {
                    this.log.logFine(this.wordEntityCount + " word entities processed so far.");
@ -111,8 +122,9 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
            }
        } catch (Exception e) {
            this.error = e.toString();     
-            this.log.logSevere("Error detected",e);
+            this.log.logSevere("Import process had detected an error",e);
        } finally {
+            this.log.logInfo("Import process finished.");
            this.globalEnd = System.currentTimeMillis();
            this.sb.dbImportManager.finishedJobs.add(this);
            this.assortmentFile.close();
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@ -99,6 +99,7 @@ public class plasmaCrawlNURL extends plasmaURL {
    private final HashSet stackIndex;           // to find out if a specific link is already on any stack
    private File cacheStacksPath;
    private int bufferkb;
+    initStackIndex initThead;
    
    public plasmaCrawlNURL(File cacheStacksPath, int bufferkb) {
        super();
@ -145,7 +146,17 @@ public class plasmaCrawlNURL extends plasmaURL {

        // init stack Index
        stackIndex = new HashSet();
-        new initStackIndex().start();
+        (initThead = new initStackIndex()).start();
+    }
+    
+    public void waitOnInitThread() {
+        try {
+            if (this.initThead != null) {
+                this.initThead.join();
+            }
+        } catch (NullPointerException e) {            
+        } catch (InterruptedException e) {}
+        
    }
    
    private void openHashCache() {
@ -229,6 +240,7 @@ public class plasmaCrawlNURL extends plasmaURL {
            } catch (Exception e) {
                musicStack = kelondroStack.reset(musicStack);
            }
+            plasmaCrawlNURL.this.initThead = null;
        }
    }

@ -277,6 +289,22 @@ public class plasmaCrawlNURL extends plasmaURL {
        push(stackMode, url.getHost(), e.hash);
        return e;
    }
+    
+    public synchronized Entry newEntry(Entry oldEntry, int stackMode) {
+        if (oldEntry == null) return null;
+        return newEntry(
+                oldEntry.initiator(),
+                oldEntry.url(),
+                oldEntry.loaddate(),
+                oldEntry.referrerHash(),
+                oldEntry.name(),
+                oldEntry.profileHandle(),
+                oldEntry.depth(),
+                oldEntry.anchors,
+                oldEntry.forkfactor,
+                stackMode
+        );
+    }

    private void push(int stackType, String domain, String hash) {
        try {
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@ -150,6 +150,30 @@ public class plasmaCrawlProfile {
        } catch (IOException e) {}
    }
    
+    public entry newEntry(Map mem) {
+        entry ne = new entry(mem);
+        try {
+            profileTable.set(ne.handle(), ne.map());
+        } catch (kelondroException e) {
+            resetDatabase();
+            try {
+                profileTable.set(ne.handle(), ne.map());
+            } catch (IOException ee) {
+                e.printStackTrace();
+                System.exit(0);
+            }
+        } catch (IOException e) {
+            resetDatabase();
+            try {
+                profileTable.set(ne.handle(), ne.map());
+            } catch (IOException ee) {
+                e.printStackTrace();
+                System.exit(0);
+            }
+        }
+        return ne;        
+    }
+    
    public entry newEntry(String name, String startURL, String generalFilter, String specificFilter,
                           int generalDepth, int specificDepth,
                           boolean crawlingQ,
--- a/source/de/anomic/server/serverCore.java
+++ b/source/de/anomic/server/serverCore.java
@ -853,6 +853,10 @@ public final class serverCore extends serverAbstractThread implements serverThre
            }          
        }
        
+        public long getRequestStartTime() {
+            return this.start;
+        }
+        
        public long getTime() {
            return System.currentTimeMillis() - this.start;
        }