refactoring:

moved importer classes to crawler and plasma package git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4770 6c8d7289-2bf4-0310-a012-ef5d649a1542
17 years ago · fbb712c669
parent ee81ff4ef4
commit fbb712c669
15 changed files with 200 additions and 342 deletions
--- a/htroot/IndexImport_p.html
+++ b/htroot/IndexImport_p.html
@ -24,14 +24,7 @@
      <table border="0" cellpadding="2" cellspacing="1">
        <tr class="TableCellLight">
          <td>Import&nbsp;Type:</td>
-          <td title="the path to the database that should be imported">
-            <select name="importType" size="1">
-              <!-- Options not availible because of missing support for Assortment DB's
-              <option value="plasmaDB">PLASMA DB Import</option>
-              <option value="assortment">Assortment File Import</option>-->
-              <option value="NURL">Crawling Queue Import</option>
-            </select>
-          </td>
+          <td title="the path to the database that should be imported"></td>
          <td title="the cache size that should be used for the import db">Cache Size</td>
          <td>
            <select name="cacheSize" size="1">
--- a/htroot/IndexImport_p.java
+++ b/htroot/IndexImport_p.java
@ -51,9 +51,10 @@
 import java.io.PrintStream;
 import java.util.Date;

+import de.anomic.crawler.NoticeURLImporter;
+import de.anomic.crawler.Importer;
 import de.anomic.http.httpHeader;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.dbImport.dbImporter;
 import de.anomic.server.serverByteBuffer;
 import de.anomic.server.serverDate;
 import de.anomic.server.serverObjects;
@ -71,27 +72,12 @@ public final class IndexImport_p {
        if (post != null) {
            if (post.containsKey("startIndexDbImport")) {
                try {
-                    String importType = (String) post.get("importType");
-                    int cacheSize = post.getInt("cacheSize", 0);
-                    boolean startImport = true;
-                    
-//                    // check if there is an already running thread with the same import path
-//                    Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
-//                    activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
-//                    
-//                    for (int i=0; i < activeCount; i++) {
-//                        plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
-//                        if (currThread.getJobName().equals(new File(importPath))) {
-//                            prop.put("error",2);
-//                            startImport = false;
-//                        }
-//                    }                    
-//                    
-                    
+                    boolean startImport = true;                    
                    if (startImport) {
-                        dbImporter importerThread = switchboard.dbImportManager.getNewImporter(importType);
+                        Importer importerThread = new NoticeURLImporter(switchboard.plasmaPath, switchboard.crawlQueues, switchboard.profilesActiveCrawls, switchboard.dbImportManager);
+
                        if (importerThread != null) {
-                            importerThread.init(switchboard, cacheSize);
+                            importerThread.setJobID(switchboard.dbImportManager.generateUniqueJobID());
                            importerThread.startIt();                            
                        }
                        prop.put("LOCATION","");
@ -119,7 +105,7 @@ public final class IndexImport_p {
            ) {
                // getting the job nr of the thread
                String jobID = (String) post.get("jobNr");
-                dbImporter importer = switchboard.dbImportManager.getImporterByID(Integer.valueOf(jobID).intValue());
+                Importer importer = switchboard.dbImportManager.getImporterByID(Integer.valueOf(jobID).intValue());
                if (importer != null) {
                    if (post.containsKey("stopIndexDbImport")) {
                        try {
@ -145,11 +131,11 @@ public final class IndexImport_p {
        /*
         * Loop over all currently running jobs
         */
-        dbImporter[] importThreads = switchboard.dbImportManager.getRunningImporter();
+        Importer[] importThreads = switchboard.dbImportManager.getRunningImporter();
        activeCount = importThreads.length;
        
        for (int i=0; i < activeCount; i++) {
-            dbImporter currThread = importThreads[i];
+            Importer currThread = importThreads[i];

            // get import type
            prop.put("running.jobs_" + i + "_type", currThread.getJobType());
@ -183,9 +169,9 @@ public final class IndexImport_p {
        /*
         * Loop over all finished jobs 
         */
-        dbImporter[] finishedJobs = switchboard.dbImportManager.getFinishedImporter();
+        Importer[] finishedJobs = switchboard.dbImportManager.getFinishedImporter();
        for (int i=0; i<finishedJobs.length; i++) {
-            dbImporter currThread = finishedJobs[i];
+            Importer currThread = finishedJobs[i];
            String error = currThread.getError();
            String fullName = currThread.getJobName().toString();
            String shortName = (fullName.length()>30)?fullName.substring(0,12) + "..." + fullName.substring(fullName.length()-22,fullName.length()):fullName;            
--- a/htroot/WatchCrawler_p.java
+++ b/htroot/WatchCrawler_p.java
@ -37,12 +37,12 @@ import java.util.regex.PatternSyntaxException;

 import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
+import de.anomic.crawler.SitemapImporter;
 import de.anomic.crawler.ZURL;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.htmlFilter.htmlFilterWriter;
 import de.anomic.http.httpHeader;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.dbImport.SitemapImporter;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -340,10 +340,9 @@ public class WatchCrawler_p {
                    				storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);
                    		
                    		// create a new sitemap importer
-                    		SitemapImporter importerThread = (SitemapImporter) sb.dbImportManager.getNewImporter("sitemap");
+                    		SitemapImporter importerThread = new SitemapImporter(sb, sb.dbImportManager, new yacyURL(sitemapURLStr, null), pe);
                    		if (importerThread != null) {
-                    			importerThread.init(sb, 0);
-                    			importerThread.initSitemap(new yacyURL(sitemapURLStr, null), pe);
+                    		    importerThread.setJobID(sb.dbImportManager.generateUniqueJobID());
                    			importerThread.startIt();
                    		}
                    	} catch (Exception e) {
--- a/source/de/anomic/plasma/dbImport/AbstractImporter.java
+++ b/source/de/anomic/plasma/dbImport/AbstractImporter.java
@ -1,45 +1,30 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

-import java.util.HashMap;
-
-import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.logging.serverLog;

-public abstract class AbstractImporter extends Thread implements dbImporter{
+public abstract class AbstractImporter extends Thread implements Importer {

    protected int jobID = -1;
    protected String jobType;
    protected serverLog log;
    protected boolean stopped = false;
    protected boolean paused = false;
-    
-    protected int cacheSize;
-    
    protected long globalStart = System.currentTimeMillis();
    protected long globalEnd;
    protected long globalPauseLast;
    protected long globalPauseDuration;
    protected String error;
-    
-    protected plasmaSwitchboard sb;

-    AbstractImporter(String theJobType, plasmaSwitchboard switchboard) {
-    	super(switchboard.dbImportManager.runningJobs,"");
+    public AbstractImporter(String theJobType) {
    	this.jobType = theJobType;
-    	this.sb = switchboard;
+
+        // initializing the logger and setting a more verbose thread name
+        this.log = new serverLog("IMPORT_" + this.jobType + "_" + this.jobID);
+        this.setName("IMPORT_" + this.jobType + "_" + this.jobID);
    }
    
    public String getError() {
        return this.error;
-    }    
-    
-    /**
-     * @see dbImporter#init(HashMap)
-     */
-    public void init() {
-        // initializing the logger and setting a more verbose thread name
-        this.log = new serverLog("IMPORT_" + this.jobType + "_" + this.jobID);
-        this.setName("IMPORT_" + this.jobType + "_" + this.jobID);
    }
    
    public void startIt() {
--- a/source/de/anomic/plasma/dbImport/dbImporter.java
+++ b/source/de/anomic/plasma/dbImport/dbImporter.java
@ -1,8 +1,6 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

-import de.anomic.plasma.plasmaSwitchboard;
-
-public interface dbImporter {
+public interface Importer {

    // functions to pause and continue importing
    public boolean isPaused();
@ -23,6 +21,5 @@ public interface dbImporter {
    public String getJobType();
    public String getError();
    public String getStatus();
-    public void init(plasmaSwitchboard switchboard, int cacheSize) throws ImporterException;
    public void startIt();    
 }
--- a/source/de/anomic/plasma/dbImport/ImporterException.java
+++ b/source/de/anomic/plasma/dbImport/ImporterException.java
@ -1,4 +1,4 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

 public class ImporterException extends Exception {

--- a/source/de/anomic/plasma/dbImport/dbImportManager.java
+++ b/source/de/anomic/plasma/dbImport/dbImportManager.java
@ -1,22 +1,19 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

 import java.util.Vector;

-import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.logging.serverLog;

-public class dbImportManager {
+public class ImporterManager {

-    public final Vector<dbImporter> finishedJobs = new Vector<dbImporter>();
+    public final Vector<Importer> finishedJobs = new Vector<Importer>();
    public final ThreadGroup runningJobs = new ThreadGroup("ImporterThreads");
    public  int currMaxJobNr = 0;
-    private plasmaSwitchboard sb;
    
-    public dbImportManager(plasmaSwitchboard theSb) {
-        this.sb = theSb;
+    public ImporterManager() {
    }
    
-    private int generateUniqueJobID() {
+    public int generateUniqueJobID() {
        int jobID;
        synchronized(this.runningJobs) {
            jobID = this.currMaxJobNr;
@ -25,27 +22,27 @@ public class dbImportManager {
        return jobID;
    }
    
-    public dbImporter[] getRunningImporter() {
+    public Importer[] getRunningImporter() {
        Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
        int activeCount = this.runningJobs.enumerate(importThreads);
-        dbImporter[] importers = new dbImporter[activeCount];
+        Importer[] importers = new Importer[activeCount];
        for (int i=0; i<activeCount; i++) {
-            importers[i] = (dbImporter) importThreads[i];
+            importers[i] = (Importer) importThreads[i];
        }
        return importers;
    }
    
-    public dbImporter[] getFinishedImporter() {
-        return (dbImporter[]) this.finishedJobs.toArray(new dbImporter[this.finishedJobs.size()]);
+    public Importer[] getFinishedImporter() {
+        return (Importer[]) this.finishedJobs.toArray(new Importer[this.finishedJobs.size()]);
    }
    
-    public dbImporter getImporterByID(int jobID) {
+    public Importer getImporterByID(int jobID) {

        Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
        int activeCount = this.runningJobs.enumerate(importThreads);
        
        for (int i=0; i < activeCount; i++) {
-            dbImporter currThread = (dbImporter) importThreads[i];
+            Importer currThread = (Importer) importThreads[i];
            if (currThread.getJobID() == jobID) {
                return currThread;
            }                    
@ -53,25 +50,6 @@ public class dbImportManager {
        return null;        
    }
    
-    public dbImporter getNewImporter(String type) {
-        if (type == null) return null;
-        if (type.length() == 0) return null;
-        
-        // create a new importer thread
-        dbImporter newImporter = null;
-        if (type.equalsIgnoreCase("NURL")) {
-            newImporter = new plasmaCrawlNURLImporter(this.sb);
-        } else if (type.equalsIgnoreCase("sitemap")) {
-        	newImporter = new SitemapImporter(this.sb);
-        }
-        
-        // assign a job ID to it
-        newImporter.setJobID(this.generateUniqueJobID());
-        
-        // return the newly created importer
-        return newImporter;
-    }
-    
    /**
     * Can be used to close all still running importer threads
     * e.g. on server shutdown
@ -94,7 +72,7 @@ public class dbImportManager {
            for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ )  {
                Thread currentThread = threadList[currentThreadIdx];
                if (currentThread.isAlive()) {
-                    ((dbImporter)currentThread).stopIt();
+                    ((Importer)currentThread).stopIt();
                }
            }      
            
--- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java
@ -1,4 +1,4 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

 import java.io.File;
 import java.io.IOException;
@ -6,12 +6,9 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;

-import de.anomic.crawler.CrawlEntry;
-import de.anomic.crawler.CrawlProfile;
-import de.anomic.crawler.NoticedURL;
 import de.anomic.plasma.plasmaSwitchboard;

-public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImporter {
+public class NoticeURLImporter extends AbstractImporter implements Importer {

 	private File plasmaPath = null;
    private HashSet<String> importProfileHandleCache = new HashSet<String>();
@ -20,45 +17,24 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
    private int importStartSize;
    private int urlCount = 0;
    private int profileCount = 0;
+    private CrawlQueues crawlQueues;
+    private CrawlProfile activeCrawls;
+    private ImporterManager dbImportManager;
    
-    public plasmaCrawlNURLImporter(plasmaSwitchboard theSb) {
-    	super("NURL",theSb);
-    }
-
-    public long getEstimatedTime() {
-        return (this.urlCount==0)?0:((this.importStartSize*getElapsedTime())/(this.urlCount))-getElapsedTime();
-    }
-
-    public String getJobName() {
-        return this.plasmaPath.toString();
-    }
-
-    public int getProcessingStatusPercent() {
-        return (this.urlCount)/((this.importStartSize<100)?1:(this.importStartSize)/100);
-    }
-
-    public String getStatus() {
-        StringBuffer theStatus = new StringBuffer();
-        
-        theStatus.append("#URLs=").append(this.urlCount).append("\n");
-        theStatus.append("#Profiles=").append(this.profileCount);
-        
-        return theStatus.toString();
-    }
-
-    public void init(plasmaSwitchboard sb, int cacheSize) throws ImporterException {
-        super.init();
+    public NoticeURLImporter(File crawlerPath, CrawlQueues crawlQueues, CrawlProfile activeCrawls, ImporterManager dbImportManager) throws ImporterException {
+        super("NURL");
+        this.crawlQueues = crawlQueues;
+        this.activeCrawls = activeCrawls;
+        this.dbImportManager = dbImportManager;
        
-        // TODO: we need more errorhandling here
-        this.plasmaPath = sb.plasmaPath;
-        this.cacheSize = cacheSize;
-        if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
+        // TODO: we need more error handling here
+        this.plasmaPath = crawlerPath;
        File noticeUrlDbFile = new File(plasmaPath,"urlNotice1.db");
        File profileDbFile = new File(plasmaPath, plasmaSwitchboard.DBFILE_ACTIVE_CRAWL_PROFILES);
        
        String errorMsg = null;
        if (!plasmaPath.exists()) 
-            errorMsg = "The import path '" + plasmaPath+ "' does not exist.";
+            errorMsg = "The import path '" + plasmaPath + "' does not exist.";
        else if (!plasmaPath.isDirectory()) 
            errorMsg = "The import path '" + plasmaPath + "' is not a directory.";
        else if (!plasmaPath.canRead()) 
@ -100,6 +76,27 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
        this.importProfileDB = new CrawlProfile(profileDbFile);
    }

+    public long getEstimatedTime() {
+        return (this.urlCount==0)?0:((this.importStartSize*getElapsedTime())/(this.urlCount))-getElapsedTime();
+    }
+
+    public String getJobName() {
+        return this.plasmaPath.toString();
+    }
+
+    public int getProcessingStatusPercent() {
+        return (this.urlCount)/((this.importStartSize<100)?1:(this.importStartSize)/100);
+    }
+
+    public String getStatus() {
+        StringBuffer theStatus = new StringBuffer();
+        
+        theStatus.append("#URLs=").append(this.urlCount).append("\n");
+        theStatus.append("#Profiles=").append(this.profileCount);
+        
+        return theStatus.toString();
+    }
+
    @SuppressWarnings("unchecked")
    public void run() {
        try {   
@ -161,7 +158,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
                        if (!this.importProfileHandleCache.contains(profileHandle)) {
                            
                            // testing if the profile is already known
-                            CrawlProfile.entry profileEntry = this.sb.profilesActiveCrawls.getEntry(profileHandle);
+                            CrawlProfile.entry profileEntry = this.activeCrawls.getEntry(profileHandle);
                            
                            // if not we need to import it
                            if (profileEntry == null) {
@ -170,7 +167,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
                                if (sourceEntry != null) {
                                    this.profileCount++;
                                    this.importProfileHandleCache.add(profileHandle);
-                                    this.sb.profilesActiveCrawls.newEntry((HashMap<String, String>) sourceEntry.map().clone());
+                                    this.activeCrawls.newEntry((HashMap<String, String>) sourceEntry.map().clone());
                                } else {
                                    this.log.logWarning("Profile '" + profileHandle + "' of url entry '" + nextHash + "' unknown.");
                                    continue;
@ -179,8 +176,8 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
                        }
                        
                        // if the url does not alredy exists in the destination stack we insert it now
-                        if (!this.sb.crawlQueues.noticeURL.existsInStack(nextHash)) {
-                            this.sb.crawlQueues.noticeURL.push((stackTypes[stackType] != -1) ? stackTypes[stackType] : NoticedURL.STACK_TYPE_CORE, nextEntry);
+                        if (!this.crawlQueues.noticeURL.existsInStack(nextHash)) {
+                            this.crawlQueues.noticeURL.push((stackTypes[stackType] != -1) ? stackTypes[stackType] : NoticedURL.STACK_TYPE_CORE, nextEntry);
                        }
                        
                        // removing hash from the import db
@ -207,7 +204,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
        } finally { 
            this.log.logInfo("Import process finished.");
            this.globalEnd = System.currentTimeMillis();
-            this.sb.dbImportManager.finishedJobs.add(this);
+            this.dbImportManager.finishedJobs.add(this);
            this.importNurlDB.close();
            this.importProfileDB.close();
        }
--- a/source/de/anomic/plasma/dbImport/SitemapImporter.java
+++ b/source/de/anomic/plasma/dbImport/SitemapImporter.java
@ -1,4 +1,4 @@
-//AbstractParser.java 
+//SitemapImporter.java 
 //------------------------
 //part of YaCy
 //(C) by Michael Peter Christen; mc@anomic.de
@ -42,24 +42,34 @@
 //the intact and unchanged copyright notice.
 //Contributions and changes to the program code must be marked as such.

-package de.anomic.plasma.dbImport;
+package de.anomic.crawler;

-import java.util.HashMap;
-
-import de.anomic.crawler.CrawlProfile;
 import de.anomic.data.SitemapParser;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.yacy.yacyURL;

-public class SitemapImporter extends AbstractImporter implements dbImporter {
+public class SitemapImporter extends AbstractImporter implements Importer {

 	private SitemapParser parser = null;
 	private yacyURL sitemapURL = null;
+	private ImporterManager superviser;
 	
-	public SitemapImporter(plasmaSwitchboard switchboard) {
-		super("sitemap",switchboard);
-	}
-
+	public SitemapImporter(plasmaSwitchboard sb, ImporterManager importManager, yacyURL sitemapURL, CrawlProfile.entry profileEntry) throws ImporterException {
+		super("sitemap");
+		this.superviser = importManager;
+        try {
+            // getting the sitemap URL
+            this.sitemapURL = sitemapURL;
+            
+            // creating the sitemap parser
+            this.parser = new SitemapParser(sb, this.sitemapURL, profileEntry);
+        } catch (Exception e) {
+            throw new ImporterException("Unable to initialize Importer",e);
+        }
+    }
+    
+    
+    
 	public long getEstimatedTime() {
 		long t = getElapsedTime();
 		int p = getProcessingStatusPercent();
@ -67,14 +77,14 @@ public class SitemapImporter extends AbstractImporter implements dbImporter {
 	}

 	/**
-	 * @see dbImporter#getJobName()
+	 * @see Importer#getJobName()
 	 */
 	public String getJobName() {
 		return this.sitemapURL.toString();
 	}

 	/**
-	 * @see dbImporter#getProcessingStatusPercent()
+	 * @see Importer#getProcessingStatusPercent()
 	 */
 	public int getProcessingStatusPercent() {
 		if (this.parser == null) return 0;
@ -87,7 +97,7 @@ public class SitemapImporter extends AbstractImporter implements dbImporter {
 	}

 	/**
-	 * @see dbImporter#getStatus()
+	 * @see Importer#getStatus()
 	 */
 	public String getStatus() {
        StringBuffer theStatus = new StringBuffer();
@ -96,33 +106,13 @@ public class SitemapImporter extends AbstractImporter implements dbImporter {
        
        return theStatus.toString();
 	}
-
-	/**
-	 * @see dbImporter#init(HashMap)
-	 * @see AbstractImporter#init(HashMap)
-	 */
-	public void init(plasmaSwitchboard switchboard, int cacheSize) throws ImporterException {
-        super.init();
-	}
-	
-	public void initSitemap(yacyURL sitemapURL, CrawlProfile.entry profileEntry) throws ImporterException {
-        try {
-            // getting the sitemap URL
-            this.sitemapURL = sitemapURL;
-            
-            // creating the sitemap parser
-            this.parser = new SitemapParser(this.sb,this.sitemapURL, profileEntry);
-        } catch (Exception e) {
-            throw new ImporterException("Unable to initialize Importer",e);
-        }
-    }
 	
 	public void run() {
 		try {
 			this.parser.parse();
 		} finally {
 			this.globalEnd = System.currentTimeMillis();
-			this.sb.dbImportManager.finishedJobs.add(this);			
+			this.superviser.finishedJobs.add(this);			
 		}
 	}
 }
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@ -110,11 +110,6 @@ public class SitemapParser extends DefaultHandler {
     */
    private CrawlProfile.entry crawlingProfile = null;

-    /**
-     * Reference to the plasmaswitchboard.
-     */
-    private plasmaSwitchboard switchboard = null;
-
    /**
     * Name of the current XML element
     */
@ -154,13 +149,11 @@ public class SitemapParser extends DefaultHandler {
     * last modification date of the {@link #nextURL}
     */
    private Date lastMod = null;
-
+    private plasmaSwitchboard sb;
+    
    public SitemapParser(plasmaSwitchboard sb, yacyURL sitemap, CrawlProfile.entry theCrawlingProfile) {
-        if (sb == null)
-            throw new NullPointerException("The switchboard must not be null");
-        if (sitemap == null)
-            throw new NullPointerException("The sitemap URL must not be null");
-        this.switchboard = sb;
+        assert sitemap != null;
+        this.sb = sb;
        this.siteMapURL = sitemap;

        if (theCrawlingProfile == null) {
@ -281,10 +274,10 @@ public class SitemapParser extends DefaultHandler {

            // check if the url is known and needs to be recrawled
            if (this.lastMod != null) {
-                String dbocc = this.switchboard.urlExists(nexturlhash);
+                String dbocc = this.sb.urlExists(nexturlhash);
                if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) {
                    // the url was already loaded. we need to check the date
-                    indexURLReference oldEntry = switchboard.wordIndex.getURL(nexturlhash, null, 0);
+                    indexURLReference oldEntry = this.sb.wordIndex.getURL(nexturlhash, null, 0);
                    if (oldEntry != null) {
                        Date modDate = oldEntry.moddate();
                        // check if modDate is null
@ -296,9 +289,9 @@ public class SitemapParser extends DefaultHandler {

            // URL needs to crawled
            String error = null;
-            error = this.switchboard.crawlStacker.stackCrawl(url,
+            error = this.sb.crawlStacker.stackCrawl(url,
                                                             null, // this.siteMapURL.toString(),
-                                                             this.switchboard.wordIndex.seedDB.mySeed().hash, this.nextURL, new Date(),
+                                                             this.sb.wordIndex.seedDB.mySeed().hash, this.nextURL, new Date(),
                                                             0, this.crawlingProfile);

            if (error != null) {
@ -306,9 +299,9 @@ public class SitemapParser extends DefaultHandler {
                    this.logger.logInfo("The URL '" + this.nextURL + "' can not be crawled. Reason: " + error);

                    // insert URL into the error DB
-                    ZURL.Entry ee = this.switchboard.crawlQueues.errorURL.newEntry(
+                    ZURL.Entry ee = this.sb.crawlQueues.errorURL.newEntry(
                            new CrawlEntry(
-                                    switchboard.wordIndex.seedDB.mySeed().hash, 
+                                    sb.wordIndex.seedDB.mySeed().hash, 
                                    new yacyURL(this.nextURL, null), 
                                    "", 
                                    "", 
@ -317,12 +310,12 @@ public class SitemapParser extends DefaultHandler {
                                    0, 
                                    0, 
                                    0),
-                            this.switchboard.wordIndex.seedDB.mySeed().hash,
+                            this.sb.wordIndex.seedDB.mySeed().hash,
                            new Date(),
                            1,
                            error);
                    ee.store();
-                    this.switchboard.crawlQueues.errorURL.push(ee);
+                    this.sb.crawlQueues.errorURL.push(ee);
                } catch (MalformedURLException e) {/* ignore this */
                }
            } else {
@ -353,7 +346,7 @@ public class SitemapParser extends DefaultHandler {
    }

    private CrawlProfile.entry createProfile(String domainName, yacyURL sitemapURL) {
-        return this.switchboard.profilesActiveCrawls.newEntry(domainName, sitemapURL,
+        return this.sb.profilesActiveCrawls.newEntry(domainName, sitemapURL,
        // crawlingFilter
                                                              ".*", ".*",
                                                              // Depth
--- a/source/de/anomic/index/indexRepositoryReference.java
+++ b/source/de/anomic/index/indexRepositoryReference.java
@ -57,26 +57,9 @@ public final class indexRepositoryReference {
    kelondroIndex urlIndexFile;
    private Export exportthread = null; // will habe a export thread assigned if exporter is running
    
-    public indexRepositoryReference(File indexSecondaryRoot, String networkName) {
+    public indexRepositoryReference(File indexSecondaryPath) {
        super();
-        File indexSecondaryPath = new File(indexSecondaryRoot, networkName);
-        File indexSecondaryTextLocation = new File(indexSecondaryPath, "TEXT");
-        if (!indexSecondaryTextLocation.exists()) {
-            // patch old index locations; the secondary path is patched in plasmaCrawlLURL
-            File oldSecondaryPath = new File(new File(indexSecondaryRoot, "PUBLIC"), "TEXT");
-            File oldSecondaryTextLocation = new File(new File(indexSecondaryRoot, "PUBLIC"), "TEXT");
-            if (oldSecondaryPath.exists() && oldSecondaryTextLocation.exists()) {
-                // move the text folder from the old location to the new location
-                assert !indexSecondaryTextLocation.exists();
-                indexSecondaryTextLocation.mkdirs();
-                if (oldSecondaryTextLocation.renameTo(indexSecondaryTextLocation)) {
-                    if (!oldSecondaryPath.delete()) oldSecondaryPath.deleteOnExit();
-                } else {
-                    indexSecondaryTextLocation = oldSecondaryTextLocation; // emergency case: stay with old directory
-                }
-            }
-        }
-        
+        File indexSecondaryTextLocation = new File(indexSecondaryPath, "TEXT");        
        urlIndexFile = new kelondroSplitTable(indexSecondaryTextLocation, "urls", indexURLReference.rowdef, false);
    }

--- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java
@ -1,22 +1,18 @@
-package de.anomic.plasma.dbImport;
+package de.anomic.plasma;

-import java.io.File;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.TreeSet;

+import de.anomic.crawler.AbstractImporter;
+import de.anomic.crawler.Importer;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexRWIEntry;
 import de.anomic.index.indexRWIRowEntry;
 import de.anomic.index.indexURLReference;
-import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.server.serverDate;

-public class plasmaDbImporter extends AbstractImporter implements dbImporter {
-
-	private File importPrimaryPath, importSecondaryPath;
+public class plasmaDbImporter extends AbstractImporter implements Importer {
 	
 	/**
 	 * the source word index (the DB to import)
@ -36,21 +32,22 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
    private long urlCounter = 0, wordCounter = 0, entryCounter = 0, notBoundEntryCounter = 0;
    

-    public plasmaDbImporter(plasmaSwitchboard sb, plasmaWordIndex homeWI, plasmaWordIndex importWI) {
-    	super("PLASMADB",sb);
+    public plasmaDbImporter(plasmaWordIndex homeWI, plasmaWordIndex importWI) {
+    	super("PLASMADB");
        this.homeWordIndex = homeWI;
        this.importWordIndex = importWI;
+        this.importStartSize = this.importWordIndex.size();
    }
-    
+
    /**
-     * @see dbImporter#getJobName()
+     * @see Importer#getJobName()
     */
    public String getJobName() {
-        return this.importPrimaryPath.toString();
+        return this.importWordIndex.getLocation(true).toString();
    }

    /**
-     * @see dbImporter#getStatus()
+     * @see Importer#getStatus()
     */
    public String getStatus() {
        StringBuffer theStatus = new StringBuffer();
@ -64,46 +61,6 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
        return theStatus.toString();
    }
    
-    //public void init(File thePrimaryPath, File theSecondaryPath, int theCacheSize, long preloadTime) {
-    /**
-     * @throws ImporterException 
-     * @see dbImporter#init(HashMap)
-     */
-    public void init(plasmaSwitchboard db, int cacheSize) throws ImporterException {
-        super.init();
-        
-        // TODO: we need more errorhandling here
-        this.importPrimaryPath = sb.indexPrimaryPath;
-        this.importSecondaryPath = sb.indexSecondaryPath;
-
-        this.cacheSize = cacheSize;
-        if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
-        
-        // configure import DB
-        String errorMsg = null;
-        if (!this.importPrimaryPath.exists()) errorMsg = "Primary Import directory does not exist.";
-        if (!this.importPrimaryPath.canRead()) errorMsg = "Primary Import directory is not readable.";
-        if (!this.importPrimaryPath.canWrite()) errorMsg = "Primary Import directory is not writeable";
-        if (!this.importPrimaryPath.isDirectory()) errorMsg = "Primary Import directory is not a directory.";
-        if (errorMsg != null) {
-            this.log.logSevere(errorMsg + "\nName: " + this.importPrimaryPath.getAbsolutePath());
-            throw new IllegalArgumentException(errorMsg);
-        }
-        if (!this.importSecondaryPath.exists()) errorMsg = "Secondary Import directory does not exist.";
-        if (!this.importSecondaryPath.canRead()) errorMsg = "Secondary Import directory is not readable.";
-        if (!this.importSecondaryPath.canWrite()) errorMsg = "Secondary Import directory is not writeable";
-        if (!this.importSecondaryPath.isDirectory()) errorMsg = "Secondary Import directory is not a directory.";
-        if (errorMsg != null) {
-            this.log.logSevere(errorMsg + "\nName: " + this.importSecondaryPath.getAbsolutePath());
-            throw new IllegalArgumentException(errorMsg);
-        }
-        
-        this.log.logFine("Initializing source word index db.");
-        this.importWordIndex = new plasmaWordIndex(sb.getConfig("network.unit.name", ""), this.log, this.importPrimaryPath, this.importSecondaryPath);
-
-        this.importStartSize = this.importWordIndex.size();
-    }
-    
    public void run() {
        try {
            importWordsDB();
@ -114,7 +71,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
    }

    /**
-     * @see dbImporter#getProcessingStatusPercent()
+     * @see Importer#getProcessingStatusPercent()
     */
    public int getProcessingStatusPercent() {
        // thid seems to be better:
@ -125,7 +82,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
    }

    /**
-     * @see dbImporter#getElapsedTime()
+     * @see Importer#getElapsedTime()
     */
    public long getEstimatedTime() {
        return (this.wordCounter==0)?0:((this.importStartSize*getElapsedTime())/this.wordCounter)-getElapsedTime();
@ -135,7 +92,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
        this.log.logInfo("STARTING DB-IMPORT");  
        
        try {
-            this.log.logInfo("Importing DB from '" + this.importPrimaryPath.getAbsolutePath() + "'/'" + this.importSecondaryPath.getAbsolutePath() + "'");
+            this.log.logInfo("Importing DB from '" + this.importWordIndex.getLocation(true).getAbsolutePath() + "'");
            this.log.logInfo("Home word index contains " + homeWordIndex.size() + " words and " + homeWordIndex.countURL() + " URLs.");
            this.log.logInfo("Import word index contains " + this.importWordIndex.size() + " words and " + this.importWordIndex.countURL() + " URLs.");                        
            
@ -267,7 +224,5 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
            if (this.importWordIndex != null) try { this.importWordIndex.close(); } catch (Exception e){}
        }
    }    
-    

-    
 }
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -118,6 +118,7 @@ import de.anomic.crawler.RobotsTxt;
 import de.anomic.crawler.CrawlStacker;
 import de.anomic.crawler.ProtocolLoader;
 import de.anomic.crawler.ZURL;
+import de.anomic.crawler.ImporterManager;
 import de.anomic.data.URLLicense;
 import de.anomic.data.blogBoard;
 import de.anomic.data.blogBoardComments;
@ -144,7 +145,6 @@ import de.anomic.kelondro.kelondroException;
 import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroMapTable;
 import de.anomic.kelondro.kelondroNaturalOrder;
-import de.anomic.plasma.dbImport.dbImportManager;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.server.serverAbstractSwitch;
 import de.anomic.server.serverBusyThread;
@ -198,58 +198,57 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
    public static wikiParser wikiParser = null;
    
    // storage management
-    public  File                        htCachePath;
-    public  File                        plasmaPath;
-    public  File                        indexPrimaryPath, indexSecondaryPath;
-    public  File                        listsPath;
-    public  File                        htDocsPath;
-    public  File                        rankingPath;
-    public  File                        workPath;
-    public  File                        releasePath;
-    public  HashMap<String, String>     rankingPermissions;
-    public  plasmaWordIndex             wordIndex;
-    public  CrawlQueues           crawlQueues;
-    public  ResultURLs          crawlResults;
-    public  plasmaSwitchboardQueue      sbQueue;
-    public  CrawlStacker          crawlStacker;
-    public  messageBoard                messageDB;
-    public  wikiBoard                   wikiDB;
-    public  blogBoard                   blogDB;
-    public  blogBoardComments           blogCommentDB;
-    public  static RobotsTxt robots = null;
-    public  CrawlProfile          profilesActiveCrawls, profilesPassiveCrawls;
-    public  CrawlProfile.entry    defaultProxyProfile;
-    public  CrawlProfile.entry    defaultRemoteProfile;
-    public  CrawlProfile.entry    defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
-    public  CrawlProfile.entry    defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
-    public  boolean                     rankingOn;
-    public  plasmaRankingDistribution   rankingOwnDistribution;
-    public  plasmaRankingDistribution   rankingOtherDistribution;
-    public  HashMap<String, Object[]>   outgoingCookies, incomingCookies;
-    public  kelondroMapTable            facilityDB;
-    public  plasmaParser                parser;
-    public  volatile long                        proxyLastAccess, localSearchLastAccess, remoteSearchLastAccess;
-    public  yacyCore                    yc;
-    public  userDB                      userDB;
-    public  bookmarksDB                 bookmarksDB;
-    public  plasmaWebStructure          webStructure;
-    public  dbImportManager             dbImportManager;
-    public  plasmaDHTFlush              transferIdxThread = null;
-    private plasmaDHTChunk              dhtTransferChunk = null;
-    public  ArrayList<plasmaSearchQuery> localSearches; // array of search result properties as HashMaps
-    public  ArrayList<plasmaSearchQuery> remoteSearches; // array of search result properties as HashMaps
+    public  File                           htCachePath;
+    public  File                           plasmaPath;
+    public  File                           listsPath;
+    public  File                           htDocsPath;
+    public  File                           rankingPath;
+    public  File                           workPath;
+    public  File                           releasePath;
+    public  HashMap<String, String>        rankingPermissions;
+    public  plasmaWordIndex                wordIndex;
+    public  CrawlQueues                    crawlQueues;
+    public  ResultURLs                     crawlResults;
+    public  plasmaSwitchboardQueue         sbQueue;
+    public  CrawlStacker                   crawlStacker;
+    public  messageBoard                   messageDB;
+    public  wikiBoard                      wikiDB;
+    public  blogBoard                      blogDB;
+    public  blogBoardComments              blogCommentDB;
+    public  static RobotsTxt               robots = null;
+    public  CrawlProfile                   profilesActiveCrawls, profilesPassiveCrawls;
+    public  CrawlProfile.entry             defaultProxyProfile;
+    public  CrawlProfile.entry             defaultRemoteProfile;
+    public  CrawlProfile.entry             defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
+    public  CrawlProfile.entry             defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
+    public  boolean                        rankingOn;
+    public  plasmaRankingDistribution      rankingOwnDistribution;
+    public  plasmaRankingDistribution      rankingOtherDistribution;
+    public  HashMap<String, Object[]>      outgoingCookies, incomingCookies;
+    public  kelondroMapTable               facilityDB;
+    public  plasmaParser                   parser;
+    public  volatile long                  proxyLastAccess, localSearchLastAccess, remoteSearchLastAccess;
+    public  yacyCore                       yc;
+    public  userDB                         userDB;
+    public  bookmarksDB                    bookmarksDB;
+    public  plasmaWebStructure             webStructure;
+    public  ImporterManager                dbImportManager;
+    public  plasmaDHTFlush                 transferIdxThread = null;
+    private plasmaDHTChunk                 dhtTransferChunk = null;
+    public  ArrayList<plasmaSearchQuery>   localSearches; // array of search result properties as HashMaps
+    public  ArrayList<plasmaSearchQuery>   remoteSearches; // array of search result properties as HashMaps
    public  HashMap<String, TreeSet<Long>> localSearchTracker, remoteSearchTracker; // mappings from requesting host to a TreeSet of Long(access time)
-    public  long                        lastseedcheckuptime = -1;
-    public  long                        indexedPages = 0;
-    public  long                        lastindexedPages = 0;
-    public  double                      requestedQueries = 0d;
-    public  double                      lastrequestedQueries = 0d;
-    public  int                         totalPPM = 0;
-    public  double                      totalQPM = 0d;
-    public  TreeMap<String, String>     clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used
-    public  boolean                     acceptLocalURLs, acceptGlobalURLs;
-    public  URLLicense                  licensedURLs;
-    public  Timer                       moreMemory;
+    public  long                           lastseedcheckuptime = -1;
+    public  long                           indexedPages = 0;
+    public  long                           lastindexedPages = 0;
+    public  double                         requestedQueries = 0d;
+    public  double                         lastrequestedQueries = 0d;
+    public  int                            totalPPM = 0;
+    public  double                         totalQPM = 0d;
+    public  TreeMap<String, String>        clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used
+    public  boolean                        acceptLocalURLs, acceptGlobalURLs;
+    public  URLLicense                     licensedURLs;
+    public  Timer                          moreMemory;
    
    public serverProcessor<indexingQueueEntry> indexingDocumentProcessor;
    public serverProcessor<indexingQueueEntry> indexingCondensementProcessor;
@ -921,13 +920,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
        this.acceptGlobalURLs = "global.any".indexOf(getConfig("network.unit.domain", "global")) >= 0;
        this.acceptLocalURLs = "local.any".indexOf(getConfig("network.unit.domain", "global")) >= 0;
        
-        // load values from configs
+        // load values from configs        
        this.plasmaPath   = getConfigPath(PLASMA_PATH, PLASMA_PATH_DEFAULT);
        this.log.logConfig("Plasma DB Path: " + this.plasmaPath.toString());
-        this.indexPrimaryPath = getConfigPath(INDEX_PRIMARY_PATH, INDEX_PATH_DEFAULT);
-        this.log.logConfig("Index Primary Path: " + this.indexPrimaryPath.toString());
-        this.indexSecondaryPath = (getConfig(INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(INDEX_SECONDARY_PATH, ""));
-        this.log.logConfig("Index Secondary Path: " + this.indexSecondaryPath.toString());
+        File indexPrimaryPath = getConfigPath(INDEX_PRIMARY_PATH, INDEX_PATH_DEFAULT);
+        this.log.logConfig("Index Primary Path: " + indexPrimaryPath.toString());
+        File indexSecondaryPath = (getConfig(INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(INDEX_SECONDARY_PATH, ""));
+        this.log.logConfig("Index Secondary Path: " + indexSecondaryPath.toString());
        this.listsPath      = getConfigPath(LISTS_PATH, LISTS_PATH_DEFAULT);
        this.log.logConfig("Lists Path:     " + this.listsPath.toString());
        this.htDocsPath   = getConfigPath(HTDOCS_PATH, HTDOCS_PATH_DEFAULT);
@ -1277,7 +1276,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
        //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true);
        //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260);

-        this.dbImportManager = new dbImportManager(this);
+        this.dbImportManager = new ImporterManager();
        
        log.logConfig("Finished Switchboard Initialization");
    }
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -82,13 +82,13 @@ public final class plasmaWordIndex implements indexRI {
    final         indexRepositoryReference referenceURL;
    public        yacySeedDB               seedDB;
    public        yacyNewsPool             newsPool;
-    
-    
+    private       File                     primaryRoot, secondaryRoot;
    
    public plasmaWordIndex(String networkName, serverLog log, File indexPrimaryRoot, File indexSecondaryRoot) {
        this.log = log;
-        File indexPrimaryPath = new File(indexPrimaryRoot, networkName);
-        File indexPrimaryTextLocation = new File(indexPrimaryPath, "TEXT");
+        this.primaryRoot = new File(indexPrimaryRoot, networkName);
+        this.secondaryRoot = new File(indexSecondaryRoot, networkName);
+        File indexPrimaryTextLocation = new File(this.primaryRoot, "TEXT");
        if (!indexPrimaryTextLocation.exists()) {
            // patch old index locations; the secondary path is patched in plasmaCrawlLURL
            File oldPrimaryPath = new File(new File(indexPrimaryRoot, "PUBLIC"), "TEXT");
@ -116,10 +116,10 @@ public final class plasmaWordIndex implements indexRI {
        this.collections = new indexCollectionRI(textindexcollections, "collection", maxCollectionPartition, indexRWIRowEntry.urlEntryRow);

        // create LURL-db
-        referenceURL = new indexRepositoryReference(indexSecondaryRoot, networkName);
+        referenceURL = new indexRepositoryReference(this.secondaryRoot);
        
        // create or init seed cache
-        File networkRoot = new File(indexPrimaryPath, "NETWORK");
+        File networkRoot = new File(this.primaryRoot, "NETWORK");
        networkRoot.mkdirs();
        File mySeedFile = new File(networkRoot, "mySeed.txt");
        File oldSeedFile = new File(new File(indexPrimaryRoot.getParentFile(), "YACYDB"), "mySeed.txt");
@ -133,7 +133,10 @@ public final class plasmaWordIndex implements indexRI {

        // create or init news database
        newsPool = new yacyNewsPool(networkRoot);
-
+    }
+    
+    public File getLocation(boolean primary) {
+        return (primary) ? this.primaryRoot : this.secondaryRoot;
    }

    public void putURL(indexURLReference entry) throws IOException {
--- a/source/yacy.java
+++ b/source/yacy.java
@ -635,10 +635,10 @@ public final class yacy {
            log.logInfo("STARTING URL CLEANUP");
            
            // db containing all currently loades urls
-            indexRepositoryReference currentUrlDB = new indexRepositoryReference(indexSecondaryRoot, networkName);
+            indexRepositoryReference currentUrlDB = new indexRepositoryReference(new File(indexSecondaryRoot, networkName));
            
            // db used to hold all neede urls
-            indexRepositoryReference minimizedUrlDB = new indexRepositoryReference(indexRoot2, networkName);
+            indexRepositoryReference minimizedUrlDB = new indexRepositoryReference(new File(indexRoot2, networkName));
            
            int cacheMem = (int)(serverMemory.max() - serverMemory.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
@ -817,7 +817,7 @@ public final class yacy {
        File root = homePath;
        File indexroot = new File(root, "DATA/INDEX");
        try {serverLog.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
-        indexRepositoryReference currentUrlDB = new indexRepositoryReference(indexroot, networkName);
+        indexRepositoryReference currentUrlDB = new indexRepositoryReference(new File(indexroot, networkName));
        currentUrlDB.deadlinkCleaner(null);
        currentUrlDB.close();
    }