diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java index 535eeb04c..1f4d738e9 100644 --- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java @@ -77,7 +77,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { this.log.logFine("Initializing source word index db."); this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false)); this.log.logFine("Initializing import URL db."); - this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2, false); + this.importUrlDB = new plasmaCrawlLURL(this.importPath, (this.cacheSize/2)/1024, preloadTime / 2, false); this.importStartSize = this.importWordIndex.size(); } @@ -154,9 +154,10 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { continue; } else { // we need to import the url - try { - // getting the url entry - plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.load(urlHash, null); + + // getting the url entry + plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.load(urlHash, null); + if (urlEntry != null) { /* write it into the home url db */ this.homeUrlDB.store(urlEntry, false); @@ -166,7 +167,8 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { if (this.urlCounter % 500 == 0) { this.log.logFine(this.urlCounter + " URLs processed so far."); } - } catch (IOException e) { + + } else { unknownUrlBuffer.add(urlHash); notBoundEntryCounter++; newContainer.remove(urlHash);