From b7e7808ea64e16cee2d0c9bbbd9c2a55a97ad941 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 12 Sep 2006 08:23:47 +0000 Subject: [PATCH] wordmigration now works also for new index database if the new database is switched on, no 'too big' messages appear, all the WORDS files can be completely migrated git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2553 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaWordIndex.java | 39 ++++++++++++++++++++ source/yacy.java | 8 +++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 30d2dcf8b..ed12c3737 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -652,6 +652,45 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } } + public Object migrateWords2index(String wordhash) throws IOException { + // returns the number of entries that had been added to the assortments + // can be negative if some assortments have been moved to the backend + File db = plasmaWordIndexFile.wordHash2path(oldDatabaseRoot, wordhash); + if (!(db.exists())) return "not available"; + plasmaWordIndexFile entity = null; + try { + entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true); + int size = entity.size(); + indexContainer container = new indexRowSetContainer(wordhash); + + try { + Iterator entries = entity.elements(true); + indexEntry entry; + while (entries.hasNext()) { + entry = (indexEntry) entries.next(); + // System.out.println("ENTRY = " + entry.getUrlHash()); + container.add(new indexEntry[] { entry }, System.currentTimeMillis()); + } + // we have read all elements, now delete the entity + entity.deleteComplete(); + entity.close(); + entity = null; + + indexContainer feedback = collections.addEntries(container, container.updated(), false); + if (feedback != null) return feedback; + return new Integer(size); + } catch (kelondroException e) { + // database corrupted, we simply give up the database and delete it + try { entity.close(); } catch (Exception ee) { } + entity = null; + try { db.delete(); } catch (Exception ee) { } + return "database corrupted; deleted"; + } + } finally { + if (entity != null) try {entity.close();}catch(Exception e){} + } + } + // The Cleaner class was provided as "UrldbCleaner" by Hydrox // see http://www.yacy-forum.de/viewtopic.php?p=18093#18093 public Cleaner makeCleaner(plasmaCrawlLURL lurl, String startHash) { diff --git a/source/yacy.java b/source/yacy.java index 9de684fa6..b3871d93e 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -651,7 +651,8 @@ public final class yacy { File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT"); serverLog log = new serverLog("WORDMIGRATION"); log.logInfo("STARTING MIGRATION"); - plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log, sps.getConfigBool("useCollectionIndex", false)); + boolean useCollectionIndex = sps.getConfigBool("useCollectionIndex", false); + plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log, useCollectionIndex); enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true); String wordhash; File wordfile; @@ -661,7 +662,10 @@ public final class yacy { wordfile = (File) words.nextElement(); wordhash = wordfile.getName().substring(0, 12); // System.out.println("NOW: " + wordhash); - migrationStatus = wordIndexCache.migrateWords2Assortment(wordhash); + if (useCollectionIndex) + migrationStatus = wordIndexCache.migrateWords2index(wordhash); + else + migrationStatus = wordIndexCache.migrateWords2Assortment(wordhash); if (migrationStatus instanceof Integer) { int migrationCount = ((Integer) migrationStatus).intValue(); if (migrationCount == 0)