*) URLCache in minizimeURLDB can be changed now (standart is 4mb)

*) moved Exception Stackprints to loggingengine

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2028 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
hydrox 19 years ago
parent 33f7886a92
commit 49f3b56526

@ -683,7 +683,7 @@ public final class yacy {
log.logInfo("SKIPPED " + wordhash + ": " + migrationStatus);
}
} catch (Exception e) {
e.printStackTrace();
log.logSevere("Exception", e);
}
log.logInfo("FINISHED MIGRATION JOB, WAIT FOR DUMP");
wordIndexCache.close(60);
@ -767,9 +767,9 @@ public final class yacy {
}
}
} catch (Error e) {
e.printStackTrace();
log.logWarning("Error", e);
} catch (Exception e) {
e.printStackTrace();
log.logWarning("Exception", e);
} finally {
log.logInfo("ASSORTMENT-IMPORT FINISHED");
if (homeWordIndex != null) try { homeWordIndex.close(5000); } catch (Exception e){/* nothing todo here */}
@ -925,7 +925,6 @@ public final class yacy {
log.logInfo("DB-IMPORT FINISHED");
} catch (Exception e) {
log.logSevere("Database import failed.",e);
e.printStackTrace();
} finally {
if (homeUrlDB != null) try { homeUrlDB.close(); } catch (Exception e){}
if (importUrlDB != null) try { importUrlDB.close(); } catch (Exception e){}
@ -934,24 +933,26 @@ public final class yacy {
}
}
public static void minimizeUrlDB(String homePath) {
public static void minimizeUrlDB(String homePath, int dbcache) {
// run with "java -classpath classes yacy -minimizeUrlDB"
try {serverLog.configureLogging(new File(homePath, "yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
serverLog log = new serverLog("URL-CLEANUP");
try {
serverLog log = new serverLog("URL-CLEANUP");
log.logInfo("STARTING URL CLEANUP");
// db containing all currently loades urls
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304);
int cache = dbcache * 1024 * 1024;
log.logFine("URLDB-Caches: "+cache+" bytes");
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), cache);
// db used to hold all neede urls
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), 4194304);
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), cache);
Runtime rt = Runtime.getRuntime();
int cacheMem = (int)(rt.maxMemory()-rt.totalMemory())-5*1024*1024;
plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, cacheMem, log);
Iterator wordHashIterator = wordIndex.wordHashes("------------", plasmaWordIndex.RL_WORDFILES, true);
Iterator wordHashIterator = wordIndex.wordHashes("------------", plasmaWordIndex.RL_WORDFILES, false);
String wordhash;
long urlCounter = 0, wordCounter = 0;
@ -999,7 +1000,7 @@ public final class yacy {
} catch (Exception e) {
e.printStackTrace();
log.logSevere("Exception", e);
} finally {
if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
}
@ -1016,7 +1017,7 @@ public final class yacy {
log.logInfo("FINISHED URL CLEANUP, WAIT FOR DUMP");
log.logInfo("TERMINATED URL CLEANUP");
} catch (IOException e) {
e.printStackTrace();
log.logSevere("IOException", e);
}
}
@ -1283,6 +1284,7 @@ public final class yacy {
private static void urldbcleanup(String homePath) {
File root = new File(homePath);
File dbroot = new File(root, "DATA/PLASMADB");
serverLog log = new serverLog("URLDBCLEANUP");
HashSet damagedURLS = new HashSet();
try {
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304);
@ -1296,7 +1298,7 @@ public final class yacy {
damagedURLS.add(m.substring(m.length() - 12));
}
try { Thread.sleep(1000); } catch (InterruptedException e) { }
System.out.println("URLs vorher: " + currentUrlDB.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());
log.logInfo("URLs vorher: " + currentUrlDB.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());
Iterator eiter2 = damagedURLS.iterator();
String urlHash;
@ -1326,15 +1328,15 @@ public final class yacy {
if (res.statusCode == 200) {
entry[1] = newUrl.toString().getBytes();
currentUrlDB.urlHashCache.put(entry);
System.out.println("UrlDB-Entry with urlHash '" + urlHash + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
log.logInfo("UrlDB-Entry with urlHash '" + urlHash + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
} else {
currentUrlDB.remove(urlHash);
System.out.println("UrlDB-Entry with urlHash '" + urlHash + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + res.status);
log.logInfo("UrlDB-Entry with urlHash '" + urlHash + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + res.status);
}
}
} catch (Exception e) {
currentUrlDB.remove(urlHash);
System.out.println("UrlDB-Entry with urlHash '" + urlHash + "' removed\n\tURL: " + oldUrlStr + "\n\tExecption: " + e.getMessage());
log.logInfo("UrlDB-Entry with urlHash '" + urlHash + "' removed\n\tURL: " + oldUrlStr + "\n\tExecption: " + e.getMessage());
} finally {
if (theHttpc != null) try {
theHttpc.close();
@ -1343,10 +1345,10 @@ public final class yacy {
}
}
System.out.println("URLs nachher: " + currentUrlDB.size() + " kaputte URLs: " + damagedURLS.size());
log.logInfo("URLs nachher: " + currentUrlDB.size() + " kaputte URLs: " + damagedURLS.size());
currentUrlDB.close();
} catch (IOException e) {
e.printStackTrace();
log.logSevere("IOException", e);
}
}
@ -1410,7 +1412,7 @@ public final class yacy {
}
log.logInfo("Total number of Hashs: " + counter + ". Last found Hash: " + wordHash);
} catch (IOException e) {
e.printStackTrace();
log.logSevere("IOException", e);
}
if (WordIndex != null) {
WordIndex.close(60);
@ -1504,8 +1506,13 @@ public final class yacy {
} else if ((args.length >= 1) && (args[0].equals("-minimizeUrlDB"))) {
// migrate words from DATA/PLASMADB/WORDS path to assortment cache, if possible
// attention: this may run long and should not be interrupted!
int dbcache = 4;
if (args.length >= 3 && args[1].equals("-cache")) {
dbcache = Integer.parseInt(args[2]);
args = shift(args, 1, 2);
}
if (args.length == 2) applicationRoot= args[1];
minimizeUrlDB(applicationRoot);
minimizeUrlDB(applicationRoot, dbcache);
} else if ((args.length >= 1) && (args[0].equals("-importDB"))) {
// attention: this may run long and should not be interrupted!
String importRoot = null;

Loading…
Cancel
Save