extended RWIHashList DHT control method:

it is now possible to select only assortments or only files in WORDS
selection of words only from the ram cache is not yet possible.


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1305 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 5449193167
commit 537a819824

@ -77,6 +77,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaURLPool;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexAssortmentCluster;
import de.anomic.plasma.plasmaWordIndexCache;
import de.anomic.plasma.plasmaWordIndexClassicDB;
import de.anomic.plasma.plasmaWordIndexEntity;
@ -1262,7 +1263,7 @@ public final class yacy {
}
}
private static void RWIHashList(String homePath, String targetName) {
private static void RWIHashList(String homePath, String targetName, String resource) {
serverLog log = new serverLog("HASHLIST");
File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
String wordChunkStartHash = "------------";
@ -1272,8 +1273,19 @@ public final class yacy {
File file = new File(root, targetName + ".txt");
try {
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
plasmaWordIndex WordIndex = new plasmaWordIndex(homeDBroot, 8*1024*1024, log);
Iterator WordHashIterator = WordIndex.wordHashes(wordChunkStartHash, true, true);
Iterator WordHashIterator = null;
if (resource.equals("all")) {
plasmaWordIndex WordIndex = new plasmaWordIndex(homeDBroot, 8*1024*1024, log);
WordHashIterator = WordIndex.wordHashes(wordChunkStartHash, true, false);
}
if (resource.equals("assortments")) {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log);
WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true);
}
if (resource.equals("words")) {
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(homeDBroot, log);
WordHashIterator = fileDB.wordHashes(wordChunkStartHash, true);
}
int counter = 0;
while (WordHashIterator.hasNext()) {
counter++;
@ -1285,10 +1297,11 @@ public final class yacy {
}
}
} catch (IOException e) {
e.printStackTrace();
}
e.printStackTrace();
}
}
/**
* Main-method which is started by java. Checks for special arguments or
* starts up the application.
@ -1393,9 +1406,11 @@ public final class yacy {
urldbcleanup(applicationRoot);
} else if ((args.length >= 1) && (args[0].equals("-rwihashlist"))) {
// generate a url list and save it in a file
if (args.length == 2) applicationRoot= args[1];
String domain = "all";
if (args.length >= 2) domain= args[1];
if (args.length == 3) applicationRoot= args[2];
String outfile = "rwihashlist_" + System.currentTimeMillis();
RWIHashList(applicationRoot, outfile);
RWIHashList(applicationRoot, outfile, domain);
} else {
if (args.length == 1) applicationRoot= args[0];
startup(applicationRoot, startupMemFree, startupMemTotal);

Loading…
Cancel
Save