added generation of domain-list

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1112 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 7ad4353fc6
commit bfe51c7228

@ -494,7 +494,7 @@ public class kelondroRecords {
try {
parentNode.setOHHandle(referenceInParent, null);
parentNode.commit(CP_NONE);
throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed");
logWarning("INTERNAL ERROR, Node/init in " + filename + ": node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed");
} catch (IOException ee) {
throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage());
}

@ -58,7 +58,7 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Properties;
@ -716,25 +716,31 @@ public final class plasmaCrawlLURL extends plasmaURL {
}
} // class Entry
public class kenum implements Enumeration {
public class kiter implements Iterator {
// enumerates entry elements
kelondroTree.rowIterator i;
public kenum(boolean up, boolean rotating) throws IOException {
public kiter(boolean up, boolean rotating) throws IOException {
i = urlHashCache.rows(up, rotating);
}
public boolean hasMoreElements() {
public boolean hasNext() {
return i.hasNext();
}
public Object nextElement() {
return new Entry(new String(((byte[][])i.next())[0]));
public Object next() {
byte[] e = ((byte[][])i.next())[0];
if (e == null) return null; else return new Entry(new String(e));
}
public void remove() {
i.remove();
}
}
public Enumeration elements(boolean up, boolean rotating) throws IOException {
public Iterator entries(boolean up, boolean rotating) throws IOException {
// enumerates entry elements
return new kenum(up, rotating);
return new kiter(up, rotating);
}
public static void main(String[] args) {
@ -748,9 +754,9 @@ public final class plasmaCrawlLURL extends plasmaURL {
if (args[0].equals("-l")) try {
// arg 1 is path to URLCache
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1);
final Enumeration enu = urls.elements(true, false);
while (enu.hasMoreElements()) {
((Entry) enu.nextElement()).print();
final Iterator enu = urls.entries(true, false);
while (enu.hasNext()) {
((Entry) enu.next()).print();
}
} catch (Exception e) {
e.printStackTrace();

@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaURLPool;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexCache;
import de.anomic.plasma.plasmaWordIndexClassicDB;
@ -1054,6 +1055,25 @@ public final class yacy {
}
}
private static void domlist(String homePath, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
Iterator eiter = pool.loadedURL.entries(true, false);
HashSet doms = new HashSet();
plasmaCrawlLURL.Entry entry;
URL url;
while (eiter.hasNext()) {
entry = (plasmaCrawlLURL.Entry) eiter.next();
if ((entry != null) && (entry.url() != null)) doms.add(entry.url().getHost());
}
serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf));
pool.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Main-method which is started by java. Checks for special arguments or
* starts up the application.
@ -1121,9 +1141,14 @@ public final class yacy {
cleanwordlist(args[1], minlength, maxlength);
} else if ((args.length >= 1) && (args[0].equals("-transfercr"))) {
// transfer a single cr file to a remote peer
String targetaddress = args[1];
String crfile = args[2];
transferCR(targetaddress, crfile);
String targetaddress = args[1];
String crfile = args[2];
transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].equals("-domlist"))) {
// generate a url list and save it in a file
if (args.length == 2) applicationRoot= args[1];
String outfile = "domlist_" + System.currentTimeMillis() + ".txt";
domlist(applicationRoot, outfile);
} else {
if (args.length == 1) applicationRoot= args[0];
startup(applicationRoot, startupMemFree, startupMemTotal);

Loading…
Cancel
Save