added generation of domain-list

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1112 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 7ad4353fc6
commit bfe51c7228

@ -494,7 +494,7 @@ public class kelondroRecords {
try { try {
parentNode.setOHHandle(referenceInParent, null); parentNode.setOHHandle(referenceInParent, null);
parentNode.commit(CP_NONE); parentNode.commit(CP_NONE);
throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed"); logWarning("INTERNAL ERROR, Node/init in " + filename + ": node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed");
} catch (IOException ee) { } catch (IOException ee) {
throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage()); throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage());
} }

@ -58,7 +58,7 @@ import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.Enumeration; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Locale; import java.util.Locale;
import java.util.Properties; import java.util.Properties;
@ -716,25 +716,31 @@ public final class plasmaCrawlLURL extends plasmaURL {
} }
} // class Entry } // class Entry
public class kenum implements Enumeration { public class kiter implements Iterator {
// enumerates entry elements // enumerates entry elements
kelondroTree.rowIterator i; kelondroTree.rowIterator i;
public kenum(boolean up, boolean rotating) throws IOException { public kiter(boolean up, boolean rotating) throws IOException {
i = urlHashCache.rows(up, rotating); i = urlHashCache.rows(up, rotating);
} }
public boolean hasMoreElements() { public boolean hasNext() {
return i.hasNext(); return i.hasNext();
} }
public Object nextElement() { public Object next() {
return new Entry(new String(((byte[][])i.next())[0])); byte[] e = ((byte[][])i.next())[0];
if (e == null) return null; else return new Entry(new String(e));
} }
public void remove() {
i.remove();
}
} }
public Enumeration elements(boolean up, boolean rotating) throws IOException { public Iterator entries(boolean up, boolean rotating) throws IOException {
// enumerates entry elements // enumerates entry elements
return new kenum(up, rotating); return new kiter(up, rotating);
} }
public static void main(String[] args) { public static void main(String[] args) {
@ -748,9 +754,9 @@ public final class plasmaCrawlLURL extends plasmaURL {
if (args[0].equals("-l")) try { if (args[0].equals("-l")) try {
// arg 1 is path to URLCache // arg 1 is path to URLCache
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1); final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1);
final Enumeration enu = urls.elements(true, false); final Iterator enu = urls.entries(true, false);
while (enu.hasMoreElements()) { while (enu.hasNext()) {
((Entry) enu.nextElement()).print(); ((Entry) enu.next()).print();
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();

@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaURLPool;
import de.anomic.plasma.plasmaWordIndex; import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexCache; import de.anomic.plasma.plasmaWordIndexCache;
import de.anomic.plasma.plasmaWordIndexClassicDB; import de.anomic.plasma.plasmaWordIndexClassicDB;
@ -1054,6 +1055,25 @@ public final class yacy {
} }
} }
private static void domlist(String homePath, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
Iterator eiter = pool.loadedURL.entries(true, false);
HashSet doms = new HashSet();
plasmaCrawlLURL.Entry entry;
URL url;
while (eiter.hasNext()) {
entry = (plasmaCrawlLURL.Entry) eiter.next();
if ((entry != null) && (entry.url() != null)) doms.add(entry.url().getHost());
}
serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf));
pool.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/** /**
* Main-method which is started by java. Checks for special arguments or * Main-method which is started by java. Checks for special arguments or
* starts up the application. * starts up the application.
@ -1124,6 +1144,11 @@ public final class yacy {
String targetaddress = args[1]; String targetaddress = args[1];
String crfile = args[2]; String crfile = args[2];
transferCR(targetaddress, crfile); transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].equals("-domlist"))) {
// generate a url list and save it in a file
if (args.length == 2) applicationRoot= args[1];
String outfile = "domlist_" + System.currentTimeMillis() + ".txt";
domlist(applicationRoot, outfile);
} else { } else {
if (args.length == 1) applicationRoot= args[0]; if (args.length == 1) applicationRoot= args[0];
startup(applicationRoot, startupMemFree, startupMemTotal); startup(applicationRoot, startupMemFree, startupMemTotal);

Loading…
Cancel
Save