diff --git a/source/yacy.java b/source/yacy.java
index 33a336d61..25d7e5ed7 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -40,12 +40,14 @@
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
+import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -1087,7 +1089,7 @@ public final class yacy {
}
}
- private static void domlist(String homePath, String targetName) {
+ private static void domlist(String homePath, boolean html, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
@@ -1098,13 +1100,63 @@ public final class yacy {
entry = (plasmaCrawlLURL.Entry) eiter.next();
if ((entry != null) && (entry.url() != null)) doms.add(entry.url().getHost());
}
- serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf));
+
+ // output file
+ if (html) {
+ File file = new File(root, targetName);
+ BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
+ Iterator i = doms.iterator();
+ String key;
+ while (i.hasNext()) {
+ key = i.next().toString();
+ bos.write(("" + key + "
").getBytes());
+ bos.write(serverCore.crlf);
+ }
+ bos.close();
+ } else {
+ // plain text list
+ serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf));
+ }
+ pool.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static void urllist(String homePath, boolean html, String targetName) {
+ File root = new File(homePath);
+ try {
+ plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
+ Iterator eiter = pool.loadedURL.entries(true, false);
+ plasmaCrawlLURL.Entry entry;
+ File file = new File(root, targetName);
+ BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
+ while (eiter.hasNext()) {
+ entry = (plasmaCrawlLURL.Entry) eiter.next();
+ if ((entry != null) && (entry.url() != null)) {
+ if (html) {
+ bos.write(("" + entry.descr() + "
").getBytes());
+ bos.write(serverCore.crlf);
+ } else {
+ bos.write(entry.url().toString().getBytes());
+ bos.write(serverCore.crlf);
+ }
+ }
+ }
+ bos.close();
pool.close();
} catch (IOException e) {
e.printStackTrace();
}
}
+ private static String[] shift(String[] args, int pos, int count) {
+ String[] newargs = new String[args.length - count];
+ System.arraycopy(args, 0, newargs, 0, pos);
+ System.arraycopy(args, pos + count, newargs, pos, args.length - pos - count);
+ return newargs;
+ }
+
/**
* Main-method which is started by java. Checks for special arguments or
* starts up the application.
@@ -1177,9 +1229,24 @@ public final class yacy {
transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].equals("-domlist"))) {
// generate a url list and save it in a file
+ boolean html = false;
+ if (args.length >= 3 && args[1].equals("-format")) {
+ if (args[2].equals("html")) html = true;
+ args = shift(args, 1, 2);
+ }
+ if (args.length == 2) applicationRoot= args[1];
+ String outfile = "domlist_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
+ domlist(applicationRoot, html, outfile);
+ } else if ((args.length >= 1) && (args[0].equals("-urllist"))) {
+ // generate a url list and save it in a file
+ boolean html = false;
+ if (args.length >= 3 && args[1].equals("-format")) {
+ if (args[2].equals("html")) html = true;
+ args = shift(args, 1, 2);
+ }
if (args.length == 2) applicationRoot= args[1];
- String outfile = "domlist_" + System.currentTimeMillis() + ".txt";
- domlist(applicationRoot, outfile);
+ String outfile = "urllist_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
+ urllist(applicationRoot, html, outfile);
} else {
if (args.length == 1) applicationRoot= args[0];
startup(applicationRoot, startupMemFree, startupMemTotal);