* added option to generate url-lists as plain text file or in html

* modified generation of dom-lists so that they can be also generated as html
these options can be called as:
java -classpath classes yacy -domlist -format html
java -classpath classes yacy -domlist -format html .
java -classpath classes yacy -domlist -format text .
java -classpath classes yacy -urllist -format html .
java -classpath classes yacy -urllist -format text .
the -format <type> can be ommitted. The text is default
a home path can be asserted or omitted at the end of the parameters

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1178 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 13fdebc50d
commit 0e88ba997e

@ -40,12 +40,14 @@
// done inside the copyright notive above. A re-distribution must contain // done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice. // the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such. // Contributions and changes to the program code must be marked as such.
import java.io.BufferedOutputStream;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
@ -1087,7 +1089,7 @@ public final class yacy {
} }
} }
private static void domlist(String homePath, String targetName) { private static void domlist(String homePath, boolean html, String targetName) {
File root = new File(homePath); File root = new File(homePath);
try { try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000); plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
@ -1098,13 +1100,63 @@ public final class yacy {
entry = (plasmaCrawlLURL.Entry) eiter.next(); entry = (plasmaCrawlLURL.Entry) eiter.next();
if ((entry != null) && (entry.url() != null)) doms.add(entry.url().getHost()); if ((entry != null) && (entry.url() != null)) doms.add(entry.url().getHost());
} }
// output file
if (html) {
File file = new File(root, targetName);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
Iterator i = doms.iterator();
String key;
while (i.hasNext()) {
key = i.next().toString();
bos.write(("<a href=\"http://" + key + "\">" + key + "</a><br>").getBytes());
bos.write(serverCore.crlf);
}
bos.close();
} else {
// plain text list
serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf)); serverFileUtils.saveSet(new File(root, targetName), doms, new String(serverCore.crlf));
}
pool.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void urllist(String homePath, boolean html, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000);
Iterator eiter = pool.loadedURL.entries(true, false);
plasmaCrawlLURL.Entry entry;
File file = new File(root, targetName);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
while (eiter.hasNext()) {
entry = (plasmaCrawlLURL.Entry) eiter.next();
if ((entry != null) && (entry.url() != null)) {
if (html) {
bos.write(("<a href=\"" + entry.url() + "\">" + entry.descr() + "</a><br>").getBytes());
bos.write(serverCore.crlf);
} else {
bos.write(entry.url().toString().getBytes());
bos.write(serverCore.crlf);
}
}
}
bos.close();
pool.close(); pool.close();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
private static String[] shift(String[] args, int pos, int count) {
String[] newargs = new String[args.length - count];
System.arraycopy(args, 0, newargs, 0, pos);
System.arraycopy(args, pos + count, newargs, pos, args.length - pos - count);
return newargs;
}
/** /**
* Main-method which is started by java. Checks for special arguments or * Main-method which is started by java. Checks for special arguments or
* starts up the application. * starts up the application.
@ -1177,9 +1229,24 @@ public final class yacy {
transferCR(targetaddress, crfile); transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].equals("-domlist"))) { } else if ((args.length >= 1) && (args[0].equals("-domlist"))) {
// generate a url list and save it in a file // generate a url list and save it in a file
boolean html = false;
if (args.length >= 3 && args[1].equals("-format")) {
if (args[2].equals("html")) html = true;
args = shift(args, 1, 2);
}
if (args.length == 2) applicationRoot= args[1];
String outfile = "domlist_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
domlist(applicationRoot, html, outfile);
} else if ((args.length >= 1) && (args[0].equals("-urllist"))) {
// generate a url list and save it in a file
boolean html = false;
if (args.length >= 3 && args[1].equals("-format")) {
if (args[2].equals("html")) html = true;
args = shift(args, 1, 2);
}
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= args[1];
String outfile = "domlist_" + System.currentTimeMillis() + ".txt"; String outfile = "urllist_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
domlist(applicationRoot, outfile); urllist(applicationRoot, html, outfile);
} else { } else {
if (args.length == 1) applicationRoot= args[0]; if (args.length == 1) applicationRoot= args[0];
startup(applicationRoot, startupMemFree, startupMemTotal); startup(applicationRoot, startupMemFree, startupMemTotal);

Loading…
Cancel
Save