- fixed bug with too much RAM in crawler queue

- fixed dir bug
- better calculation of TF for join
- better waiting-on-result logic

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4424 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent a8a5df4a51
commit acf771d5e1

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5 javacTarget=1.5
# Release Configuration # Release Configuration
releaseVersion=0.564 releaseVersion=0.565
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -40,6 +40,7 @@
package de.anomic.http; package de.anomic.http;
import java.io.File;
import java.util.LinkedList; import java.util.LinkedList;
import de.anomic.server.serverAbstractSwitch; import de.anomic.server.serverAbstractSwitch;
@ -50,7 +51,7 @@ public final class httpdSwitchboard extends serverAbstractSwitch implements serv
private final LinkedList<Object> cacheStack; private final LinkedList<Object> cacheStack;
public httpdSwitchboard(String rootPath, String initPath, String configPath, boolean applyPro) { public httpdSwitchboard(File rootPath, String initPath, String configPath, boolean applyPro) {
super(rootPath, initPath, configPath, applyPro); super(rootPath, initPath, configPath, applyPro);
cacheStack = new LinkedList<Object>(); cacheStack = new LinkedList<Object>();
} }

@ -325,7 +325,7 @@ public class indexContainer extends kelondroRowSet {
assert (ie0.urlHash().length() == keylength) : "ie0.urlHash() = " + ie0.urlHash(); assert (ie0.urlHash().length() == keylength) : "ie0.urlHash() = " + ie0.urlHash();
assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash(); assert (ie1.urlHash().length() == keylength) : "ie1.urlHash() = " + ie1.urlHash();
// this is a hit. Calculate word distance: // this is a hit. Calculate word distance:
ie0.combineDistance(ie1); ie0.join(ie1);
if (ie0.worddistance() <= maxDistance) conj.add(ie0); if (ie0.worddistance() <= maxDistance) conj.add(ie0);
} }
} }
@ -360,7 +360,7 @@ public class indexContainer extends kelondroRowSet {
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1.combineDistance(ie2); ie1.join(ie2);
if (ie1.worddistance() <= maxDistance) conj.add(ie1); if (ie1.worddistance() <= maxDistance) conj.add(ie1);
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
@ -435,7 +435,7 @@ public class indexContainer extends kelondroRowSet {
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1.combineDistance(ie2); ie1.join(ie2);
e1.remove(); e1.remove();
if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break; if (e1.hasNext()) ie1 = (indexRWIEntry) e1.next(); else break;
if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break; if (e2.hasNext()) ie2 = (indexRWIEntry) e2.next(); else break;

@ -87,7 +87,7 @@ public interface indexRWIEntry {
public String toString(); public String toString();
public void combineDistance(indexRWIEntry oe); public void join(indexRWIEntry oe);
public int worddistance(); public int worddistance();

@ -263,18 +263,22 @@ public final class indexRWIRowEntry implements indexRWIEntry {
return toPropertyForm(); return toPropertyForm();
} }
public static indexRWIEntry combineDistance(indexRWIRowEntry ie1, indexRWIEntry ie2) { public static indexRWIEntry join(indexRWIRowEntry ie1, indexRWIEntry ie2) {
// returns a modified entry of the first argument // returns a modified entry of the first argument
// combine the distance
ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext())); ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext()));
ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? Math.min(ie1.posinphrase(), ie2.posinphrase()) : 0 /*unknown*/);
ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
ie1.entry.setCol(col_wordsInText, (ie1.wordsintext() + ie2.wordsintext()) / 2);
// combine term frequency
ie1.entry.setCol(col_wordsInText, ie1.wordsintext() + ie2.wordsintext());
return ie1; return ie1;
} }
public void combineDistance(indexRWIEntry oe) { public void join(indexRWIEntry oe) {
combineDistance(this, oe); join(this, oe);
} }
public int worddistance() { public int worddistance() {

@ -54,7 +54,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
this.phrasesintext = e.phrasesintext(); this.phrasesintext = e.phrasesintext();
this.posintext = e.posintext(); this.posintext = e.posintext();
this.posinphrase = e.posinphrase(); this.posinphrase = e.posinphrase();
this.posofphrase= e.posofphrase(); this.posofphrase = e.posofphrase();
this.quality = e.quality(); this.quality = e.quality();
this.urlcomps = e.urlcomps(); this.urlcomps = e.urlcomps();
this.urllength = e.urllength(); this.urllength = e.urllength();
@ -65,7 +65,15 @@ public class indexRWIVarEntry implements indexRWIEntry {
this.termFrequency = e.termFrequency(); this.termFrequency = e.termFrequency();
} }
public void combineDistance(indexRWIEntry oe) { public void join(indexRWIEntry oe) {
// combine the distance
this.worddistance = this.worddistance() + oe.worddistance() + Math.abs(this.posintext() - oe.posintext());
this.posintext = Math.min(this.posintext(), oe.posintext());
this.posinphrase = (this.posofphrase() == oe.posofphrase()) ? Math.min(this.posinphrase(), oe.posinphrase()) : 0;
this.posofphrase = Math.min(this.posofphrase(), oe.posofphrase());
// combine term frequency
this.wordsintext = this.wordsintext() + oe.wordsintext();
} }
public kelondroBitfield flags() { public kelondroBitfield flags() {

@ -74,10 +74,10 @@ public class plasmaCrawlNURL {
public plasmaCrawlNURL(File cachePath) { public plasmaCrawlNURL(File cachePath) {
super(); super();
coreStack = new plasmaCrawlBalancer(cachePath, "urlNoticeCoreStack", true); coreStack = new plasmaCrawlBalancer(cachePath, "urlNoticeCoreStack", false);
limitStack = new plasmaCrawlBalancer(cachePath, "urlNoticeLimitStack", false); limitStack = new plasmaCrawlBalancer(cachePath, "urlNoticeLimitStack", false);
//overhangStack = new plasmaCrawlBalancer(overhangStackFile); //overhangStack = new plasmaCrawlBalancer(overhangStackFile);
remoteStack = new plasmaCrawlBalancer(cachePath, "urlNoticeRemoteStack", true); remoteStack = new plasmaCrawlBalancer(cachePath, "urlNoticeRemoteStack", false);
} }
public void close() { public void close() {

@ -103,6 +103,7 @@ public final class plasmaSearchEvent {
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.workerThreads = null; this.workerThreads = null;
this.localSearchThread = null;
this.resultList = new ArrayList<ResultEntry>(10); // this is the result set which is filled up with search results, enriched with snippets this.resultList = new ArrayList<ResultEntry>(10); // this is the result set which is filled up with search results, enriched with snippets
//this.resultListLock = 0; // no locked elements until now //this.resultListLock = 0; // no locked elements until now
this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed. this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed.
@ -407,6 +408,17 @@ public final class plasmaSearchEvent {
return false; return false;
} }
private int countFinishedRemoteSearch() {
int count = 0;
// check only primary search threads
if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) {
for (int i = 0; i < this.primarySearchThreads.length; i++) {
if ((this.primarySearchThreads[i] == null) || (!(this.primarySearchThreads[i].isAlive()))) count++;
}
}
return count;
}
public plasmaSearchQuery getQuery() { public plasmaSearchQuery getQuery() {
return query; return query;
} }
@ -563,19 +575,12 @@ public final class plasmaSearchEvent {
public ResultEntry oneResult(int item) { public ResultEntry oneResult(int item) {
// first sleep a while to give accumulation threads a chance to work // first sleep a while to give accumulation threads a chance to work
if (anyWorkerAlive()) { while (((localSearchThread != null) && (localSearchThread.isAlive())) ||
long sleeptime = Math.min(600, this.eventTime + (this.query.maximumTime / this.query.displayResults() * ((item % this.query.displayResults()) + 1)) - System.currentTimeMillis()); ((this.primarySearchThreads != null) && (this.primarySearchThreads.length > item) && (anyWorkerAlive()) &&
if (this.resultList.size() <= item + 10) sleeptime = Math.min(sleeptime + 300, 600); ((this.resultList.size() <= item) || (countFinishedRemoteSearch() <= item)))) {
if (sleeptime > 0) try {Thread.sleep(sleeptime);} catch (InterruptedException e) {} try {Thread.sleep(100);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-oneResult+++ (1) sleeping " + sleeptime);
// then sleep until any result is available (that should not happen)
while ((this.resultList.size() <= item) && (anyWorkerAlive())) {
try {Thread.sleep(100);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-oneResult+++ (2) sleeping " + 100);
}
} }
// finally, if there is something, return the result // finally, if there is something, return the result
synchronized (this.resultList) { synchronized (this.resultList) {
// check if we have enough entries // check if we have enough entries
@ -610,7 +615,7 @@ public final class plasmaSearchEvent {
boolean secondarySearchStartet = false; boolean secondarySearchStartet = false;
private void prepareSecondarySearch() { private void prepareSecondarySearch() {
if (secondarySearchStartet) return; // dont do this twice if (secondarySearchStartet) return; // don't do this twice
if ((rcAbstracts == null) || (rcAbstracts.size() != query.queryHashes.size())) return; // secondary search not possible (yet) if ((rcAbstracts == null) || (rcAbstracts.size() != query.queryHashes.size())) return; // secondary search not possible (yet)
this.secondarySearchStartet = true; this.secondarySearchStartet = true;

@ -877,7 +877,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
private static plasmaSwitchboard sb; private static plasmaSwitchboard sb;
public plasmaSwitchboard(String rootPath, String initPath, String configPath, boolean applyPro) { public plasmaSwitchboard(File rootPath, String initPath, String configPath, boolean applyPro) {
super(rootPath, initPath, configPath, applyPro); super(rootPath, initPath, configPath, applyPro);
serverProfiling.startSystemProfiling(); serverProfiling.startSystemProfiling();
sb=this; sb=this;

@ -186,7 +186,7 @@ public final class serverLog {
Logger.getLogger(appName).isLoggable(Level.FINEST); Logger.getLogger(appName).isLoggable(Level.FINEST);
} }
public static final void configureLogging(String homePath, File loggingConfigFile) throws SecurityException, FileNotFoundException, IOException { public static final void configureLogging(File homePath, File loggingConfigFile) throws SecurityException, FileNotFoundException, IOException {
FileInputStream fileIn = null; FileInputStream fileIn = null;
try { try {
System.out.println("STARTUP: Trying to load logging configuration from file " + loggingConfigFile.toString()); System.out.println("STARTUP: Trying to load logging configuration from file " + loggingConfigFile.toString());

@ -58,7 +58,7 @@ public abstract class serverAbstractSwitch implements serverSwitch {
// configuration management // configuration management
private File configFile; private File configFile;
private String configComment; private String configComment;
private String rootPath; private File rootPath;
protected serverLog log; protected serverLog log;
protected int serverJobs; protected int serverJobs;
protected long maxTrackingTime; protected long maxTrackingTime;
@ -69,7 +69,7 @@ public abstract class serverAbstractSwitch implements serverSwitch {
private TreeMap<String, serverSwitchAction> switchActions; private TreeMap<String, serverSwitchAction> switchActions;
protected HashMap<String, TreeMap<Long, String>> accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries protected HashMap<String, TreeMap<Long, String>> accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries
public serverAbstractSwitch(String rootPath, String initPath, String configPath, boolean applyPro) { public serverAbstractSwitch(File rootPath, String initPath, String configPath, boolean applyPro) {
// we initialize the switchboard with a property file, // we initialize the switchboard with a property file,
// but maintain these properties then later in a new 'config' file // but maintain these properties then later in a new 'config' file
// to reset all changed configs, the config file must // to reset all changed configs, the config file must
@ -526,7 +526,7 @@ public abstract class serverAbstractSwitch implements serverSwitch {
public abstract serverObjects action(String actionName, serverObjects actionInput); public abstract serverObjects action(String actionName, serverObjects actionInput);
public String getRootPath() { public File getRootPath() {
return rootPath; return rootPath;
} }

@ -26,9 +26,11 @@
package de.anomic.server; package de.anomic.server;
import java.io.File;
public class serverPlainSwitch extends serverAbstractSwitch implements serverSwitch { public class serverPlainSwitch extends serverAbstractSwitch implements serverSwitch {
public serverPlainSwitch(String rootPath, String initPath, String configPath, boolean applyPro) { public serverPlainSwitch(File rootPath, String initPath, String configPath, boolean applyPro) {
super(rootPath, initPath, configPath, applyPro); super(rootPath, initPath, configPath, applyPro);
} }

@ -60,7 +60,7 @@ import de.anomic.server.logging.serverLog;
public interface serverSwitch { public interface serverSwitch {
// the root path for the application // the root path for the application
public String getRootPath(); public File getRootPath();
// a logger for this switchboard // a logger for this switchboard
public void setLog(serverLog log); public void setLog(serverLog log);

@ -43,6 +43,7 @@
package de.anomic.yacy; package de.anomic.yacy;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.ArrayList; import java.util.ArrayList;
@ -1092,7 +1093,7 @@ public final class yacyClient {
public static void main(String[] args) { public static void main(String[] args) {
System.out.println("yacyClient Test"); System.out.println("yacyClient Test");
try { try {
final plasmaSwitchboard sb = new plasmaSwitchboard(args[0], "httpProxy.init", "DATA/SETTINGS/httpProxy.conf", false); final plasmaSwitchboard sb = new plasmaSwitchboard(new File(args[0]), "httpProxy.init", "DATA/SETTINGS/httpProxy.conf", false);
/*final yacyCore core =*/ new yacyCore(sb); /*final yacyCore core =*/ new yacyCore(sb);
yacyCore.peerActions.loadSeedLists(); yacyCore.peerActions.loadSeedLists();
final yacySeed target = yacyCore.seedDB.getConnected(args[1]); final yacySeed target = yacyCore.seedDB.getConnected(args[1]);

@ -384,6 +384,7 @@ public final class yacyVersion implements Comparator<yacyVersion>, Comparable<ya
//byte[] script = ("cd " + plasmaSwitchboard.getSwitchboard().getRootPath() + ";while [ -e ../yacy.running ]; do sleep 1;done;tar xfz " + release + ";cp -Rf yacy/* ../../;rm -Rf yacy;cd ../../;startYACY.sh").getBytes(); //byte[] script = ("cd " + plasmaSwitchboard.getSwitchboard().getRootPath() + ";while [ -e ../yacy.running ]; do sleep 1;done;tar xfz " + release + ";cp -Rf yacy/* ../../;rm -Rf yacy;cd ../../;startYACY.sh").getBytes();
try { try {
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard(); plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
String apphome = sb.getRootPath().toString();
serverLog.logInfo("UPDATE", "INITIATED"); serverLog.logInfo("UPDATE", "INITIATED");
String script = String script =
"#!/bin/sh" + serverCore.LF_STRING + "#!/bin/sh" + serverCore.LF_STRING +
@ -394,14 +395,14 @@ public final class yacyVersion implements Comparator<yacyVersion>, Comparable<ya
"while [ -f ../yacy.running ]; do" + serverCore.LF_STRING + "while [ -f ../yacy.running ]; do" + serverCore.LF_STRING +
"sleep 1" + serverCore.LF_STRING + "sleep 1" + serverCore.LF_STRING +
"done" + serverCore.LF_STRING + "done" + serverCore.LF_STRING +
"cp -Rf yacy/* ../../" + serverCore.LF_STRING + "cp -Rf yacy/* " + apphome + serverCore.LF_STRING +
"rm -Rf yacy" + serverCore.LF_STRING + "rm -Rf yacy" + serverCore.LF_STRING +
"else" + serverCore.LF_STRING + "else" + serverCore.LF_STRING +
"while [ -f ../yacy.running ]; do" + serverCore.LF_STRING + "while [ -f ../yacy.running ]; do" + serverCore.LF_STRING +
"sleep 1" + serverCore.LF_STRING + "sleep 1" + serverCore.LF_STRING +
"done" + serverCore.LF_STRING + "done" + serverCore.LF_STRING +
"fi" + serverCore.LF_STRING + "fi" + serverCore.LF_STRING +
"cd ../../" + serverCore.LF_STRING + "cd " + apphome + serverCore.LF_STRING +
"nohup ./startYACY.sh > /dev/null" + serverCore.LF_STRING; "nohup ./startYACY.sh > /dev/null" + serverCore.LF_STRING;
File scriptFile = new File(sb.getRootPath(), "DATA/RELEASE/update.sh"); File scriptFile = new File(sb.getRootPath(), "DATA/RELEASE/update.sh");
serverSystem.deployScript(scriptFile, script); serverSystem.deployScript(scriptFile, script);

@ -51,7 +51,7 @@ public class migration {
//SVN constants //SVN constants
public static final int USE_WORK_DIR=1389; //wiki & messages in DATA/WORK public static final int USE_WORK_DIR=1389; //wiki & messages in DATA/WORK
public static final int TAGDB_WITH_TAGHASH=1635; //tagDB keys are tagHashes instead of plain tagname. public static final int TAGDB_WITH_TAGHASH=1635; //tagDB keys are tagHashes instead of plain tagname.
public static final int NEW_OVERLAYS=3675; public static final int NEW_OVERLAYS=4422;
public static void main(String[] args) { public static void main(String[] args) {
} }

@ -164,7 +164,7 @@ public final class yacy {
* @param homePath Root-path where all information is to be found. * @param homePath Root-path where all information is to be found.
* @param startupFree free memory at startup time, to be used later for statistics * @param startupFree free memory at startup time, to be used later for statistics
*/ */
private static void startup(String homePath, long startupMemFree, long startupMemTotal) { private static void startup(File homePath, long startupMemFree, long startupMemTotal) {
int oldRev=0; int oldRev=0;
int newRev=0; int newRev=0;
@ -183,7 +183,7 @@ public final class yacy {
} }
// ensure that there is a DATA directory, if not, create one and if that fails warn and die // ensure that there is a DATA directory, if not, create one and if that fails warn and die
File f = new File(homePath); if (!(f.exists())) f.mkdirs(); File f = homePath; if (!(f.exists())) f.mkdirs();
f = new File(homePath, "DATA/"); if (!(f.exists())) f.mkdirs(); f = new File(homePath, "DATA/"); if (!(f.exists())) f.mkdirs();
if (!(f.exists())) { if (!(f.exists())) {
System.err.println("Error creating DATA-directory in " + homePath.toString() + " . Please check your write-permission for this folder. YaCy will now terminate."); System.err.println("Error creating DATA-directory in " + homePath.toString() + " . Please check your write-permission for this folder. YaCy will now terminate.");
@ -198,7 +198,7 @@ public final class yacy {
System.out.println("could not copy yacy.logging"); System.out.println("could not copy yacy.logging");
} }
try{ try{
serverLog.configureLogging(homePath,new File(homePath, "DATA/LOG/yacy.logging")); serverLog.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));
} catch (IOException e) { } catch (IOException e) {
System.out.println("could not find logging properties in homePath=" + homePath); System.out.println("could not find logging properties in homePath=" + homePath);
e.printStackTrace(); e.printStackTrace();
@ -263,7 +263,7 @@ public final class yacy {
sb.setConfig("version", Double.toString(version)); sb.setConfig("version", Double.toString(version));
sb.setConfig("vString", yacyVersion.combined2prettyVersion(Double.toString(version))); sb.setConfig("vString", yacyVersion.combined2prettyVersion(Double.toString(version)));
sb.setConfig("vdate", (vDATE.startsWith("@")) ? serverDate.formatShortDay() : vDATE); sb.setConfig("vdate", (vDATE.startsWith("@")) ? serverDate.formatShortDay() : vDATE);
sb.setConfig("applicationRoot", homePath); sb.setConfig("applicationRoot", homePath.toString());
serverLog.logConfig("STARTUP", "YACY Version: " + version + ", Built " + sb.getConfig("vdate", "00000000")); serverLog.logConfig("STARTUP", "YACY Version: " + version + ", Built " + sb.getConfig("vdate", "00000000"));
yacyVersion.latestRelease = version; yacyVersion.latestRelease = version;
@ -451,7 +451,7 @@ public final class yacy {
* @param homePath Root-path where all the information is to be found. * @param homePath Root-path where all the information is to be found.
* @return Properties read from the configurationfile. * @return Properties read from the configurationfile.
*/ */
private static Properties configuration(String mes, String homePath) { private static Properties configuration(String mes, File homePath) {
serverLog.logConfig(mes, "Application Root Path: " + homePath.toString()); serverLog.logConfig(mes, "Application Root Path: " + homePath.toString());
// read data folder // read data folder
@ -480,7 +480,7 @@ public final class yacy {
// YaCy is running in the same runtime. we can shutdown via interrupt // YaCy is running in the same runtime. we can shutdown via interrupt
sb.terminate(); sb.terminate();
} else { } else {
String applicationRoot = System.getProperty("user.dir").replace('\\', '/'); File applicationRoot = new File(System.getProperty("user.dir").replace('\\', '/'));
shutdown(applicationRoot); shutdown(applicationRoot);
} }
} }
@ -491,7 +491,7 @@ public final class yacy {
* *
* @param homePath Root-path where all the information is to be found. * @param homePath Root-path where all the information is to be found.
*/ */
static void shutdown(String homePath) { static void shutdown(File homePath) {
// start up // start up
System.out.println(copyright); System.out.println(copyright);
System.out.println(hline); System.out.println(hline);
@ -544,7 +544,7 @@ public final class yacy {
* *
* @param homePath Root-Path where all the information is to be found. * @param homePath Root-Path where all the information is to be found.
*/ */
private static void genWordstat(String homePath) { private static void genWordstat(File homePath) {
// start up // start up
System.out.println(copyright); System.out.println(copyright);
System.out.println(hline); System.out.println(hline);
@ -587,12 +587,12 @@ public final class yacy {
* @param homePath path to the YaCy directory * @param homePath path to the YaCy directory
* @param dbcache cache size in MB * @param dbcache cache size in MB
*/ */
public static void minimizeUrlDB(String homePath) { public static void minimizeUrlDB(File homePath) {
// run with "java -classpath classes yacy -minimizeUrlDB" // run with "java -classpath classes yacy -minimizeUrlDB"
try {serverLog.configureLogging(homePath,new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try {serverLog.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File indexPrimaryRoot = new File(new File(homePath), "DATA/INDEX"); File indexPrimaryRoot = new File(homePath, "DATA/INDEX");
File indexSecondaryRoot = new File(new File(homePath), "DATA/INDEX"); File indexSecondaryRoot = new File(homePath, "DATA/INDEX");
File indexRoot2 = new File(new File(homePath), "DATA/INDEX2"); File indexRoot2 = new File(homePath, "DATA/INDEX2");
serverLog log = new serverLog("URL-CLEANUP"); serverLog log = new serverLog("URL-CLEANUP");
try { try {
log.logInfo("STARTING URL CLEANUP"); log.logInfo("STARTING URL CLEANUP");
@ -777,24 +777,24 @@ public final class yacy {
* *
* @param homePath Root-Path where all information is to be found. * @param homePath Root-Path where all information is to be found.
*/ */
private static void urldbcleanup(String homePath) { private static void urldbcleanup(File homePath) {
File root = new File(homePath); File root = homePath;
File indexroot = new File(root, "DATA/INDEX"); File indexroot = new File(root, "DATA/INDEX");
try {serverLog.configureLogging(homePath,new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try {serverLog.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(indexroot, 10000); plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(indexroot, 10000);
currentUrlDB.urldbcleanup(); currentUrlDB.urldbcleanup();
currentUrlDB.close(); currentUrlDB.close();
} }
private static void RWIHashList(String homePath, String targetName, String resource, String format) { private static void RWIHashList(File homePath, String targetName, String resource, String format) {
plasmaWordIndex WordIndex = null; plasmaWordIndex WordIndex = null;
serverLog log = new serverLog("HASHLIST"); serverLog log = new serverLog("HASHLIST");
File indexPrimaryRoot = new File(new File(homePath), "DATA/INDEX"); File indexPrimaryRoot = new File(homePath, "DATA/INDEX");
File indexSecondaryRoot = new File(new File(homePath), "DATA/INDEX"); File indexSecondaryRoot = new File(homePath, "DATA/INDEX");
String wordChunkStartHash = "AAAAAAAAAAAA"; String wordChunkStartHash = "AAAAAAAAAAAA";
try {serverLog.configureLogging(homePath,new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try {serverLog.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
log.logInfo("STARTING CREATION OF RWI-HASHLIST"); log.logInfo("STARTING CREATION OF RWI-HASHLIST");
File root = new File(homePath); File root = homePath;
try { try {
Iterator<indexContainer> indexContainerIterator = null; Iterator<indexContainer> indexContainerIterator = null;
if (resource.equals("all")) { if (resource.equals("all")) {
@ -850,10 +850,10 @@ public final class yacy {
* Searching for peers affected by Bug * Searching for peers affected by Bug
* @param homePath * @param homePath
*/ */
public static void testPeerDB(String homePath) { public static void testPeerDB(File homePath) {
try { try {
File yacyDBPath = new File(new File(homePath), "DATA/YACYDB"); File yacyDBPath = new File(homePath, "DATA/YACYDB");
String[] dbFileNames = {"seed.new.db","seed.old.db","seed.pot.db"}; String[] dbFileNames = {"seed.new.db","seed.old.db","seed.pot.db"};
for (int i=0; i < dbFileNames.length; i++) { for (int i=0; i < dbFileNames.length; i++) {
@ -905,16 +905,16 @@ public final class yacy {
// go into headless awt mode // go into headless awt mode
System.setProperty("java.awt.headless", "true"); System.setProperty("java.awt.headless", "true");
String applicationRoot = System.getProperty("user.dir").replace('\\', '/'); File applicationRoot = new File(System.getProperty("user.dir").replace('\\', '/'));
//System.out.println("args.length=" + args.length); //System.out.println("args.length=" + args.length);
//System.out.print("args=["); for (int i = 0; i < args.length; i++) System.out.print(args[i] + ", "); System.out.println("]"); //System.out.print("args=["); for (int i = 0; i < args.length; i++) System.out.print(args[i] + ", "); System.out.println("]");
if ((args.length >= 1) && ((args[0].toLowerCase().equals("-startup")) || (args[0].equals("-start")))) { if ((args.length >= 1) && ((args[0].toLowerCase().equals("-startup")) || (args[0].equals("-start")))) {
// normal start-up of yacy // normal start-up of yacy
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= new File(args[1]);
startup(applicationRoot, startupMemFree, startupMemTotal); startup(applicationRoot, startupMemFree, startupMemTotal);
} else if ((args.length >= 1) && ((args[0].toLowerCase().equals("-shutdown")) || (args[0].equals("-stop")))) { } else if ((args.length >= 1) && ((args[0].toLowerCase().equals("-shutdown")) || (args[0].equals("-stop")))) {
// normal shutdown of yacy // normal shutdown of yacy
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= new File(args[1]);
shutdown(applicationRoot); shutdown(applicationRoot);
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-minimizeurldb"))) { } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-minimizeurldb"))) {
// migrate words from DATA/PLASMADB/WORDS path to assortment cache, if possible // migrate words from DATA/PLASMADB/WORDS path to assortment cache, if possible
@ -922,11 +922,11 @@ public final class yacy {
if (args.length >= 3 && args[1].toLowerCase().equals("-cache")) { if (args.length >= 3 && args[1].toLowerCase().equals("-cache")) {
args = shift(args, 1, 2); args = shift(args, 1, 2);
} }
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= new File(args[1]);
minimizeUrlDB(applicationRoot); minimizeUrlDB(applicationRoot);
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-testpeerdb"))) { } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-testpeerdb"))) {
if (args.length == 2) { if (args.length == 2) {
applicationRoot= args[1]; applicationRoot = new File(args[1]);
} else if (args.length > 2) { } else if (args.length > 2) {
System.err.println("Usage: -testPeerDB [homeDbRoot]"); System.err.println("Usage: -testPeerDB [homeDbRoot]");
} }
@ -935,7 +935,7 @@ public final class yacy {
// this can help to create a stop-word list // this can help to create a stop-word list
// to use this, you need a 'yacy.words' file in the root path // to use this, you need a 'yacy.words' file in the root path
// start this with "java -classpath classes yacy -genwordstat [<rootdir>]" // start this with "java -classpath classes yacy -genwordstat [<rootdir>]"
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= new File(args[1]);
genWordstat(applicationRoot); genWordstat(applicationRoot);
} else if ((args.length == 4) && (args[0].toLowerCase().equals("-cleanwordlist"))) { } else if ((args.length == 4) && (args[0].toLowerCase().equals("-cleanwordlist"))) {
// this can be used to organize and clean a word-list // this can be used to organize and clean a word-list
@ -950,7 +950,7 @@ public final class yacy {
transferCR(targetaddress, crfile); transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) { } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) {
// generate a url list and save it in a file // generate a url list and save it in a file
if (args.length == 2) applicationRoot= args[1]; if (args.length == 2) applicationRoot= new File(args[1]);
urldbcleanup(applicationRoot); urldbcleanup(applicationRoot);
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-rwihashlist"))) { } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-rwihashlist"))) {
// generate a url list and save it in a file // generate a url list and save it in a file
@ -958,11 +958,11 @@ public final class yacy {
String format = "txt"; String format = "txt";
if (args.length >= 2) domain= args[1]; if (args.length >= 2) domain= args[1];
if (args.length >= 3) format= args[2]; if (args.length >= 3) format= args[2];
if (args.length == 4) applicationRoot= args[3]; if (args.length == 4) applicationRoot= new File(args[3]);
String outfile = "rwihashlist_" + System.currentTimeMillis(); String outfile = "rwihashlist_" + System.currentTimeMillis();
RWIHashList(applicationRoot, outfile, domain, format); RWIHashList(applicationRoot, outfile, domain, format);
} else { } else {
if (args.length == 1) applicationRoot= args[0]; if (args.length == 1) applicationRoot= new File(args[0]);
startup(applicationRoot, startupMemFree, startupMemTotal); startup(applicationRoot, startupMemFree, startupMemTotal);
} }
} }

Loading…
Cancel
Save