From 33345463402b0ecc67d9ec62be2ed7a778d3a3e0 Mon Sep 17 00:00:00 2001 From: jerri Date: Tue, 2 Aug 2005 19:40:29 +0000 Subject: [PATCH] Started the quest for in-source documentation with javadoc. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@483 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/yacy.java | 301 +++++++++++++++++++++++++++++++---------------- 1 file changed, 202 insertions(+), 99 deletions(-) diff --git a/source/yacy.java b/source/yacy.java index 635b76256..c030fae81 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -38,31 +38,6 @@ //the intact and unchanged copyright notice. //Contributions and changes to the program code must be marked as such. -/* - This is the main class of the proxy. - From here, several threads are started: - - - one single instance of the plasmaSwitchboard is generated, - which itself starts a thread with a plasmaHTMLCache object. This object simply counts - files sizes in the cache and terminates then. - It also generates a plasmaCrawlerLoader object, which may itself start - some more httpc-calling threads to load web pages. They terminate automatically when a page has loaded - - one serverCore - thread is started, which implements a multi-threaded server. - The process may start itself many more processes that handle connections. - - finally, all idle-dependent processes are written in a queue in plasmaSwitchboard - which are worked off inside an idle-sensitive loop of the main process. (here) - - On termination, the following must be done: - - stop feeding of the crawling process because it othervise fills the indexing queue. - - say goodbye to connected peers and disable new connections. Don't wait for success. - - first terminate the serverCore thread. This prevents that new cache objects are queued - - wait that the plasmaHTMLCache terminates (it should be normal that this process already has terminated) - - then wait for termination of all loader process of the plasmaCrawlerLoader - - work off the indexing and cache storage queue. These values are inside a RAM cache and would be lost othervise - - write all settings - - terminate - */ - import java.io.BufferedReader; import java.io.BufferedWriter; @@ -108,16 +83,59 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacyCore; +/** +* This is the main class of the proxy. Several threads are started from here: +* +* +* On termination, the following must be done: +* +*/ public final class yacy { - + // static objects private static String vString = "@REPL_VERSION@"; private static float version = (float) 0.1; - + private static final String vDATE = "@REPL_DATE@"; private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]"; private static final String hline = "-------------------------------------------------------------------------------"; - + + /** + * Convert the combined versionstring into a pretty string. + * FIXME: Why is this so complicated? + * + * @param s Combined version string + * @return Pretty string where version and svn-Version are separated by an + * slash + */ public static String combinedVersionString2PrettyString(String s) { long svn; try {svn = (long) (100000000.0 * Double.parseDouble(s));} catch (NumberFormatException ee) {svn = 0;} @@ -130,18 +148,32 @@ public final class yacy { while (svnStr.length() < 5) svnStr = "0" + svnStr; return vStr + "/" + svnStr; } - + + /** + * Combines the version of the proxy with the versionnumber from svn to a + * combined Version + * + * @param version Current given version for this proxy. + * @param svn Current version given from svn. + * @return String with the combined version + */ public static float versvn2combinedVersion(float version, int svn) { return (float) (((double) version * 100000000.0 + ((double) svn)) / 100000000.0); } - + + /** + * Starts up the whole application. Sets up all datastructures and starts + * the main threads. + * + * @param homePath Root-path where all information is to be found. + */ private static void startup(String homePath) { long startup = yacyCore.universalTime(); try { // start up System.out.println(copyright); System.out.println(hline); - + // check java version try { String[] check = "a,b".split(","); // split needs java 1.4 @@ -150,7 +182,7 @@ public final class yacy { Thread.currentThread().sleep(3000); System.exit(-1); } - + // setting up logging try { serverLog.configureLogging(new File(homePath, "yacy.logging")); @@ -160,20 +192,20 @@ public final class yacy { } serverLog.logSystem("STARTUP", copyright); serverLog.logSystem("STARTUP", hline); - + serverLog.logSystem("STARTUP", "java version " + System.getProperty("java.version", "no-java-version")); serverLog.logSystem("STARTUP", "Application Root Path: " + homePath.toString()); - + // create data folder File dataFolder = new File(homePath, "DATA"); if (!(dataFolder.exists())) dataFolder.mkdir(); - + plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf"); - + // hardcoded, forced, temporary value-migration sb.setConfig("htTemplatePath", "htroot/env/templates"); sb.setConfig("parseableExt", "html,htm,txt,php,shtml,asp"); - + // if we are running an SVN version, we try to detect the used svn revision now ... Properties buildProp = new Properties(); File buildPropFile = null; @@ -183,7 +215,7 @@ public final class yacy { } catch (Exception e) { System.err.println("ERROR: " + buildPropFile.toString() + " not found in settings path"); } - + try { if (buildProp.containsKey("releaseNr")) { // this normally looks like this: $Revision: 181 $ @@ -202,32 +234,32 @@ public final class yacy { } catch (Exception e) { System.err.println("Unable to determine the currently used SVN revision number."); } - + sb.setConfig("version", "" + version); sb.setConfig("vdate", vDATE); sb.setConfig("applicationRoot", homePath); sb.setConfig("startupTime", "" + startup); serverLog.logSystem("STARTUP", "YACY Version: " + version + ", Built " + vDATE); yacyCore.latestVersion = (float) version; - + // read environment //new int port = Integer.parseInt(sb.getConfig("port", "8080")); int timeout = Integer.parseInt(sb.getConfig("httpdTimeout", "60000")); if (timeout < 60000) timeout = 60000; int maxSessions = Integer.parseInt(sb.getConfig("httpdMaxSessions", "100")); - + // create some directories File htRootPath = new File(sb.getRootPath(), sb.getConfig("htRootPath", "htroot")); File htDocsPath = new File(sb.getRootPath(), sb.getConfig("htDocsPath", "DATA/HTDOCS")); File htTemplatePath = new File(sb.getRootPath(), sb.getConfig("htTemplatePath","htdocs")); - + // create default notifier picture if (!((new File(htRootPath, "env/pictures/notifier.gif")).exists())) try { serverFileUtils.copy(new File(htRootPath, "env/pictures/empty.gif"), new File(htRootPath, "env/pictures/notifier.gif")); } catch (IOException e) {} - + if (!(htDocsPath.exists())) htDocsPath.mkdir(); File htdocsDefaultReadme = new File(htDocsPath, "readme.txt"); if (!(htdocsDefaultReadme.exists())) try {serverFileUtils.write(( @@ -245,10 +277,10 @@ public final class yacy { "\r\n").getBytes(), htdocsDefaultReadme);} catch (IOException e) { System.out.println("Error creating htdocs readme: " + e.getMessage()); } - + File wwwDefaultPath = new File(htDocsPath, "www"); if (!(wwwDefaultPath.exists())) wwwDefaultPath.mkdir(); - + File wwwDefaultClass = new File(wwwDefaultPath, "welcome.class"); //if ((!(wwwDefaultClass.exists())) || (wwwDefaultClass.length() != (new File(htRootPath, "htdocsdefault/welcome.class")).length())) try { if((new File(htRootPath, "htdocsdefault/welcome.java")).exists()) @@ -256,10 +288,10 @@ public final class yacy { serverFileUtils.copy(new File(htRootPath, "htdocsdefault/welcome.class"), wwwDefaultClass); serverFileUtils.copy(new File(htRootPath, "htdocsdefault/welcome.html"), new File(wwwDefaultPath, "welcome.html")); //} catch (IOException e) {} - + File shareDefaultPath = new File(htDocsPath, "share"); if (!(shareDefaultPath.exists())) shareDefaultPath.mkdir(); - + File shareDefaultClass = new File(shareDefaultPath, "dir.class"); //if ((!(shareDefaultClass.exists())) || (shareDefaultClass.length() != (new File(htRootPath, "htdocsdefault/dir.class")).length())) try { if((new File(htRootPath, "htdocsdefault/dir.java")).exists()) @@ -267,8 +299,8 @@ public final class yacy { serverFileUtils.copy(new File(htRootPath, "htdocsdefault/dir.class"), shareDefaultClass); serverFileUtils.copy(new File(htRootPath, "htdocsdefault/dir.html"), new File(shareDefaultPath, "dir.html")); //} catch (IOException e) {} - - + + // set preset accounts/passwords String acc; if ((acc = sb.getConfig("proxyAccount", "")).length() > 0) { @@ -283,7 +315,7 @@ public final class yacy { sb.setConfig("adminAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(acc))); sb.setConfig("adminAccount", ""); } - + // fix unsafe old passwords if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) { sb.setConfig("proxyAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); @@ -305,7 +337,7 @@ public final class yacy { sb.setConfig("downloadAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); sb.setConfig("downloadAccountBase64", ""); } - + // start main threads try { httpd protocolHandler = new httpd(sb, new httpdFileHandler(sb), new httpdProxyHandler(sb)); @@ -325,7 +357,7 @@ public final class yacy { // first start the server sb.deployThread("10_httpd", "HTTPD Server/Proxy", "the HTTPD, used as web server and proxy", null, server, 0, 0, 0, 0); //server.start(); - + // open the browser window boolean browserPopUpTrigger = sb.getConfig("browserPopUpTrigger", "true").equals("true"); if (browserPopUpTrigger) { @@ -333,11 +365,11 @@ public final class yacy { String browserPopUpApplication = sb.getConfig("browserPopUpApplication", "netscape"); serverSystem.openBrowser("http://localhost:" + port + "/" + browserPopUpPage, browserPopUpApplication); } - + //Copy the shipped locales into DATA File localesPath = new File(sb.getRootPath(), sb.getConfig("localesPath", "DATA/LOCALE")); File defaultLocalesPath = new File(sb.getRootPath(), "locales"); - + try{ File[] defaultLocales = defaultLocalesPath.listFiles(); localesPath.mkdirs(); @@ -349,7 +381,7 @@ public final class yacy { }catch(NullPointerException e){ serverLog.logError("STARTUP", "Nullpointer Exception while copying the default Locales"); } - + //regenerate Locales from Translationlist, if needed String lang = sb.getConfig("htLocaleSelection", ""); if(! lang.equals("") && ! lang.equals("default") ){ //locale is used @@ -361,7 +393,7 @@ public final class yacy { }catch(IOException e){ //Error } - + try{ //seperate try, because we want this, even when the File "version2 does not exist. if(! currentRev.equals(sb.getConfig("svnRevision", "")) ){ //is this another version?! File sourceDir = new File(sb.getConfig("htRootPath", "htroot")); @@ -377,19 +409,19 @@ public final class yacy { //Error } } - + // registering shutdown hook serverLog.logSystem("STARTUP", "Registering Shutdown Hook"); Runtime run = Runtime.getRuntime(); run.addShutdownHook(new shutdownHookThread(Thread.currentThread(), sb)); - + // wait for server shutdown try { sb.waitForShutdown(); } catch (Exception e) { serverLog.logError("MAIN CONTROL LOOP", "PANIK: " + e.getMessage(),e); } - + // shut down serverLog.logSystem("SHUTDOWN", "caught termination signal"); server.terminate(false); @@ -400,7 +432,7 @@ public final class yacy { } catch (IOException ee) { serverLog.logSystem("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)"); } - + // idle until the processes are down while (server.isAlive()) { Thread.currentThread().sleep(2000); // wait a while @@ -421,19 +453,27 @@ public final class yacy { System.exit(0); } catch (Exception e) {} // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790) } - - - + + /** + * Loads the configuration from the data-folder. + * FIXME: Why is this called over and over again from every method, instead + * of setting the configurationdata once for this class in main? + * + * @param mes Where are we called from, so that the errormessages can be + * more descriptive. + * @param homePath Root-path where all the information is to be found. + * @return Properties read from the configurationfile. + */ private static Properties configuration(String mes, String homePath) { serverLog.logSystem(mes, "Application Root Path: " + homePath.toString()); - + // read data folder File dataFolder = new File(homePath, "DATA"); if (!(dataFolder.exists())) { serverLog.logError(mes, "Application was never started or root path wrong."); System.exit(-1); } - + Properties config = new Properties(); try { config.load(new FileInputStream(new File(homePath, "DATA/SETTINGS/httpProxy.conf"))); @@ -444,31 +484,37 @@ public final class yacy { serverLog.logError(mes, "could not read configuration file."); System.exit(-1); } - + return config; } - + + /** + * Call the shutdown-page from yacy to tell it to shut down. This method is + * called if you start yacy with the argument -shutdown. + * + * @param homePath Root-path where all the information is to be found. + */ static void shutdown(String homePath) { // start up System.out.println(copyright); System.out.println(hline); - + Properties config = configuration("REMOTE-SHUTDOWN", homePath); - + // read port int port = Integer.parseInt((String) config.get("port")); - + // read password String encodedPassword = (String) config.get("adminAccountBase64MD5"); if (encodedPassword == null) encodedPassword = ""; // not defined - + // send 'wget' to web interface httpHeader requestHeader = new httpHeader(); requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify try { httpc con = httpc.getInstance("localhost", port, 10000, false); httpc.response res = con.GET("Steering.html?shutdown=", requestHeader); - + // read response if (res.status.startsWith("2")) { serverLog.logSystem("REMOTE-SHUTDOWN", "YACY accepted shutdown command."); @@ -484,23 +530,33 @@ public final class yacy { serverLog.logError("REMOTE-SHUTDOWN", "could not establish connection to YACY socket: " + e.getMessage()); System.exit(-1); } - + // finished serverLog.logSystem("REMOTE-SHUTDOWN", "SUCCESSFULLY FINISHED remote-shutdown:"); serverLog.logSystem("REMOTE-SHUTDOWN", "YACY will terminate after working off all enqueued tasks."); } - + + /** + * This method gets all found words and outputs a statistic about the score + * of the words. The output of this method can be used to create stop-word + * lists. This method will be called if you start yacy with the argument + * -genwordstat. + * FIXME: How can stop-word list be created from this output? What type of + * score is output? + * + * @param homePath Root-Path where all the information is to be found. + */ private static void genWordstat(String homePath) { // start up System.out.println(copyright); System.out.println(hline); - + Properties config = configuration("GEN-WORDSTAT", homePath); - + // load words serverLog.logInfo("GEN-WORDSTAT", "loading words..."); HashMap words = loadWordMap(new File(homePath, "yacy.words")); - + // find all hashes serverLog.logInfo("GEN-WORDSTAT", "searching all word-hash databases..."); File dbRoot = new File(homePath, config.getProperty("dbPath")); @@ -513,7 +569,7 @@ public final class yacy { h = f.getName().substring(0, plasmaURL.urlHashLength); hs.addScore(h, (int) f.length()); } - + // list the hashes in reverse order serverLog.logInfo("GEN-WORDSTAT", "listing words in reverse size order..."); String w; @@ -524,11 +580,20 @@ public final class yacy { if (w == null) System.out.print("# " + h); else System.out.print(w); System.out.println(" - " + hs.getScore(h)); } - + // finished serverLog.logSystem("GEN-WORDSTAT", "FINISHED"); } - + + /** + * Migrates the PLASMA WORDS structure to the assortment cache if possible. + * This method will be called if you start yacy with the argument + * -migratewords. + * Caution: This might take a long time to finish. Don't interrupt it! + * FIXME: Shouldn't this method be private? + * + * @param homePath Root-path where all the information is to be found. + */ public static void migrateWords(String homePath) { // run with "java -classpath classes yacy -migratewords" try {serverLog.configureLogging(new File(homePath, "yacy.logging"));} catch (Exception e) {} @@ -561,7 +626,14 @@ public final class yacy { e.printStackTrace(); } } - + + /** + * Reads all words from the given file and creates a hashmap, where key is + * the plasma word hash and value is the word itself. + * + * @param wordlist File where the words are stored. + * @return HashMap with the hash-word - relation. + */ private static HashMap loadWordMap(File wordlist) { // returns a hash-word - Relation HashMap wordmap = new HashMap(); @@ -573,7 +645,14 @@ public final class yacy { } catch (IOException e) {} return wordmap; } - + + /** + * Reads all words from the given file and creats as HashSet, which contains + * all found words. + * + * @param wordlist File where the words are stored. + * @return HashSet with the words + */ private static HashSet loadWordSet(File wordlist) { // returns a set of words HashSet wordset = new HashSet(); @@ -585,13 +664,22 @@ public final class yacy { } catch (IOException e) {} return wordset; } - + + /** + * Cleans a wordlist in a file according to the length of the words. The + * file with the given filename is read and then only the words in the given + * length-range are written back to the file. + * + * @param wordlist Name of the file the words are stored in. + * @param minlength Minimal needed length for each word to be stored. + * @param maxlength Maximal allowed length for each word to be stored. + */ private static void cleanwordlist(String wordlist, int minlength, int maxlength) { // start up System.out.println(copyright); System.out.println(hline); serverLog.logSystem("CLEAN-WORDLIST", "START"); - + String word; TreeSet wordset = new TreeSet(); int count = 0; @@ -607,7 +695,7 @@ public final class yacy { count++; } br.close(); - + if (wordset.size() != count) { count = count - wordset.size(); BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(wordlist))); @@ -625,24 +713,30 @@ public final class yacy { serverLog.logError("CLEAN-WORDLIST", "ERROR: " + e.getMessage()); System.exit(-1); } - + // finished serverLog.logSystem("CLEAN-WORDLIST", "FINISHED"); } - + + /** + * Gets all words from the stopword-list and removes them in the databases. + * FIXME: Really? Don't know if I read this correctly. + * + * @param homePath Root-Path where all information is to be found. + */ private static void deleteStopwords(String homePath) { // start up System.out.println(copyright); System.out.println(hline); serverLog.logSystem("DELETE-STOPWORDS", "START"); - + Properties config = configuration("DELETE-STOPWORDS", homePath); File dbRoot = new File(homePath, config.getProperty("dbPath")); - + // load stopwords HashSet stopwords = loadWordSet(new File(homePath, "yacy.stopwords")); serverLog.logInfo("DELETE-STOPWORDS", "loaded stopwords, " + stopwords.size() + " entries in list, starting scanning"); - + // find all hashes File f; String w; @@ -661,14 +755,19 @@ public final class yacy { } } } - + serverLog.logInfo("DELETE-STOPWORDS", "TOTALS: deleted " + count + " indexes; " + (totalamount / 1024) + " kbytes"); - + // finished serverLog.logSystem("DELETE-STOPWORDS", "FINISHED"); } - - // application wrapper + + /** + * Main-method which is started by java. Checks for special arguments or + * starts up the application. + * + * @param args Given arguments from the command line. + */ public static void main(String args[]) { String applicationRoot = System.getProperty("user.dir"); //System.out.println("args.length=" + args.length); @@ -709,24 +808,28 @@ public final class yacy { } } +/** +* This class is a helper class whose instance is started, when the java virtual +* machine shuts down. Signals the plasmaSwitchboard to shut down. +*/ class shutdownHookThread extends Thread { private plasmaSwitchboard sb = null; private Thread mainThread = null; - + public shutdownHookThread(Thread mainThread, plasmaSwitchboard sb) { this.sb = sb; this.mainThread = mainThread; } - + public void run() { - + try { if (!this.sb.isTerminated()) { serverLog.logSystem("SHUTDOWN","Shutdown via shutdown hook."); - + // sending the yacy main thread a shutdown signal this.sb.terminate(); - + // waiting for the yacy thread to finish execution this.mainThread.join(); }