From 650ca3955a4fbc08a25363286bc5fe7fd79f6910 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 31 May 2005 17:39:14 +0000
Subject: [PATCH] added flush-thread for index cache and added language-name
 mapping in Language_p

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@203 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/Language_p.java                        |  29 +++-
 htroot/env/templates/header.template          |   1 +
 source/de/anomic/kelondro/kelondroMap.java    |   4 +-
 source/de/anomic/kelondro/kelondroTree.java   |   4 +-
 source/de/anomic/plasma/plasmaCrawlLURL.java  |   2 +-
 .../anomic/plasma/plasmaWordIndexCache.java   | 149 +++++++++++++-----
 .../plasma/plasmaWordIndexClassicDB.java      |   2 +-
 .../plasma/plasmaWordIndexInterface.java      |   2 +-
 8 files changed, 144 insertions(+), 49 deletions(-)
diff --git a/htroot/Language_p.java b/htroot/Language_p.java
index ae92b28cc..a4c31ffbd 100644
--- a/htroot/Language_p.java
+++ b/htroot/Language_p.java
@@ -53,6 +53,7 @@ import java.io.PrintWriter;
 import java.net.URL;
 import java.util.Iterator;
 import java.util.Vector;
+import java.util.HashMap;
 
 import de.anomic.data.listManager;
 import de.anomic.http.httpHeader;
@@ -63,8 +64,20 @@ import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
 import de.anomic.data.translator;
 
+
 public class Language_p {
 
+    public static HashMap langMap(serverSwitch env) {
+	String[] ms = env.getConfig("htLocaleLang", "").split(",");
+	HashMap map = new HashMap();
+	int p;
+	for (int i = 0; i < ms.length; i++) {
+	    p = ms[i].indexOf("/");
+	    if (p > 0) map.put(ms[i].substring(0, p), ms[i].substring(p + 1));
+	}
+	return map;
+    }
+        
 	private static boolean copyFile(File from, File to){
 			if(from == null || to == null){
 			return false;
@@ -153,20 +166,26 @@ public class Language_p {
 	//reread language files
 	langFiles = listManager.getDirListing(langPath);
 	int i;
-	//virtuell entry
+	HashMap langNames = langMap(env);
+        String langKey, langName;
+        
+        //virtuell entry
 	prop.put("langlist_0_file", "default");
-	prop.put("langlist_0_name", "default");
-
+	prop.put("langlist_0_name", ((langNames.get("default") == null) ? "default" : (String) langNames.get("default")));
+        
 	for(i=0;i<= langFiles.length-1 ;i++){
 		if(langFiles[i].endsWith(".lng")){
 			//+1 because of the virtuall entry "default" at top
+                        langKey = langFiles[i].substring(0, langFiles[i].length() -4);
+                        langName = (String) langNames.get(langKey);
 			prop.put("langlist_"+(i+1)+"_file", langFiles[i]);
-			prop.put("langlist_"+(i+1)+"_name", langFiles[i].substring(0, langFiles[i].length() -4));
+			prop.put("langlist_"+(i+1)+"_name", ((langName == null) ? langKey : langName));
 		}
 	}
 	prop.put("langlist", (i+1));
 
-	prop.put("currentlang", env.getConfig("htLocaleSelection", "default"));
+        langName = (String) langNames.get(env.getConfig("htLocaleSelection", "default"));
+	prop.put("currentlang", ((langName == null) ? "default" : langName));
 	return prop;
     }
 
diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template
index a14ed560e..e3b58cc8a 100644
--- a/htroot/env/templates/header.template
+++ b/htroot/env/templates/header.template
@@ -63,6 +63,7 @@
   <tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/ViewLog_p.html" class="MenuItemLink">Log</a></td></tr>
   <tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Settings_p.html" class="MenuItemLink">Settings</a></td></tr>
   <tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Performance_p.html" class="MenuItemLink">Performance</a></td></tr>
+  <tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Language_p.html" class="MenuItemLink">Language</a></td></tr>
   <tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Skins_p.html" class="MenuItemLink">Skins</a></td></tr>
   <tr><td class="MenuSpacer"></td></tr>
   
diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java
index e7c39d6ff..437ff1b6f 100644
--- a/source/de/anomic/kelondro/kelondroMap.java
+++ b/source/de/anomic/kelondro/kelondroMap.java
@@ -173,9 +173,9 @@ public class kelondroMap {
                 valuel = Long.parseLong(value);
                 accumulator = (Long) accMap.get(accfields[i]);
                 if (add)
-                    accMap.put(accfields[i], new Long(accumulator.longValue() + valuel));
+                    accMap.put(accfields[i], new Long(accumulator.longValue() + ((long) valuel)));
                 else
-                    accMap.put(accfields[i], new Long(accumulator.longValue() - valuel));
+                    accMap.put(accfields[i], new Long(accumulator.longValue() - ((long) valuel)));
             } catch (NumberFormatException e) {}
         }
     }
diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java
index 2105efe5d..a7bff903a 100644
--- a/source/de/anomic/kelondro/kelondroTree.java
+++ b/source/de/anomic/kelondro/kelondroTree.java
@@ -546,7 +546,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
     }
 
     // Associates the specified value with the specified key in this map
-    public byte[] put(byte[] key, byte[] value) throws IOException {
+    public synchronized byte[] put(byte[] key, byte[] value) throws IOException {
 	byte[][] row = new byte[2][];
 	row[0] = key;
 	row[1] = value;
@@ -555,7 +555,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
     }
     
     // Removes the mapping for this key from this map if present (optional operation).
-    public byte[][] remove(byte[] key) throws IOException {
+    public synchronized byte[][] remove(byte[] key) throws IOException {
 	Search search = new Search(key);
 	if (search.found()) {
 	    Node result = search.getMatcher();
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java
index fa70fc6ae..e62275d31 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@@ -303,7 +303,7 @@ public class plasmaCrawlLURL extends plasmaURL {
                 prop.put("table_indexed_" + c + "_showInit", (showInit) ? 1 : 0);
                 prop.put("table_indexed_" + c + "_showInit_initiatorSeed", (initiatorSeed == null) ? dfltInit : initiatorSeed.getName());
                 prop.put("table_indexed_" + c + "_showExec", (showExec) ? 1 : 0);
-                prop.put("table_indexed_" + c + "_showExec_executorSeed", (initiatorSeed == null) ? dfltExec : executorSeed.getName());
+                prop.put("table_indexed_" + c + "_showExec_executorSeed", (executorSeed == null) ? dfltExec : executorSeed.getName());
                 prop.put("table_indexed_" + c + "_moddate", daydate(urle.moddate()));
                 prop.put("table_indexed_" + c + "_wordcount", urle.wordCount());
                 prop.put("table_indexed_" + c + "_urldescr", urle.descr());
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index 7a766c306..dc99ca7eb 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -65,11 +65,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
     private plasmaWordIndexInterface backend;
     private TreeMap cache;
     private kelondroMScoreCluster hashScore;
-    private HashMap hashDate;
+    private kelondroMScoreCluster hashDate;
+    private long startTime;
     private int maxWords;
     private serverLog log;
     private plasmaWordIndexAssortmentCluster assortmentCluster;
-    private int singletonBufferSize; //kb
+    private int assortmentBufferSize; //kb
+    private flush flushThread;
 
     // calculated constants
     private static String minKey, maxKey;
@@ -80,7 +82,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 	for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
     }
 
-    public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int singletonbufferkb, serverLog log) {
+    public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int assortmentbufferkb, serverLog log) {
         // migrate#1
         File oldSingletonFile = new File(databaseRoot, oldSingletonFileName);
         File newSingletonFile = new File(databaseRoot, newSingletonFileName);
@@ -94,17 +96,21 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         File acSingletonFile = new File(assortmentClusterPath, newSingletonFileName);
         if ((newSingletonFile.exists()) && (!(acSingletonFile.exists()))) newSingletonFile.renameTo(acSingletonFile);
         
+        // create flushing thread
+        flushThread = new flush();
+        
         // creates a new index cache
         // the cache has a back-end where indexes that do not fit in the cache are flushed
         this.databaseRoot = databaseRoot;
-        this.singletonBufferSize = singletonbufferkb;
+        this.assortmentBufferSize = assortmentbufferkb;
         this.cache = new TreeMap();
 	this.hashScore = new kelondroMScoreCluster();
-        this.hashDate  = new HashMap();
+        this.hashDate  = new kelondroMScoreCluster();
+        this.startTime = System.currentTimeMillis();
 	this.maxWords = 10000;
         this.backend = backend;
         this.log = log;
-	this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentLimit, singletonBufferSize, log);
+	this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentLimit, assortmentBufferSize, log);
 
         // read in dump of last session
         try {
@@ -113,8 +119,12 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
             log.logError("unable to restore cache dump: " + e.getMessage());
             e.printStackTrace();
         }
+        
+        // start permanent flushing
+        flushThread.start();
     }
 
+    
     private void dump(int waitingSeconds) throws IOException {
         log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)");
         File indexDumpFile = new File(databaseRoot, indexDumpFileName);
@@ -125,20 +135,17 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
         synchronized (cache) {
             Iterator i = cache.entrySet().iterator();
-            //Iterator i = hashScore.scores(true);
             Map.Entry entry;
             String wordHash;
             plasmaWordIndexEntryContainer container;
-            long creationTime;
+            long updateTime;
             plasmaWordIndexEntry wordEntry;
             byte[][] row = new byte[5][];
             while (i.hasNext()) {
                 // get entries
                 entry = (Map.Entry) i.next();
-                //wordHash = (String) i.next();
                 wordHash = (String) entry.getKey();
-                creationTime = getCreationTime(wordHash);
-                //container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+                updateTime = getUpdateTime(wordHash);
                 container = (plasmaWordIndexEntryContainer) entry.getValue();
 
                 // put entries on stack
@@ -148,7 +155,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
                         wordEntry = (plasmaWordIndexEntry) ci.next();
                         row[0] = wordHash.getBytes();
                         row[1] = kelondroRecords.long2bytes(container.size(), 4);
-                        row[2] = kelondroRecords.long2bytes(creationTime, 8);
+                        row[2] = kelondroRecords.long2bytes(updateTime, 8);
                         row[3] = wordEntry.getUrlHash().getBytes();
                         row[4] = wordEntry.toEncodedForm(true).getBytes();
                         dumpStack.push(row);
@@ -235,7 +242,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
     public Iterator wordHashes(String startWordHash, boolean up) {
         // here we merge 3 databases into one view:
         // - the RAM Cache
-        // - the singleton File Cache
+        // - the assortmentCluster File Cache
         // - the backend
         if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
         return new kelondroMergeIterator(
@@ -247,6 +254,49 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
                         true);
     }
     
+    private class flush extends Thread {
+        boolean terminate, pause;
+        
+        public flush() {
+            terminate = false;
+            pause = false;
+        }
+        
+        public void run() {
+            String nextHash;
+            while (!terminate) {
+                if (pause) {
+                    try {this.sleep(300);} catch (InterruptedException e) {}
+                } else {
+                    nextHash = (String) hashDate.getMinObject();
+                    if (nextHash != null) {
+                        try {
+                            flushFromMem(nextHash, true);
+                        } catch (Exception e) {
+                            log.logError("flushThread: " + e.getMessage());
+                            e.printStackTrace();
+                        }
+                        try {this.sleep(10 + java.lang.Math.min(1000, 10 * maxWords/(cache.size() + 1)));} catch (InterruptedException e) {}
+                    } else {
+                        try {this.sleep(2000);} catch (InterruptedException e) {}
+                    }
+                }              
+            }
+        }
+        
+        public void pause() {
+            pause = true;
+        }
+        
+        public void proceed() {
+            pause = false;
+        }
+        
+        public void terminate() {
+            terminate = true;
+        }
+    }
+    
     private int flushFromMem(String key, boolean reintegrate) {
         // this method flushes indexes out from the ram to the disc.
         // at first we check the singleton database and act accordingly
@@ -261,12 +311,12 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
             // get the container
             container = (plasmaWordIndexEntryContainer) cache.get(key);
             if (container == null) return 0; // flushing of nonexisting key
-            time = getCreationTime(key);
+            time = getUpdateTime(key);
 
             // remove it from the cache
             cache.remove(key);
 	    hashScore.deleteScore(key);
-            hashDate.remove(key);
+            hashDate.deleteScore(key);
 	}
         
         // now decide where to flush that container
@@ -296,7 +346,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 		synchronized (cache) {
 		    cache.put(key, container);
 		    hashScore.setScore(key, container.size());
-		    hashDate.put(key, new Long(time));
+		    hashDate.setScore(key, intTime(time));
 		}
 		return -flushedFromAssortment.size();
 	    } else {
@@ -306,6 +356,14 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         }	
     }
     
+    private int intTime(long longTime) {
+        return (int) ((longTime - startTime) / 1000);
+    }
+    
+    private long longTime(int intTime) {
+        return ((long) intTime) * ((long) 1000) + startTime;
+    }
+    
     private boolean flushFromAssortmentCluster(String key) {
 	// this should only be called if the singleton shall be deleted or returned in an index entity
         plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key);
@@ -319,19 +377,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
     }
 
     private synchronized int flushFromMemToLimit() {
-	if ((hashScore.size() == 0) && (cache.size() == 0)) {
-	    serverLog.logDebug("PLASMA INDEXING", "flushToLimit: called but cache is empty");
-	    return 0;
-	}
-	if ((hashScore.size() == 0) && (cache.size() != 0)) {
-	    serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=0 but cache.size=" + cache.size());
-	    return 0;
-	}
-	if ((hashScore.size() != 0) && (cache.size() == 0)) {
-	    serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=" + hashScore.size() + " but cache.size=0");
-	    return 0;
-	}
+        if ((hashScore.size() == 0) || (cache.size() == 0)) return 0;
 
+        flushThread.pause();
         int count = 0;
 	//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
         synchronized (hashScore) {
@@ -345,7 +393,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
             while (i.hasNext()) {
 		// get the entry properties
                 key = (String) i.next();
-                createTime = (Long) hashDate.get(key);
+                createTime = new Long(longTime(hashDate.getScore(key)));
                 count = hashScore.getScore(key);
 		
 		// put it into a specific ohl
@@ -396,7 +444,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 
 	    // stop flushing if cache is shrinked enough
 	    // avoid as possible to flush high-scores
-	    if (cache.size() < this.maxWords - 100) return count;
+	    if (cache.size() < this.maxWords - 100) {
+                flushThread.proceed();
+                return count;
+            }
 
             // flush high-scores
             for (int cluster = java.lang.Math.min(clusterCandidate.length, ramcacheLimit); cluster > assortmentLimit; cluster--) {
@@ -411,43 +462,60 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
                         candidateCounter += cluster;
                         log.logDebug("flushed high-cluster below limit #" + cluster + ", key=" + key + ", count=" + count + ", cachesize=" + cache.size());
                     }
-                    if (cache.size() < this.maxWords - 100) return count;
+                    if (cache.size() < this.maxWords - 100) {
+                        flushThread.proceed();
+                        return count;
+                    }
                 }
             }
             
         }
+        flushThread.proceed();
         return count;
     }
     
     public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
+        flushThread.pause();
         flushFromMem(wordHash, false);
         flushFromAssortmentCluster(wordHash);
+        flushThread.proceed();
 	return backend.getIndex(wordHash, deleteIfEmpty);
     }
     
-    public long getCreationTime(String wordHash) {
-        Long time = (Long) hashDate.get(wordHash);
+    public long getUpdateTime(String wordHash) {
+        plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+        if (entries == null) return 0;
+        return entries.updated();
+        /*
+        Long time = new Long(longTime(hashDate.getScore(wordHash)));
         if (time == null) return 0;
         return time.longValue();
+        */
     }
     
     public void deleteIndex(String wordHash) {
+        flushThread.pause();
         synchronized (cache) {
             cache.remove(wordHash);
             hashScore.deleteScore(wordHash);
-            hashDate.remove(wordHash);
+            hashDate.deleteScore(wordHash);
         }
         assortmentCluster.removeFromAll(wordHash);
 	backend.deleteIndex(wordHash);
+        flushThread.proceed();
     }
 
     public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
+        flushThread.pause();
         flushFromMem(wordHash, false);
         flushFromAssortmentCluster(wordHash);
-        return backend.removeEntries(wordHash, urlHashes, deleteComplete);
+        int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete);
+        flushThread.proceed();
+        return removed;
     }
     
     public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) {
+        flushThread.pause();
 	//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
         if (cache.size() >= this.maxWords) flushFromMemToLimit();
 	//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
@@ -462,10 +530,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
             if (added > 0) {
                 cache.put(wordHash, entries);
                 hashScore.addScore(wordHash, added);
-                hashDate.put(wordHash, new Long(updateTime));
+                hashDate.setScore(wordHash, intTime(updateTime));
             }
 	}
         //System.out.println("DEBUG: cache = " + cache.toString());
+        flushThread.proceed();
         return added;
     }
 
@@ -475,11 +544,17 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         if (entries.add(new plasmaWordIndexEntry[]{newEntry}, updateTime) > 0) {
             cache.put(wordHash, entries);
             hashScore.incScore(wordHash);
-            hashDate.put(wordHash, new Long(updateTime));
+            hashDate.setScore(wordHash, intTime(updateTime));
         }
+        flushThread.proceed();
     }
 
     public void close(int waitingSeconds) {
+        // stop permanent flushing
+        flushThread.terminate();
+        try {flushThread.join(5000);} catch (InterruptedException e) {}
+        
+        // close cluster
         assortmentCluster.close();
         try {
             dump(waitingSeconds);
diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
index 14782905f..482572360 100644
--- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@@ -187,7 +187,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
         }
     }
     
-    public long getCreationTime(String wordHash) {
+    public long getUpdateTime(String wordHash) {
         File f = plasmaWordIndexEntity.wordHash2path(databaseRoot, wordHash);
         if (f.exists()) return f.lastModified(); else return -1;
     }
diff --git a/source/de/anomic/plasma/plasmaWordIndexInterface.java b/source/de/anomic/plasma/plasmaWordIndexInterface.java
index 218c7b58e..ef583b948 100644
--- a/source/de/anomic/plasma/plasmaWordIndexInterface.java
+++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java
@@ -51,7 +51,7 @@ public interface plasmaWordIndexInterface {
     public Iterator wordHashes(String startWordHash, boolean up);
 
     public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
-    public long getCreationTime(String wordHash);
+    public long getUpdateTime(String wordHash);
     public void deleteIndex(String wordHash);
 
     public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);