added flush-thread for index cache and added language-name mapping in Language_p

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@203 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 3dd3431c58
commit 650ca3955a

@ -53,6 +53,7 @@ import java.io.PrintWriter;
import java.net.URL;
import java.util.Iterator;
import java.util.Vector;
import java.util.HashMap;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
@ -63,8 +64,20 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.data.translator;
public class Language_p {
public static HashMap langMap(serverSwitch env) {
String[] ms = env.getConfig("htLocaleLang", "").split(",");
HashMap map = new HashMap();
int p;
for (int i = 0; i < ms.length; i++) {
p = ms[i].indexOf("/");
if (p > 0) map.put(ms[i].substring(0, p), ms[i].substring(p + 1));
}
return map;
}
private static boolean copyFile(File from, File to){
if(from == null || to == null){
return false;
@ -153,20 +166,26 @@ public class Language_p {
//reread language files
langFiles = listManager.getDirListing(langPath);
int i;
HashMap langNames = langMap(env);
String langKey, langName;
//virtuell entry
prop.put("langlist_0_file", "default");
prop.put("langlist_0_name", "default");
prop.put("langlist_0_name", ((langNames.get("default") == null) ? "default" : (String) langNames.get("default")));
for(i=0;i<= langFiles.length-1 ;i++){
if(langFiles[i].endsWith(".lng")){
//+1 because of the virtuall entry "default" at top
langKey = langFiles[i].substring(0, langFiles[i].length() -4);
langName = (String) langNames.get(langKey);
prop.put("langlist_"+(i+1)+"_file", langFiles[i]);
prop.put("langlist_"+(i+1)+"_name", langFiles[i].substring(0, langFiles[i].length() -4));
prop.put("langlist_"+(i+1)+"_name", ((langName == null) ? langKey : langName));
}
}
prop.put("langlist", (i+1));
prop.put("currentlang", env.getConfig("htLocaleSelection", "default"));
langName = (String) langNames.get(env.getConfig("htLocaleSelection", "default"));
prop.put("currentlang", ((langName == null) ? "default" : langName));
return prop;
}

@ -63,6 +63,7 @@
<tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/ViewLog_p.html" class="MenuItemLink">Log</a></td></tr>
<tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Settings_p.html" class="MenuItemLink">Settings</a></td></tr>
<tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Performance_p.html" class="MenuItemLink">Performance</a></td></tr>
<tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Language_p.html" class="MenuItemLink">Language</a></td></tr>
<tr><td class="MenuItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;<a href="/Skins_p.html" class="MenuItemLink">Skins</a></td></tr>
<tr><td class="MenuSpacer"></td></tr>

@ -173,9 +173,9 @@ public class kelondroMap {
valuel = Long.parseLong(value);
accumulator = (Long) accMap.get(accfields[i]);
if (add)
accMap.put(accfields[i], new Long(accumulator.longValue() + valuel));
accMap.put(accfields[i], new Long(accumulator.longValue() + ((long) valuel)));
else
accMap.put(accfields[i], new Long(accumulator.longValue() - valuel));
accMap.put(accfields[i], new Long(accumulator.longValue() - ((long) valuel)));
} catch (NumberFormatException e) {}
}
}

@ -546,7 +546,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
}
// Associates the specified value with the specified key in this map
public byte[] put(byte[] key, byte[] value) throws IOException {
public synchronized byte[] put(byte[] key, byte[] value) throws IOException {
byte[][] row = new byte[2][];
row[0] = key;
row[1] = value;
@ -555,7 +555,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
}
// Removes the mapping for this key from this map if present (optional operation).
public byte[][] remove(byte[] key) throws IOException {
public synchronized byte[][] remove(byte[] key) throws IOException {
Search search = new Search(key);
if (search.found()) {
Node result = search.getMatcher();

@ -303,7 +303,7 @@ public class plasmaCrawlLURL extends plasmaURL {
prop.put("table_indexed_" + c + "_showInit", (showInit) ? 1 : 0);
prop.put("table_indexed_" + c + "_showInit_initiatorSeed", (initiatorSeed == null) ? dfltInit : initiatorSeed.getName());
prop.put("table_indexed_" + c + "_showExec", (showExec) ? 1 : 0);
prop.put("table_indexed_" + c + "_showExec_executorSeed", (initiatorSeed == null) ? dfltExec : executorSeed.getName());
prop.put("table_indexed_" + c + "_showExec_executorSeed", (executorSeed == null) ? dfltExec : executorSeed.getName());
prop.put("table_indexed_" + c + "_moddate", daydate(urle.moddate()));
prop.put("table_indexed_" + c + "_wordcount", urle.wordCount());
prop.put("table_indexed_" + c + "_urldescr", urle.descr());

@ -65,11 +65,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
private plasmaWordIndexInterface backend;
private TreeMap cache;
private kelondroMScoreCluster hashScore;
private HashMap hashDate;
private kelondroMScoreCluster hashDate;
private long startTime;
private int maxWords;
private serverLog log;
private plasmaWordIndexAssortmentCluster assortmentCluster;
private int singletonBufferSize; //kb
private int assortmentBufferSize; //kb
private flush flushThread;
// calculated constants
private static String minKey, maxKey;
@ -80,7 +82,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int singletonbufferkb, serverLog log) {
public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int assortmentbufferkb, serverLog log) {
// migrate#1
File oldSingletonFile = new File(databaseRoot, oldSingletonFileName);
File newSingletonFile = new File(databaseRoot, newSingletonFileName);
@ -94,17 +96,21 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
File acSingletonFile = new File(assortmentClusterPath, newSingletonFileName);
if ((newSingletonFile.exists()) && (!(acSingletonFile.exists()))) newSingletonFile.renameTo(acSingletonFile);
// create flushing thread
flushThread = new flush();
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot;
this.singletonBufferSize = singletonbufferkb;
this.assortmentBufferSize = assortmentbufferkb;
this.cache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new HashMap();
this.hashDate = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis();
this.maxWords = 10000;
this.backend = backend;
this.log = log;
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentLimit, singletonBufferSize, log);
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentLimit, assortmentBufferSize, log);
// read in dump of last session
try {
@ -113,8 +119,12 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
log.logError("unable to restore cache dump: " + e.getMessage());
e.printStackTrace();
}
// start permanent flushing
flushThread.start();
}
private void dump(int waitingSeconds) throws IOException {
log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexDumpFileName);
@ -125,20 +135,17 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) {
Iterator i = cache.entrySet().iterator();
//Iterator i = hashScore.scores(true);
Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
long creationTime;
long updateTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[5][];
while (i.hasNext()) {
// get entries
entry = (Map.Entry) i.next();
//wordHash = (String) i.next();
wordHash = (String) entry.getKey();
creationTime = getCreationTime(wordHash);
//container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
updateTime = getUpdateTime(wordHash);
container = (plasmaWordIndexEntryContainer) entry.getValue();
// put entries on stack
@ -148,7 +155,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
wordEntry = (plasmaWordIndexEntry) ci.next();
row[0] = wordHash.getBytes();
row[1] = kelondroRecords.long2bytes(container.size(), 4);
row[2] = kelondroRecords.long2bytes(creationTime, 8);
row[2] = kelondroRecords.long2bytes(updateTime, 8);
row[3] = wordEntry.getUrlHash().getBytes();
row[4] = wordEntry.toEncodedForm(true).getBytes();
dumpStack.push(row);
@ -235,7 +242,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public Iterator wordHashes(String startWordHash, boolean up) {
// here we merge 3 databases into one view:
// - the RAM Cache
// - the singleton File Cache
// - the assortmentCluster File Cache
// - the backend
if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
return new kelondroMergeIterator(
@ -247,6 +254,49 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
true);
}
private class flush extends Thread {
boolean terminate, pause;
public flush() {
terminate = false;
pause = false;
}
public void run() {
String nextHash;
while (!terminate) {
if (pause) {
try {this.sleep(300);} catch (InterruptedException e) {}
} else {
nextHash = (String) hashDate.getMinObject();
if (nextHash != null) {
try {
flushFromMem(nextHash, true);
} catch (Exception e) {
log.logError("flushThread: " + e.getMessage());
e.printStackTrace();
}
try {this.sleep(10 + java.lang.Math.min(1000, 10 * maxWords/(cache.size() + 1)));} catch (InterruptedException e) {}
} else {
try {this.sleep(2000);} catch (InterruptedException e) {}
}
}
}
}
public void pause() {
pause = true;
}
public void proceed() {
pause = false;
}
public void terminate() {
terminate = true;
}
}
private int flushFromMem(String key, boolean reintegrate) {
// this method flushes indexes out from the ram to the disc.
// at first we check the singleton database and act accordingly
@ -261,12 +311,12 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// get the container
container = (plasmaWordIndexEntryContainer) cache.get(key);
if (container == null) return 0; // flushing of nonexisting key
time = getCreationTime(key);
time = getUpdateTime(key);
// remove it from the cache
cache.remove(key);
hashScore.deleteScore(key);
hashDate.remove(key);
hashDate.deleteScore(key);
}
// now decide where to flush that container
@ -296,7 +346,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
synchronized (cache) {
cache.put(key, container);
hashScore.setScore(key, container.size());
hashDate.put(key, new Long(time));
hashDate.setScore(key, intTime(time));
}
return -flushedFromAssortment.size();
} else {
@ -306,6 +356,14 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
}
private int intTime(long longTime) {
return (int) ((longTime - startTime) / 1000);
}
private long longTime(int intTime) {
return ((long) intTime) * ((long) 1000) + startTime;
}
private boolean flushFromAssortmentCluster(String key) {
// this should only be called if the singleton shall be deleted or returned in an index entity
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key);
@ -319,19 +377,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private synchronized int flushFromMemToLimit() {
if ((hashScore.size() == 0) && (cache.size() == 0)) {
serverLog.logDebug("PLASMA INDEXING", "flushToLimit: called but cache is empty");
return 0;
}
if ((hashScore.size() == 0) && (cache.size() != 0)) {
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=0 but cache.size=" + cache.size());
return 0;
}
if ((hashScore.size() != 0) && (cache.size() == 0)) {
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=" + hashScore.size() + " but cache.size=0");
return 0;
}
if ((hashScore.size() == 0) || (cache.size() == 0)) return 0;
flushThread.pause();
int count = 0;
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
synchronized (hashScore) {
@ -345,7 +393,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
while (i.hasNext()) {
// get the entry properties
key = (String) i.next();
createTime = (Long) hashDate.get(key);
createTime = new Long(longTime(hashDate.getScore(key)));
count = hashScore.getScore(key);
// put it into a specific ohl
@ -396,7 +444,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// stop flushing if cache is shrinked enough
// avoid as possible to flush high-scores
if (cache.size() < this.maxWords - 100) return count;
if (cache.size() < this.maxWords - 100) {
flushThread.proceed();
return count;
}
// flush high-scores
for (int cluster = java.lang.Math.min(clusterCandidate.length, ramcacheLimit); cluster > assortmentLimit; cluster--) {
@ -411,43 +462,60 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
candidateCounter += cluster;
log.logDebug("flushed high-cluster below limit #" + cluster + ", key=" + key + ", count=" + count + ", cachesize=" + cache.size());
}
if (cache.size() < this.maxWords - 100) return count;
if (cache.size() < this.maxWords - 100) {
flushThread.proceed();
return count;
}
}
}
}
flushThread.proceed();
return count;
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
flushThread.pause();
flushFromMem(wordHash, false);
flushFromAssortmentCluster(wordHash);
flushThread.proceed();
return backend.getIndex(wordHash, deleteIfEmpty);
}
public long getCreationTime(String wordHash) {
Long time = (Long) hashDate.get(wordHash);
public long getUpdateTime(String wordHash) {
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (entries == null) return 0;
return entries.updated();
/*
Long time = new Long(longTime(hashDate.getScore(wordHash)));
if (time == null) return 0;
return time.longValue();
*/
}
public void deleteIndex(String wordHash) {
flushThread.pause();
synchronized (cache) {
cache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.remove(wordHash);
hashDate.deleteScore(wordHash);
}
assortmentCluster.removeFromAll(wordHash);
backend.deleteIndex(wordHash);
flushThread.proceed();
}
public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
flushThread.pause();
flushFromMem(wordHash, false);
flushFromAssortmentCluster(wordHash);
return backend.removeEntries(wordHash, urlHashes, deleteComplete);
int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete);
flushThread.proceed();
return removed;
}
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) {
flushThread.pause();
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
if (cache.size() >= this.maxWords) flushFromMemToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
@ -462,10 +530,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.put(wordHash, new Long(updateTime));
hashDate.setScore(wordHash, intTime(updateTime));
}
}
//System.out.println("DEBUG: cache = " + cache.toString());
flushThread.proceed();
return added;
}
@ -475,11 +544,17 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (entries.add(new plasmaWordIndexEntry[]{newEntry}, updateTime) > 0) {
cache.put(wordHash, entries);
hashScore.incScore(wordHash);
hashDate.put(wordHash, new Long(updateTime));
hashDate.setScore(wordHash, intTime(updateTime));
}
flushThread.proceed();
}
public void close(int waitingSeconds) {
// stop permanent flushing
flushThread.terminate();
try {flushThread.join(5000);} catch (InterruptedException e) {}
// close cluster
assortmentCluster.close();
try {
dump(waitingSeconds);

@ -187,7 +187,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
}
}
public long getCreationTime(String wordHash) {
public long getUpdateTime(String wordHash) {
File f = plasmaWordIndexEntity.wordHash2path(databaseRoot, wordHash);
if (f.exists()) return f.lastModified(); else return -1;
}

@ -51,7 +51,7 @@ public interface plasmaWordIndexInterface {
public Iterator wordHashes(String startWordHash, boolean up);
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
public long getCreationTime(String wordHash);
public long getUpdateTime(String wordHash);
public void deleteIndex(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);

Loading…
Cancel
Save