further enhanced caching (new cache flush methods)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@111 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 85c2f3be8a
commit 287d2e6f10

@ -1,6 +1,6 @@
<project name="YaCy" default="all" basedir=".">
<description>
YaCy - a Peer to Peer search Engine
YaCy - a Peer to Peer Web Search Engine
</description>
<!--
@ -70,11 +70,10 @@
<property name="javacTarget" value="1.4"/>
<!-- Release Configuration -->
<property name="releaseVersion" value="0.373"/>
<property name="releaseFile" value="yacy_v${releaseVersion}_${DSTAMP}.tgz"/>
<property name="releaseVersion" value="0.374"/>
<property name="releaseFile" value="yacy_dev_v${releaseVersion}_${DSTAMP}.tar.gz"/>
<target name="init">
<mkdir dir="${build}/de/anomic/data"/>
<mkdir dir="${build}/de/anomic/htmlFilter"/>
<mkdir dir="${build}/de/anomic/http"/>
@ -97,9 +96,11 @@
<copy file="${src}/yacy.java.orig" tofile="${src}/yacy.java" filtering="true" />
</target>
<target name="compile" depends="init" description="Compiling the yacy sources ...">
<javac srcdir="${src}/" destdir="${build}" excludes="de/anomic/plasma/parser/*/*" source="${javacSource}" target="${javacTarget}">
<javac srcdir="${src}/" destdir="${build}"
excludes="de/anomic/plasma/parser/*/*"
debug="true"
source="${javacSource}" target="${javacTarget}">
<classpath>
<pathelement location="${build}" />
@ -108,12 +109,17 @@
<pathelement location="${lib}/commons-pool-1.2.jar" />
</classpath>
</javac>
<javac srcdir="${htroot}/" destdir="${htroot}" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${htroot}/htdocsdefault" destdir="${htroot}/htdocsdefault" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${htroot}/yacy" destdir="${htroot}/yacy" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${htroot}/" destdir="${htroot}"
classpath="${build}"
source="1.4" target="1.4"/>
<javac srcdir="${htroot}/htdocsdefault" destdir="${htroot}/htdocsdefault"
classpath="${build}"
source="1.4" target="1.4"/>
<javac srcdir="${htroot}/yacy" destdir="${htroot}/yacy"
classpath="${build}"
source="1.4" target="1.4"/>
</target>
<target name="all" depends="compile">
<delete file="${src}/yacy.java" />
<move file="${src}/yacy.java.orig" tofile="${src}/yacy.java" />
@ -200,8 +206,6 @@
<echo message="${releaseVersion}" file="${doc}/release.txt"/>
</target>
<target name="clean" description="make clean">
<delete>
<fileset dir="${build}" includes="**/*.class" />

@ -167,7 +167,7 @@ public class Network {
boolean complete = post.containsKey("ip");
Enumeration e = null;
switch (page) {
case 1 : e = yacyCore.seedDB.seedsSortedConnected(post.get("order", "down").equals("up"), post.get("sort", "ICount")); break;
case 1 : e = yacyCore.seedDB.seedsSortedConnected(post.get("order", "down").equals("up"), post.get("sort", "LCount")); break;
case 2 : e = yacyCore.seedDB.seedsSortedDisconnected(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break;
case 3 : e = yacyCore.seedDB.seedsSortedPotential(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break;
}

@ -45,7 +45,7 @@
# Contributions and changes to the program code must be marked as such.
# define variables
version='0.373'
version='0.374'
datestr=`date +%Y%m%d`
#release='yacy_v'$version'_'$datestr
release='yacy_dev_v'$version'_'$datestr

@ -211,6 +211,16 @@ public class kelondroMScoreCluster {
}
}
public int getMaxScore() {
if (refkeyDB.size() == 0) return -1;
return (int) ((((Long) keyrefDB.lastKey()).longValue() & 0xFFFFFFFF00000000L) >> 32);
}
public int getMinScore() {
if (refkeyDB.size() == 0) return -1;
return (int) ((((Long) keyrefDB.firstKey()).longValue() & 0xFFFFFFFF00000000L) >> 32);
}
public Object getMaxObject() {
if (refkeyDB.size() == 0) return null;
//return getScores(1, false)[0];

@ -415,6 +415,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private boolean flushFromSingleton(String key) {
// this should only be called if the singleton shall be deleted or returned in an index entity
Object[] singleton = readSingleton(key);
if (singleton == null) {
return false;
@ -443,52 +444,60 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
return 0;
}
int count = 0;
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
int total = 0;
synchronized (hashScore) {
String key;
int count;
Long createTime;
// flush high-scores
while ((total < 100) && (hashScore.size() >= maxWords)) {
key = (String) hashScore.getMaxObject();
// generate flush list
Iterator i = hashScore.scores(true);
TreeMap[] al = new TreeMap[hashScore.getMaxScore() + 1];
for (int k = 0; k < al.length; k++) al[k] = new TreeMap(); // by create time ordered hash-list
while (i.hasNext()) {
// get the entry properties
key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count < 5) {
log.logWarning("flushing of high-key " + key + " not appropriate (too less entries, count=" + count + "): increase cache size");
break;
// put it into a specific ohl
al[count].put(createTime, key);
//System.out.println("COUNT FOR KEY " + key + ": " + count);
}
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
//log.logDebug("high-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
break;
// print statistics
for (int k = 1; k < al.length; k++) log.logDebug("FLUSH-LIST " + k + ": " + al[k].size() + " entries");
// flush singletons
i = al[1].entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
key = (String) entry.getValue();
createTime = (Long) entry.getKey();
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) > 90000)) {
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
count += flushFromMem((String) key, true);
}
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
total += flushFromMem(key, false);
}
// flush singletons
Iterator i = hashScore.scores(true);
ArrayList al = new ArrayList();
while ((i.hasNext()) && (total < 200)) {
key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count > 1) {
//log.logDebug("flush of singleton-key " + key + ": count too high (count=" + count + ")");
break;
// flush high-scores
for (int k = al.length - 1; k >= 2; k--) {
i = al[k].entrySet().iterator();
while (i.hasNext()) {
entry = (Map.Entry) i.next();
key = (String) entry.getValue();
createTime = (Long) entry.getKey();
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) > (600000/k))) {
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
count += flushFromMem(key, false);
}
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 90000)) {
//log.logDebug("singleton-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
continue;
if (count > 2000) return count;
}
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
al.add(key);
total++;
}
for (int k = 0; k < al.size(); k++) flushFromMem((String) al.get(k), true);
}
return total;
return count;
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
@ -521,7 +530,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
flushFromMemToLimit();
if (cache.size() >= this.maxWords) flushFromMemToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
// put new words into cache

@ -103,8 +103,8 @@ import de.anomic.yacy.yacyCore;
public final class yacy {
// static objects
private static final String vString = "@REPL_VERSION@";
private static final String vDATE = "@REPL_DATE@";
private static final String vString = "0.373";
private static final String vDATE = "20050512";
private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]";
private static final String hline = "-------------------------------------------------------------------------------";

@ -1,2 +1,2 @@
#plasmaParser configuration file
#Thu May 12 11:55:37 CEST 2005
#Thu May 12 18:11:49 CEST 2005

Loading…
Cancel
Save