further enhanced caching (new cache flush methods)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@111 6c8d7289-2bf4-0310-a012-ef5d649a1542
orbiter 20 years ago
parent 85c2f3be8a
commit 287d2e6f10

@ -1,7 +1,7 @@
<project name="YaCy" default="all" basedir=".">
YaCy - a Peer to Peer search Engine
YaCy - a Peer to Peer Web Search Engine
@ -50,7 +50,7 @@
<format property="REPL_DATE_FORMAT" pattern="yyyymmdd" />
<format property="REPL_DATE_FORMAT" pattern="yyyymmdd" />
@ -70,143 +70,147 @@
<property name="javacTarget" value="1.4"/>
<!-- Release Configuration -->
<property name="releaseVersion" value="0.373"/>
<property name="releaseFile" value="yacy_v${releaseVersion}_${DSTAMP}.tgz"/>
<property name="releaseVersion" value="0.374"/>
<property name="releaseFile" value="yacy_dev_v${releaseVersion}_${DSTAMP}.tar.gz"/>
<target name="init">
<mkdir dir="${build}/de/anomic/data"/>
<mkdir dir="${build}/de/anomic/htmlFilter"/>
<mkdir dir="${build}/de/anomic/http"/>
<mkdir dir="${build}/de/anomic/kelondro"/>
<mkdir dir="${build}/de/anomic/net"/>
<mkdir dir="${build}/de/anomic/plasma"/>
<mkdir dir="${build}/de/anomic/plasma/parser"/>
<mkdir dir="${build}/de/anomic/server"/>
<mkdir dir="${build}/de/anomic/tools"/>
<mkdir dir="${build}/de/anomic/yacy"/>
<mkdir dir="${doc}"/>
<mkdir dir="${data}"/>
<mkdir dir="${release}"/>
<move file="${src}/yacy.java" tofile="${src}/yacy.java.orig" />
<filter token="REPL_VERSION" value="${releaseVersion}" />
<filter token="REPL_DATE" value="${DSTAMP}"/>
<copy file="${src}/yacy.java.orig" tofile="${src}/yacy.java" filtering="true" />
<mkdir dir="${build}/de/anomic/data"/>
<mkdir dir="${build}/de/anomic/htmlFilter"/>
<mkdir dir="${build}/de/anomic/http"/>
<mkdir dir="${build}/de/anomic/kelondro"/>
<mkdir dir="${build}/de/anomic/net"/>
<mkdir dir="${build}/de/anomic/plasma"/>
<mkdir dir="${build}/de/anomic/plasma/parser"/>
<mkdir dir="${build}/de/anomic/server"/>
<mkdir dir="${build}/de/anomic/tools"/>
<mkdir dir="${build}/de/anomic/yacy"/>
<mkdir dir="${doc}"/>
<mkdir dir="${data}"/>
<mkdir dir="${release}"/>
<move file="${src}/yacy.java" tofile="${src}/yacy.java.orig" />
<filter token="REPL_VERSION" value="${releaseVersion}" />
<filter token="REPL_DATE" value="${DSTAMP}"/>
<copy file="${src}/yacy.java.orig" tofile="${src}/yacy.java" filtering="true" />
<target name="compile" depends="init" description="Compiling the yacy sources ...">
<javac srcdir="${src}/" destdir="${build}" excludes="de/anomic/plasma/parser/*/*" source="${javacSource}" target="${javacTarget}">
<pathelement location="${build}" />
<!-- libs needed for the yacy thread/object-pools -->
<pathelement location="${lib}/commons-collections.jar" />
<pathelement location="${lib}/commons-pool-1.2.jar" />
<javac srcdir="${htroot}/" destdir="${htroot}" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${htroot}/htdocsdefault" destdir="${htroot}/htdocsdefault" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${htroot}/yacy" destdir="${htroot}/yacy" classpath="${build}" source="1.4" target="1.4"/>
<javac srcdir="${src}/" destdir="${build}"
source="${javacSource}" target="${javacTarget}">
<pathelement location="${build}" />
<!-- libs needed for the yacy thread/object-pools -->
<pathelement location="${lib}/commons-collections.jar" />
<pathelement location="${lib}/commons-pool-1.2.jar" />
<javac srcdir="${htroot}/" destdir="${htroot}"
source="1.4" target="1.4"/>
<javac srcdir="${htroot}/htdocsdefault" destdir="${htroot}/htdocsdefault"
source="1.4" target="1.4"/>
<javac srcdir="${htroot}/yacy" destdir="${htroot}/yacy"
source="1.4" target="1.4"/>
<target name="all" depends="compile">
<delete file="${src}/yacy.java" />
<move file="${src}/yacy.java.orig" tofile="${src}/yacy.java" />
<delete file="${src}/yacy.java" />
<move file="${src}/yacy.java.orig" tofile="${src}/yacy.java" />
<target name="parsers" depends="compile" description="Compiling and zipping all additional parsers">
<subant target="">
<property name="src" location="${src}"/>
<property name="build" location="${build}"/>
<property name="libx" location="${libx}"/>
<property name="release" location="${release}"/>
<property name="javacSource" value="${javacSource}"/>
<property name="javacTarget" value="${javacTarget}"/>
<fileset dir="${src}/" includes="de/anomic/plasma/parser/*/build.xml"/>
<subant target="">
<property name="src" location="${src}"/>
<property name="build" location="${build}"/>
<property name="libx" location="${libx}"/>
<property name="release" location="${release}"/>
<property name="javacSource" value="${javacSource}"/>
<property name="javacTarget" value="${javacTarget}"/>
<fileset dir="${src}/" includes="de/anomic/plasma/parser/*/build.xml"/>
<target name="dist" depends="all,parsers" description="Compiling sources and make a release file ...">
<property name="accessRightsDir" value="755"/>
<property name="accessRightsFile" value="644"/>
<tar destfile="${release}/${releaseFile}" compression="gzip" defaultexcludes="yes">
<!-- copy classes -->
<tarfileset dir="${build}" prefix="classes" dirmode="${accessRightsDir}" mode="${accessRightsFile}" >
<include name="**/*.*"/>
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- copy libs -->
<tarfileset dir="${lib}" includes="**/*" prefix="lib" dirmode="${accessRightsDir}" mode="${accessRightsFile}"/>
<!-- copy configuration files -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="yacy.init"/>
<include name="yacy.yellow"/>
<include name="yacy.black"/>
<include name="yacy.blue"/>
<include name="yacy.stopwords"/>
<include name="yacy.parser"/>
<include name="httpd.mime"/>
<include name="superseed.txt"/>
<!-- copy wrappers -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="startYACY.command"/>
<include name="startYACY.bat"/>
<include name="startYACY_noconsole.bat"/>
<include name="startYACY.sh"/>
<include name="stopYACY.command"/>
<include name="stopYACY.bat"/>
<include name="stopYACY.sh"/>
<include name="killYACY.sh"/>
<!-- copy documentation -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="readme.txt"/>
<include name="gpl.txt"/>
<tarfileset dir="${doc}" prefix="doc" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<!-- copy source code -->
<tarfileset dir="${src}" prefix="source" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*.*"/>
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- copy server pages -->
<tarfileset dir="${htroot}" prefix="htroot" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<!-- copy add-on's -->
<tarfileset dir="${addon}" prefix="addon" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<echo message="${releaseVersion}" file="${doc}/release.txt"/>
<property name="accessRightsDir" value="755"/>
<property name="accessRightsFile" value="644"/>
<tar destfile="${release}/${releaseFile}" compression="gzip" defaultexcludes="yes">
<!-- copy classes -->
<tarfileset dir="${build}" prefix="classes" dirmode="${accessRightsDir}" mode="${accessRightsFile}" >
<include name="**/*.*"/>
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- copy libs -->
<tarfileset dir="${lib}" includes="**/*" prefix="lib" dirmode="${accessRightsDir}" mode="${accessRightsFile}"/>
<!-- copy configuration files -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="yacy.init"/>
<include name="yacy.yellow"/>
<include name="yacy.black"/>
<include name="yacy.blue"/>
<include name="yacy.stopwords"/>
<include name="yacy.parser"/>
<include name="httpd.mime"/>
<include name="superseed.txt"/>
<!-- copy wrappers -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="startYACY.command"/>
<include name="startYACY.bat"/>
<include name="startYACY_noconsole.bat"/>
<include name="startYACY.sh"/>
<include name="stopYACY.command"/>
<include name="stopYACY.bat"/>
<include name="stopYACY.sh"/>
<include name="killYACY.sh"/>
<!-- copy documentation -->
<tarfileset dir="." dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="readme.txt"/>
<include name="gpl.txt"/>
<tarfileset dir="${doc}" prefix="doc" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<!-- copy source code -->
<tarfileset dir="${src}" prefix="source" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*.*"/>
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- copy server pages -->
<tarfileset dir="${htroot}" prefix="htroot" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<!-- copy add-on's -->
<tarfileset dir="${addon}" prefix="addon" dirmode="${accessRightsDir}" mode="${accessRightsFile}">
<include name="**/*"/>
<echo message="${releaseVersion}" file="${doc}/release.txt"/>
<target name="clean" description="make clean">
<fileset dir="${build}" includes="**/*.class" />
<fileset dir="${htroot}" includes="**/*.class" />
<fileset dir="${build}" includes="**/*.class" />
<fileset dir="${htroot}" includes="**/*.class" />

@ -167,7 +167,7 @@ public class Network {
boolean complete = post.containsKey("ip");
Enumeration e = null;
switch (page) {
case 1 : e = yacyCore.seedDB.seedsSortedConnected(post.get("order", "down").equals("up"), post.get("sort", "ICount")); break;
case 1 : e = yacyCore.seedDB.seedsSortedConnected(post.get("order", "down").equals("up"), post.get("sort", "LCount")); break;
case 2 : e = yacyCore.seedDB.seedsSortedDisconnected(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break;
case 3 : e = yacyCore.seedDB.seedsSortedPotential(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break;

@ -45,7 +45,7 @@
# Contributions and changes to the program code must be marked as such.
# define variables
datestr=`date +%Y%m%d`

@ -211,6 +211,16 @@ public class kelondroMScoreCluster {
public int getMaxScore() {
if (refkeyDB.size() == 0) return -1;
return (int) ((((Long) keyrefDB.lastKey()).longValue() & 0xFFFFFFFF00000000L) >> 32);
public int getMinScore() {
if (refkeyDB.size() == 0) return -1;
return (int) ((((Long) keyrefDB.firstKey()).longValue() & 0xFFFFFFFF00000000L) >> 32);
public Object getMaxObject() {
if (refkeyDB.size() == 0) return null;
//return getScores(1, false)[0];

@ -415,6 +415,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
private boolean flushFromSingleton(String key) {
// this should only be called if the singleton shall be deleted or returned in an index entity
Object[] singleton = readSingleton(key);
if (singleton == null) {
return false;
@ -443,52 +444,60 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
return 0;
int count = 0;
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
int total = 0;
synchronized (hashScore) {
String key;
int count;
Long createTime;
// flush high-scores
while ((total < 100) && (hashScore.size() >= maxWords)) {
key = (String) hashScore.getMaxObject();
// generate flush list
Iterator i = hashScore.scores(true);
TreeMap[] al = new TreeMap[hashScore.getMaxScore() + 1];
for (int k = 0; k < al.length; k++) al[k] = new TreeMap(); // by create time ordered hash-list
while (i.hasNext()) {
// get the entry properties
key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count < 5) {
log.logWarning("flushing of high-key " + key + " not appropriate (too less entries, count=" + count + "): increase cache size");
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
//log.logDebug("high-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
total += flushFromMem(key, false);
// put it into a specific ohl
al[count].put(createTime, key);
//System.out.println("COUNT FOR KEY " + key + ": " + count);
// print statistics
for (int k = 1; k < al.length; k++) log.logDebug("FLUSH-LIST " + k + ": " + al[k].size() + " entries");
// flush singletons
Iterator i = hashScore.scores(true);
ArrayList al = new ArrayList();
while ((i.hasNext()) && (total < 200)) {
key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count > 1) {
//log.logDebug("flush of singleton-key " + key + ": count too high (count=" + count + ")");
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 90000)) {
//log.logDebug("singleton-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
i = al[1].entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
key = (String) entry.getValue();
createTime = (Long) entry.getKey();
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) > 90000)) {
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
count += flushFromMem((String) key, true);
// flush high-scores
for (int k = al.length - 1; k >= 2; k--) {
i = al[k].entrySet().iterator();
while (i.hasNext()) {
entry = (Map.Entry) i.next();
key = (String) entry.getValue();
createTime = (Long) entry.getKey();
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) > (600000/k))) {
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
count += flushFromMem(key, false);
if (count > 2000) return count;
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
for (int k = 0; k < al.size(); k++) flushFromMem((String) al.get(k), true);
return total;
return count;
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
@ -521,7 +530,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
if (cache.size() >= this.maxWords) flushFromMemToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
// put new words into cache

@ -103,8 +103,8 @@ import de.anomic.yacy.yacyCore;
public final class yacy {
// static objects
private static final String vString = "@REPL_VERSION@";
private static final String vDATE = "@REPL_DATE@";
private static final String vString = "0.373";
private static final String vDATE = "20050512";
private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]";
private static final String hline = "-------------------------------------------------------------------------------";

@ -1,2 +1,2 @@
#plasmaParser configuration file
#Thu May 12 11:55:37 CEST 2005
#Thu May 12 18:11:49 CEST 2005
