@ -1,13 +1,14 @@
// plasmaWordIndexCache.java
// indexRAMCacheRI.java
// -------------------------
// (C) 2005, 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// part of YACY
// first published 2005 on http://www.anomic.de
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
//
//
// $LastChangedDate$
// This is a part of YaCy, a peer-to-peer based web search engine
// $LastChangedRevision$
//
// $LastChangedBy$
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// it under the terms of the GNU General Public License as published by
@ -22,27 +23,8 @@
// You should have received a copy of the GNU General Public License
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic. plasma ;
package de.anomic.index ;
import java.io.File ;
import java.io.File ;
import java.io.IOException ;
import java.io.IOException ;
@ -50,18 +32,15 @@ import java.util.Iterator;
import java.util.Map ;
import java.util.Map ;
import java.util.TreeMap ;
import java.util.TreeMap ;
import de.anomic.index.indexContainer ;
import de.anomic.index.indexEntry ;
import de.anomic.index.indexRI ;
import de.anomic.index.indexAbstractRI ;
import de.anomic.kelondro.kelondroArray ;
import de.anomic.kelondro.kelondroArray ;
import de.anomic.kelondro.kelondroException ;
import de.anomic.kelondro.kelondroException ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.kelondro.kelondroRecords ;
import de.anomic.kelondro.kelondroRecords ;
import de.anomic.plasma.plasmaWordIndexAssortment ;
import de.anomic.server.logging.serverLog ;
import de.anomic.server.logging.serverLog ;
import de.anomic.yacy.yacySeedDB ;
import de.anomic.yacy.yacySeedDB ;
public final class plasmaWordIndexCache extends indexAbstractRI implements indexRI {
public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
// environment constants
// environment constants
private static final String indexArrayFileName = "indexDump1.array" ;
private static final String indexArrayFileName = "indexDump1.array" ;
@ -87,7 +66,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
}
public plasmaWordIndexCache ( File databaseRoot , serverLog log ) {
public indexRAMCacheRI ( File databaseRoot , serverLog log ) {
// creates a new index cache
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
// the cache has a back-end where indexes that do not fit in the cache are flushed
@ -120,22 +99,22 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
long wordsPerSecond = 0 , wordcount = 0 , urlcount = 0 ;
long wordsPerSecond = 0 , wordcount = 0 , urlcount = 0 ;
Map . Entry entry ;
Map . Entry entry ;
String wordHash ;
String wordHash ;
plasmaWordIndexEntry Container container ;
indexTreeMap Container container ;
long updateTime ;
long updateTime ;
plasmaWordIndexEntryInstance wordEntry ;
indexURLEntry wordEntry ;
byte [ ] [ ] row = new byte [ 5 ] [ ] ;
byte [ ] [ ] row = new byte [ 5 ] [ ] ;
// write kCache, this will be melted with the wCache upon load
// write kCache, this will be melted with the wCache upon load
synchronized ( kCache ) {
synchronized ( kCache ) {
Iterator i = kCache . values ( ) . iterator ( ) ;
Iterator i = kCache . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) {
while ( i . hasNext ( ) ) {
container = ( plasmaWordIndexEntry Container) i . next ( ) ;
container = ( indexTreeMap Container) i . next ( ) ;
// put entries on stack
// put entries on stack
if ( container ! = null ) {
if ( container ! = null ) {
Iterator ci = container . entries ( ) ;
Iterator ci = container . entries ( ) ;
while ( ci . hasNext ( ) ) {
while ( ci . hasNext ( ) ) {
wordEntry = ( plasmaWordIndexEntryInstance ) ci . next ( ) ;
wordEntry = ( indexURLEntry ) ci . next ( ) ;
row [ 0 ] = container . wordHash ( ) . getBytes ( ) ;
row [ 0 ] = container . wordHash ( ) . getBytes ( ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( container . updated ( ) , 8 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( container . updated ( ) , 8 ) ;
@ -158,13 +137,13 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
entry = ( Map . Entry ) i . next ( ) ;
entry = ( Map . Entry ) i . next ( ) ;
wordHash = ( String ) entry . getKey ( ) ;
wordHash = ( String ) entry . getKey ( ) ;
updateTime = getUpdateTime ( wordHash ) ;
updateTime = getUpdateTime ( wordHash ) ;
container = ( plasmaWordIndexEntry Container) entry . getValue ( ) ;
container = ( indexTreeMap Container) entry . getValue ( ) ;
// put entries on stack
// put entries on stack
if ( container ! = null ) {
if ( container ! = null ) {
Iterator ci = container . entries ( ) ;
Iterator ci = container . entries ( ) ;
while ( ci . hasNext ( ) ) {
while ( ci . hasNext ( ) ) {
wordEntry = ( plasmaWordIndexEntryInstance ) ci . next ( ) ;
wordEntry = ( indexURLEntry ) ci . next ( ) ;
row [ 0 ] = wordHash . getBytes ( ) ;
row [ 0 ] = wordHash . getBytes ( ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 1 ] = kelondroRecords . long2bytes ( container . size ( ) , 4 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( updateTime , 8 ) ;
row [ 2 ] = kelondroRecords . long2bytes ( updateTime , 8 ) ;
@ -203,7 +182,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
int i = dumpArray . size ( ) ;
int i = dumpArray . size ( ) ;
String wordHash ;
String wordHash ;
//long creationTime;
//long creationTime;
plasmaWordIndexEntryInstance wordEntry ;
indexURLEntry wordEntry ;
byte [ ] [ ] row ;
byte [ ] [ ] row ;
//Runtime rt = Runtime.getRuntime();
//Runtime rt = Runtime.getRuntime();
while ( i - - > 0 ) {
while ( i - - > 0 ) {
@ -212,7 +191,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
if ( ( row [ 0 ] = = null ) | | ( row [ 1 ] = = null ) | | ( row [ 2 ] = = null ) | | ( row [ 3 ] = = null ) | | ( row [ 4 ] = = null ) ) continue ;
if ( ( row [ 0 ] = = null ) | | ( row [ 1 ] = = null ) | | ( row [ 2 ] = = null ) | | ( row [ 3 ] = = null ) | | ( row [ 4 ] = = null ) ) continue ;
wordHash = new String ( row [ 0 ] , "UTF-8" ) ;
wordHash = new String ( row [ 0 ] , "UTF-8" ) ;
//creationTime = kelondroRecords.bytes2long(row[2]);
//creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new plasmaWordIndexEntryInstance ( new String ( row [ 3 ] , "UTF-8" ) , new String ( row [ 4 ] , "UTF-8" ) ) ;
wordEntry = new indexURLEntry ( new String ( row [ 3 ] , "UTF-8" ) , new String ( row [ 4 ] , "UTF-8" ) ) ;
// store to cache
// store to cache
addEntry ( wordHash , wordEntry , startTime , false ) ;
addEntry ( wordHash , wordEntry , startTime , false ) ;
urlCount + + ;
urlCount + + ;
@ -288,7 +267,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
public int indexSize ( String wordHash ) {
public int indexSize ( String wordHash ) {
int size = 0 ;
int size = 0 ;
plasmaWordIndexEntryContainer cacheIndex = ( plasmaWordIndexEntry Container) wCache . get ( wordHash ) ;
indexTreeMapContainer cacheIndex = ( indexTreeMap Container) wCache . get ( wordHash ) ;
if ( cacheIndex ! = null ) size + = cacheIndex . size ( ) ;
if ( cacheIndex ! = null ) size + = cacheIndex . size ( ) ;
return size ;
return size ;
}
}
@ -302,13 +281,13 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
// find entries in kCache that are too old for that place and shift them to the wCache
// find entries in kCache that are too old for that place and shift them to the wCache
long time ;
long time ;
Long l ;
Long l ;
plasmaWordIndexEntry Container container ;
indexTreeMap Container container ;
synchronized ( kCache ) {
synchronized ( kCache ) {
while ( kCache . size ( ) > 0 ) {
while ( kCache . size ( ) > 0 ) {
l = ( Long ) kCache . firstKey ( ) ;
l = ( Long ) kCache . firstKey ( ) ;
time = l . longValue ( ) ;
time = l . longValue ( ) ;
if ( System . currentTimeMillis ( ) - time < kCacheMaxAge ) return ;
if ( System . currentTimeMillis ( ) - time < kCacheMaxAge ) return ;
container = ( plasmaWordIndexEntry Container) kCache . remove ( l ) ;
container = ( indexTreeMap Container) kCache . remove ( l ) ;
addEntries ( container , container . updated ( ) , false ) ;
addEntries ( container , container . updated ( ) , false ) ;
}
}
}
}
@ -362,13 +341,13 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
}
}
public indexContainer getContainer ( String wordHash , boolean deleteIfEmpty , long maxtime_dummy ) {
public indexContainer getContainer ( String wordHash , boolean deleteIfEmpty , long maxtime_dummy ) {
return ( plasmaWordIndexEntry Container) wCache . get ( wordHash ) ;
return ( indexTreeMap Container) wCache . get ( wordHash ) ;
}
}
public indexContainer deleteContainer ( String wordHash ) {
public indexContainer deleteContainer ( String wordHash ) {
// returns the index that had been deleted
// returns the index that had been deleted
synchronized ( wCache ) {
synchronized ( wCache ) {
plasmaWordIndexEntryContainer container = ( plasmaWordIndexEntry Container) wCache . remove ( wordHash ) ;
indexTreeMapContainer container = ( indexTreeMap Container) wCache . remove ( wordHash ) ;
hashScore . deleteScore ( wordHash ) ;
hashScore . deleteScore ( wordHash ) ;
hashDate . deleteScore ( wordHash ) ;
hashDate . deleteScore ( wordHash ) ;
return container ;
return container ;
@ -379,7 +358,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
if ( urlHashes . length = = 0 ) return 0 ;
if ( urlHashes . length = = 0 ) return 0 ;
int count = 0 ;
int count = 0 ;
synchronized ( wCache ) {
synchronized ( wCache ) {
plasmaWordIndexEntryContainer c = ( plasmaWordIndexEntry Container) deleteContainer ( wordHash ) ;
indexTreeMapContainer c = ( indexTreeMap Container) deleteContainer ( wordHash ) ;
if ( c ! = null ) {
if ( c ! = null ) {
count = c . removeEntries ( wordHash , urlHashes , deleteComplete ) ;
count = c . removeEntries ( wordHash , urlHashes , deleteComplete ) ;
if ( c . size ( ) ! = 0 ) this . addEntries ( c , System . currentTimeMillis ( ) , false ) ;
if ( c . size ( ) ! = 0 ) this . addEntries ( c , System . currentTimeMillis ( ) , false ) ;
@ -397,13 +376,13 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
Iterator i = kCache . entrySet ( ) . iterator ( ) ;
Iterator i = kCache . entrySet ( ) . iterator ( ) ;
Map . Entry entry ;
Map . Entry entry ;
Long l ;
Long l ;
plasmaWordIndexEntry Container c ;
indexTreeMap Container c ;
while ( i . hasNext ( ) ) {
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
entry = ( Map . Entry ) i . next ( ) ;
l = ( Long ) entry . getKey ( ) ;
l = ( Long ) entry . getKey ( ) ;
// get container
// get container
c = ( plasmaWordIndexEntry Container) entry . getValue ( ) ;
c = ( indexTreeMap Container) entry . getValue ( ) ;
if ( c . remove ( urlHash ) ! = null ) {
if ( c . remove ( urlHash ) ! = null ) {
if ( c . size ( ) = = 0 ) {
if ( c . size ( ) = = 0 ) {
i . remove ( ) ;
i . remove ( ) ;
@ -431,8 +410,8 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
} else synchronized ( wCache ) {
} else synchronized ( wCache ) {
// put container into wCache
// put container into wCache
String wordHash = container . wordHash ( ) ;
String wordHash = container . wordHash ( ) ;
plasmaWordIndexEntryContainer entries = ( plasmaWordIndexEntry Container) wCache . get ( wordHash ) ; // null pointer exception? wordhash != null! must be cache==null
indexTreeMapContainer entries = ( indexTreeMap Container) wCache . get ( wordHash ) ; // null pointer exception? wordhash != null! must be cache==null
if ( entries = = null ) entries = new plasmaWordIndexEntry Container( wordHash ) ;
if ( entries = = null ) entries = new indexTreeMap Container( wordHash ) ;
added = entries . add ( container , - 1 ) ;
added = entries . add ( container , - 1 ) ;
if ( added > 0 ) {
if ( added > 0 ) {
wCache . put ( wordHash , entries ) ;
wCache . put ( wordHash , entries ) ;
@ -447,15 +426,15 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
public indexContainer addEntry ( String wordHash , indexEntry newEntry , long updateTime , boolean dhtCase ) {
public indexContainer addEntry ( String wordHash , indexEntry newEntry , long updateTime , boolean dhtCase ) {
if ( dhtCase ) synchronized ( kCache ) {
if ( dhtCase ) synchronized ( kCache ) {
// put container into kCache
// put container into kCache
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntry Container( wordHash ) ;
indexTreeMapContainer container = new indexTreeMap Container( wordHash ) ;
container . add ( newEntry ) ;
container . add ( newEntry ) ;
kCache . put ( new Long ( updateTime + kCacheInc ) , container ) ;
kCache . put ( new Long ( updateTime + kCacheInc ) , container ) ;
kCacheInc + + ;
kCacheInc + + ;
if ( kCacheInc > 10000 ) kCacheInc = 0 ;
if ( kCacheInc > 10000 ) kCacheInc = 0 ;
return null ;
return null ;
} else synchronized ( wCache ) {
} else synchronized ( wCache ) {
plasmaWordIndexEntryContainer container = ( plasmaWordIndexEntry Container) wCache . get ( wordHash ) ;
indexTreeMapContainer container = ( indexTreeMap Container) wCache . get ( wordHash ) ;
if ( container = = null ) container = new plasmaWordIndexEntry Container( wordHash ) ;
if ( container = = null ) container = new indexTreeMap Container( wordHash ) ;
indexEntry [ ] entries = new indexEntry [ ] { newEntry } ;
indexEntry [ ] entries = new indexEntry [ ] { newEntry } ;
if ( container . add ( entries , updateTime ) > 0 ) {
if ( container . add ( entries , updateTime ) > 0 ) {
wCache . put ( wordHash , container ) ;
wCache . put ( wordHash , container ) ;