// ResourceObserver.java
// -----------------------
// (c) David Wieditz; lotus at mail.berlios.de
// first published 6.2.2010
//
// based on the former code (c) by Detlef Reichl; detlef!reichl()gmx!org
// Pforzheim, Germany, 2008
//
// part of YaCy
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search ;
import java.io.File ;
import java.io.IOException ;
import org.apache.commons.io.FileUtils ;
import net.yacy.cora.document.WordCache ;
import net.yacy.cora.protocol.Domains ;
import net.yacy.cora.util.ConcurrentLog ;
import net.yacy.crawler.data.Cache ;
import net.yacy.crawler.data.ResultURLs ;
import net.yacy.kelondro.data.word.WordReference ;
import net.yacy.kelondro.rwi.IndexCell ;
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.peers.NewsPool ;
import net.yacy.peers.operation.yacyRelease ;
import net.yacy.search.query.SearchEventCache ;
public class ResourceObserver {
public static final ConcurrentLog log = new ConcurrentLog ( "RESOURCE OBSERVER" ) ;
/** status type for which shows where in the control-circuit model a memory state can be categorized */
public enum Space implements Comparable < Space > {
/** smallest space state, outside of over/undershot */
EXHAUSTED ,
/** wanted-space state between steady-state and under/overshot */
NOMINAL ,
/** largest space state, below steady-state */
AMPLE ;
}
private final Switchboard sb ;
private final File path ; // path to check
private Space normalizedDiskFree = Space . AMPLE ;
private Space normalizedDiskUsed = Space . AMPLE ;
private Space normalizedMemoryFree = Space . AMPLE ;
public ResourceObserver ( final Switchboard sb ) {
this . sb = sb ;
this . path = sb . getDataPath ( SwitchboardConstants . INDEX_PRIMARY_PATH , "" ) . getParentFile ( ) ;
log . info ( "path for disc space measurement: " + this . path ) ;
}
/ * *
* checks the resources and pauses crawls if necessary
* /
public void resourceObserverJob ( ) {
MemoryControl . setProperMbyte ( getMinFreeMemory ( ) ) ; // may change by user config
this . normalizedDiskFree = getNormalizedDiskFree ( ) ;
this . normalizedDiskUsed = getNormalizedDiskUsed ( true ) ;
this . normalizedMemoryFree = getNormalizedMemoryFree ( ) ;
// take actions if disk space is below AMPLE
if ( this . normalizedDiskFree ! = Space . AMPLE | |
this . normalizedDiskUsed ! = Space . AMPLE | |
this . normalizedMemoryFree ! = Space . AMPLE ) {
String reason = "" ;
if ( this . normalizedDiskFree ! = Space . AMPLE ) reason + = " not enough disk space, " + getUsableSpace ( ) ;
if ( this . normalizedDiskUsed ! = Space . AMPLE ) reason + = " too high disk usage, " + getNormalizedDiskUsed ( true ) ;
if ( this . normalizedMemoryFree ! = Space . AMPLE ) reason + = " not enough memory space" ;
if ( ! this . sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) ) {
log . info ( "pausing local crawls" ) ;
this . sb . pauseCrawlJob ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL , "resource observer:" + reason ) ;
this . sb . setConfig ( SwitchboardConstants . CRAWLJOB_LOCAL_AUTODISABLED , true ) ;
}
if ( ! this . sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) ) {
log . info ( "pausing remote triggered crawls" ) ;
this . sb . pauseCrawlJob ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL , "resource observer:" + reason ) ;
this . sb . setConfig ( SwitchboardConstants . CRAWLJOB_REMOTE_AUTODISABLED , true ) ;
}
if ( ( this . normalizedDiskFree = = Space . EXHAUSTED | | this . normalizedMemoryFree ! = Space . AMPLE ) & & this . sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , false ) ) {
log . info ( "disabling index receive" ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , false ) ;
this . sb . peers . mySeed ( ) . setFlagAcceptRemoteIndex ( false ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , true ) ;
}
}
// shrink resources if space is EXHAUSTED
if ( ( this . normalizedDiskFree = = Space . EXHAUSTED & & this . sb . getConfigBool ( SwitchboardConstants . RESOURCE_DISK_FREE_AUTOREGULATE , SwitchboardConstants . RESOURCE_DISK_FREE_AUTOREGULATE_DEFAULT ) ) | |
( this . normalizedDiskUsed = = Space . EXHAUSTED & & this . sb . getConfigBool ( SwitchboardConstants . RESOURCE_DISK_USED_AUTOREGULATE , SwitchboardConstants . RESOURCE_DISK_USED_AUTOREGULATE_DEFAULT ) ) ) {
shrinkmethods : while ( true /*this is not a loop, just a construct that we can leave with a break*/ ) {
// delete old releases
if ( yacyRelease . deleteOldDownloads ( sb . releasePath , 1 ) ) log . warn ( "DISK SPACE EXHAUSTED - deleting downloaded releases files" ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// clear HTCACHE
log . info ( "DISK SPACE EXHAUSTED - deleting HTCACHE" ) ;
Cache . clear ( ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// delete logs
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// delete robots.txt
log . info ( "DISK SPACE EXHAUSTED - deleting robots.txt database" ) ;
try { sb . robots . clear ( ) ; } catch ( final IOException e ) { }
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// delete news
log . info ( "DISK SPACE EXHAUSTED - deleting News database" ) ;
sb . peers . newsPool . clear ( NewsPool . INCOMING_DB ) ; sb . peers . newsPool . clear ( NewsPool . PROCESSED_DB ) ;
sb . peers . newsPool . clear ( NewsPool . OUTGOING_DB ) ; sb . peers . newsPool . clear ( NewsPool . PUBLISHED_DB ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// clear citations
if ( sb . index . connectedCitation ( ) ) {
log . info ( "DISK SPACE EXHAUSTED - deleting citations" ) ;
try { sb . index . urlCitation ( ) . clear ( ) ; } catch ( final IOException e ) { }
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
// throw away crawl queues, if they are large
if ( sb . crawlQueues . coreCrawlJobSize ( ) > 1000 ) {
log . info ( "DISK SPACE EXHAUSTED - deleting crawl queues" ) ;
sb . crawlQueues . clear ( ) ;
sb . crawlStacker . clear ( ) ;
ResultURLs . clearStacks ( ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
// cut away too large RWIs
IndexCell < WordReference > termIndex = sb . index . termIndex ( ) ;
if ( termIndex ! = null ) try {
int shrinkedReferences = termIndex . deleteOld ( 100 , 10000 ) ;
if ( shrinkedReferences > 0 ) {
log . info ( "DISK SPACE EXHAUSTED - shrinked " + shrinkedReferences + " RWI references to a maximum of 100" ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
} catch ( IOException e ) {
}
// delete too old RWIs
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// delete fulltext from large Solr documents
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// run a solr optimize
this . sb . index . fulltext ( ) . commit ( false ) ;
//this.sb.index.fulltext().optimize(1);
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
/ *
// delete old Solr documents
long day = 1000 * 60 * 60 * 24 ;
for ( int t = 12 ; t > = 1 ; t - - ) {
log . info ( "DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " months" ) ;
this . sb . index . fulltext ( ) . deleteOldDocuments ( t * 30 * day , true ) ;
this . sb . index . fulltext ( ) . commit ( false ) ;
this . sb . index . fulltext ( ) . optimize ( 1 ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
}
for ( int t = 30 ; t > 3 ; t - - ) {
log . info ( "DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " days" ) ;
this . sb . index . fulltext ( ) . deleteOldDocuments ( t * day , true ) ;
this . sb . index . fulltext ( ) . commit ( false ) ;
this . sb . index . fulltext ( ) . optimize ( 1 ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
}
* /
// WE SHOULD NEVER GET UP TO HERE...
/ *
// delete ALL RWIs
if ( sb . index . termIndex ( ) ! = null ) {
try { sb . index . termIndex ( ) . clear ( ) ; } catch ( final IOException e ) { }
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
}
// delete full Solr
try { sb . index . fulltext ( ) . clearLocalSolr ( ) ; } catch ( final IOException e ) { }
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
* /
break ; // DO NOT REMOVE THIS, the loop may run forever. It shall run only once.
}
this . normalizedDiskFree = getNormalizedDiskFree ( ) ;
this . normalizedDiskUsed = getNormalizedDiskUsed ( false ) ;
this . normalizedMemoryFree = getNormalizedMemoryFree ( ) ;
}
// normalize state if the resources are AMPLE
if ( this . normalizedDiskFree = = Space . AMPLE & & this . normalizedDiskUsed = = Space . AMPLE & & this . normalizedMemoryFree = = Space . AMPLE ) {
if ( this . sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , false ) ) { // we were wrong!
log . info ( "enabling index receive" ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , true ) ;
this . sb . peers . mySeed ( ) . setFlagAcceptRemoteIndex ( true ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , false ) ;
} else if ( this . sb . getConfigBool ( SwitchboardConstants . CRAWLJOB_LOCAL_AUTODISABLED , false ) ) {
log . info ( "continue paused local crawls" ) ;
this . sb . setConfig ( SwitchboardConstants . CRAWLJOB_LOCAL_AUTODISABLED , false ) ;
this . sb . continueCrawlJob ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) ;
} else if ( this . sb . getConfigBool ( SwitchboardConstants . CRAWLJOB_REMOTE_AUTODISABLED , false ) ) {
log . info ( "continue paused remote triggered crawls" ) ;
this . sb . setConfig ( SwitchboardConstants . CRAWLJOB_REMOTE_AUTODISABLED , false ) ;
this . sb . continueCrawlJob ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) ;
}
log . info ( "resources ok" ) ;
}
}
private long sizeOfDirectory_lastCountTime = 0 ;
private long sizeOfDirectory_lastCountValue = 0 ;
public long getSizeOfDataPath ( final boolean cached ) {
if ( cached & & System . currentTimeMillis ( ) - this . sizeOfDirectory_lastCountTime < 600000 ) return this . sizeOfDirectory_lastCountValue ;
this . sizeOfDirectory_lastCountTime = System . currentTimeMillis ( ) ;
try {
this . sizeOfDirectory_lastCountValue = FileUtils . sizeOfDirectory ( this . path ) ;
} catch ( Throwable e ) { } // org.apache.commons.io.FileUtils.sizeOf calls sizes of files which are there temporary and may cause an exception. Thats a bug inside FileUtils
return this . sizeOfDirectory_lastCountValue ;
}
public long getUsableSpace ( ) {
return this . path . getUsableSpace ( ) ;
}
private Space getNormalizedDiskUsed ( final boolean cached ) {
final long currentUsed = getSizeOfDataPath ( cached ) ;
//final long currentSpace = getUsableSpace(this.path);
if ( currentUsed < 1L ) return Space . AMPLE ;
Space ret = Space . AMPLE ;
if ( currentUsed > getMaxUsedDiskOvershot ( ) ) {
log . warn ( "Volume " + this . path . toString ( ) + ": used space (" + ( currentUsed / 1024 / 1024 ) + " MB) is too high (> " + ( getMaxUsedDiskOvershot ( ) / 1024 / 1024 ) + " MB)" ) ;
ret = Space . EXHAUSTED ;
return ret ;
}
if ( currentUsed > getMaxUsedDiskSteadystate ( ) ) {
log . info ( "Volume " + this . path . toString ( ) + ": used space (" + ( currentUsed / 1024 / 1024 ) + " MB) is high, but nominal (> " + ( getMaxUsedDiskSteadystate ( ) / 1024 / 1024 ) + " MB)" ) ;
ret = Space . NOMINAL ;
return ret ;
}
return ret ;
}
/ * *
* returns the amount of disk space available
* @return < ul >
* < li > < code > HIGH < / code > if disk space is available < / li >
* < li > < code > MEDIUM < / code > if low disk space is available < / li >
* < li > < code > LOW < / code > if lower than hardlimit disk space is available < / li >
* < / ul >
* /
private Space getNormalizedDiskFree ( ) {
final long currentSpace = getUsableSpace ( ) ;
//final long currentSpace = getUsableSpace(this.path);
if ( currentSpace < 1L ) return Space . AMPLE ; // this happens if the function does not work, like on Windows
Space ret = Space . AMPLE ;
if ( currentSpace < getMinFreeDiskUndershot ( ) ) {
log . warn ( "Volume " + this . path . toString ( ) + ": free space (" + ( currentSpace / 1024 / 1024 ) + " MB) is too low (< " + ( getMinFreeDiskSteadystate ( ) / 1024 / 1024 ) + " MB)" ) ;
ret = Space . EXHAUSTED ;
return ret ;
}
if ( currentSpace < getMinFreeDiskSteadystate ( ) ) {
log . info ( "Volume " + this . path . toString ( ) + ": free space (" + ( currentSpace / 1024 / 1024 ) + " MB) is low, but nominal (< " + ( getMinFreeDiskSteadystate ( ) / 1024 / 1024 ) + " MB)" ) ;
ret = Space . NOMINAL ;
return ret ;
}
return ret ;
}
private Space getNormalizedMemoryFree ( ) {
if ( MemoryControl . properState ( ) ) return Space . AMPLE ;
// clear some caches - @all: are there more of these, we could clear here?
this . sb . index . clearCaches ( ) ;
SearchEventCache . cleanupEvents ( true ) ;
this . sb . trail . clear ( ) ;
Switchboard . urlBlacklist . clearblacklistCache ( ) ;
WordCache . clearCommonWords ( ) ;
Domains . clear ( ) ;
return MemoryControl . properState ( ) ? Space . AMPLE : Space . EXHAUSTED ;
}
/ * *
* @return < code > true < / code > if disk space is available
* /
public boolean getDiskAvailable ( ) {
return this . normalizedDiskFree = = Space . AMPLE ;
}
/ * *
* @return < code > true < / code > if memory is available
* /
public boolean getMemoryAvailable ( ) {
return this . normalizedMemoryFree = = Space . AMPLE ;
}
/ * *
* @return the maximum amount of space ( bytes ) that should be used as steady state
* /
public long getMaxUsedDiskSteadystate ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_USED_MAX_STEADYSTATE ,
SwitchboardConstants . RESOURCE_DISK_USED_MAX_STEADYSTATE_DEFAULT ) /* MB */ * 1024L * 1024L ;
}
/ * *
* @return the maximum amount of space ( bytes ) that should be used as hard limit ; the limit when autoregulation to steady state should start
* /
public long getMaxUsedDiskOvershot ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_USED_MAX_OVERSHOT ,
SwitchboardConstants . RESOURCE_DISK_USED_MAX_OVERSHOT_DEFAULT ) /* MB */ * 1024L * 1024L ;
}
/ * *
* @return amount of space ( bytes ) that should be kept free as steady state
* /
public long getMinFreeDiskSteadystate ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_FREE_MIN_STEADYSTATE ,
SwitchboardConstants . RESOURCE_DISK_FREE_MIN_STEADYSTATE_DEFAULT ) /* MB */ * 1024L * 1024L ;
}
/ * *
* @return amount of space ( bytes ) that should at least be kept free as hard limit ; the limit when autoregulation to steady state should start
* /
public long getMinFreeDiskUndershot ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_FREE_MIN_UNDERSHOT ,
SwitchboardConstants . RESOURCE_DISK_FREE_MIN_UNDERSHOT_DEFAULT ) /* MB */ * 1024L * 1024L ;
}
/ * *
* @return amount of space ( MiB ) that should at least be free
* /
public long getMinFreeMemory ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . MEMORY_ACCEPTDHT , 0 ) ;
}
}