@ -110,7 +110,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order ;
import net.yacy.kelondro.order.Base64Order ;
import net.yacy.kelondro.order.Digest ;
import net.yacy.kelondro.order.Digest ;
import net.yacy.kelondro.order.NaturalOrder ;
import net.yacy.kelondro.order.NaturalOrder ;
import net.yacy.kelondro.rwi.ReferenceContainerCache ;
import net.yacy.kelondro.util.EventTracker ;
import net.yacy.kelondro.util.EventTracker ;
import net.yacy.kelondro.util.FileUtils ;
import net.yacy.kelondro.util.FileUtils ;
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.kelondro.util.MemoryControl ;
@ -170,7 +169,6 @@ import de.anomic.yacy.yacyRelease;
import de.anomic.yacy.dht.Dispatcher ;
import de.anomic.yacy.dht.Dispatcher ;
import de.anomic.yacy.dht.PeerSelection ;
import de.anomic.yacy.dht.PeerSelection ;
import de.anomic.yacy.graphics.WebStructureGraph ;
import de.anomic.yacy.graphics.WebStructureGraph ;
import de.anomic.yacy.graphics.WebStructureGraph.HostReference ;
public final class Switchboard extends serverSwitch {
public final class Switchboard extends serverSwitch {
@ -228,7 +226,7 @@ public final class Switchboard extends serverSwitch {
public URLLicense licensedURLs ;
public URLLicense licensedURLs ;
public List < Pattern > networkWhitelist , networkBlacklist ;
public List < Pattern > networkWhitelist , networkBlacklist ;
public FilterEngine domainList ;
public FilterEngine domainList ;
p ublic Dispatcher dhtDispatcher ;
p rivate Dispatcher dhtDispatcher ;
public LinkedBlockingQueue < String > trail ;
public LinkedBlockingQueue < String > trail ;
public yacySeedDB peers ;
public yacySeedDB peers ;
public WorkTables tables ;
public WorkTables tables ;
@ -386,7 +384,7 @@ public final class Switchboard extends serverSwitch {
//final long startedSeedListAquisition = System.currentTimeMillis();
//final long startedSeedListAquisition = System.currentTimeMillis();
// init a DHT transmission dispatcher
// init a DHT transmission dispatcher
this . dhtDispatcher = new Dispatcher (
this . dhtDispatcher = ( peers . sizeConnected ( ) = = 0 ) ? null : new Dispatcher (
indexSegments . segment ( Segments . Process . LOCALCRAWLING ) ,
indexSegments . segment ( Segments . Process . LOCALCRAWLING ) ,
peers ,
peers ,
true ,
true ,
@ -451,8 +449,9 @@ public final class Switchboard extends serverSwitch {
BlockRank . loadBlockRankTable ( rankingPath , 8 ) ;
BlockRank . loadBlockRankTable ( rankingPath , 8 ) ;
// load distributed ranking
// load distributed ranking
final File hostIndexFile = new File ( queuesRoot , "hostIndex.blob" ) ;
// very large memory configurations allow to re-compute a ranking table
// very large memory configurations allow to re-compute a ranking table
/ *
final File hostIndexFile = new File ( queuesRoot , "hostIndex.blob" ) ;
if ( MemoryControl . available ( ) > 1024 * 1024 * 1024 ) new Thread ( ) {
if ( MemoryControl . available ( ) > 1024 * 1024 * 1024 ) new Thread ( ) {
public void run ( ) {
public void run ( ) {
ReferenceContainerCache < HostReference > hostIndex ; // this will get large, more than 0.5 million entries by now
ReferenceContainerCache < HostReference > hostIndex ; // this will get large, more than 0.5 million entries by now
@ -470,6 +469,7 @@ public final class Switchboard extends serverSwitch {
//BlockRank.storeBlockRankTable(rankingPath);
//BlockRank.storeBlockRankTable(rankingPath);
}
}
} . start ( ) ;
} . start ( ) ;
* /
// load the robots.txt db
// load the robots.txt db
this . log . logConfig ( "Initializing robots.txt DB" ) ;
this . log . logConfig ( "Initializing robots.txt DB" ) ;
@ -639,27 +639,26 @@ public final class Switchboard extends serverSwitch {
this . clusterhashes = this . peers . clusterHashes ( getConfig ( "cluster.peers.yacydomain" , "" ) ) ;
this . clusterhashes = this . peers . clusterHashes ( getConfig ( "cluster.peers.yacydomain" , "" ) ) ;
// deploy blocking threads
// deploy blocking threads
int indexerThreads = Math . max ( 1 , WorkflowProcessor . useCPU / 2 ) ;
this . indexingStorageProcessor = new WorkflowProcessor < indexingQueueEntry > (
this . indexingStorageProcessor = new WorkflowProcessor < indexingQueueEntry > (
"storeDocumentIndex" ,
"storeDocumentIndex" ,
"This is the sequencing step of the indexing queue. Files are written as streams, too much councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full." ,
"This is the sequencing step of the indexing queue. Files are written as streams, too much councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full." ,
new String [ ] { "RWI/Cache/Collections" } ,
new String [ ] { "RWI/Cache/Collections" } ,
this , "storeDocumentIndex" , 2 * WorkflowProcessor . useCPU, null , indexerThreads ) ;
this , "storeDocumentIndex" , 2 * WorkflowProcessor . availableCPU, null , 1 /*Math.max(1, WorkflowProcessor.availableCPU / 2)*/ ) ;
this . indexingAnalysisProcessor = new WorkflowProcessor < indexingQueueEntry > (
this . indexingAnalysisProcessor = new WorkflowProcessor < indexingQueueEntry > (
"webStructureAnalysis" ,
"webStructureAnalysis" ,
"This just stores the link structure of the document into a web structure database." ,
"This just stores the link structure of the document into a web structure database." ,
new String [ ] { "storeDocumentIndex" } ,
new String [ ] { "storeDocumentIndex" } ,
this , "webStructureAnalysis" , 2 * WorkflowProcessor . useCPU, indexingStorageProcessor , WorkflowProcessor . useCPU + 1 ) ;
this , "webStructureAnalysis" , 2 * WorkflowProcessor . availableCPU, indexingStorageProcessor , WorkflowProcessor . availableCPU ) ;
this . indexingCondensementProcessor = new WorkflowProcessor < indexingQueueEntry > (
this . indexingCondensementProcessor = new WorkflowProcessor < indexingQueueEntry > (
"condenseDocument" ,
"condenseDocument" ,
"This does a structural analysis of plain texts: markup of headlines, slicing into phrases (i.e. sentences), markup with position, counting of words, calculation of term frequency." ,
"This does a structural analysis of plain texts: markup of headlines, slicing into phrases (i.e. sentences), markup with position, counting of words, calculation of term frequency." ,
new String [ ] { "webStructureAnalysis" } ,
new String [ ] { "webStructureAnalysis" } ,
this , "condenseDocument" , 4 * WorkflowProcessor . useCPU, indexingAnalysisProcessor , WorkflowProcessor . useCPU + 1 ) ;
this , "condenseDocument" , 4 * WorkflowProcessor . availableCPU, indexingAnalysisProcessor , WorkflowProcessor . availableCPU ) ;
this . indexingDocumentProcessor = new WorkflowProcessor < indexingQueueEntry > (
this . indexingDocumentProcessor = new WorkflowProcessor < indexingQueueEntry > (
"parseDocument" ,
"parseDocument" ,
"This does the parsing of the newly loaded documents from the web. The result is not only a plain text document, but also a list of URLs that are embedded into the document. The urls are handed over to the CrawlStacker. This process has two child process queues!" ,
"This does the parsing of the newly loaded documents from the web. The result is not only a plain text document, but also a list of URLs that are embedded into the document. The urls are handed over to the CrawlStacker. This process has two child process queues!" ,
new String [ ] { "condenseDocument" , "CrawlStacker" } ,
new String [ ] { "condenseDocument" , "CrawlStacker" } ,
this , "parseDocument" , 4 * WorkflowProcessor . us eCPU, indexingCondensementProcessor , WorkflowProcessor . useCPU + 1 ) ;
this , "parseDocument" , 4 * WorkflowProcessor . availabl eCPU, indexingCondensementProcessor , WorkflowProcessor . availableCPU ) ;
// deploy busy threads
// deploy busy threads
log . logConfig ( "Starting Threads" ) ;
log . logConfig ( "Starting Threads" ) ;
@ -894,7 +893,7 @@ public final class Switchboard extends serverSwitch {
// shut down
// shut down
this . crawler . close ( ) ;
this . crawler . close ( ) ;
this. dhtDispatcher . close ( ) ;
if ( this . dhtDispatcher ! = null ) this. dhtDispatcher . close ( ) ;
synchronized ( this . indexSegments ) {
synchronized ( this . indexSegments ) {
this . indexSegments . close ( ) ;
this . indexSegments . close ( ) ;
}
}
@ -952,7 +951,7 @@ public final class Switchboard extends serverSwitch {
this . queuesRoot ) ;
this . queuesRoot ) ;
// init a DHT transmission dispatcher
// init a DHT transmission dispatcher
dhtDispatcher = new Dispatcher (
dhtDispatcher = ( peers . sizeConnected ( ) = = 0 ) ? null : new Dispatcher (
indexSegments . segment ( Segments . Process . LOCALCRAWLING ) ,
indexSegments . segment ( Segments . Process . LOCALCRAWLING ) ,
peers ,
peers ,
true ,
true ,
@ -1245,7 +1244,7 @@ public final class Switchboard extends serverSwitch {
indexingCondensementProcessor . announceShutdown ( ) ;
indexingCondensementProcessor . announceShutdown ( ) ;
indexingAnalysisProcessor . announceShutdown ( ) ;
indexingAnalysisProcessor . announceShutdown ( ) ;
indexingStorageProcessor . announceShutdown ( ) ;
indexingStorageProcessor . announceShutdown ( ) ;
dhtDispatcher . close ( ) ;
if ( dhtDispatcher ! = null ) dhtDispatcher . close ( ) ;
indexingCondensementProcessor . awaitShutdown ( 12000 ) ;
indexingCondensementProcessor . awaitShutdown ( 12000 ) ;
indexingAnalysisProcessor . awaitShutdown ( 12000 ) ;
indexingAnalysisProcessor . awaitShutdown ( 12000 ) ;
indexingStorageProcessor . awaitShutdown ( 12000 ) ;
indexingStorageProcessor . awaitShutdown ( 12000 ) ;
@ -2379,6 +2378,7 @@ public final class Switchboard extends serverSwitch {
}
}
public boolean dhtTransferJob ( final String segment ) {
public boolean dhtTransferJob ( final String segment ) {
if ( dhtDispatcher = = null ) return false ;
final String rejectReason = dhtShallTransfer ( segment ) ;
final String rejectReason = dhtShallTransfer ( segment ) ;
if ( rejectReason ! = null ) {
if ( rejectReason ! = null ) {
if ( this . log . isFine ( ) ) {
if ( this . log . isFine ( ) ) {