@ -323,10 +323,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
new serverInstantThread ( this , "deQueue" , "queueSize" ) , 10000 ) ;
new serverInstantThread ( this , "deQueue" , "queueSize" ) , 10000 ) ;
deployThread ( "70_cachemanager" , "Proxy Cache Enqueue" , "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack" ,
deployThread ( "70_cachemanager" , "Proxy Cache Enqueue" , "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack" ,
new serverInstantThread ( cacheManager , "job" , "size" ) , 10000 ) ;
new serverInstantThread ( cacheManager , "job" , "size" ) , 10000 ) ;
deployThread ( "60_globalcrawl" , "Global Crawl" , "thread that performes a single crawl/indexing step of a web page for global crawling" ,
deployThread ( "62_remotetriggeredcrawl" , "Remote Crawl Job" , "thread that performes a single crawl/indexing step triggered by a remote peer" ,
new serverInstantThread ( this , "globalCrawlJob" , "globalCrawlJobSize" ) , 30000 ) ;
new serverInstantThread ( this , "remoteTriggeredCrawlJob" , "remoteTriggeredCrawlJobSize" ) , 30000 ) ;
deployThread ( "61_globalcrawltrigger" , "Global Crawl Trigger" , "thread that triggeres remote peers for crawling" ,
new serverInstantThread ( this , "limitCrawlTriggerJob" , "limitCrawlTriggerJobSize" ) , 30000 ) ;
deployThread ( "50_localcrawl" , "Local Crawl" , "thread that performes a single crawl step from the local crawl queue" ,
deployThread ( "50_localcrawl" , "Local Crawl" , "thread that performes a single crawl step from the local crawl queue" ,
new serverInstantThread ( this , "localCrawlJob" , "localCrawlJobSize" ) , 10000 ) ;
new serverInstantThread ( this , " coreCrawlJob", "core CrawlJobSize") , 10000 ) ;
deployThread ( "40_peerseedcycle" , "Seed-List Upload" , "task that a principal peer performes to generate and upload a seed-list to a ftp account" ,
deployThread ( "40_peerseedcycle" , "Seed-List Upload" , "task that a principal peer performes to generate and upload a seed-list to a ftp account" ,
new serverInstantThread ( yc , "publishSeedList" , null ) , 180000 ) ;
new serverInstantThread ( yc , "publishSeedList" , null ) , 180000 ) ;
deployThread ( "30_peerping" , "YaCy Core" , "this is the p2p-control and peer-ping task" ,
deployThread ( "30_peerping" , "YaCy Core" , "this is the p2p-control and peer-ping task" ,
@ -374,7 +376,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} catch ( IOException e ) { }
} catch ( IOException e ) { }
}
}
private void cleanProfiles ( ) {
private void cleanProfiles ( ) {
if ( total Size( ) > 0 ) return ;
if ( queue Size( ) > 0 ) return ;
Iterator i = profiles . profiles ( true ) ;
Iterator i = profiles . profiles ( true ) ;
plasmaCrawlProfile . entry entry ;
plasmaCrawlProfile . entry entry ;
try {
try {
@ -428,12 +430,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log . logSystem ( "SWITCHBOARD SHUTDOWN TERMINATED" ) ;
log . logSystem ( "SWITCHBOARD SHUTDOWN TERMINATED" ) ;
}
}
/ *
public int totalSize ( ) {
public int totalSize ( ) {
return processStack . size ( ) + cacheLoader . size ( ) + noticeURL . stackSize ( ) ;
return processStack . size ( ) + cacheLoader . size ( ) + noticeURL . stackSize ( ) ;
}
}
* /
public int queueSize ( ) {
public int queueSize ( ) {
return processStack . s ize( ) ;
return processStack . size ( ) + cacheLoader . size ( ) + noticeURL . stackS ize( ) ;
}
}
public int lUrlSize ( ) {
public int lUrlSize ( ) {
@ -463,7 +467,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do one processing step
// do one processing step
log . logDebug ( "DEQUEUE: cacheManager=" + ( ( cacheManager . idle ( ) ) ? "idle" : "busy" ) +
log . logDebug ( "DEQUEUE: cacheManager=" + ( ( cacheManager . idle ( ) ) ? "idle" : "busy" ) +
", processStack=" + processStack . size ( ) +
", processStack=" + processStack . size ( ) +
", localStackSize=" + noticeURL . localStackSize ( ) +
", coreStackSize=" + noticeURL . coreStackSize ( ) +
", limitStackSize=" + noticeURL . limitStackSize ( ) +
", overhangStackSize=" + noticeURL . overhangStackSize ( ) +
", remoteStackSize=" + noticeURL . remoteStackSize ( ) ) ;
", remoteStackSize=" + noticeURL . remoteStackSize ( ) ) ;
processResourceStack ( ( plasmaHTCache . Entry ) processStack . removeFirst ( ) ) ;
processResourceStack ( ( plasmaHTCache . Entry ) processStack . removeFirst ( ) ) ;
return true ;
return true ;
@ -529,22 +535,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
}
}
public int local CrawlJobSize( ) {
public int core CrawlJobSize( ) {
return noticeURL . local StackSize( ) ;
return noticeURL . core StackSize( ) ;
}
}
public boolean local CrawlJob( ) {
public boolean core CrawlJob( ) {
if ( noticeURL . local StackSize( ) = = 0 ) {
if ( noticeURL . core StackSize( ) = = 0 ) {
//log.logDebug(" Local Crawl: queue is empty");
//log.logDebug(" Core Crawl: queue is empty");
return false ;
return false ;
}
}
if ( processStack . size ( ) > = crawlSlots ) {
if ( processStack . size ( ) > = crawlSlots ) {
log . logDebug ( " Local Crawl: too many processes in queue, dismissed (" +
log . logDebug ( " Core Crawl: too many processes in queue, dismissed (" +
"processStack=" + processStack . size ( ) + ")" ) ;
"processStack=" + processStack . size ( ) + ")" ) ;
return false ;
return false ;
}
}
if ( cacheLoader . size ( ) > = crawlSlots ) {
if ( cacheLoader . size ( ) > = crawlSlots ) {
log . logDebug ( " Local Crawl: too many loader in queue, dismissed (" +
log . logDebug ( " Core Crawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader . size ( ) + ")" ) ;
"cacheLoader=" + cacheLoader . size ( ) + ")" ) ;
return false ;
return false ;
}
}
@ -562,17 +568,91 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
}
}
// do a local crawl (may start a global crawl)
// do a local crawl
plasmaCrawlNURL . entry nex = noticeURL . localPop ( ) ;
plasmaCrawlNURL . entry urlEntry = noticeURL . corePop ( ) ;
processCrawling ( nex , nex . initiator ( ) ) ;
if ( urlEntry . url ( ) = = null ) return false ;
return true ;
String profileHandle = urlEntry . profileHandle ( ) ;
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile . entry profile = profiles . getEntry ( profileHandle ) ;
if ( profile = = null ) {
log . logError ( "LOCALCRAWL[" + noticeURL . coreStackSize ( ) + ", " + noticeURL . remoteStackSize ( ) + "]: LOST PROFILE HANDLE '" + urlEntry . profileHandle ( ) + "' (must be internal error) for URL " + urlEntry . url ( ) ) ;
return false ;
}
log . logDebug ( "LOCALCRAWL: url=" + urlEntry . url ( ) + ", initiator=" + urlEntry . initiator ( ) +
", crawlOrder=" + ( ( profile . remoteIndexing ( ) ) ? "true" : "false" ) + ", depth=" + urlEntry . depth ( ) + ", crawlDepth=" + profile . generalDepth ( ) + ", filter=" + profile . generalFilter ( ) +
", permission=" + ( ( yacyCore . seedDB = = null ) ? "undefined" : ( ( ( yacyCore . seedDB . mySeed . isSenior ( ) ) | | ( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) ? "true" : "false" ) ) ) ;
return processLocalCrawling ( urlEntry , profile ) ;
}
public int limitCrawlTriggerJobSize ( ) {
return noticeURL . limitStackSize ( ) ;
}
public boolean limitCrawlTriggerJob ( ) {
if ( noticeURL . limitStackSize ( ) = = 0 ) {
//log.logDebug("LimitCrawl: queue is empty");
return false ;
}
// if the server is busy, we do crawling more slowly
if ( ! ( cacheManager . idle ( ) ) ) try { Thread . currentThread ( ) . sleep ( 2000 ) ; } catch ( InterruptedException e ) { }
// if crawling was paused we have to wait until we wer notified to continue
synchronized ( this . crawlingPausedSync ) {
if ( this . crawlingIsPaused ) {
try {
this . crawlingPausedSync . wait ( ) ;
}
catch ( InterruptedException e ) { return false ; }
}
}
// start a global crawl, if possible
plasmaCrawlNURL . entry urlEntry = noticeURL . limitPop ( ) ;
if ( urlEntry . url ( ) = = null ) return false ;
String profileHandle = urlEntry . profileHandle ( ) ;
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile . entry profile = profiles . getEntry ( profileHandle ) ;
if ( profile = = null ) {
log . logError ( "REMOTECRAWLTRIGGER[" + noticeURL . coreStackSize ( ) + ", " + noticeURL . remoteStackSize ( ) + "]: LOST PROFILE HANDLE '" + urlEntry . profileHandle ( ) + "' (must be internal error) for URL " + urlEntry . url ( ) ) ;
return false ;
}
log . logDebug ( "plasmaSwitchboard.limitCrawlTriggerJob: url=" + urlEntry . url ( ) + ", initiator=" + urlEntry . initiator ( ) +
", crawlOrder=" + ( ( profile . remoteIndexing ( ) ) ? "true" : "false" ) + ", depth=" + urlEntry . depth ( ) + ", crawlDepth=" + profile . generalDepth ( ) + ", filter=" + profile . generalFilter ( ) +
", permission=" + ( ( yacyCore . seedDB = = null ) ? "undefined" : ( ( ( yacyCore . seedDB . mySeed . isSenior ( ) ) | | ( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) ? "true" : "false" ) ) ) ;
boolean tryRemote =
( profile . remoteIndexing ( ) ) /* granted */ & &
( urlEntry . initiator ( ) ! = null ) & & ( ! ( urlEntry . initiator ( ) . equals ( plasmaURL . dummyHash ) ) ) /* not proxy */ & &
( ( yacyCore . seedDB . mySeed . isSenior ( ) ) | |
( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) /* qualified */ ;
if ( tryRemote ) {
boolean success = processRemoteCrawlTrigger ( urlEntry ) ;
if ( success ) return true ;
}
// alternatively do a local crawl
if ( processStack . size ( ) > = crawlSlots ) {
log . logDebug ( "LimitCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack . size ( ) + ")" ) ;
return false ;
}
if ( cacheLoader . size ( ) > = crawlSlots ) {
log . logDebug ( "LimitCrawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader . size ( ) + ")" ) ;
return false ;
}
processLocalCrawling ( urlEntry , profile ) ;
return false ;
}
}
public int globalCrawlJobSize ( ) {
public int remoteTriggered CrawlJobSize( ) {
return noticeURL . remoteStackSize ( ) ;
return noticeURL . remoteStackSize ( ) ;
}
}
public boolean globalCrawlJob ( ) {
public boolean remoteTriggered CrawlJob( ) {
// work off crawl requests that had been placed by other peers to our crawl stack
// work off crawl requests that had been placed by other peers to our crawl stack
// do nothing if either there are private processes to be done
// do nothing if either there are private processes to be done
@ -586,9 +666,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"processStack=" + processStack . size ( ) + ")" ) ;
"processStack=" + processStack . size ( ) + ")" ) ;
return false ;
return false ;
}
}
if ( noticeURL . local StackSize( ) > 0 ) {
if ( noticeURL . core StackSize( ) > 0 ) {
log . logDebug ( "GlobalCrawl: any local crawl is in queue, dismissed (" +
log . logDebug ( "GlobalCrawl: any local crawl is in queue, dismissed (" +
" localStackSize=" + noticeURL . local StackSize( ) + ")" ) ;
" coreStackSize=" + noticeURL . core StackSize( ) + ")" ) ;
return false ;
return false ;
}
}
@ -606,9 +686,20 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
plasmaCrawlNURL . entry nex = noticeURL . remotePop ( ) ;
plasmaCrawlNURL . entry urlEntry = noticeURL . remotePop ( ) ;
processCrawling ( nex , nex . initiator ( ) ) ;
if ( urlEntry . url ( ) = = null ) return false ;
return true ;
String profileHandle = urlEntry . profileHandle ( ) ;
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile . entry profile = profiles . getEntry ( profileHandle ) ;
if ( profile = = null ) {
log . logError ( "REMOTETRIGGEREDCRAWL[" + noticeURL . coreStackSize ( ) + ", " + noticeURL . remoteStackSize ( ) + "]: LOST PROFILE HANDLE '" + urlEntry . profileHandle ( ) + "' (must be internal error) for URL " + urlEntry . url ( ) ) ;
return false ;
}
log . logDebug ( "plasmaSwitchboard.remoteTriggeredCrawlJob: url=" + urlEntry . url ( ) + ", initiator=" + urlEntry . initiator ( ) +
", crawlOrder=" + ( ( profile . remoteIndexing ( ) ) ? "true" : "false" ) + ", depth=" + urlEntry . depth ( ) + ", crawlDepth=" + profile . generalDepth ( ) + ", filter=" + profile . generalFilter ( ) +
", permission=" + ( ( yacyCore . seedDB = = null ) ? "undefined" : ( ( ( yacyCore . seedDB . mySeed . isSenior ( ) ) | | ( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) ? "true" : "false" ) ) ) ;
return processLocalCrawling ( urlEntry , profile ) ;
}
}
private void processResourceStack ( plasmaHTCache . Entry entry ) {
private void processResourceStack ( plasmaHTCache . Entry entry ) {
@ -687,7 +778,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
}
}
log . logInfo ( "CRAWL: ADDED " + c + " LINKS FROM " + entry . url . toString ( ) +
log . logInfo ( "CRAWL: ADDED " + c + " LINKS FROM " + entry . url . toString ( ) +
", NEW CRAWL STACK SIZE IS " + noticeURL . local StackSize( ) ) ;
", NEW CRAWL STACK SIZE IS " + noticeURL . core StackSize( ) ) ;
}
}
// create index
// create index
@ -839,6 +930,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// store information
// store information
boolean local = ( ( initiatorHash . equals ( plasmaURL . dummyHash ) ) | | ( initiatorHash . equals ( yacyCore . seedDB . mySeed . hash ) ) ) ;
boolean local = ( ( initiatorHash . equals ( plasmaURL . dummyHash ) ) | | ( initiatorHash . equals ( yacyCore . seedDB . mySeed . hash ) ) ) ;
boolean global =
( profile . remoteIndexing ( ) ) /* granted */ & &
( currentdepth = = profile . generalDepth ( ) ) /* leaf node */ & &
( initiatorHash . equals ( yacyCore . seedDB . mySeed . hash ) ) /* not proxy */ & &
( ( yacyCore . seedDB . mySeed . isSenior ( ) ) | |
( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) /* qualified */ ;
noticeURL . newEntry ( initiatorHash , /* initiator, needed for p2p-feedback */
noticeURL . newEntry ( initiatorHash , /* initiator, needed for p2p-feedback */
nexturl , /* url clear text string */
nexturl , /* url clear text string */
loadDate , /* load date */
loadDate , /* load date */
@ -848,7 +946,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
currentdepth , /*depth so far*/
currentdepth , /*depth so far*/
0 , /*anchors, default value */
0 , /*anchors, default value */
0 , /*forkfactor, default value */
0 , /*forkfactor, default value */
( ( local ) ? 1 : 4 ) /*local/remote stack*/
( ( global ) ? plasmaCrawlNURL . STACK_TYPE_LIMIT :
( ( local ) ? plasmaCrawlNURL . STACK_TYPE_CORE : plasmaCrawlNURL . STACK_TYPE_REMOTE ) ) /*local/remote stack*/
) ;
) ;
return null ;
return null ;
@ -870,13 +969,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ( u = = null ) return plasmaURL . dummyHash ; else return u . toString ( ) ;
if ( u = = null ) return plasmaURL . dummyHash ; else return u . toString ( ) ;
}
}
private void processCrawling ( plasmaCrawlNURL . entry urlEntry , String initiator ) {
private void processCrawlingX ( plasmaCrawlNURL . entry urlEntry , String initiator ) {
if ( urlEntry . url ( ) = = null ) return ;
if ( urlEntry . url ( ) = = null ) return ;
String profileHandle = urlEntry . profileHandle ( ) ;
String profileHandle = urlEntry . profileHandle ( ) ;
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile . entry profile = profiles . getEntry ( profileHandle ) ;
plasmaCrawlProfile . entry profile = profiles . getEntry ( profileHandle ) ;
if ( profile = = null ) {
if ( profile = = null ) {
log . logError ( "CRAWL[" + noticeURL . local StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: LOST PROFILE HANDLE '" + urlEntry . profileHandle ( ) + "' (must be internal error) for URL " + urlEntry . url ( ) ) ;
log . logError ( "CRAWL[" + noticeURL . core StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: LOST PROFILE HANDLE '" + urlEntry . profileHandle ( ) + "' (must be internal error) for URL " + urlEntry . url ( ) ) ;
return ;
return ;
}
}
log . logDebug ( "plasmaSwitchboard.processCrawling: url=" + urlEntry . url ( ) + ", initiator=" + urlEntry . initiator ( ) +
log . logDebug ( "plasmaSwitchboard.processCrawling: url=" + urlEntry . url ( ) + ", initiator=" + urlEntry . initiator ( ) +
@ -891,39 +991,41 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) /* qualified */ ;
( yacyCore . seedDB . mySeed . isPrincipal ( ) ) ) /* qualified */ ;
if ( tryRemote ) {
if ( tryRemote ) {
boolean success = process GlobalCrawling ( urlEntry ) ;
boolean success = process RemoteCrawlTrigger ( urlEntry ) ;
if ( ! ( success ) ) processLocalCrawling ( urlEntry , profile , initiator );
if ( ! ( success ) ) processLocalCrawling ( urlEntry , profile );
} else {
} else {
processLocalCrawling ( urlEntry , profile , initiator );
processLocalCrawling ( urlEntry , profile );
}
}
}
}
private void processLocalCrawling ( plasmaCrawlNURL . entry urlEntry , plasmaCrawlProfile . entry profile , String initiator ) {
private boolean processLocalCrawling ( plasmaCrawlNURL . entry urlEntry , plasmaCrawlProfile . entry profile ) {
// work off one Crawl stack entry
// work off one Crawl stack entry
if ( ( urlEntry = = null ) & & ( urlEntry . url ( ) = = null ) ) {
if ( ( urlEntry = = null ) & & ( urlEntry . url ( ) = = null ) ) {
log . logInfo ( "LOCALCRAWL[" + noticeURL . local StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: urlEntry=null" ) ;
log . logInfo ( "LOCALCRAWL[" + noticeURL . core StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: urlEntry=null" ) ;
return ;
return false ;
}
}
cacheLoader . loadParallel ( urlEntry . url ( ) , urlEntry . referrerHash ( ) , initiator , urlEntry . depth ( ) , profile ) ;
cacheLoader . loadParallel ( urlEntry . url ( ) , urlEntry . referrerHash ( ) , urlEntry . initiator ( ) , urlEntry . depth ( ) , profile ) ;
log . logInfo ( "LOCALCRAWL[" + noticeURL . localStackSize ( ) + ", " + noticeURL . remoteStackSize ( ) + "]: enqueued for load " + urlEntry . url ( ) ) ;
log . logInfo ( "LOCALCRAWL[" + noticeURL . coreStackSize ( ) + ", " + noticeURL . remoteStackSize ( ) + "]: enqueued for load " + urlEntry . url ( ) ) ;
return true ;
}
}
private boolean process GlobalCrawling ( plasmaCrawlNURL . entry urlEntry ) {
private boolean process RemoteCrawlTrigger ( plasmaCrawlNURL . entry urlEntry ) {
if ( urlEntry = = null ) {
if ( urlEntry = = null ) {
log . logInfo ( " GLOBALCRAWL[" + noticeURL . local StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: urlEntry=null" ) ;
log . logInfo ( " REMOTECRAWLTRIGGER[" + noticeURL . core StackSize( ) + ", " + noticeURL . remoteStackSize ( ) + "]: urlEntry=null" ) ;
return false ;
return false ;
}
}
// are we qualified?
// are we qualified?
if ( ( yacyCore . seedDB . mySeed = = null ) | |
if ( ( yacyCore . seedDB . mySeed = = null ) | |
( yacyCore . seedDB . mySeed . isJunior ( ) ) ) {
( yacyCore . seedDB . mySeed . isJunior ( ) ) ) {
log . logDebug ( "plasmaSwitchboard.process GlobalCrawling : no permission") ;
log . logDebug ( "plasmaSwitchboard.process RemoteCrawlTrigger : no permission") ;
return false ;
return false ;
}
}
// check url
// check url
if ( urlEntry . url ( ) = = null ) {
if ( urlEntry . url ( ) = = null ) {
log . logDebug ( "ERROR: plasmaSwitchboard.process GlobalCrawling - url is null. name=" + urlEntry . name ( ) ) ;
log . logDebug ( "ERROR: plasmaSwitchboard.process RemoteCrawlTrigger - url is null. name=" + urlEntry . name ( ) ) ;
return false ;
return false ;
}
}
String nexturlString = urlEntry . url ( ) . toString ( ) ;
String nexturlString = urlEntry . url ( ) . toString ( ) ;
@ -932,7 +1034,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// check remote crawl
// check remote crawl
yacySeed remoteSeed = yacyCore . dhtAgent . getCrawlSeed ( urlhash ) ;
yacySeed remoteSeed = yacyCore . dhtAgent . getCrawlSeed ( urlhash ) ;
if ( remoteSeed = = null ) {
if ( remoteSeed = = null ) {
log . logDebug ( "plasmaSwitchboard.process GlobalCrawling : no remote crawl seed available") ;
log . logDebug ( "plasmaSwitchboard.process RemoteCrawlTrigger : no remote crawl seed available") ;
return false ;
return false ;
}
}
@ -960,13 +1062,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
yacyCore . peerActions . peerDeparture ( remoteSeed ) ;
yacyCore . peerActions . peerDeparture ( remoteSeed ) ;
return false ;
return false ;
} else try {
} else try {
log . logDebug ( "plasmaSwitchboard.process GlobalCrawling : remoteSeed=" + remoteSeed . getName ( ) + ", url=" + nexturlString + ", response=" + page . toString ( ) ) ; // DEBUG
log . logDebug ( "plasmaSwitchboard.process RemoteCrawlTrigger : remoteSeed=" + remoteSeed . getName ( ) + ", url=" + nexturlString + ", response=" + page . toString ( ) ) ; // DEBUG
int newdelay = Integer . parseInt ( ( String ) page . get ( "delay" ) ) ;
int newdelay = Integer . parseInt ( ( String ) page . get ( "delay" ) ) ;
yacyCore . dhtAgent . setCrawlDelay ( remoteSeed . hash , newdelay ) ;
yacyCore . dhtAgent . setCrawlDelay ( remoteSeed . hash , newdelay ) ;
String response = ( String ) page . get ( "response" ) ;
String response = ( String ) page . get ( "response" ) ;
if ( response . equals ( "stacked" ) ) {
if ( response . equals ( "stacked" ) ) {
log . logInfo ( " GLOBALCRAWL : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay ) ;
log . logInfo ( " REMOTECRAWLTRIGGER : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay ) ;
return true ;
return true ;
} else if ( response . equals ( "double" ) ) {
} else if ( response . equals ( "double" ) ) {
String lurl = ( String ) page . get ( "lurl" ) ;
String lurl = ( String ) page . get ( "lurl" ) ;
@ -974,19 +1076,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String propStr = crypt . simpleDecode ( lurl , ( String ) page . get ( "key" ) ) ;
String propStr = crypt . simpleDecode ( lurl , ( String ) page . get ( "key" ) ) ;
plasmaCrawlLURL . entry entry = loadedURL . newEntry ( propStr , true , yacyCore . seedDB . mySeed . hash , remoteSeed . hash , 1 ) ;
plasmaCrawlLURL . entry entry = loadedURL . newEntry ( propStr , true , yacyCore . seedDB . mySeed . hash , remoteSeed . hash , 1 ) ;
noticeURL . remove ( entry . hash ( ) ) ;
noticeURL . remove ( entry . hash ( ) ) ;
log . logInfo ( " GLOBALCRAWL : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " SUPERFLUOUS. CAUSE: " + page . get ( "reason" ) + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'" ) ;
log . logInfo ( " REMOTECRAWLTRIGGER : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " SUPERFLUOUS. CAUSE: " + page . get ( "reason" ) + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'" ) ;
return true ;
return true ;
} else {
} else {
log . logInfo ( " GLOBALCRAWL : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " REJECTED. CAUSE: " + page . get ( "reason" ) + " (URL=" + nexturlString + ")" ) ;
log . logInfo ( " REMOTECRAWLTRIGGER : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " REJECTED. CAUSE: " + page . get ( "reason" ) + " (URL=" + nexturlString + ")" ) ;
return false ;
return false ;
}
}
} else {
} else {
log . logInfo ( " GLOBALCRAWL : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " DENIED. RESPONSE=" + response + ", CAUSE=" + page . get ( "reason" ) + ", URL=" + nexturlString ) ;
log . logInfo ( " REMOTECRAWLTRIGGER : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " DENIED. RESPONSE=" + response + ", CAUSE=" + page . get ( "reason" ) + ", URL=" + nexturlString ) ;
return false ;
return false ;
}
}
} catch ( Exception e ) {
} catch ( Exception e ) {
// wrong values
// wrong values
log . logError ( " GLOBALCRAWL : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " FAILED. CLIENT RETURNED: " + page . toString ( ) ) ;
log . logError ( " REMOTECRAWLTRIGGER : REMOTE CRAWL TO PEER " + remoteSeed . getName ( ) + " FAILED. CLIENT RETURNED: " + page . toString ( ) ) ;
e . printStackTrace ( ) ;
e . printStackTrace ( ) ;
return false ;
return false ;
}
}
@ -1337,7 +1439,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int transferred ;
int transferred ;
long starttime = System . currentTimeMillis ( ) ;
long starttime = System . currentTimeMillis ( ) ;
try {
try {
if ( ( total Size( ) = = 0 ) & &
if ( ( queue Size( ) = = 0 ) & &
( getConfig ( "allowDistributeIndex" , "false" ) . equals ( "true" ) ) & &
( getConfig ( "allowDistributeIndex" , "false" ) . equals ( "true" ) ) & &
( ( transferred = performTransferIndex ( indexCount , peerCount , true ) ) > 0 ) ) {
( ( transferred = performTransferIndex ( indexCount , peerCount , true ) ) > 0 ) ) {
indexCount = transferred ;
indexCount = transferred ;