@ -39,8 +39,13 @@
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
import java.io.BufferedReader ;
import java.io.File ;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.InputStreamReader ;
import java.io.OutputStream ;
import java.io.PrintWriter ;
import java.io.Writer ;
import java.net.MalformedURLException ;
import java.util.HashMap ;
@ -73,10 +78,68 @@ public class CrawlURLFetchStack_p {
return stack ;
}
public static final String STREAM_CMD_ADDURLS_ = "ADD URLS: " ;
public static final String STREAM_CMD_END = "END" ;
public static final String STREAM_RESP_OK_ADDURLS_ = "FAILED URLS: " ;
public static final String STREAM_RESP_OK = "OK" ;
public static final String STREAM_RESP_FAILED = "FAILED" ;
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
final serverObjects prop = new serverObjects ( ) ;
plasmaSwitchboard sb = ( plasmaSwitchboard ) env ;
if ( ( ( String ) header . get ( httpHeader . CONNECTION_PROP_PATH ) ) . endsWith ( ".stream" ) ) {
/ * = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
* . stream request
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = * /
InputStream in = ( InputStream ) header . get ( httpHeader . CONNECTION_PROP_INPUTSTREAM ) ;
OutputStream out = ( OutputStream ) header . get ( httpHeader . CONNECTION_PROP_OUTPUTSTREAM ) ;
BufferedReader inrb = new BufferedReader ( new InputStreamReader ( in ) ) ;
PrintWriter outw = new PrintWriter ( out ) ;
String line ;
int addurls = 0 , cururl = 0 ;
boolean [ ] status = new boolean [ 0 ] ;
URLFetcherStack stack = getURLFetcherStack ( env ) ;
try {
while ( ( line = inrb . readLine ( ) ) ! = null ) {
// commands
if ( line . startsWith ( STREAM_CMD_ADDURLS_ ) ) {
try {
addurls = Integer . parseInt ( line . substring ( STREAM_CMD_ADDURLS_ . length ( ) ) ) ;
status = new boolean [ addurls ] ;
cururl = 0 ;
outw . println ( STREAM_RESP_OK ) ;
} catch ( NumberFormatException e ) {
outw . println ( STREAM_RESP_FAILED ) ;
}
} else if ( line . equals ( STREAM_CMD_END ) ) {
break ;
} else {
if ( cururl < addurls ) // add url
status [ cururl + + ] = addURL ( line , stack ) ;
if ( cururl > 0 & & cururl = = addurls ) {
// done with parsing the passed URL count, now some status output: i.e. 'FAILED URLS: 5 of 8'
outw . print ( STREAM_RESP_OK_ADDURLS_ ) ;
StringBuffer stat = new StringBuffer ( ) ;
for ( int i = 0 ; i < status . length ; i + + )
if ( ! status [ i ] ) stat . append ( i ) . append ( ", " ) ;
outw . print ( stat . substring ( 0 , stat . length ( ) - 2 ) ) ;
outw . print ( " of " ) ;
outw . println ( status . length ) ;
cururl = 0 ;
addurls = 0 ;
}
}
}
} catch ( IOException e ) { e . printStackTrace ( ) ; }
outw . flush ( ) ;
return null ;
} else {
/ * = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
* ' normal ' request
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = * /
if ( post ! = null ) {
if ( post . containsKey ( "addurls" ) ) {
prop . put ( "addedUrls" , 1 ) ;
@ -146,6 +209,7 @@ public class CrawlURLFetchStack_p {
}
}
}
}
putFetched ( prop ) ;
prop . put ( "urlCount" , getURLFetcherStack ( env ) . size ( ) ) ;
@ -156,7 +220,6 @@ public class CrawlURLFetchStack_p {
prop . put ( "remurls" , sb . noticeURL . stackSize ( plasmaCrawlNURL . STACK_TYPE_LIMIT ) ) ;
prop . put ( "locurlsVal" , Math . min ( sb . noticeURL . stackSize ( plasmaCrawlNURL . STACK_TYPE_CORE ) , 500 ) ) ;
prop . put ( "remurlsVal" , Math . min ( sb . noticeURL . stackSize ( plasmaCrawlNURL . STACK_TYPE_LIMIT ) , 500 ) ) ;
return prop ;
}
@ -174,14 +237,19 @@ public class CrawlURLFetchStack_p {
private static int addURLs ( String [ ] urls , URLFetcherStack stack ) {
int count = - 1 ;
for ( int i = 0 ; i < urls . length ; i + + ) try {
if ( urls [ i ] . length ( ) = = 0 ) continue ;
stack . push ( new URL ( urls [ i ] ) ) ;
count + + ;
} catch ( MalformedURLException e ) { /* ignore this */ }
for ( int i = 0 ; i < urls . length ; i + + )
if ( addURL ( urls [ i ] , stack ) ) count + + ;
return count ;
}
private static boolean addURL ( String url , URLFetcherStack stack ) {
try {
if ( url = = null | | url . length ( ) = = 0 ) return false ;
stack . push ( new URL ( url ) ) ;
return true ;
} catch ( MalformedURLException e ) { return false ; }
}
private static int shiftFromNotice ( plasmaCrawlNURL nurl , int fromStackType , URLFetcherStack stack , int count ) {
plasmaCrawlNURL . Entry entry ;
int failed = 0 ;
@ -196,7 +264,7 @@ public class CrawlURLFetchStack_p {
int count = 0 ;
String url ;
for ( int i = 0 ; i < amount ; i + + ) {
url = post . get ( "url" + count+ + , null ) ;
url = post . get ( "url" + i , null ) ;
if ( url = = null | | url . length ( ) = = 0 ) continue ;
try {
stack . push ( new URL ( url ) ) ;