@ -1,10 +1,13 @@
// IndexControl_p.java
// IndexControl_p.java
// -----------------------
// -----------------------
// part of the AnomicHTTPD caching proxy
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@anomic.de
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// Frankfurt, Germany, 2004
// last change: 02.05.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
//
// This program is free software; you can redistribute it and/or modify
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// it under the terms of the GNU General Public License as published by
@ -40,7 +43,7 @@
// Contributions and changes to the program code must be marked as such.
// Contributions and changes to the program code must be marked as such.
// You must compile this file with
// You must compile this file with
// javac -classpath .:../ C lasses IndexControl_p.java
// javac -classpath .:../ c lasses IndexControl_p.java
// if the shell's current path is HTROOT
// if the shell's current path is HTROOT
import java.io.IOException ;
import java.io.IOException ;
@ -50,7 +53,7 @@ import java.util.Enumeration;
import java.util.HashSet ;
import java.util.HashSet ;
import java.util.HashMap ;
import java.util.HashMap ;
import java.util.Iterator ;
import java.util.Iterator ;
import java.util.TreeMap ;
import de.anomic.htmlFilter.htmlFilterContentScraper ;
import de.anomic.htmlFilter.htmlFilterContentScraper ;
import de.anomic.http.httpHeader ;
import de.anomic.http.httpHeader ;
import de.anomic.plasma.plasmaCrawlLURL ;
import de.anomic.plasma.plasmaCrawlLURL ;
@ -67,11 +70,11 @@ import de.anomic.yacy.yacySeed;
public class IndexControl_p {
public class IndexControl_p {
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
// return variable that accumulates replacements
// return variable that accumulates replacements
plasmaSwitchboard switchboard = ( plasmaSwitchboard ) env ;
plasmaSwitchboard switchboard = ( plasmaSwitchboard ) env ;
serverObjects prop = new serverObjects ( ) ;
serverObjects prop = new serverObjects ( ) ;
if ( ( post = = null ) | | ( env = = null ) ) {
if ( post = = null | | env = = null ) {
prop . put ( "keystring" , "" ) ;
prop . put ( "keystring" , "" ) ;
prop . put ( "keyhash" , "" ) ;
prop . put ( "keyhash" , "" ) ;
prop . put ( "urlstring" , "" ) ;
prop . put ( "urlstring" , "" ) ;
@ -81,51 +84,69 @@ public class IndexControl_p {
prop . put ( "ucount" , Integer . toString ( switchboard . urlPool . loadedURL . size ( ) ) ) ;
prop . put ( "ucount" , Integer . toString ( switchboard . urlPool . loadedURL . size ( ) ) ) ;
prop . put ( "otherHosts" , "" ) ;
prop . put ( "otherHosts" , "" ) ;
prop . put ( "indexDistributeChecked" , ( switchboard . getConfig ( "allowDistributeIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeChecked" , ( switchboard . getConfig ( "allowDistributeIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeWhileCrawling" , ( switchboard . getConfig ( "allowDistributeIndexWhileCrawling" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeWhileCrawling" , ( switchboard . getConfig ( "allowDistributeIndexWhileCrawling" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveChecked" , ( switchboard . getConfig ( "allowReceiveIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveChecked" , ( switchboard . getConfig ( "allowReceiveIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveBlockBlacklistChecked" , ( switchboard . getConfig ( "indexReceiveBlockBlacklist" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveBlockBlacklistChecked" , ( switchboard . getConfig ( "indexReceiveBlockBlacklist" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
return prop ; // be save
return prop ; // be save
}
}
// default values
// default values
String keystring = ( ( String ) post . get ( "keystring" ) ) . trim ( ) ;
String keystring = ( ( String ) post . get ( "keystring" ) ) . trim ( ) ;
String keyhash = ( ( String ) post . get ( "keyhash" ) ) . trim ( ) ;
String keyhash = ( ( String ) post . get ( "keyhash" ) ) . trim ( ) ;
String urlstring = ( ( String ) post . get ( "urlstring" ) ) . trim ( ) ;
String urlstring = ( ( String ) post . get ( "urlstring" ) ) . trim ( ) ;
String urlhash = ( ( String ) post . get ( "urlhash" ) ) . trim ( ) ;
String urlhash = ( ( String ) post . get ( "urlhash" ) ) . trim ( ) ;
if ( ( ! ( urlstring . startsWith ( "http://" ) ) ) & & ( ! ( urlstring . startsWith ( "https://" ) ) ) ) urlstring = "http://" + urlstring ;
if ( ! urlstring . startsWith ( "http://" ) & &
! urlstring . startsWith ( "https://" ) ) { urlstring = "http://" + urlstring ; }
prop . put ( "keystring" , keystring ) ;
prop . put ( "keystring" , keystring ) ;
prop . put ( "keyhash" , keyhash ) ;
prop . put ( "keyhash" , keyhash ) ;
prop . put ( "urlstring" , urlstring ) ;
prop . put ( "urlstring" , urlstring ) ;
prop . put ( "urlhash" , urlhash ) ;
prop . put ( "urlhash" , urlhash ) ;
prop . put ( "result" , "" ) ;
prop . put ( "result" , "" ) ;
// read values from checkboxes
// read values from checkboxes
String [ ] urlx = post . getAll ( "urlhx.*" ) ;
String [ ] urlx = post . getAll ( "urlhx.*" ) ;
boolean delurl = post . containsKey ( "delurl" ) ;
boolean delurl = post . containsKey ( "delurl" ) ;
boolean delurlref = post . containsKey ( "delurlref" ) ;
boolean delurlref = post . containsKey ( "delurlref" ) ;
//System.out.println("DEBUG CHECK: " + ((delurl) ? "delurl" : "") + " " + ((delurlref) ? "delurlref" : ""));
// System.out.println("DEBUG CHECK: " + ((delurl) ? "delurl" : "") + " " + ((delurlref) ? "delurlref" : ""));
// DHT control
if ( post . containsKey ( "setIndexTransmission" ) ) {
if ( post . containsKey ( "setIndexTransmission" ) ) {
boolean allowDistributeIndex = ( ( String ) post . get ( "indexDistribute" , "" ) ) . equals ( "on" ) ;
if ( post . get ( "indexDistribute" , "" ) . equals ( "on" ) ) {
switchboard . setConfig ( "allowDistributeIndex" , ( allowDistributeIndex ) ? "true" : "false" ) ;
switchboard . setConfig ( "allowDistributeIndex" , "true" ) ;
if ( allowDistributeIndex ) switchboard . indexDistribution . enable ( ) ; else switchboard . indexDistribution . disable ( ) ;
switchboard . indexDistribution . enable ( ) ;
} else {
boolean allowDistributeIndexWhileCrawling = post . containsKey ( "indexDistributeWhileCrawling" ) ;
switchboard . setConfig ( "allowDistributeIndex" , "false" ) ;
switchboard . setConfig ( "allowDistributeIndexWhileCrawling" , ( allowDistributeIndexWhileCrawling ) ? "true" : "false" ) ;
switchboard . indexDistribution . disable ( ) ;
if ( allowDistributeIndexWhileCrawling ) switchboard . indexDistribution . enableWhileCrawling ( ) ; else switchboard . indexDistribution . disableWhileCrawling ( ) ;
}
boolean allowReceiveIndex = ( ( String ) post . get ( "indexReceive" , "" ) ) . equals ( "on" ) ;
if ( post . containsKey ( "indexDistributeWhileCrawling" ) ) {
switchboard . setConfig ( "allowReceiveIndex" , ( allowReceiveIndex ) ? "true" : "false" ) ;
switchboard . setConfig ( "allowDistributeIndexWhileCrawling" , "true" ) ;
yacyCore . seedDB . mySeed . setFlagAcceptRemoteIndex ( allowReceiveIndex ) ;
switchboard . indexDistribution . enableWhileCrawling ( ) ;
} else {
boolean indexReceiveBlockBlacklist = ( ( String ) post . get ( "indexReceiveBlockBlacklist" , "" ) ) . equals ( "on" ) ;
switchboard . setConfig ( "allowDistributeIndexWhileCrawling" , "false" ) ;
switchboard . setConfig ( "indexReceiveBlockBlacklist" , ( indexReceiveBlockBlacklist ) ? "true" : "false" ) ;
switchboard . indexDistribution . disableWhileCrawling ( ) ;
}
if ( post . get ( "indexReceive" , "" ) . equals ( "on" ) ) {
switchboard . setConfig ( "allowReceiveIndex" , "true" ) ;
yacyCore . seedDB . mySeed . setFlagAcceptRemoteIndex ( true ) ;
} else {
switchboard . setConfig ( "allowReceiveIndex" , "false" ) ;
yacyCore . seedDB . mySeed . setFlagAcceptRemoteIndex ( false ) ;
}
if ( post . get ( "indexReceiveBlockBlacklist" , "" ) . equals ( "on" ) ) {
switchboard . setConfig ( "indexReceiveBlockBlacklist" , "true" ) ;
} else {
switchboard . setConfig ( "indexReceiveBlockBlacklist" , "false" ) ;
}
}
}
// delete word
if ( post . containsKey ( "keyhashdeleteall" ) ) {
if ( post . containsKey ( "keyhashdeleteall" ) ) {
if ( ( delurl ) | | ( delurlref ) ) {
if ( delurl | | delurlref ) {
// generate an urlx array
// generate an urlx array
plasmaWordIndexEntity index = null ;
plasmaWordIndexEntity index = null ;
try {
try {
@ -135,43 +156,67 @@ public class IndexControl_p {
Enumeration en = index . elements ( true ) ;
Enumeration en = index . elements ( true ) ;
int i = 0 ;
int i = 0 ;
urlx = new String [ index . size ( ) ] ;
urlx = new String [ index . size ( ) ] ;
while ( en . hasMoreElements ( ) ) urlx [ i + + ] = ( ( plasmaWordIndexEntry ) en . nextElement ( ) ) . getUrlHash ( ) ;
while ( en . hasMoreElements ( ) ) {
index . close ( ) ; index = null ;
urlx [ i + + ] = ( ( plasmaWordIndexEntry ) en . nextElement ( ) ) . getUrlHash ( ) ;
}
index . close ( ) ;
index = null ;
} catch ( IOException e ) {
} catch ( IOException e ) {
urlx = new String [ 0 ] ;
urlx = new String [ 0 ] ;
} finally {
} finally {
if ( index ! = null ) try { index . close ( ) ; } catch ( Exception e ) { }
if ( index ! = null ) try { index . close ( ) ; } catch ( Exception e ) { }
}
}
}
}
if ( delurlref ) for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
if ( delurlref ) {
if ( ( delurl ) | | ( delurlref ) ) for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . urlPool . loadedURL . remove ( urlx [ i ] ) ;
for ( int i = 0 ; i < urlx . length ; i + + ) {
switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
}
}
if ( delurl | | delurlref ) {
for ( int i = 0 ; i < urlx . length ; i + + ) {
switchboard . urlPool . loadedURL . remove ( urlx [ i ] ) ;
}
}
switchboard . wordIndex . deleteIndex ( keyhash ) ;
switchboard . wordIndex . deleteIndex ( keyhash ) ;
post . remove ( "keyhashdeleteall" ) ;
post . remove ( "keyhashdeleteall" ) ;
if ( ( keystring . length ( ) > 0 ) & & ( plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) )
if ( keystring . length ( ) > 0 & &
plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) {
post . put ( "keystringsearch" , "generated" ) ;
post . put ( "keystringsearch" , "generated" ) ;
else
} else {
post . put ( "keyhashsearch" , "generated" ) ;
post . put ( "keyhashsearch" , "generated" ) ;
}
}
}
// delete selected URLs
if ( post . containsKey ( "keyhashdelete" ) ) {
if ( post . containsKey ( "keyhashdelete" ) ) {
if ( delurlref ) for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
if ( delurlref ) {
if ( ( delurl ) | | ( delurlref ) ) for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . urlPool . loadedURL . remove ( urlx [ i ] ) ;
for ( int i = 0 ; i < urlx . length ; i + + ) {
switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
}
}
if ( delurl | | delurlref ) {
for ( int i = 0 ; i < urlx . length ; i + + ) {
switchboard . urlPool . loadedURL . remove ( urlx [ i ] ) ;
}
}
switchboard . wordIndex . removeEntries ( keyhash , urlx , true ) ;
switchboard . wordIndex . removeEntries ( keyhash , urlx , true ) ;
// this shall lead to a presentation of the list; so handle that the remaining program
// this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation
// thinks that it was called for a list presentation
post . remove ( "keyhashdelete" ) ;
post . remove ( "keyhashdelete" ) ;
if ( ( keystring . length ( ) > 0 ) & & ( plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) )
if ( keystring . length ( ) > 0 & &
plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) {
post . put ( "keystringsearch" , "generated" ) ;
post . put ( "keystringsearch" , "generated" ) ;
else
} else {
post . put ( "keyhashsearch" , "generated" ) ;
post . put ( "keyhashsearch" , "generated" ) ;
//prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash);
// prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash);
}
}
}
if ( post . containsKey ( "urlhashdeleteall" ) ) {
if ( post . containsKey ( "urlhashdeleteall" ) ) {
int i = switchboard . removeAllUrlReferences ( urlhash , true ) ;
int i = switchboard . removeAllUrlReferences ( urlhash , true ) ;
prop . put ( "result" , "Deleted URL and " + i + " references from " + i + " word indexes." ) ;
prop . put ( "result" , "Deleted URL and " + i + " references from " + i + " word indexes." ) ;
}
}
if ( post . containsKey ( "urlhashdelete" ) ) {
if ( post . containsKey ( "urlhashdelete" ) ) {
plasmaCrawlLURL . Entry entry = switchboard . urlPool . loadedURL . getEntry ( urlhash ) ;
plasmaCrawlLURL . Entry entry = switchboard . urlPool . loadedURL . getEntry ( urlhash ) ;
URL url = entry . url ( ) ;
URL url = entry . url ( ) ;
@ -192,18 +237,23 @@ public class IndexControl_p {
prop . put ( "urlhash" , "" ) ;
prop . put ( "urlhash" , "" ) ;
prop . put ( "result" , genUrlList ( switchboard , keyhash , keystring ) ) ;
prop . put ( "result" , genUrlList ( switchboard , keyhash , keystring ) ) ;
}
}
if ( post . containsKey ( "keyhashsearch" ) ) {
if ( post . containsKey ( "keyhashsearch" ) ) {
if ( ( keystring . length ( ) = = 0 ) | | ( ! ( plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) ) )
if ( keystring . length ( ) = = 0 | |
! plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) {
prop . put ( "keystring" , "<not possible to compute word from hash>" ) ;
prop . put ( "keystring" , "<not possible to compute word from hash>" ) ;
}
prop . put ( "urlstring" , "" ) ;
prop . put ( "urlstring" , "" ) ;
prop . put ( "urlhash" , "" ) ;
prop . put ( "urlhash" , "" ) ;
prop . put ( "result" , genUrlList ( switchboard , keyhash , "" ) ) ;
prop . put ( "result" , genUrlList ( switchboard , keyhash , "" ) ) ;
}
}
// transfer to other peer
if ( post . containsKey ( "keyhashtransfer" ) ) {
if ( post . containsKey ( "keyhashtransfer" ) ) {
if ( ( keystring . length ( ) = = 0 ) | | ( ! ( plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) ) )
if ( keystring . length ( ) = = 0 | |
! plasmaWordIndexEntry . word2hash ( keystring ) . equals ( keyhash ) ) {
prop . put ( "keystring" , "<not possible to compute word from hash>" ) ;
prop . put ( "keystring" , "<not possible to compute word from hash>" ) ;
}
prop . put ( "urlstring" , "" ) ;
prop . put ( "urlstring" , "" ) ;
prop . put ( "urlhash" , "" ) ;
prop . put ( "urlhash" , "" ) ;
plasmaWordIndexEntity [ ] indexes = new plasmaWordIndexEntity [ 1 ] ;
plasmaWordIndexEntity [ ] indexes = new plasmaWordIndexEntity [ 1 ] ;
@ -232,43 +282,46 @@ public class IndexControl_p {
}
}
// now delete all entries that have no url entry
// now delete all entries that have no url entry
Iterator hashIter = unknownURLEntries . iterator ( ) ;
Iterator hashIter = unknownURLEntries . iterator ( ) ;
while ( hashIter . hasNext ( ) ) try {
while ( hashIter . hasNext ( ) ) {
indexes [ 0 ] . removeEntry ( ( String ) hashIter . next ( ) , false ) ;
try {
} catch ( IOException e ) { }
indexes [ 0 ] . removeEntry ( ( String ) hashIter . next ( ) , false ) ;
// use whats remaining
} catch ( IOException e ) { }
}
// use whats remaining
String gzipBody = switchboard . getConfig ( "indexControl.gzipBody" , "false" ) ;
String gzipBody = switchboard . getConfig ( "indexControl.gzipBody" , "false" ) ;
int timeout = ( int ) switchboard . getConfigLong ( "indexControl.timeout" , 60000 ) ;
int timeout = ( int ) switchboard . getConfigLong ( "indexControl.timeout" , 60000 ) ;
result = yacyClient . transferIndex (
result = yacyClient . transferIndex (
yacyCore . seedDB . getConnected ( post . get ( "hostHash" , "" ) ) ,
yacyCore . seedDB . getConnected ( post . get ( "hostHash" , "" ) ) ,
indexes ,
indexes ,
knownURLs ,
knownURLs ,
"true" . equalsIgnoreCase ( gzipBody ) ,
"true" . equalsIgnoreCase ( gzipBody ) ,
timeout ) ;
timeout ) ;
prop . put ( "result" , ( result = = null ) ? ( "Successfully transferred " + indexes [ 0 ] . size ( ) + " words in " + ( ( System . currentTimeMillis ( ) - starttime ) / 1000 ) + " seconds" ) : result ) ;
prop . put ( "result" , ( result = = null ) ? ( "Successfully transferred " + indexes [ 0 ] . size ( ) + " words in " + ( ( System . currentTimeMillis ( ) - starttime ) / 1000 ) + " seconds" ) : result ) ;
try { indexes [ 0 ] . close ( ) ; } catch ( IOException e ) { }
try { indexes [ 0 ] . close ( ) ; } catch ( IOException e ) { }
}
}
// generate list
if ( post . containsKey ( "keyhashsimilar" ) ) {
if ( post . containsKey ( "keyhashsimilar" ) ) {
Iterator hashIt = switchboard . wordIndex . wordHashes ( keyhash , true , true ) ;
final Iterator hashIt = switchboard . wordIndex . wordHashes ( keyhash , true , true ) ;
String result = "Sequential List of Word-Hashes:<br>" ;
StringBuffer result = new StringBuffer ( "Sequential List of Word-Hashes:<br>" ) ;
String hash ;
String hash ;
int i = 0 ;
int i = 0 ;
while ( ( hashIt . hasNext ( ) ) & & ( i < 256 ) ) {
while ( hashIt . hasNext ( ) & & i < 256 ) {
hash = ( String ) hashIt . next ( ) ;
hash = ( String ) hashIt . next ( ) ;
result + = "<a href=\"/IndexControl_p.html?" +
result . append ( "<a href=\"/IndexControl_p.html?" )
"keystring=" +
. append ( "keystring=" )
"&keyhash=" + hash +
. append ( "&keyhash=" ) . append ( hash )
"&urlhash=" +
. append ( "&urlhash=" )
"&urlstring=" +
. append ( "&urlstring=" )
"&keyhashsearch=" +
. append ( "&urlhashsearch=" )
"\" class=\"tt\">" + hash + "</a> " + ( ( ( i + 1 ) % 8 = = 0 ) ? "<br>" : "" ) ;
. append ( "\" class=\"tt\">" ) . append ( hash ) . append ( "</a> " )
i + + ;
. append ( ( ( i + 1 ) % 8 = = 0 ) ? "<br>" : "" ) ;
}
i + + ;
prop . put ( "result" , result ) ;
}
prop . put ( "result" , result ) ;
}
}
if ( post . containsKey ( "urlstringsearch" ) ) {
if ( post . containsKey ( "urlstringsearch" ) ) {
try {
try {
URL url = new URL ( urlstring ) ;
URL url = new URL ( urlstring ) ;
urlhash = plasmaURL . urlHash ( url ) ;
urlhash = plasmaURL . urlHash ( url ) ;
@ -280,7 +333,7 @@ public class IndexControl_p {
prop . put ( "urlhash" , "" ) ;
prop . put ( "urlhash" , "" ) ;
}
}
}
}
if ( post . containsKey ( "urlhashsearch" ) ) {
if ( post . containsKey ( "urlhashsearch" ) ) {
plasmaCrawlLURL . Entry entry = switchboard . urlPool . loadedURL . getEntry ( urlhash ) ;
plasmaCrawlLURL . Entry entry = switchboard . urlPool . loadedURL . getEntry ( urlhash ) ;
URL url = entry . url ( ) ;
URL url = entry . url ( ) ;
@ -293,62 +346,64 @@ public class IndexControl_p {
}
}
}
}
if ( post . containsKey ( "urlhashsimilar" ) ) {
// generate list
if ( post . containsKey ( "urlhashsimilar" ) ) {
try {
try {
Iterator hashIt = switchboard . urlPool . loadedURL . urlHashes ( urlhash , true ) ;
final Iterator hashIt = switchboard . urlPool . loadedURL . urlHashes ( urlhash , true ) ;
String result = "Sequential List of URL-Hashes:<br>" ;
StringBuffer result = new StringBuffer ( "Sequential List of URL-Hashes:<br>" ) ;
String hash ;
String hash ;
int i = 0 ;
int i = 0 ;
while ( ( hashIt . hasNext ( ) ) & & ( i < 256 ) ) {
while ( hashIt . hasNext ( ) & & i < 256 ) {
hash = ( String ) hashIt . next ( ) ;
hash = ( String ) hashIt . next ( ) ;
result + = "<a href=\"/IndexControl_p.html?" +
result . append ( "<a href=\"/IndexControl_p.html?" )
"keystring=" +
. append ( "keystring=" )
"&keyhash=" +
. append ( "&keyhash=" )
"&urlhash=" + hash +
. append ( "&urlhash=" ) . append ( hash )
"&urlstring=" +
. append ( "&urlstring=" )
"&urlhashsearch=" +
. append ( "&urlhashsearch=" )
"\" class=\"tt\">" + hash + "</a> " + ( ( ( i + 1 ) % 8 = = 0 ) ? "<br>" : "" ) ;
. append ( "\" class=\"tt\">" ) . append ( hash ) . append ( "</a> " )
. append ( ( ( i + 1 ) % 8 = = 0 ) ? "<br>" : "" ) ;
i + + ;
i + + ;
}
}
prop . put ( "result" , result );
prop . put ( "result" , result .toString ( ) );
} catch ( IOException e ) {
} catch ( IOException e ) {
prop . put ( "result" , "Error: " + e . getMessage ( ) ) ;
prop . put ( "result" , "Error: " + e . getMessage ( ) ) ;
}
}
}
}
// L ist known hosts
// l ist known hosts
yacySeed seed ;
yacySeed seed ;
int hc = 0 ;
int hc = 0 ;
if ( ( yacyCore . seedDB ! = null ) & & ( yacyCore . seedDB . sizeConnected ( ) > 0 ) ) {
if ( yacyCore . seedDB ! = null & & yacyCore . seedDB . sizeConnected ( ) > 0 ) {
Enumeration e = yacyCore . dhtAgent . getAcceptRemoteIndexSeeds ( keyhash ) ;
Enumeration e = yacyCore . dhtAgent . getAcceptRemoteIndexSeeds ( keyhash ) ;
while ( e . hasMoreElements ( ) ) {
while ( e . hasMoreElements ( ) ) {
seed = ( yacySeed ) e . nextElement ( ) ;
seed = ( yacySeed ) e . nextElement ( ) ;
if ( seed ! = null ) {
if ( seed ! = null ) {
prop . put ( "hosts_" + hc + "_hosthash" , seed . hash ) ;
prop . put ( "hosts_" + hc + "_hosthash" , seed . hash ) ;
prop . put ( "hosts_" + hc + "_hostname" , /*seed.hash + " " +*/ seed . get ( "Name" , "nameless" ) ) ;
prop . put ( "hosts_" + hc + "_hostname" , /*seed.hash + " " +*/ seed . get ( "Name" , "nameless" ) ) ;
hc + + ;
hc + + ;
}
}
}
}
prop . put ( "hosts" , Integer . toString ( hc ) ) ;
prop . put ( "hosts" , Integer . toString ( hc ) ) ;
} else {
} else {
prop . put ( "hosts" , "0" ) ;
prop . put ( "hosts" , "0" ) ;
}
}
// insert constants
// insert constants
prop . put ( "wcount" , Integer . toString ( switchboard . wordIndex . size ( ) ) ) ;
prop . put ( "wcount" , Integer . toString ( switchboard . wordIndex . size ( ) ) ) ;
prop . put ( "ucount" , Integer . toString ( switchboard . urlPool . loadedURL . size ( ) ) ) ;
prop . put ( "ucount" , Integer . toString ( switchboard . urlPool . loadedURL . size ( ) ) ) ;
prop . put ( "indexDistributeChecked" , ( switchboard . getConfig ( "allowDistributeIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeChecked" , ( switchboard . getConfig ( "allowDistributeIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeWhileCrawling" , ( switchboard . getConfig ( "allowDistributeIndexWhileCrawling" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexDistributeWhileCrawling" , ( switchboard . getConfig ( "allowDistributeIndexWhileCrawling" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveChecked" , ( switchboard . getConfig ( "allowReceiveIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveChecked" , ( switchboard . getConfig ( "allowReceiveIndex" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveBlockBlacklistChecked" , ( switchboard . getConfig ( "indexReceiveBlockBlacklist" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
prop . put ( "indexReceiveBlockBlacklistChecked" , ( switchboard . getConfig ( "indexReceiveBlockBlacklist" , "true" ) . equals ( "true" ) ) ? "checked" : "" ) ;
// return rewrite properties
// return rewrite properties
return prop ;
return prop ;
}
}
public static String genUrlProfile ( plasmaSwitchboard switchboard , plasmaCrawlLURL . Entry entry , String urlhash ) {
public static String genUrlProfile ( plasmaSwitchboard switchboard , plasmaCrawlLURL . Entry entry , String urlhash ) {
if ( entry = = null ) return "No entry found for URL-hash " + urlhash ;
if ( entry = = null ) { return "No entry found for URL-hash " + urlhash ; }
URL url = entry . url ( ) ;
URL url = entry . url ( ) ;
if ( url = = null ) return "No entry found for URL-hash " + urlhash ;
if ( url = = null ) { return "No entry found for URL-hash " + urlhash ; }
String result = "<table>" +
String result = "<table>" +
"<tr><td class=\"small\">URL String</td><td class=\"tt\">" + htmlFilterContentScraper . urlNormalform ( url ) + "</td></tr>" +
"<tr><td class=\"small\">URL String</td><td class=\"tt\">" + htmlFilterContentScraper . urlNormalform ( url ) + "</td></tr>" +
"<tr><td class=\"small\">Hash</td><td class=\"tt\">" + urlhash + "</td></tr>" +
"<tr><td class=\"small\">Hash</td><td class=\"tt\">" + urlhash + "</td></tr>" +
@ -374,66 +429,77 @@ public class IndexControl_p {
"<span class=\"small\"> this may produce unresolved references at other word indexes but they do not harm</span><br><br>" +
"<span class=\"small\"> this may produce unresolved references at other word indexes but they do not harm</span><br><br>" +
"<input type=\"submit\" value=\"Delete URL and remove all references from words\" name=\"urlhashdeleteall\"><br>" +
"<input type=\"submit\" value=\"Delete URL and remove all references from words\" name=\"urlhashdeleteall\"><br>" +
"<span class=\"small\"> delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)</span><br>" +
"<span class=\"small\"> delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)</span><br>" +
"</form>" ;
"</form>" ;
return result ;
return result ;
}
}
public static String genUrlList ( plasmaSwitchboard switchboard , String keyhash , String keystring ) {
public static String genUrlList ( plasmaSwitchboard switchboard , String keyhash , String keystring ) {
// search for a word hash and generate a list of url links
// search for a word hash and generate a list of url links
plasmaWordIndexEntity index = null ;
plasmaWordIndexEntity index = null ;
try {
try {
HashSet keyhashes = new HashSet ( ) ;
final HashSet keyhashes = new HashSet ( ) ;
keyhashes . add ( keyhash ) ;
keyhashes . add ( keyhash ) ;
index = switchboard . searchManager . searchHashes ( keyhashes , 10000 ) ;
index = switchboard . searchManager . searchHashes ( keyhashes , 10000 ) ;
String result = "" ;
final StringBuffer result = new StringBuffer ( 1024 ) ;
if ( index . size ( ) = = 0 ) {
if ( index . size ( ) = = 0 ) {
result = "No URL entries related to this word hash <span class=\"tt\">" + keyhash + "</span>." ;
result . append ( "No URL entries related to this word hash <span class=\"tt\">" ) . append ( keyhash ) . append ( "</span>." ) ;
} else {
} else {
Enumeration en = index . elements ( true ) ;
final Enumeration en = index . elements ( true ) ;
plasmaWordIndexEntry ie ;
result . append ( "URL entries related to this word hash <span class=\"tt\">" ) . append ( keyhash ) . append ( "</span><br>" ) ;
result = "URL entries related to this word hash <span class=\"tt\">" + keyhash + "</span>:<br>" ;
result . append ( "<form action=\"IndexControl_p.html\" method=\"post\" enctype=\"multipart/form-data\">" ) ;
result + = "<form action=\"IndexControl_p.html\" method=\"post\" enctype=\"multipart/form-data\">" ;
String us , uh ;
String us , uh ;
int i = 0 ;
int i = 0 ;
final TreeMap tm = new TreeMap ( ) ;
while ( en . hasMoreElements ( ) ) {
while ( en . hasMoreElements ( ) ) {
ie = ( plasmaWordIndexEntry ) en . nextElement ( ) ;
uh = ( ( plasmaWordIndexEntry ) en . nextElement ( ) ) . getUrlHash ( ) ;
uh = ie . getUrlHash ( ) ;
result + =
"<input type=\"checkbox\" name=\"urlhx" + i + + + "\" value=\"" + uh + "\" align=\"top\">" ;
if ( switchboard . urlPool . loadedURL . exists ( uh ) ) {
if ( switchboard . urlPool . loadedURL . exists ( uh ) ) {
us = switchboard . urlPool . loadedURL . getEntry ( uh ) . url ( ) . toString ( ) ;
us = switchboard . urlPool . loadedURL . getEntry ( uh ) . url ( ) . toString ( ) ;
result + =
tm . put ( us , uh ) ;
"<a href=\"/IndexControl_p.html?" + "keystring=" + keystring +
} else {
"&keyhash=" + keyhash + "&urlhash=" + uh + "&urlstringsearch=" + "&urlstring=" + us +
tm . put ( "" , uh ) ;
"\" class=\"tt\">" + uh + "</a><span class=\"tt\"> " + us + "</span><br>" ;
}
}
final Iterator iter = tm . keySet ( ) . iterator ( ) ;
result . ensureCapacity ( ( tm . size ( ) + 2 ) * 384 ) ;
while ( iter . hasNext ( ) ) {
us = iter . next ( ) . toString ( ) ;
uh = ( String ) tm . get ( us ) ;
result . append ( "<input type=\"checkbox\" name=\"urlhx" ) . append ( i + + ) . append ( "\" value=\"" ) . append ( uh ) . append ( "\" align=\"top\">" ) ;
if ( us . length ( ) > 0 ) {
result . append ( "<a href=\"/IndexControl_p.html?" ) . append ( "keystring=" ) . append ( keystring )
. append ( "&keyhash=" ) . append ( keyhash ) . append ( "&urlhash=" ) . append ( uh )
. append ( "&urlstringsearch=" ) . append ( "&urlstring=" ) . append ( us ) . append ( "\" class=\"tt\">" )
. append ( uh ) . append ( "</a><span class=\"tt\"> " ) . append ( us ) . append ( "</span><br>" ) ;
} else {
} else {
result + =
result . append ( "<span class=\"tt\">" ) . append ( uh ) . append ( " <unresolved URL Hash></span><br>" ) ;
"<span class=\"tt\">" + uh + " <unresolved URL Hash></span><br>" ;
}
}
}
}
result + =
result . append ( "<input type=\"hidden\" name=\"keystring\" value=\"" ) . append ( keystring ) . append ( "\">" )
"<input type=\"hidden\" name=\"keystring\" value=\"" + keystring + "\">" +
. append ( "<input type=\"hidden\" name=\"keyhash\" value=\"" ) . append ( keyhash ) . append ( "\">" )
"<input type=\"hidden\" name=\"keyhash\" value=\"" + keyhash + "\">" +
. append ( "<input type=\"hidden\" name=\"urlstring\" value=\"\">" )
"<input type=\"hidden\" name=\"urlstring\" value=\"\">" +
. append ( "<input type=\"hidden\" name=\"urlhash\" value=\"\">" )
"<input type=\"hidden\" name=\"urlhash\" value=\"\">" +
. append ( "<br><fieldset><legend>Reference Deletion</legend><table border=\"0\" cellspacing=\"5\" cellpadding=\"5\"><tr valign=\"top\"><td><br><br>" )
"<br><fieldset><legend>Reference Deletion</legend><table border=\"0\" cellspacing=\"5\" cellpadding=\"5\"><tr valign=\"top\"><td><br><br>" +
. append ( "<input type=\"submit\" value=\"Delete reference to selected URLs\" name=\"keyhashdelete\"><br><br>" )
"<input type=\"submit\" value=\"Delete reference to selected URLs\" name=\"keyhashdelete\"><br><br>" +
. append ( "<input type=\"submit\" value=\"Delete reference to ALL URLs\" name=\"keyhashdeleteall\"><span class=\"small\"><br> (= delete Word)</span>" )
"<input type=\"submit\" value=\"Delete reference to ALL URLs\" name=\"keyhashdeleteall\"><span class=\"small\"><br> (= delete Word)</span>" +
. append ( "</td><td width=\"150\">" )
"</td><td width=\"150\">" +
. append ( "<center><input type=\"checkbox\" name=\"delurl\" value=\"\" align=\"top\" checked></center><br>" )
"<center><input type=\"checkbox\" name=\"delurl\" value=\"\" align=\"top\" checked></center><br>" +
. append ( "<span class=\"small\">delete also the referenced URL itself (reasonable and recommended, may produce unresolved references at other word indexes but they do not harm)</span>" )
"<span class=\"small\">delete also the referenced URL itself (reasonable and recommended, may produce unresolved references at other word indexes but they do not harm)</span>" +
. append ( "</td><td width=\"150\">" )
"</td><td width=\"150\">" +
. append ( "<center><input type=\"checkbox\" name=\"delurlref\" value=\"\" align=\"top\"></center><br>" )
"<center><input type=\"checkbox\" name=\"delurlref\" value=\"\" align=\"top\"></center><br>" +
. append ( "<span class=\"small\">for every resolveable and deleted URL reference, delete the same reference at every other word where the reference exists (very extensive, but prevents further unresolved references)</span>" )
"<span class=\"small\">for every resolveable and deleted URL reference, delete the same reference at every other word where the reference exists (very extensive, but prevents further unresolved references)</span>" +
. append ( "</td></tr></table></fieldset></form><br>" ) ;
"</td></tr></table></fieldset></form>" ;
}
}
index . close ( ) ; index = null ;
index . close ( ) ;
return result ;
index = null ;
return result . toString ( ) ;
} catch ( IOException e ) {
} catch ( IOException e ) {
return "" ;
return "" ;
} finally {
} finally {
if ( index ! = null ) try { index . close ( ) ; } catch ( Exception e ) { } ;
if ( index ! = null ) try { index . close ( ) ; index = null ; } catch ( Exception e ) { } ;
}
}
}
}
}
}