- less automatic indexing after a search (needs to reset the default

crawl profiles)
- fix for concurrency problem in storage of serverSwitch Properties
- markup update
pull/1/head
Michael Christen 13 years ago
parent f62e6fb438
commit e7e429705a

@ -45,7 +45,8 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
import net.yacy.repository.RegexHelper;
public final class CrawlSwitchboard {
public final class CrawlSwitchboard
{
public static final String CRAWL_PROFILE_PROXY = "proxy";
public static final String CRAWL_PROFILE_REMOTE = "remote";
@ -76,10 +77,7 @@ public final class CrawlSwitchboard {
public CrawlProfile defaultSurrogateProfile;
private final File queuesRoot;
public CrawlSwitchboard(
final String networkName,
final Log log,
final File queuesRoot) {
public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) {
log.logInfo("Initializing Word Index for the network '" + networkName + "'.");
@ -88,7 +86,8 @@ public final class CrawlSwitchboard {
System.exit(0);
}
this.log = log;
this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
this.profilesActiveCrawlsCache =
Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
// make crawl profiles database and default profiles
this.queuesRoot = queuesRoot;
@ -106,24 +105,40 @@ public final class CrawlSwitchboard {
} catch ( final RowSpaceExceededException e ) {
p = null;
}
if (p == null) continue;
if ( p == null ) {
continue;
}
if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH)) ) {
removeActive(handle);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
Log.logWarning("CrawlProfiles", "removed Profile "
+ p.handle()
+ ": "
+ p.name()
+ " from active crawls since "
+ CrawlProfile.FILTER_URL_MUSTMATCH
+ " is no valid regular expression: "
+ p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
} else if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)) ) {
removeActive(handle);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
Log.logWarning("CrawlProfiles", "removed Profile "
+ p.handle()
+ ": "
+ p.name()
+ " from active crawls since "
+ CrawlProfile.FILTER_URL_MUSTNOTMATCH
+ " is no valid regular expression: "
+ p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
} else {
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
}
}
initActiveCrawlProfiles();
log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries");
log.logInfo("Loaded active crawl profiles from file "
+ profilesActiveFile.getName()
+ ", "
+ this.profilesActiveCrawls.size()
+ " entries");
final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile);
@ -138,16 +153,25 @@ public final class CrawlSwitchboard {
continue;
}
}
log.logInfo("Loaded passive crawl profiles from file " + profilesPassiveFile.getName() +
", " + this.profilesPassiveCrawls.size() + " entries" +
", " + profilesPassiveFile.length()/1024);
log.logInfo("Loaded passive crawl profiles from file "
+ profilesPassiveFile.getName()
+ ", "
+ this.profilesPassiveCrawls.size()
+ " entries"
+ ", "
+ profilesPassiveFile.length()
/ 1024);
}
public CrawlProfile getActive(final byte[] profileKey) {
if (profileKey == null) return null;
if ( profileKey == null ) {
return null;
}
// get from cache
CrawlProfile p = this.profilesActiveCrawlsCache.get(profileKey);
if (p != null) return p;
if ( p != null ) {
return p;
}
// get from db
Map<String, String> m;
@ -158,14 +182,18 @@ public final class CrawlSwitchboard {
} catch ( final RowSpaceExceededException e ) {
m = null;
}
if (m == null) return null;
if ( m == null ) {
return null;
}
p = new CrawlProfile(m);
this.profilesActiveCrawlsCache.put(profileKey, p);
return p;
}
public CrawlProfile getPassive(final byte[] profileKey) {
if (profileKey == null) return null;
if ( profileKey == null ) {
return null;
}
Map<String, String> m;
try {
m = this.profilesPassiveCrawls.get(profileKey);
@ -174,7 +202,9 @@ public final class CrawlSwitchboard {
} catch ( final RowSpaceExceededException e ) {
m = null;
}
if (m == null) return null;
if ( m == null ) {
return null;
}
return new CrawlProfile(m);
}
@ -187,13 +217,17 @@ public final class CrawlSwitchboard {
}
public void removeActive(final byte[] profileKey) {
if (profileKey == null) return;
if ( profileKey == null ) {
return;
}
this.profilesActiveCrawlsCache.remove(profileKey);
this.profilesActiveCrawls.remove(profileKey);
}
public void removePassive(final byte[] profileKey) {
if (profileKey == null) return;
if ( profileKey == null ) {
return;
}
this.profilesPassiveCrawls.remove(profileKey);
}
@ -220,13 +254,27 @@ public final class CrawlSwitchboard {
for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) {
profile = new CrawlProfile(this.profilesActiveCrawls.get(handle));
name = profile.name();
if (name.equals(CRAWL_PROFILE_PROXY)) this.defaultProxyProfile = profile;
if (name.equals(CRAWL_PROFILE_REMOTE)) this.defaultRemoteProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) this.defaultTextSnippetLocalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) this.defaultTextSnippetGlobalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) this.defaultMediaSnippetLocalProfile = profile;
if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) this.defaultMediaSnippetGlobalProfile = profile;
if (name.equals(CRAWL_PROFILE_SURROGATE)) this.defaultSurrogateProfile = profile;
if ( name.equals(CRAWL_PROFILE_PROXY) ) {
this.defaultProxyProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_REMOTE) ) {
this.defaultRemoteProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) {
this.defaultTextSnippetLocalProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) {
this.defaultTextSnippetGlobalProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) {
this.defaultMediaSnippetLocalProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) {
this.defaultMediaSnippetGlobalProfile = profile;
}
if ( name.equals(CRAWL_PROFILE_SURROGATE) ) {
this.defaultSurrogateProfile = profile;
}
}
} catch ( final Exception e ) {
this.profilesActiveCrawls.clear();
@ -241,66 +289,212 @@ public final class CrawlSwitchboard {
if ( this.defaultProxyProfile == null ) {
// generate new default entry for proxy crawling
this.defaultProxyProfile = new CrawlProfile(
"proxy", null,
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING,
this.defaultProxyProfile =
new CrawlProfile(
"proxy",
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE),
-1,
false,
true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/,
true,
false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, true, true, true,
false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/,
true,
true,
true,
CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultProxyProfile.handle()), this.defaultProxyProfile);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultProxyProfile.handle()),
this.defaultProxyProfile);
}
if ( this.defaultRemoteProfile == null ) {
// generate new default entry for remote crawling
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", CrawlProfile.MATCH_NEVER_STRING, 0, true,
-1, -1, true, true, true, false, false, true, true, false, CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile);
this.defaultRemoteProfile =
new CrawlProfile(
CRAWL_PROFILE_REMOTE,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
CrawlProfile.MATCH_NEVER_STRING,
0,
false,
-1,
-1,
true,
true,
true,
false,
false,
true,
true,
false,
CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultRemoteProfile.handle()),
this.defaultRemoteProfile);
}
if ( this.defaultTextSnippetLocalProfile == null ) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile);
this.defaultTextSnippetLocalProfile =
new CrawlProfile(
CRAWL_PROFILE_SNIPPET_LOCAL_TEXT,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE),
-1,
true,
false,
false,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()),
this.defaultTextSnippetLocalProfile);
}
if ( this.defaultTextSnippetGlobalProfile == null ) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
this.defaultTextSnippetGlobalProfile =
new CrawlProfile(
CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE),
-1,
true,
true,
true,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
this.defaultTextSnippetGlobalProfile);
}
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
if ( this.defaultMediaSnippetLocalProfile == null ) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
this.defaultMediaSnippetLocalProfile =
new CrawlProfile(
CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE),
-1,
true,
false,
false,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()),
this.defaultMediaSnippetLocalProfile);
}
if ( this.defaultMediaSnippetGlobalProfile == null ) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile);
this.defaultMediaSnippetGlobalProfile =
new CrawlProfile(
CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE),
-1,
true,
false,
true,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()),
this.defaultMediaSnippetGlobalProfile);
}
if ( this.defaultSurrogateProfile == null ) {
// generate new default entry for surrogate parsing
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CacheStrategy.NOCACHE);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile);
this.defaultSurrogateProfile =
new CrawlProfile(
CRAWL_PROFILE_SURROGATE,
null,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE),
-1,
true,
true,
false,
false,
false,
true,
true,
false,
CacheStrategy.NOCACHE);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultSurrogateProfile.handle()),
this.defaultSurrogateProfile);
}
}
private void resetProfiles() {
this.profilesActiveCrawlsCache.clear();
final File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
if (pdb.exists()) FileUtils.deletedelete(pdb);
if ( pdb.exists() ) {
FileUtils.deletedelete(pdb);
}
try {
this.profilesActiveCrawls = new MapHeap(pdb, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
this.profilesActiveCrawls =
new MapHeap(pdb, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch ( final IOException e1 ) {
Log.logException(e1);
this.profilesActiveCrawls = null;
@ -315,7 +509,9 @@ public final class CrawlSwitchboard {
try {
for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) {
// check for interruption
if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress");
if ( Thread.currentThread().isInterrupted() ) {
throw new InterruptedException("Shutdown in progress");
}
// getting next profile
try {
@ -325,13 +521,13 @@ public final class CrawlSwitchboard {
} catch ( final RowSpaceExceededException e ) {
continue;
}
if (!((entry.name().equals(CRAWL_PROFILE_PROXY)) ||
(entry.name().equals(CRAWL_PROFILE_REMOTE)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) ||
(entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) ||
(entry.name().equals(CRAWL_PROFILE_SURROGATE)))) {
if ( !((entry.name().equals(CRAWL_PROFILE_PROXY))
|| (entry.name().equals(CRAWL_PROFILE_REMOTE))
|| (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT))
|| (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT))
|| (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA))
|| (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) || (entry.name()
.equals(CRAWL_PROFILE_SURROGATE))) ) {
final CrawlProfile p = new CrawlProfile(entry);
this.profilesPassiveCrawls.put(UTF8.getBytes(p.handle()), p);
this.profilesActiveCrawls.remove(handle);
@ -345,16 +541,15 @@ public final class CrawlSwitchboard {
return hasDoneSomething;
}
public void close() {
this.profilesActiveCrawlsCache.clear();
this.profilesActiveCrawls.close();
this.profilesPassiveCrawls.close();
}
/**
* Loads crawl profiles from a DB file.
*
* @param file DB file
* @return crawl profile data
*/
@ -363,10 +558,12 @@ public final class CrawlSwitchboard {
try {
ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch ( final IOException e ) {
Log.logException(e);Log.logException(e);
Log.logException(e);
Log.logException(e);
FileUtils.deletedelete(file);
try {
ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
ret =
new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' ');
} catch ( final IOException e1 ) {
Log.logException(e1);
ret = null;

@ -39,8 +39,8 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
@ -52,11 +52,11 @@ import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.kelondro.workflow.WorkflowThread;
import de.anomic.server.serverAccessTracker.Track;
import de.anomic.server.serverCore.Session;
public class serverSwitch {
public class serverSwitch
{
// configuration management
private final File configFile;
@ -72,7 +72,11 @@ public class serverSwitch {
private final NavigableMap<String, BusyThread> workerThreads;
private final serverAccessTracker accessTracker;
public serverSwitch(final File dataPath, final File appPath, final String initPath, final String configPath) {
public serverSwitch(
final File dataPath,
final File appPath,
final String initPath,
final String configPath) {
// we initialize the switchboard with a property file,
// but maintain these properties then later in a new 'config' file
// to reset all changed configs, the config file must
@ -81,18 +85,21 @@ public class serverSwitch {
// file name of the config file
this.dataPath = dataPath;
this.appPath = appPath;
this.configComment = "This is an automatically generated file, updated by serverAbstractSwitch and initialized by " + initPath;
this.configComment =
"This is an automatically generated file, updated by serverAbstractSwitch and initialized by "
+ initPath;
final File initFile = new File(appPath, initPath);
this.configFile = new File(dataPath, configPath); // propertiesFile(config);
firstInit = !configFile.exists(); // this is true if the application was started for the first time
new File(configFile.getParent()).mkdir();
this.firstInit = !this.configFile.exists(); // this is true if the application was started for the first time
new File(this.configFile.getParent()).mkdir();
// predefine init's
final ConcurrentMap<String, String> initProps;
if (initFile.exists())
if ( initFile.exists() ) {
initProps = FileUtils.loadMap(initFile);
else
} else {
initProps = new ConcurrentHashMap<String, String>();
}
// if 'pro'-version is selected, overload standard settings with 'pro'-settings
Iterator<String> i;
@ -108,20 +115,21 @@ public class serverSwitch {
}
// load config's from last save
if (configFile.exists())
configProps = FileUtils.loadMap(configFile);
else
configProps = new ConcurrentHashMap<String, String>();
if ( this.configFile.exists() ) {
this.configProps = FileUtils.loadMap(this.configFile);
} else {
this.configProps = new ConcurrentHashMap<String, String>();
}
// remove all values from config that do not appear in init
configRemoved = new ConcurrentHashMap<String, String>();
synchronized (configProps) {
i = configProps.keySet().iterator();
this.configRemoved = new ConcurrentHashMap<String, String>();
synchronized ( this.configProps ) {
i = this.configProps.keySet().iterator();
String key;
while ( i.hasNext() ) {
key = i.next();
if ( !(initProps.containsKey(key)) ) {
configRemoved.put(key, this.configProps.get(key));
this.configRemoved.put(key, this.configProps.get(key));
i.remove();
}
}
@ -132,8 +140,8 @@ public class serverSwitch {
// merge new props from init to config
// this is necessary for migration, when new properties are attached
initProps.putAll(configProps);
configProps = initProps;
initProps.putAll(this.configProps);
this.configProps = initProps;
// save result; this may initially create a config file after
// initialization
@ -141,20 +149,20 @@ public class serverSwitch {
}
// other settings
authorization = new ConcurrentHashMap<InetAddress, String>();
this.authorization = new ConcurrentHashMap<InetAddress, String>();
// init thread control
workerThreads = new TreeMap<String, BusyThread>();
this.workerThreads = new TreeMap<String, BusyThread>();
// init busy state control
serverJobs = 0;
this.serverJobs = 0;
// init server tracking
this.accessTracker = new serverAccessTracker(
this.accessTracker =
new serverAccessTracker(
getConfigLong("server.maxTrackingTime", 60 * 60 * 1000),
(int) getConfigLong("server.maxTrackingCount", 1000),
(int) getConfigLong("server.maxTrackingHostCount", 100)
);
(int) getConfigLong("server.maxTrackingHostCount", 100));
}
public String myPublicIP() {
@ -166,7 +174,9 @@ public class serverSwitch {
// otherwise we return the real IP address of this host
final InetAddress pLIP = Domains.myPublicLocalIP();
if (pLIP != null) return pLIP.getHostAddress();
if ( pLIP != null ) {
return pLIP.getHostAddress();
}
return null;
}
@ -176,7 +186,7 @@ public class serverSwitch {
}
public Log getLog() {
return log;
return this.log;
}
public void setConfig(final Map<String, String> otherConfigs) {
@ -202,35 +212,39 @@ public class serverSwitch {
public void setConfig(final String key, final String value) {
// set the value
final String oldValue = configProps.put(key, value);
if (oldValue == null || !value.equals(oldValue)) saveConfig();
final String oldValue = this.configProps.put(key, value);
if ( oldValue == null || !value.equals(oldValue) ) {
saveConfig();
}
}
public void removeConfig(final String key) {
configProps.remove(key);
this.configProps.remove(key);
}
/**
* Gets a configuration parameter from the properties.
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be
* found or if it is invalid
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
*/
public String getConfig(final String key, final String dflt) {
// get the value
final String s = configProps.get(key);
final String s = this.configProps.get(key);
// return value
if (s == null) return dflt;
if ( s == null ) {
return dflt;
}
return s;
}
/**
* Gets a configuration parameter from the properties.
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be
* found or if it is invalid
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
*/
public long getConfigLong(final String key, final long dflt) {
@ -243,9 +257,9 @@ public class serverSwitch {
/**
* Gets a configuration parameter from the properties.
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be
* found or if it is invalid
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
*/
public double getConfigFloat(final String key, final float dflt) {
@ -258,9 +272,9 @@ public class serverSwitch {
/**
* Gets a configuration parameter from the properties.
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be
* found or if it is invalid
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
*/
public int getConfigInt(final String key, final int dflt) {
@ -273,9 +287,9 @@ public class serverSwitch {
/**
* Gets a configuration parameter from the properties.
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be
* found or if it is invalid
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
*/
public boolean getConfigBool(final String key, final boolean dflt) {
@ -284,12 +298,13 @@ public class serverSwitch {
/**
* Create a File instance for a configuration setting specifying a path.
*
* @param key config key
* @param dflt default path value, that is used when there is no value
* <code>key</code> in the configuration.
* @return if the value of the setting is an absolute path String, then the
* returned File is derived from this setting only. Otherwise the path's file
* is constructed from the applications root path + the relative path setting.
* @param dflt default path value, that is used when there is no value <code>key</code> in the
* configuration.
* @return if the value of the setting is an absolute path String, then the returned File is derived from
* this setting only. Otherwise the path's file is constructed from the applications root path +
* the relative path setting.
*/
public File getDataPath(final String key, final String dflt) {
File ret;
@ -308,26 +323,22 @@ public class serverSwitch {
}
public Iterator<String> configKeys() {
return configProps.keySet().iterator();
return this.configProps.keySet().iterator();
}
private void saveConfig() {
try {
ConcurrentMap<String, String> configPropsCopy = new ConcurrentHashMap<String, String>();
configPropsCopy.putAll(configProps); // avoid concurrency problems
FileUtils.saveMap(configFile, configPropsCopy, configComment);
} catch (final IOException e) {
log.logSevere("CONFIG: Cannot write config file " + configFile.toString() + ": " + e.getMessage(), e);
//System.out.println("ERROR: cannot write config file " + configFile.toString() + ": " + e.getMessage());
}
configPropsCopy.putAll(this.configProps); // avoid concurrency problems
FileUtils.saveMap(this.configFile, configPropsCopy, this.configComment);
}
/**
* Gets configuration parameters which have been removed during initialization.
*
* @return contains parameter name as key and parameter value as value
*/
public ConcurrentMap<String, String> getRemoved() {
return configRemoved;
return this.configRemoved;
}
public void deployThread(
@ -337,8 +348,13 @@ public class serverSwitch {
final String threadMonitorURL,
final BusyThread newThread,
final long startupDelay) {
deployThread(threadName, threadShortDescription, threadLongDescription, threadMonitorURL,
newThread, startupDelay,
deployThread(
threadName,
threadShortDescription,
threadLongDescription,
threadMonitorURL,
newThread,
startupDelay,
Long.parseLong(getConfig(threadName + "_idlesleep", "100")),
Long.parseLong(getConfig(threadName + "_busysleep", "1000")),
Long.parseLong(getConfig(threadName + "_memprereq", "1000000")));
@ -354,7 +370,10 @@ public class serverSwitch {
final long initialIdleSleep,
final long initialBusySleep,
final long initialMemoryPreRequisite) {
if (newThread.isAlive()) throw new RuntimeException("undeployed threads must not live; they are started as part of the deployment");
if ( newThread.isAlive() ) {
throw new RuntimeException(
"undeployed threads must not live; they are started as part of the deployment");
}
newThread.setStartupSleep(startupDelay);
long x;
try {
@ -379,17 +398,23 @@ public class serverSwitch {
setConfig(threadName + "_memprereq", initialMemoryPreRequisite);
}
newThread.setDescription(threadShortDescription, threadLongDescription, threadMonitorURL);
workerThreads.put(threadName, newThread);
this.workerThreads.put(threadName, newThread);
// start the thread
if (workerThreads.containsKey(threadName)) newThread.start();
if ( this.workerThreads.containsKey(threadName) ) {
newThread.start();
}
}
public BusyThread getThread(final String threadName) {
return workerThreads.get(threadName);
return this.workerThreads.get(threadName);
}
public void setThreadPerformance(final String threadName, final long idleMillis, final long busyMillis, final long memprereqBytes) {
final BusyThread thread = workerThreads.get(threadName);
public void setThreadPerformance(
final String threadName,
final long idleMillis,
final long busyMillis,
final long memprereqBytes) {
final BusyThread thread = this.workerThreads.get(threadName);
if ( thread != null ) {
setConfig(threadName + "_idlesleep", thread.setIdleSleep(idleMillis));
setConfig(threadName + "_busysleep", thread.setBusySleep(busyMillis));
@ -399,28 +424,28 @@ public class serverSwitch {
}
public synchronized void terminateThread(final String threadName, final boolean waitFor) {
if (workerThreads.containsKey(threadName)) {
((WorkflowThread) workerThreads.get(threadName)).terminate(waitFor);
workerThreads.remove(threadName);
if ( this.workerThreads.containsKey(threadName) ) {
((WorkflowThread) this.workerThreads.get(threadName)).terminate(waitFor);
this.workerThreads.remove(threadName);
}
}
public void intermissionAllThreads(final long pause) {
final Iterator<String> e = workerThreads.keySet().iterator();
final Iterator<String> e = this.workerThreads.keySet().iterator();
while ( e.hasNext() ) {
workerThreads.get(e.next()).intermission(pause);
this.workerThreads.get(e.next()).intermission(pause);
}
}
public synchronized void terminateAllThreads(final boolean waitFor) {
Iterator<String> e = workerThreads.keySet().iterator();
Iterator<String> e = this.workerThreads.keySet().iterator();
while ( e.hasNext() ) {
((WorkflowThread) workerThreads.get(e.next())).terminate(false);
((WorkflowThread) this.workerThreads.get(e.next())).terminate(false);
}
if ( waitFor ) {
e = workerThreads.keySet().iterator();
e = this.workerThreads.keySet().iterator();
while ( e.hasNext() ) {
((WorkflowThread) workerThreads.get(e.next())).terminate(true);
((WorkflowThread) this.workerThreads.get(e.next())).terminate(true);
e.remove();
}
}
@ -431,7 +456,9 @@ public class serverSwitch {
final WorkflowThread st = getThread(threadName);
for ( final Session s : ((serverCore) st).getJobList() ) {
if (!s.isAlive()) continue;
if ( !s.isAlive() ) {
continue;
}
if ( s.getTime() > timeout ) {
list.add(s.getName());
}
@ -440,21 +467,26 @@ public class serverSwitch {
}
public void closeSessions(String threadName, String sessionName) {
if (sessionName == null) return;
if ( sessionName == null ) {
return;
}
final WorkflowThread st = getThread(threadName);
for ( final Session s : ((serverCore) st).getJobList() ) {
if (
(s.isAlive()) &&
(s.getName().equals(sessionName))
) {
if ( (s.isAlive()) && (s.getName().equals(sessionName)) ) {
// try to stop session
s.setStopped(true);
try { Thread.sleep(100); } catch (final InterruptedException ex) {}
try {
Thread.sleep(100);
} catch ( final InterruptedException ex ) {
}
// try to interrupt session
s.interrupt();
try { Thread.sleep(100); } catch (final InterruptedException ex) {}
try {
Thread.sleep(100);
} catch ( final InterruptedException ex ) {
}
// try to close socket
if ( s.isAlive() ) {
@ -463,43 +495,54 @@ public class serverSwitch {
// wait for session to finish
if ( s.isAlive() ) {
try { s.join(500); } catch (final InterruptedException ex) {}
try {
s.join(500);
} catch ( final InterruptedException ex ) {
}
}
}
}
}
public Iterator<String> /*of serverThread-Names (String)*/threadNames() {
return workerThreads.keySet().iterator();
return this.workerThreads.keySet().iterator();
}
// authentication routines:
public void setAuthentify(final InetAddress host, final String user, final String rights) {
// sets access attributes according to host addresses
authorization.put(host, user + "@" + rights);
this.authorization.put(host, user + "@" + rights);
}
public void removeAuthentify(final InetAddress host) {
// remove access attributes according to host addresses
authorization.remove(host);
this.authorization.remove(host);
}
public String getAuthentifyUser(final InetAddress host) {
// read user name according to host addresses
final String a = authorization.get(host);
if (a == null) return null;
final String a = this.authorization.get(host);
if ( a == null ) {
return null;
}
final int p = a.indexOf('@');
if (p < 0) return null;
if ( p < 0 ) {
return null;
}
return a.substring(0, p);
}
public String getAuthentifyRights(final InetAddress host) {
// read access rigths according to host addresses
final String a = authorization.get(host);
if (a == null) return null;
final String a = this.authorization.get(host);
if ( a == null ) {
return null;
}
final int p = a.indexOf('@');
if (p < 0) return null;
if ( p < 0 ) {
return null;
}
return a.substring(p + 1);
}
@ -517,7 +560,9 @@ public class serverSwitch {
public boolean hasAuthentifyRight(final InetAddress host, final String right) {
final String rights = getAuthentifyRights(host);
if (rights == null) return false;
if ( rights == null ) {
return false;
}
return rights.indexOf(right) >= 0;
}
@ -531,11 +576,11 @@ public class serverSwitch {
@Override
public String toString() {
return configProps.toString();
return this.configProps.toString();
}
public void handleBusyState(final int jobs) {
serverJobs = jobs;
this.serverJobs = jobs;
}
public void track(final String host, final String accessPath) {
@ -555,15 +600,16 @@ public class serverSwitch {
}
/**
* Retrieve text data (e. g. config file) from file
* Retrieve text data (e. g. config file) from file file may be an url or a filename with path relative to
* rootPath parameter
*
* file may be an url or a filename with path relative to rootPath parameter
* @param file url or filename
* @param rootPath searchpath for file
* @param file file to use when remote fetching fails (null if unused)
*/
public Reader getConfigFileFromWebOrLocally(final String uri,
final String rootPath, final File file) throws IOException, FileNotFoundException {
public Reader getConfigFileFromWebOrLocally(final String uri, final String rootPath, final File file)
throws IOException,
FileNotFoundException {
if ( uri.startsWith("http://") || uri.startsWith("https://") ) {
final String[] uris = uri.split(",");
for ( String netdef : uris ) {
@ -574,7 +620,9 @@ public class serverSwitch {
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
byte[] data = client.GETbytes(uri);
if (data == null || data.length == 0) continue;
if ( data == null || data.length == 0 ) {
continue;
}
// save locally in case next fetch fails
if ( file != null ) {
FileOutputStream f = new FileOutputStream(file);
@ -592,7 +640,8 @@ public class serverSwitch {
throw new FileNotFoundException();
}
} else {
final File f = (uri.length() > 0 && uri.startsWith("/")) ? new File(uri) : new File(rootPath, uri);
final File f =
(uri.length() > 0 && uri.startsWith("/")) ? new File(uri) : new File(rootPath, uri);
if ( f.exists() ) {
return new FileReader(f);
} else {
@ -605,6 +654,7 @@ public class serverSwitch {
/**
* Generates a random password.
*
* @return random password which is 20 characters long.
*/
public String genRandomPassword() {
@ -613,6 +663,7 @@ public class serverSwitch {
/**
* Generates a random password of a given length.
*
* @param length length o password
* @return password of given length
*/

@ -32,6 +32,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
@ -66,8 +67,8 @@ import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSet;
import net.yacy.kelondro.logging.Log;
public final class FileUtils {
public final class FileUtils
{
private static final int DEFAULT_BUFFER_SIZE = 1024; // this is also the maximum chunk size
@ -83,13 +84,13 @@ public final class FileUtils {
* @param count the total amount of bytes to copy (-1 for all, else must be greater than zero)
* @return Total number of bytes copied.
* @throws IOException
*
* @see #copy(InputStream source, File dest)
* @see #copyRange(File source, OutputStream dest, int start)
* @see #copy(File source, OutputStream dest)
* @see #copy(File source, File dest)
*/
public static long copy(final InputStream source, final OutputStream dest, final long count) throws IOException {
public static long copy(final InputStream source, final OutputStream dest, final long count)
throws IOException {
assert count < 0 || count > 0 : "precondition violated: count == " + count + " (nothing to copy)";
if ( count == 0 ) {
// no bytes to copy
@ -99,7 +100,8 @@ public final class FileUtils {
final byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE);
int c; long total = 0;
int c;
long total = 0;
while ( (c = source.read(buffer, 0, chunkSize)) > 0 ) {
dest.write(buffer, 0, c);
dest.flush();
@ -107,7 +109,9 @@ public final class FileUtils {
if ( count > 0 ) {
chunkSize = (int) Math.min(count - total, DEFAULT_BUFFER_SIZE);
if (chunkSize == 0) break;
if ( chunkSize == 0 ) {
break;
}
}
}
@ -116,13 +120,19 @@ public final class FileUtils {
return total;
}
public static int copy(final File source, final Charset inputCharset, final Writer dest) throws IOException {
public static int copy(final File source, final Charset inputCharset, final Writer dest)
throws IOException {
InputStream fis = null;
try {
fis = new FileInputStream(source);
return copy(fis, dest, inputCharset);
} finally {
if (fis != null) try { fis.close(); } catch (final Exception e) {}
if ( fis != null ) {
try {
fis.close();
} catch ( final Exception e ) {
}
}
}
}
@ -131,7 +141,8 @@ public final class FileUtils {
return copy(reader, dest);
}
public static int copy(final InputStream source, final Writer dest, final Charset inputCharset) throws IOException {
public static int copy(final InputStream source, final Writer dest, final Charset inputCharset)
throws IOException {
final InputStreamReader reader = new InputStreamReader(source, inputCharset);
return copy(reader, dest);
}
@ -145,8 +156,12 @@ public final class FileUtils {
public static int copy(final Reader source, final Writer dest) throws IOException {
assert source != null;
assert dest != null;
if (source == null) throw new IOException("source is null");
if (dest == null) throw new IOException("dest is null");
if ( source == null ) {
throw new IOException("source is null");
}
if ( dest == null ) {
throw new IOException("dest is null");
}
final char[] buffer = new char[DEFAULT_BUFFER_SIZE];
int count = 0;
int n = 0;
@ -160,7 +175,9 @@ public final class FileUtils {
assert e != null;
// an "sun.io.MalformedInputException: Missing byte-order mark" - exception may occur here
//Log.logException(e);
throw new IOException(e == null ? "null" : e.getMessage() == null ? e.toString() : e.getMessage(), e);
throw new IOException(
e == null ? "null" : e.getMessage() == null ? e.toString() : e.getMessage(),
e);
}
return count;
}
@ -183,18 +200,29 @@ public final class FileUtils {
*/
public static void copy(final InputStream source, final File dest, final long count) throws IOException {
final String path = dest.getParent();
if (path != null && path.length() > 0) new File(path).mkdirs();
if ( path != null && path.length() > 0 ) {
new File(path).mkdirs();
}
FileOutputStream fos = null;
try {
fos = new FileOutputStream(dest);
copy(source, fos, count);
} finally {
if (fos != null) try {fos.close();} catch (final Exception e) { Log.logWarning("FileUtils", "cannot close FileOutputStream for "+ dest +"! "+ e.getMessage()); }
if ( fos != null ) {
try {
fos.close();
} catch ( final Exception e ) {
Log.logWarning(
"FileUtils",
"cannot close FileOutputStream for " + dest + "! " + e.getMessage());
}
}
}
}
/**
* Copies a part of a File to an OutputStream.
*
* @param source File
* @param dest OutputStream
* @param start Number of bytes to skip from the beginning of the File
@ -204,20 +232,33 @@ public final class FileUtils {
* @see #copy(File source, OutputStream dest)
* @see #copy(File source, File dest)
*/
public static void copyRange(final File source, final OutputStream dest, final int start) throws IOException {
public static void copyRange(final File source, final OutputStream dest, final int start)
throws IOException {
InputStream fis = null;
try {
fis = new FileInputStream(source);
final long skipped = fis.skip(start);
if (skipped != start) throw new IllegalStateException("Unable to skip '" + start + "' bytes. Only '" + skipped + "' bytes skipped.");
if ( skipped != start ) {
throw new IllegalStateException("Unable to skip '"
+ start
+ "' bytes. Only '"
+ skipped
+ "' bytes skipped.");
}
copy(fis, dest, -1);
} finally {
if (fis != null) try { fis.close(); } catch (final Exception e) {}
if ( fis != null ) {
try {
fis.close();
} catch ( final Exception e ) {
}
}
}
}
/**
* Copies a File to an OutputStream.
*
* @param source File
* @param dest OutputStream
* @throws IOException
@ -232,12 +273,18 @@ public final class FileUtils {
fis = new FileInputStream(source);
copy(fis, dest, -1);
} finally {
if (fis != null) try { fis.close(); } catch (final Exception e) {}
if ( fis != null ) {
try {
fis.close();
} catch ( final Exception e ) {
}
}
}
}
/**
* Copies a File to a File.
*
* @param source File
* @param dest File
* @param count the amount of bytes to copy
@ -255,8 +302,18 @@ public final class FileUtils {
fos = new FileOutputStream(dest);
copy(fis, fos, -1);
} finally {
if (fis != null) try {fis.close();} catch (final Exception e) {}
if (fos != null) try {fos.close();} catch (final Exception e) {}
if ( fis != null ) {
try {
fis.close();
} catch ( final Exception e ) {
}
}
if ( fos != null ) {
try {
fos.close();
} catch ( final Exception e ) {
}
}
}
}
@ -297,9 +354,16 @@ public final class FileUtils {
try {
fis = new FileInputStream(source);
int p = 0, c;
while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c;
while ( (c = fis.read(buffer, p, buffer.length - p)) > 0 ) {
p += c;
}
} finally {
if (fis != null) try { fis.close(); } catch (final Exception e) {}
if ( fis != null ) {
try {
fis.close();
} catch ( final Exception e ) {
}
}
fis = null;
}
return buffer;
@ -315,8 +379,18 @@ public final class FileUtils {
zipOut.close();
return byteOut.toByteArray();
} finally {
if (zipOut != null) try { zipOut.close(); } catch (final Exception e) {}
if (byteOut != null) try { byteOut.close(); } catch (final Exception e) {}
if ( zipOut != null ) {
try {
zipOut.close();
} catch ( final Exception e ) {
}
}
if ( byteOut != null ) {
try {
byteOut.close();
} catch ( final Exception e ) {
}
}
}
}
@ -326,7 +400,12 @@ public final class FileUtils {
fos = new FileOutputStream(dest);
writeAndGZip(source, fos);
} finally {
if (fos != null) try {fos.close();} catch (final Exception e) {}
if ( fos != null ) {
try {
fos.close();
} catch ( final Exception e ) {
}
}
}
}
@ -337,18 +416,26 @@ public final class FileUtils {
copy(source, zipOut);
zipOut.close();
} finally {
if (zipOut != null) try { zipOut.close(); } catch (final Exception e) {}
if ( zipOut != null ) {
try {
zipOut.close();
} catch ( final Exception e ) {
}
}
}
}
/**
* This function determines if a byte array is gzip compressed and uncompress it
*
* @param source properly gzip compressed byte array
* @return uncompressed byte array
* @throws IOException
*/
public static byte[] uncompressGZipArray(byte[] source) throws IOException {
if (source == null) return null;
if ( source == null ) {
return null;
}
// support of gzipped data (requested by roland)
/* "Bitwise OR of signed byte value
@ -394,12 +481,19 @@ public final class FileUtils {
String line;
while ( (line = br.readLine()) != null ) {
line = line.trim();
if (line.length() > 0 && line.charAt(0) != '#') set.add(line.trim().toLowerCase());
if ( line.length() > 0 && line.charAt(0) != '#' ) {
set.add(line.trim().toLowerCase());
}
}
br.close();
} catch ( final IOException e ) {
} finally {
if (br != null) try { br.close(); } catch (final Exception e) {}
if ( br != null ) {
try {
br.close();
} catch ( final Exception e ) {
}
}
}
return set;
}
@ -415,16 +509,18 @@ public final class FileUtils {
}
}
public static void saveMap(final File file, final Map<String, String> props, final String comment) throws IOException {
public static void saveMap(final File file, final Map<String, String> props, final String comment) {
PrintWriter pw = null;
final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000));
try {
pw = new PrintWriter(tf, "UTF-8");
pw.println("# " + comment);
String key, value;
for ( final Map.Entry<String, String> entry : props.entrySet() ) {
key = entry.getKey();
if (key != null)
if ( key != null ) {
key = key.replace("\\", "\\\\").replace("\n", "\\n").replace("=", "\\=");
}
if ( entry.getValue() == null ) {
value = "";
} else {
@ -433,12 +529,27 @@ public final class FileUtils {
pw.println(key + "=" + value);
}
pw.println("# EOF");
} catch ( FileNotFoundException e ) {
Log.logWarning("FileUtils", e.getMessage(), e);
} catch ( UnsupportedEncodingException e ) {
Log.logWarning("FileUtils", e.getMessage(), e);
} finally {
if ( pw != null ) {
pw.close();
}
pw = null;
}
try {
forceMove(tf, file);
} catch ( IOException e ) {
// ignore
}
}
public static Set<String> loadSet(final File file, final int chunksize, final boolean tree) throws IOException {
final Set<String> set = (tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>();
public static Set<String> loadSet(final File file, final int chunksize, final boolean tree)
throws IOException {
final Set<String> set =
(tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>();
final byte[] b = read(file);
for ( int i = 0; (i + chunksize) <= b.length; i++ ) {
set.add(UTF8.String(b, i, chunksize));
@ -446,8 +557,10 @@ public final class FileUtils {
return set;
}
public static Set<String> loadSet(final File file, final String sep, final boolean tree) throws IOException {
final Set<String> set = (tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>();
public static Set<String> loadSet(final File file, final String sep, final boolean tree)
throws IOException {
final Set<String> set =
(tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>();
final byte[] b = read(file);
final StringTokenizer st = new StringTokenizer(UTF8.String(b), sep);
while ( st.hasMoreTokens() ) {
@ -456,7 +569,8 @@ public final class FileUtils {
return set;
}
public static void saveSet(final File file, final String format, final Set<byte[]> set, final String sep) throws IOException {
public static void saveSet(final File file, final String format, final Set<byte[]> set, final String sep)
throws IOException {
final File tf = new File(file.toString() + ".prt" + (System.currentTimeMillis() % 1000));
OutputStream os = null;
if ( (format == null) || (format.equals("plain")) ) {
@ -466,21 +580,26 @@ public final class FileUtils {
} else if ( format.equals("zip") ) {
final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
String name = file.getName();
if (name.endsWith(".zip")) name = name.substring(0, name.length() - 4);
if ( name.endsWith(".zip") ) {
name = name.substring(0, name.length() - 4);
}
zos.putNextEntry(new ZipEntry(name + ".txt"));
os = zos;
}
if ( os != null ) {
for ( final byte[] b : set ) {
os.write(b);
if (sep != null) os.write(UTF8.getBytes(sep));
if ( sep != null ) {
os.write(UTF8.getBytes(sep));
}
}
os.close();
}
forceMove(tf, file);
}
public static void saveSet(final File file, final String format, final RowSet set, final String sep) throws IOException {
public static void saveSet(final File file, final String format, final RowSet set, final String sep)
throws IOException {
final File tf = new File(file.toString() + ".prt" + (System.currentTimeMillis() % 1000));
OutputStream os = null;
if ( (format == null) || (format.equals("plain")) ) {
@ -490,7 +609,9 @@ public final class FileUtils {
} else if ( format.equals("zip") ) {
final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
String name = file.getName();
if (name.endsWith(".zip")) name = name.substring(0, name.length() - 4);
if ( name.endsWith(".zip") ) {
name = name.substring(0, name.length() - 4);
}
zos.putNextEntry(new ZipEntry(name + ".txt"));
os = zos;
}
@ -500,7 +621,9 @@ public final class FileUtils {
os.write(i.next().getPrimaryKeyBytes());
}
while ( i.hasNext() ) {
if (sep != null) os.write(UTF8.getBytes(sep));
if ( sep != null ) {
os.write(UTF8.getBytes(sep));
}
os.write(i.next().getPrimaryKeyBytes());
}
os.close();
@ -516,6 +639,7 @@ public final class FileUtils {
private final static Pattern escaped_equal = Pattern.compile("\\=", Pattern.LITERAL);
private final static Pattern escaped_newline = Pattern.compile("\\n", Pattern.LITERAL);
private final static Pattern escaped_backslash = Pattern.compile("\\", Pattern.LITERAL);
//private final static Pattern escaped_backslashbackslash = Pattern.compile("\\\\", Pattern.LITERAL);
public static ConcurrentHashMap<String, String> table(final Iterator<String> li) {
@ -524,7 +648,9 @@ public final class FileUtils {
while ( li.hasNext() ) {
int pos = 0;
line = li.next().trim();
if (line.length() > 0 && line.charAt(0) == '#') continue; // exclude comments
if ( line.length() > 0 && line.charAt(0) == '#' ) {
continue; // exclude comments
}
do {
// search for unescaped =
pos = line.indexOf('=', pos + 1);
@ -547,15 +673,18 @@ public final class FileUtils {
}
public static Iterator<String> strings(final byte[] a) {
if (a == null) return new ArrayList<String>().iterator();
if ( a == null ) {
return new ArrayList<String>().iterator();
}
try {
return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8")));
return new StringsIterator(new BufferedReader(new InputStreamReader(
new ByteArrayInputStream(a),
"UTF-8")));
} catch ( final UnsupportedEncodingException e ) {
return null;
}
}
/**
* Read lines of a file into an ArrayList.
*
@ -576,13 +705,16 @@ public final class FileUtils {
} catch ( final IOException e ) {
// list is empty
} finally {
if (br!=null) try { br.close(); } catch (final Exception e) {}
if ( br != null ) {
try {
br.close();
} catch ( final Exception e ) {
}
}
}
return list;
}
/**
* Write a String to a file (used for string representation of lists).
*
@ -600,7 +732,12 @@ public final class FileUtils {
} catch ( final IOException e ) {
return false;
} finally {
if (bw!=null) try { bw.close(); } catch (final Exception e) {}
if ( bw != null ) {
try {
bw.close();
} catch ( final Exception e ) {
}
}
}
}
@ -625,7 +762,9 @@ public final class FileUtils {
// Read the List
String line = "";
while ( (line = br.readLine()) != null ) {
if (line.length() == 0) continue;
if ( line.length() == 0 ) {
continue;
}
if ( line.charAt(0) != '#' || withcomments ) {
//temp += line + serverCore.CRLF_STRING;
temp.append(line).append(CR).append(LF);
@ -634,7 +773,12 @@ public final class FileUtils {
br.close();
} catch ( final IOException e ) {
} finally {
if (br!=null) try { br.close(); } catch (final Exception e) {}
if ( br != null ) {
try {
br.close();
} catch ( final Exception e ) {
}
}
}
return new String(temp);
@ -642,8 +786,8 @@ public final class FileUtils {
/**
* Read content of a directory into a String array of file names.
* @param dirname The directory to get the file listing from. If it doesn't exist yet,
* it will be created.
*
* @param dirname The directory to get the file listing from. If it doesn't exist yet, it will be created.
* @return array of file names
*/
public static List<String> getDirListing(final String dirname) {
@ -652,11 +796,10 @@ public final class FileUtils {
/**
* Read content of a directory into a String array of file names.
* @param dirname The directory to get the file listing from. If it doesn't exist yet,
* it will be created.
* @param filter String which contains a regular expression which has to be matched by
* file names in order to appear in returned array. All file names will be returned if
* filter is null.
*
* @param dirname The directory to get the file listing from. If it doesn't exist yet, it will be created.
* @param filter String which contains a regular expression which has to be matched by file names in order
* to appear in returned array. All file names will be returned if filter is null.
* @return array of file names
*/
public static List<String> getDirListing(final String dirname, final String filter) {
@ -666,8 +809,7 @@ public final class FileUtils {
/**
* Read content of a directory into a String array of file names.
*
* @param dir The directory to get the file listing from. If it doesn't exist yet,
* it will be created.
* @param dir The directory to get the file listing from. If it doesn't exist yet, it will be created.
* @return array of file names
*/
public static List<String> getDirListing(final File dir) {
@ -676,11 +818,10 @@ public final class FileUtils {
/**
* Read content of a directory into a String array of file names.
* @param dir The directory to get the file listing from. If it doesn't exist yet,
* it will be created.
* @param filter String which contains a regular expression which has to be matched by
* file names in order to appear in returned array. All file names will be returned if
* filter is null.
*
* @param dir The directory to get the file listing from. If it doesn't exist yet, it will be created.
* @param filter String which contains a regular expression which has to be matched by file names in order
* to appear in returned array. All file names will be returned if filter is null.
* @return array of file names
*/
public static List<String> getDirListing(final File dir, final String filter) {
@ -707,17 +848,20 @@ public final class FileUtils {
}
/**
* Returns a List of all dirs and subdirs as File Objects
*
* Warning: untested
* Returns a List of all dirs and subdirs as File Objects Warning: untested
*/
public static ArrayList<File> getDirsRecursive(final File dir, final String notdir, final boolean excludeDotfiles){
public static ArrayList<File> getDirsRecursive(
final File dir,
final String notdir,
final boolean excludeDotfiles) {
final File[] dirList = dir.listFiles();
final ArrayList<File> resultList = new ArrayList<File>();
ArrayList<File> recursive;
Iterator<File> iter;
for ( int i = 0; i < dirList.length; i++ ) {
if (dirList[i].isDirectory() && (!excludeDotfiles || !dirList[i].getName().startsWith(".")) && !dirList[i].getName().equals(notdir)) {
if ( dirList[i].isDirectory()
&& (!excludeDotfiles || !dirList[i].getName().startsWith("."))
&& !dirList[i].getName().equals(notdir) ) {
resultList.add(dirList[i]);
recursive = getDirsRecursive(dirList[i], notdir, excludeDotfiles);
iter = recursive.iterator();
@ -729,8 +873,6 @@ public final class FileUtils {
return resultList;
}
/**
* Write elements of an Array of Strings to a file (one element per line).
*
@ -746,24 +888,31 @@ public final class FileUtils {
return FileUtils.writeList(listFile, new String(out)); //(File, String)
}
public static class StringsIterator implements Iterator<String> {
public static class StringsIterator implements Iterator<String>
{
private final BufferedReader reader;
private String nextLine;
public StringsIterator(final BufferedReader reader) {
this.reader = reader;
this.nextLine = null;
next();
}
@Override
public boolean hasNext() {
return this.nextLine != null;
}
@Override
public String next() {
final String line = this.nextLine;
try {
while ( (this.nextLine = this.reader.readLine()) != null ) {
this.nextLine = this.nextLine.trim();
if (this.nextLine.length() > 0) break;
if ( this.nextLine.length() > 0 ) {
break;
}
}
} catch ( final IOException e ) {
this.nextLine = null;
@ -774,6 +923,7 @@ public final class FileUtils {
return line;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@ -795,27 +945,39 @@ public final class FileUtils {
/**
* Moves all files from a directory to another.
*
* @param from_dir Directory which contents will be moved.
* @param to_dir Directory to move into. It must exist already.
*/
public static void moveAll(final File from_dir, final File to_dir) {
if (!(from_dir.isDirectory())) return;
if (!(to_dir.isDirectory())) return;
if ( !(from_dir.isDirectory()) ) {
return;
}
if ( !(to_dir.isDirectory()) ) {
return;
}
final String[] list = from_dir.list();
for ( int i = 0; i < list.length; i++ ) {
if(!new File(from_dir, list[i]).renameTo(new File(to_dir, list[i])))
Log.logWarning("serverFileUtils", "moveAll(): could not move from "+ from_dir + list[i] +" to "+ to_dir + list[i]);
if ( !new File(from_dir, list[i]).renameTo(new File(to_dir, list[i])) ) {
Log.logWarning("serverFileUtils", "moveAll(): could not move from "
+ from_dir
+ list[i]
+ " to "
+ to_dir
+ list[i]);
}
}
}
public static class dirlistComparator implements Comparator<File>, Serializable {
public static class dirlistComparator implements Comparator<File>, Serializable
{
/**
* generated serial
*/
private static final long serialVersionUID = -5196490300039230135L;
@Override
public int compare(final File file1, final File file2) {
if ( file1.isDirectory() && !file2.isDirectory() ) {
return -1;
@ -842,18 +1004,23 @@ public final class FileUtils {
final String fileExt = (idx > -1) ? fileName.substring(idx + 1) : "";
// create the temp file
final File tempFile = File.createTempFile(parserClassName + "_" + ((idx>-1)?fileName.substring(0,idx):fileName), (fileExt.length()>0)?"."+fileExt:fileExt);
final File tempFile =
File.createTempFile(
parserClassName + "_" + ((idx > -1) ? fileName.substring(0, idx) : fileName),
(fileExt.length() > 0) ? "." + fileExt : fileExt);
return tempFile;
}
/**
* copies the input stream to one output stream (byte per byte)
*
* @param in
* @param out
* @return number of copies bytes
* @throws IOException
*/
public static int copyToStream(final BufferedInputStream in, final BufferedOutputStream out) throws IOException {
public static int copyToStream(final BufferedInputStream in, final BufferedOutputStream out)
throws IOException {
int count = 0;
// copy bytes
int b;
@ -867,13 +1034,17 @@ public final class FileUtils {
/**
* copies the input stream to both output streams (byte per byte)
*
* @param in
* @param out0
* @param out1
* @return number of copies bytes
* @throws IOException
*/
public static int copyToStreams(final BufferedInputStream in, final BufferedOutputStream out0, final BufferedOutputStream out1) throws IOException {
public static int copyToStreams(
final BufferedInputStream in,
final BufferedOutputStream out0,
final BufferedOutputStream out1) throws IOException {
assert out0 != null;
assert out1 != null;
@ -892,13 +1063,17 @@ public final class FileUtils {
/**
* copies the input stream to all writers (byte per byte)
*
* @param data
* @param writer
* @param charSet
* @return
* @throws IOException
*/
public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final Charset charSet) throws IOException {
public static int copyToWriter(
final BufferedInputStream data,
final BufferedWriter writer,
final Charset charSet) throws IOException {
// the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader."
final Reader sourceReader = new InputStreamReader(data, charSet);
@ -913,7 +1088,11 @@ public final class FileUtils {
return count;
}
public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final Charset charSet) throws IOException {
public static int copyToWriters(
final BufferedInputStream data,
final BufferedWriter writer0,
final BufferedWriter writer1,
final Charset charSet) throws IOException {
// the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader."
assert writer0 != null;
assert writer1 != null;
@ -933,30 +1112,42 @@ public final class FileUtils {
}
/**
* delete files and directories
* if a directory is not empty, delete also everything inside
* because deletion sometimes fails on windows, there is also a windows exec included
* delete files and directories if a directory is not empty, delete also everything inside because
* deletion sometimes fails on windows, there is also a windows exec included
*
* @param path
*/
public static void deletedelete(final File path) {
if (path == null || !path.exists()) return;
if ( path == null || !path.exists() ) {
return;
}
// empty the directory first
if ( path.isDirectory() ) {
final String[] list = path.list();
if ( list != null ) {
for (final String s: list) deletedelete(new File(path, s));
for ( final String s : list ) {
deletedelete(new File(path, s));
}
}
}
int c = 0;
while ( c++ < 20 ) {
if (!path.exists()) break;
if (path.delete()) break;
if ( !path.exists() ) {
break;
}
if ( path.delete() ) {
break;
}
// some OS may be slow when giving up file pointer
//System.runFinalization();
//System.gc();
try { Thread.sleep(200); } catch (final InterruptedException e) { break; }
try {
Thread.sleep(200);
} catch ( final InterruptedException e ) {
break;
}
}
if ( path.exists() ) {
path.deleteOnExit();
@ -981,7 +1172,9 @@ public final class FileUtils {
Log.logException(e);
}
}
if (path.exists()) Log.logSevere("FileUtils", "cannot delete file " + p);
if ( path.exists() ) {
Log.logSevere("FileUtils", "cannot delete file " + p);
}
}
}

@ -28,7 +28,6 @@
package net.yacy.peers.graphics;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
@ -64,8 +63,8 @@ import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.LookAheadIterator;
public class WebStructureGraph {
public class WebStructureGraph
{
public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 50000; // maximum number of hosts in web structure map
@ -80,9 +79,11 @@ public class WebStructureGraph {
private final static leanrefObject leanrefObjectPOISON = new leanrefObject(null, null);
private static class leanrefObject {
private static class leanrefObject
{
private final DigestURI url;
private final Set<MultiProtocolURI> globalRefURLs;
private leanrefObject(final DigestURI url, final Set<MultiProtocolURI> globalRefURLs) {
this.url = url;
this.globalRefURLs = globalRefURLs;
@ -98,11 +99,16 @@ public class WebStructureGraph {
// load web structure
Map<String, String> loadedStructure;
try {
loadedStructure = (this.structureFile.exists()) ? FileUtils.loadMap(this.structureFile) : new TreeMap<String, String>();
loadedStructure =
(this.structureFile.exists())
? FileUtils.loadMap(this.structureFile)
: new TreeMap<String, String>();
} catch ( final OutOfMemoryError e ) {
loadedStructure = new TreeMap<String, String>();
}
if (loadedStructure != null) this.structure_old.putAll(loadedStructure);
if ( loadedStructure != null ) {
this.structure_old.putAll(loadedStructure);
}
// delete out-dated entries in case the structure is too big
if ( this.structure_old.size() > maxhosts ) {
@ -112,7 +118,9 @@ public class WebStructureGraph {
for ( final Map.Entry<String, String> entry : this.structure_old.entrySet() ) {
key = entry.getKey();
value = entry.getValue();
if (value.length() >= 8) delset.add(value.substring(0, 8) + key);
if ( value.length() >= 8 ) {
delset.add(value.substring(0, 8) + key);
}
}
int delcount = this.structure_old.size() - (maxhosts * 9 / 10);
final Iterator<String> j = delset.iterator();
@ -125,9 +133,12 @@ public class WebStructureGraph {
this.publicRefDNSResolvingWorker.start();
}
private class PublicRefDNSResolvingProcess extends Thread {
private class PublicRefDNSResolvingProcess extends Thread
{
private PublicRefDNSResolvingProcess() {
}
@Override
public void run() {
leanrefObject lro;
try {
@ -139,9 +150,14 @@ public class WebStructureGraph {
}
}
public void generateCitationReference(final DigestURI url, final Document document, final Condenser condenser) {
public void generateCitationReference(
final DigestURI url,
final Document document,
final Condenser condenser) {
// generate citation reference
if (url.isLocal()) return; // we do this only for global urls
if ( url.isLocal() ) {
return; // we do this only for global urls
}
final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
final Iterator<MultiProtocolURI> it = hl.keySet().iterator();
final HashSet<MultiProtocolURI> globalRefURLs = new HashSet<MultiProtocolURI>();
@ -150,14 +166,17 @@ public class WebStructureGraph {
int maxref = 1000;
while ( it.hasNext() && maxref-- > 0 ) {
u = it.next();
if (u == null) continue;
if ( u == null ) {
continue;
}
if ( refhost != null && u.getHost() != null && !u.getHost().equals(refhost) ) {
// this is a global link
globalRefURLs.add(u);
}
}
final leanrefObject lro = new leanrefObject(url, globalRefURLs);
if (globalRefURLs.size() > 0) try {
if ( globalRefURLs.size() > 0 ) {
try {
if ( this.publicRefDNSResolvingWorker.isAlive() ) {
this.publicRefDNSResolvingQueue.put(lro);
} else {
@ -167,6 +186,7 @@ public class WebStructureGraph {
learnrefs(lro);
}
}
}
private void learnrefs(final leanrefObject lro) {
final StringBuilder cpg = new StringBuilder(240);
@ -178,11 +198,17 @@ public class WebStructureGraph {
assert nexturlhashb != null;
if ( nexturlhashb != null ) {
nexturlhash = ASCII.String(nexturlhashb);
assert nexturlhash.length() == 12 : "nexturlhash.length() = " + nexturlhash.length() + ", nexturlhash = " + nexturlhash;
assert nexturlhash.length() == 12 : "nexturlhash.length() = "
+ nexturlhash.length()
+ ", nexturlhash = "
+ nexturlhash;
//assert !nexturlhash.substring(6).equals(refhashp);
// this is a global link
cpg.append(nexturlhash); // store complete hash
assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString();
assert cpg.length() % 12 == 0 : "cpg.length() = "
+ cpg.length()
+ ", cpg = "
+ cpg.toString();
}
}
assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString();
@ -190,13 +216,17 @@ public class WebStructureGraph {
}
private static int refstr2count(final String refs) {
if ((refs == null) || (refs.length() <= 8)) return 0;
if ( (refs == null) || (refs.length() <= 8) ) {
return 0;
}
assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length();
return (refs.length() - 8) / 10;
}
static Map<String, Integer> refstr2map(final String refs) {
if ((refs == null) || (refs.length() <= 8)) return new HashMap<String, Integer>();
if ( (refs == null) || (refs.length() <= 8) ) {
return new HashMap<String, Integer>();
}
final Map<String, Integer> map = new HashMap<String, Integer>();
String c;
final int refsc = refstr2count(refs);
@ -264,31 +294,43 @@ public class WebStructureGraph {
final String key = tailMap.firstKey();
if ( key.startsWith(hosthash) ) {
ref = tailMap.get(key);
if (hostname.length() == 0) hostname = key.substring(7);
if (date.length() == 0) date = ref.substring(0, 8);
if ( hostname.length() == 0 ) {
hostname = key.substring(7);
}
if ( date.length() == 0 ) {
date = ref.substring(0, 8);
}
h.putAll(refstr2map(ref));
}
}
}
if (h.isEmpty()) return null;
if ( h.isEmpty() ) {
return null;
}
return new StructureEntry(hosthash, hostname, date, h);
}
public StructureEntry incomingReferences(final String hosthash) {
final String hostname = hostHash2hostName(hosthash);
if (hostname == null) return null;
if ( hostname == null ) {
return null;
}
// collect the references
WebStructureGraph.StructureEntry sentry;
final HashMap<String, Integer> hosthashes = new HashMap<String, Integer>();
Iterator<WebStructureGraph.StructureEntry> i = new StructureIterator(false);
while ( i.hasNext() ) {
sentry = i.next();
if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash));
if ( sentry.references.containsKey(hosthash) ) {
hosthashes.put(sentry.hosthash, sentry.references.get(hosthash));
}
}
i = new StructureIterator(true);
while ( i.hasNext() ) {
sentry = i.next();
if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash));
if ( sentry.references.containsKey(hosthash) ) {
hosthashes.put(sentry.hosthash, sentry.references.get(hosthash));
}
}
// construct a new structureEntry Object
return new StructureEntry(
@ -298,28 +340,35 @@ public class WebStructureGraph {
hosthashes);
}
public static class HostReferenceFactory implements ReferenceFactory<HostReference> {
public static class HostReferenceFactory implements ReferenceFactory<HostReference>
{
private static final Row hostReferenceRow = new Row("String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}", Base64Order.enhancedCoder);
private static final Row hostReferenceRow = new Row(
"String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}",
Base64Order.enhancedCoder);
public HostReferenceFactory() {
}
@Override
public Row getRow() {
return hostReferenceRow;
}
@Override
public HostReference produceSlow(final Entry e) {
return new HostReference(e);
}
@Override
public HostReference produceFast(final HostReference e) {
return e;
}
}
public static class HostReference extends AbstractReference implements Reference {
public static class HostReference extends AbstractReference implements Reference
{
private final Row.Entry entry;
@ -339,14 +388,17 @@ public class WebStructureGraph {
this.entry = entry;
}
@Override
public String toPropertyForm() {
return this.entry.toPropertyForm(':', true, true, false, true);
}
@Override
public Entry toKelondroEntry() {
return this.entry;
}
@Override
public byte[] urlhash() {
return this.entry.getPrimaryKeyBytes();
}
@ -355,23 +407,30 @@ public class WebStructureGraph {
return (int) this.entry.getColLong(2);
}
@Override
public long lastModified() {
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(1));
}
@Override
public void join(final Reference r) {
// joins two entries into one entry
final HostReference oe = (HostReference) r;
// combine date
final long o = oe.lastModified();
if (lastModified() < o) this.entry.setCol(1, MicroDate.microDateDays(o));
if ( lastModified() < o ) {
this.entry.setCol(1, MicroDate.microDateDays(o));
}
// combine count
final int c = oe.count();
if (count() < c) this.entry.setCol(2, c);
if ( count() < c ) {
this.entry.setCol(2, c);
}
}
@Override
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
}
@ -384,11 +443,14 @@ public class WebStructureGraph {
public synchronized ReferenceContainerCache<HostReference> incomingReferences() {
// we return a cache if the cache is filled and not stale
if (hostReferenceIndexCache != null &&
hostReferenceIndexCacheTime + hostReferenceIndexCacheTTL > System.currentTimeMillis()) return hostReferenceIndexCache;
if ( hostReferenceIndexCache != null
&& hostReferenceIndexCacheTime + hostReferenceIndexCacheTTL > System.currentTimeMillis() ) {
return hostReferenceIndexCache;
}
// collect the references
final ReferenceContainerCache<HostReference> idx = new ReferenceContainerCache<HostReference>(hostReferenceFactory, Base64Order.enhancedCoder, 6);
final ReferenceContainerCache<HostReference> idx =
new ReferenceContainerCache<HostReference>(hostReferenceFactory, Base64Order.enhancedCoder, 6);
// we iterate over all structure entries.
// one structure entry has information that a specific host links to a list of other hosts
@ -415,10 +477,15 @@ public class WebStructureGraph {
structureLoop: while ( structureIterator.hasNext() ) {
sentry = structureIterator.next();
// then we loop over all the hosts that are linked from sentry.hosthash
refloop: for (final Map.Entry<String, Integer> refhosthashandcounter: sentry.references.entrySet()) {
refloop: for ( final Map.Entry<String, Integer> refhosthashandcounter : sentry.references
.entrySet() ) {
term = UTF8.getBytes(refhosthashandcounter.getKey());
try {
hr = new HostReference(ASCII.getBytes(sentry.hosthash), GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(), refhosthashandcounter.getValue().intValue());
hr =
new HostReference(
ASCII.getBytes(sentry.hosthash),
GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(),
refhosthashandcounter.getValue().intValue());
} catch ( final ParseException e ) {
continue refloop;
}
@ -436,7 +503,9 @@ public class WebStructureGraph {
continue refloop;
}
}
if (System.currentTimeMillis() > timeout) break structureLoop;
if ( System.currentTimeMillis() > timeout ) {
break structureLoop;
}
}
}
@ -459,7 +528,9 @@ public class WebStructureGraph {
public int referencesCount(final String hosthash) {
// returns the number of hosts that are referenced by this hosthash
assert hosthash.length() == 6 : "hosthash = " + hosthash;
if (hosthash == null || hosthash.length() != 6) return 0;
if ( hosthash == null || hosthash.length() != 6 ) {
return 0;
}
SortedMap<String, String> tailMap;
int c = 0;
synchronized ( this.structure_old ) {
@ -513,8 +584,12 @@ public class WebStructureGraph {
// parse the new reference string and join it with the stored references
final StructureEntry structure = outgoingReferences(hosthash);
final Map<String, Integer> refs = (structure == null) ? new HashMap<String, Integer>() : structure.references;
assert reference.length() % 12 == 0 : "reference.length() = " + reference.length() + ", reference = " + reference.toString();
final Map<String, Integer> refs =
(structure == null) ? new HashMap<String, Integer>() : structure.references;
assert reference.length() % 12 == 0 : "reference.length() = "
+ reference.length()
+ ", reference = "
+ reference.toString();
String dom;
int c;
for ( int i = 0; i < reference.length() / 12; i++ ) {
@ -538,10 +613,14 @@ public class WebStructureGraph {
minrefcount = entry.getValue().intValue();
minrefkey = entry.getKey();
}
if (minrefcount == 1) break findloop;
if ( minrefcount == 1 ) {
break findloop;
}
}
// remove the smallest
if (minrefkey == null) break delloop;
if ( minrefkey == null ) {
break delloop;
}
refs.remove(minrefkey);
shrink--;
}
@ -600,36 +679,54 @@ public class WebStructureGraph {
return new StructureIterator(latest);
}
private class StructureIterator extends LookAheadIterator<StructureEntry> implements Iterator<StructureEntry> {
private class StructureIterator extends LookAheadIterator<StructureEntry> implements
Iterator<StructureEntry>
{
private final Iterator<Map.Entry<String, String>> i;
private StructureIterator(final boolean latest) {
this.i = ((latest) ? WebStructureGraph.this.structure_new : WebStructureGraph.this.structure_old).entrySet().iterator();
this.i =
((latest) ? WebStructureGraph.this.structure_new : WebStructureGraph.this.structure_old)
.entrySet()
.iterator();
}
@Override
public StructureEntry next0() {
Map.Entry<String, String> entry = null;
String dom = null, ref = "";
while ( this.i.hasNext() ) {
entry = this.i.next();
ref = entry.getValue();
if ((ref.length() - 8) % 10 != 0) continue;
if ( (ref.length() - 8) % 10 != 0 ) {
continue;
}
dom = entry.getKey();
if (dom.length() >= 8) break;
if ( dom.length() >= 8 ) {
break;
}
dom = null;
}
if (entry == null || dom == null) return null;
if ( entry == null || dom == null ) {
return null;
}
assert (ref.length() - 8) % 10 == 0 : "refs = " + ref + ", length = " + ref.length();
return new StructureEntry(dom.substring(0, 6), dom.substring(7), ref.substring(0, 8), refstr2map(ref));
return new StructureEntry(
dom.substring(0, 6),
dom.substring(7),
ref.substring(0, 8),
refstr2map(ref));
}
}
public static class StructureEntry {
public static class StructureEntry
{
public String hosthash; // the tail of the host hash
public String hostname; // the host name
public String date; // date of latest change
public Map<String, Integer> references; // a map from the referenced host hash to the number of referenced to that host
private StructureEntry(
final String hosthash,
final String hostname,
@ -654,20 +751,32 @@ public class WebStructureGraph {
}
// save to web structure file
log.logInfo("Saving Web Structure File: new = " + this.structure_new.size() + " entries, old = " + this.structure_old.size() + " entries");
log.logInfo("Saving Web Structure File: new = "
+ this.structure_new.size()
+ " entries, old = "
+ this.structure_old.size()
+ " entries");
final long time = System.currentTimeMillis();
joinOldNew();
if (this.structure_old.size() > 0) try {
if ( this.structure_old.size() > 0 ) {
synchronized ( this.structure_old ) {
if ( this.structure_old.size() > 0 ) {
FileUtils.saveMap(this.structureFile, this.structure_old, "Web Structure Syntax: <b64hash(6)>','<host> to <date-yyyymmdd(8)>{<target-b64hash(6)><target-count-hex(4)>}*");
FileUtils
.saveMap(
this.structureFile,
this.structure_old,
"Web Structure Syntax: <b64hash(6)>','<host> to <date-yyyymmdd(8)>{<target-b64hash(6)><target-count-hex(4)>}*");
final long t = Math.max(1, System.currentTimeMillis() - time);
log.logInfo("Saved Web Structure File: " + this.structure_old.size() + " entries in " + t + " milliseconds, " + (this.structure_old.size() * 1000 / t) + " entries/second");
log.logInfo("Saved Web Structure File: "
+ this.structure_old.size()
+ " entries in "
+ t
+ " milliseconds, "
+ (this.structure_old.size() * 1000 / t)
+ " entries/second");
}
this.structure_old.clear();
}
} catch (final IOException e) {
Log.logException(e);
}
}
}

Loading…
Cancel
Save