From e7e429705aa337e91a25ef369c9d7029f9947b2b Mon Sep 17 00:00:00 2001 From: Michael Christen Date: Mon, 5 Dec 2011 16:22:11 +0100 Subject: [PATCH] - less automatic indexing after a search (needs to reset the default crawl profiles) - fix for concurrency problem in storage of serverSwitch Properties - markup update --- .../de/anomic/crawler/CrawlSwitchboard.java | 431 ++++++++---- source/de/anomic/server/serverSwitch.java | 515 +++++++------- source/net/yacy/kelondro/util/FileUtils.java | 653 ++++++++++++------ .../peers/graphics/WebStructureGraph.java | 397 +++++++---- 4 files changed, 1273 insertions(+), 723 deletions(-) diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java index d056c8e88..7f78fd13f 100644 --- a/source/de/anomic/crawler/CrawlSwitchboard.java +++ b/source/de/anomic/crawler/CrawlSwitchboard.java @@ -45,18 +45,19 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.kelondroException; import net.yacy.repository.RegexHelper; -public final class CrawlSwitchboard { +public final class CrawlSwitchboard +{ - public static final String CRAWL_PROFILE_PROXY = "proxy"; - public static final String CRAWL_PROFILE_REMOTE = "remote"; - public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText"; - public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT = "snippetGlobalText"; - public static final String CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA = "snippetLocalMedia"; - public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA = "snippetGlobalMedia"; - public static final String CRAWL_PROFILE_SURROGATE = "surrogates"; + public static final String CRAWL_PROFILE_PROXY = "proxy"; + public static final String CRAWL_PROFILE_REMOTE = "remote"; + public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText"; + public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT = "snippetGlobalText"; + public static final String CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA = "snippetLocalMedia"; + public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA = "snippetGlobalMedia"; + public static final String CRAWL_PROFILE_SURROGATE = "surrogates"; - public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap"; - public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap"; + public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive.heap"; + public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive.heap"; public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L; public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L; @@ -65,30 +66,28 @@ public final class CrawlSwitchboard { public static final long CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE = 60L * 24L * 30L; public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L; - private final Log log; - private MapHeap profilesActiveCrawls; + private final Log log; + private MapHeap profilesActiveCrawls; private final MapHeap profilesPassiveCrawls; private final Map profilesActiveCrawlsCache; //TreeMap(Base64Order.enhancedCoder); - public CrawlProfile defaultProxyProfile; - public CrawlProfile defaultRemoteProfile; - public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; - public CrawlProfile defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile; - public CrawlProfile defaultSurrogateProfile; - private final File queuesRoot; - - public CrawlSwitchboard( - final String networkName, - final Log log, - final File queuesRoot) { + public CrawlProfile defaultProxyProfile; + public CrawlProfile defaultRemoteProfile; + public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; + public CrawlProfile defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile; + public CrawlProfile defaultSurrogateProfile; + private final File queuesRoot; + + public CrawlSwitchboard(final String networkName, final Log log, final File queuesRoot) { log.logInfo("Initializing Word Index for the network '" + networkName + "'."); - if (networkName == null || networkName.length() == 0) { + if ( networkName == null || networkName.length() == 0 ) { log.logSevere("no network name given - shutting down"); System.exit(0); } this.log = log; - this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap(Base64Order.enhancedCoder)); + this.profilesActiveCrawlsCache = + Collections.synchronizedMap(new TreeMap(Base64Order.enhancedCoder)); // make crawl profiles database and default profiles this.queuesRoot = queuesRoot; @@ -97,84 +96,115 @@ public final class CrawlSwitchboard { final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); this.profilesActiveCrawls = loadFromDB(profilesActiveFile); - for (final byte[] handle : this.profilesActiveCrawls.keySet()) { + for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) { CrawlProfile p; try { p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); - } catch (final IOException e) { + } catch ( final IOException e ) { p = null; - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { p = null; } - if (p == null) continue; - if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) { + if ( p == null ) { + continue; + } + if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH)) ) { removeActive(handle); - Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() - + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH - + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); - } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) { + Log.logWarning("CrawlProfiles", "removed Profile " + + p.handle() + + ": " + + p.name() + + " from active crawls since " + + CrawlProfile.FILTER_URL_MUSTMATCH + + " is no valid regular expression: " + + p.get(CrawlProfile.FILTER_URL_MUSTMATCH)); + } else if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)) ) { removeActive(handle); - Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() - + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH - + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); + Log.logWarning("CrawlProfiles", "removed Profile " + + p.handle() + + ": " + + p.name() + + " from active crawls since " + + CrawlProfile.FILTER_URL_MUSTNOTMATCH + + " is no valid regular expression: " + + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)); } else { Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); } } initActiveCrawlProfiles(); - log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); + log.logInfo("Loaded active crawl profiles from file " + + profilesActiveFile.getName() + + ", " + + this.profilesActiveCrawls.size() + + " entries"); final File profilesPassiveFile = new File(queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES); this.profilesPassiveCrawls = loadFromDB(profilesPassiveFile); - for (final byte[] handle : this.profilesPassiveCrawls.keySet()) { + for ( final byte[] handle : this.profilesPassiveCrawls.keySet() ) { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); - } catch (final IOException e) { + } catch ( final IOException e ) { continue; - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { continue; } } - log.logInfo("Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + - ", " + this.profilesPassiveCrawls.size() + " entries" + - ", " + profilesPassiveFile.length()/1024); + log.logInfo("Loaded passive crawl profiles from file " + + profilesPassiveFile.getName() + + ", " + + this.profilesPassiveCrawls.size() + + " entries" + + ", " + + profilesPassiveFile.length() + / 1024); } public CrawlProfile getActive(final byte[] profileKey) { - if (profileKey == null) return null; + if ( profileKey == null ) { + return null; + } // get from cache CrawlProfile p = this.profilesActiveCrawlsCache.get(profileKey); - if (p != null) return p; + if ( p != null ) { + return p; + } // get from db Map m; try { m = this.profilesActiveCrawls.get(profileKey); - } catch (final IOException e) { + } catch ( final IOException e ) { m = null; - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { m = null; } - if (m == null) return null; + if ( m == null ) { + return null; + } p = new CrawlProfile(m); this.profilesActiveCrawlsCache.put(profileKey, p); return p; } public CrawlProfile getPassive(final byte[] profileKey) { - if (profileKey == null) return null; + if ( profileKey == null ) { + return null; + } Map m; try { m = this.profilesPassiveCrawls.get(profileKey); - } catch (final IOException e) { + } catch ( final IOException e ) { m = null; - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { m = null; } - if (m == null) return null; + if ( m == null ) { + return null; + } return new CrawlProfile(m); } @@ -187,13 +217,17 @@ public final class CrawlSwitchboard { } public void removeActive(final byte[] profileKey) { - if (profileKey == null) return; + if ( profileKey == null ) { + return; + } this.profilesActiveCrawlsCache.remove(profileKey); this.profilesActiveCrawls.remove(profileKey); } public void removePassive(final byte[] profileKey) { - if (profileKey == null) return; + if ( profileKey == null ) { + return; + } this.profilesPassiveCrawls.remove(profileKey); } @@ -217,18 +251,32 @@ public final class CrawlSwitchboard { CrawlProfile profile; String name; try { - for (final byte[] handle: this.profilesActiveCrawls.keySet()) { + for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) { profile = new CrawlProfile(this.profilesActiveCrawls.get(handle)); name = profile.name(); - if (name.equals(CRAWL_PROFILE_PROXY)) this.defaultProxyProfile = profile; - if (name.equals(CRAWL_PROFILE_REMOTE)) this.defaultRemoteProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) this.defaultTextSnippetLocalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) this.defaultTextSnippetGlobalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) this.defaultMediaSnippetLocalProfile = profile; - if (name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) this.defaultMediaSnippetGlobalProfile = profile; - if (name.equals(CRAWL_PROFILE_SURROGATE)) this.defaultSurrogateProfile = profile; + if ( name.equals(CRAWL_PROFILE_PROXY) ) { + this.defaultProxyProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_REMOTE) ) { + this.defaultRemoteProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) { + this.defaultTextSnippetLocalProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) { + this.defaultTextSnippetGlobalProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) { + this.defaultMediaSnippetLocalProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) { + this.defaultMediaSnippetGlobalProfile = profile; + } + if ( name.equals(CRAWL_PROFILE_SURROGATE) ) { + this.defaultSurrogateProfile = profile; + } } - } catch (final Exception e) { + } catch ( final Exception e ) { this.profilesActiveCrawls.clear(); this.defaultProxyProfile = null; this.defaultRemoteProfile = null; @@ -239,69 +287,215 @@ public final class CrawlSwitchboard { this.defaultSurrogateProfile = null; } - if (this.defaultProxyProfile == null) { + if ( this.defaultProxyProfile == null ) { // generate new default entry for proxy crawling - this.defaultProxyProfile = new CrawlProfile( - "proxy", null, - CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, - CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, + this.defaultProxyProfile = + new CrawlProfile( + "proxy", + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, "", 0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/, true, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), + -1, + false, true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/, true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/, true, - false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, true, true, true, + false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, + true, + true, + true, CacheStrategy.IFFRESH); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultProxyProfile.handle()), this.defaultProxyProfile); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultProxyProfile.handle()), + this.defaultProxyProfile); } - if (this.defaultRemoteProfile == null) { + if ( this.defaultRemoteProfile == null ) { // generate new default entry for remote crawling - this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", CrawlProfile.MATCH_NEVER_STRING, 0, true, - -1, -1, true, true, true, false, false, true, true, false, CacheStrategy.IFFRESH); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile); + this.defaultRemoteProfile = + new CrawlProfile( + CRAWL_PROFILE_REMOTE, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + CrawlProfile.MATCH_NEVER_STRING, + 0, + false, + -1, + -1, + true, + true, + true, + false, + false, + true, + true, + false, + CacheStrategy.IFFRESH); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultRemoteProfile.handle()), + this.defaultRemoteProfile); } - if (this.defaultTextSnippetLocalProfile == null) { + if ( this.defaultTextSnippetLocalProfile == null ) { // generate new default entry for snippet fetch and optional crawling - this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile); + this.defaultTextSnippetLocalProfile = + new CrawlProfile( + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + 0, + false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), + -1, + true, + false, + false, + true, + false, + true, + true, + false, + CacheStrategy.IFEXIST); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), + this.defaultTextSnippetLocalProfile); } - if (this.defaultTextSnippetGlobalProfile == null) { + if ( this.defaultTextSnippetGlobalProfile == null ) { // generate new default entry for snippet fetch and optional crawling - this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CacheStrategy.IFEXIST); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile); + this.defaultTextSnippetGlobalProfile = + new CrawlProfile( + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + 0, + false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), + -1, + true, + true, + true, + true, + false, + true, + true, + false, + CacheStrategy.IFEXIST); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), + this.defaultTextSnippetGlobalProfile); } this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST); - if (this.defaultMediaSnippetLocalProfile == null) { + if ( this.defaultMediaSnippetLocalProfile == null ) { // generate new default entry for snippet fetch and optional crawling - this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile); + this.defaultMediaSnippetLocalProfile = + new CrawlProfile( + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + 0, + false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), + -1, + true, + false, + false, + true, + false, + true, + true, + false, + CacheStrategy.IFEXIST); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), + this.defaultMediaSnippetLocalProfile); } - if (this.defaultMediaSnippetGlobalProfile == null) { + if ( this.defaultMediaSnippetGlobalProfile == null ) { // generate new default entry for snippet fetch and optional crawling - this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, true, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CacheStrategy.IFEXIST); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile); + this.defaultMediaSnippetGlobalProfile = + new CrawlProfile( + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + 0, + false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), + -1, + true, + false, + true, + true, + false, + true, + true, + false, + CacheStrategy.IFEXIST); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), + this.defaultMediaSnippetGlobalProfile); } - if (this.defaultSurrogateProfile == null) { + if ( this.defaultSurrogateProfile == null ) { // generate new default entry for surrogate parsing - this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0, false, - CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CacheStrategy.NOCACHE); - this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile); + this.defaultSurrogateProfile = + new CrawlProfile( + CRAWL_PROFILE_SURROGATE, + null, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + CrawlProfile.MATCH_ALL_STRING, + CrawlProfile.MATCH_NEVER_STRING, + "", + 0, + false, + CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), + -1, + true, + true, + false, + false, + false, + true, + true, + false, + CacheStrategy.NOCACHE); + this.profilesActiveCrawls.put( + UTF8.getBytes(this.defaultSurrogateProfile.handle()), + this.defaultSurrogateProfile); } } private void resetProfiles() { this.profilesActiveCrawlsCache.clear(); final File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); - if (pdb.exists()) FileUtils.deletedelete(pdb); + if ( pdb.exists() ) { + FileUtils.deletedelete(pdb); + } try { - this.profilesActiveCrawls = new MapHeap(pdb, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e1) { + this.profilesActiveCrawls = + new MapHeap(pdb, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); + } catch ( final IOException e1 ) { Log.logException(e1); this.profilesActiveCrawls = null; } @@ -313,48 +507,49 @@ public final class CrawlSwitchboard { CrawlProfile entry; boolean hasDoneSomething = false; try { - for (final byte[] handle: this.profilesActiveCrawls.keySet()) { + for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) { // check for interruption - if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress"); + if ( Thread.currentThread().isInterrupted() ) { + throw new InterruptedException("Shutdown in progress"); + } // getting next profile try { entry = new CrawlProfile(this.profilesActiveCrawls.get(handle)); - } catch (final IOException e) { + } catch ( final IOException e ) { continue; - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { continue; } - if (!((entry.name().equals(CRAWL_PROFILE_PROXY)) || - (entry.name().equals(CRAWL_PROFILE_REMOTE)) || - (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) || - (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) || - (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) || - (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) || - (entry.name().equals(CRAWL_PROFILE_SURROGATE)))) { + if ( !((entry.name().equals(CRAWL_PROFILE_PROXY)) + || (entry.name().equals(CRAWL_PROFILE_REMOTE)) + || (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) + || (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) + || (entry.name().equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) + || (entry.name().equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) || (entry.name() + .equals(CRAWL_PROFILE_SURROGATE))) ) { final CrawlProfile p = new CrawlProfile(entry); this.profilesPassiveCrawls.put(UTF8.getBytes(p.handle()), p); this.profilesActiveCrawls.remove(handle); hasDoneSomething = true; } } - } catch (final kelondroException e) { + } catch ( final kelondroException e ) { resetProfiles(); hasDoneSomething = true; } return hasDoneSomething; } - public void close() { this.profilesActiveCrawlsCache.clear(); this.profilesActiveCrawls.close(); this.profilesPassiveCrawls.close(); } - /** * Loads crawl profiles from a DB file. + * * @param file DB file * @return crawl profile data */ @@ -362,12 +557,14 @@ public final class CrawlSwitchboard { MapHeap ret; try { ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e) { - Log.logException(e);Log.logException(e); + } catch ( final IOException e ) { + Log.logException(e); + Log.logException(e); FileUtils.deletedelete(file); try { - ret = new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); - } catch (final IOException e1) { + ret = + new MapHeap(file, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, ' '); + } catch ( final IOException e1 ) { Log.logException(e1); ret = null; } diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index 7d7defb34..44203d0cc 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -39,8 +39,8 @@ import java.util.Map; import java.util.NavigableMap; import java.util.Random; import java.util.TreeMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; @@ -52,27 +52,31 @@ import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.workflow.BusyThread; import net.yacy.kelondro.workflow.WorkflowThread; - import de.anomic.server.serverAccessTracker.Track; import de.anomic.server.serverCore.Session; -public class serverSwitch { - +public class serverSwitch +{ + // configuration management - private final File configFile; - private final String configComment; - private final File dataPath; - protected final File appPath; - protected boolean firstInit; - protected Log log; - protected int serverJobs; - private ConcurrentMap configProps; - private final ConcurrentMap configRemoved; - private final ConcurrentMap authorization; - private final NavigableMap workerThreads; - private final serverAccessTracker accessTracker; - - public serverSwitch(final File dataPath, final File appPath, final String initPath, final String configPath) { + private final File configFile; + private final String configComment; + private final File dataPath; + protected final File appPath; + protected boolean firstInit; + protected Log log; + protected int serverJobs; + private ConcurrentMap configProps; + private final ConcurrentMap configRemoved; + private final ConcurrentMap authorization; + private final NavigableMap workerThreads; + private final serverAccessTracker accessTracker; + + public serverSwitch( + final File dataPath, + final File appPath, + final String initPath, + final String configPath) { // we initialize the switchboard with a property file, // but maintain these properties then later in a new 'config' file // to reset all changed configs, the config file must @@ -81,47 +85,51 @@ public class serverSwitch { // file name of the config file this.dataPath = dataPath; this.appPath = appPath; - this.configComment = "This is an automatically generated file, updated by serverAbstractSwitch and initialized by " + initPath; + this.configComment = + "This is an automatically generated file, updated by serverAbstractSwitch and initialized by " + + initPath; final File initFile = new File(appPath, initPath); this.configFile = new File(dataPath, configPath); // propertiesFile(config); - firstInit = !configFile.exists(); // this is true if the application was started for the first time - new File(configFile.getParent()).mkdir(); + this.firstInit = !this.configFile.exists(); // this is true if the application was started for the first time + new File(this.configFile.getParent()).mkdir(); // predefine init's final ConcurrentMap initProps; - if (initFile.exists()) + if ( initFile.exists() ) { initProps = FileUtils.loadMap(initFile); - else + } else { initProps = new ConcurrentHashMap(); - + } + // if 'pro'-version is selected, overload standard settings with 'pro'-settings Iterator i; String prop; - + // delete the 'pro' init settings i = initProps.keySet().iterator(); - while (i.hasNext()) { + while ( i.hasNext() ) { prop = i.next(); - if (prop.endsWith("__pro")) { + if ( prop.endsWith("__pro") ) { i.remove(); } } - + // load config's from last save - if (configFile.exists()) - configProps = FileUtils.loadMap(configFile); - else - configProps = new ConcurrentHashMap(); + if ( this.configFile.exists() ) { + this.configProps = FileUtils.loadMap(this.configFile); + } else { + this.configProps = new ConcurrentHashMap(); + } // remove all values from config that do not appear in init - configRemoved = new ConcurrentHashMap(); - synchronized (configProps) { - i = configProps.keySet().iterator(); + this.configRemoved = new ConcurrentHashMap(); + synchronized ( this.configProps ) { + i = this.configProps.keySet().iterator(); String key; - while (i.hasNext()) { + while ( i.hasNext() ) { key = i.next(); - if (!(initProps.containsKey(key))) { - configRemoved.put(key, this.configProps.get(key)); + if ( !(initProps.containsKey(key)) ) { + this.configRemoved.put(key, this.configProps.get(key)); i.remove(); } } @@ -132,8 +140,8 @@ public class serverSwitch { // merge new props from init to config // this is necessary for migration, when new properties are attached - initProps.putAll(configProps); - configProps = initProps; + initProps.putAll(this.configProps); + this.configProps = initProps; // save result; this may initially create a config file after // initialization @@ -141,48 +149,50 @@ public class serverSwitch { } // other settings - authorization = new ConcurrentHashMap(); + this.authorization = new ConcurrentHashMap(); // init thread control - workerThreads = new TreeMap(); + this.workerThreads = new TreeMap(); // init busy state control - serverJobs = 0; - + this.serverJobs = 0; + // init server tracking - this.accessTracker = new serverAccessTracker( - getConfigLong("server.maxTrackingTime", 60 * 60 * 1000), - (int) getConfigLong("server.maxTrackingCount", 1000), - (int) getConfigLong("server.maxTrackingHostCount", 100) - ); + this.accessTracker = + new serverAccessTracker( + getConfigLong("server.maxTrackingTime", 60 * 60 * 1000), + (int) getConfigLong("server.maxTrackingCount", 1000), + (int) getConfigLong("server.maxTrackingHostCount", 100)); } - + public String myPublicIP() { // if a static IP was configured, we have to return it here ... final String staticIP = getConfig("staticIP", ""); - if (staticIP.length() > 0) { + if ( staticIP.length() > 0 ) { return staticIP; } // otherwise we return the real IP address of this host final InetAddress pLIP = Domains.myPublicLocalIP(); - if (pLIP != null) return pLIP.getHostAddress(); + if ( pLIP != null ) { + return pLIP.getHostAddress(); + } return null; } - + // a logger for this switchboard public void setLog(final Log log) { - this.log = log; + this.log = log; } public Log getLog() { - return log; + return this.log; } public void setConfig(final Map otherConfigs) { final Iterator> i = otherConfigs.entrySet().iterator(); Map.Entry entry; - while (i.hasNext()) { + while ( i.hasNext() ) { entry = i.next(); setConfig(entry.getKey(), entry.getValue()); } @@ -202,94 +212,99 @@ public class serverSwitch { public void setConfig(final String key, final String value) { // set the value - final String oldValue = configProps.put(key, value); - if (oldValue == null || !value.equals(oldValue)) saveConfig(); + final String oldValue = this.configProps.put(key, value); + if ( oldValue == null || !value.equals(oldValue) ) { + saveConfig(); + } } public void removeConfig(final String key) { - configProps.remove(key); + this.configProps.remove(key); } - + /** * Gets a configuration parameter from the properties. + * * @param key name of the configuration parameter - * @param dflt default value which will be used in case parameter can not be - * found or if it is invalid + * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value */ public String getConfig(final String key, final String dflt) { // get the value - final String s = configProps.get(key); + final String s = this.configProps.get(key); // return value - if (s == null) return dflt; + if ( s == null ) { + return dflt; + } return s; } /** * Gets a configuration parameter from the properties. + * * @param key name of the configuration parameter - * @param dflt default value which will be used in case parameter can not be - * found or if it is invalid + * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value */ public long getConfigLong(final String key, final long dflt) { try { return Long.parseLong(getConfig(key, Long.toString(dflt))); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { return dflt; } } /** * Gets a configuration parameter from the properties. + * * @param key name of the configuration parameter - * @param dflt default value which will be used in case parameter can not be - * found or if it is invalid + * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value */ public double getConfigFloat(final String key, final float dflt) { try { return Float.parseFloat(getConfig(key, Float.toString(dflt))); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { return dflt; } } /** * Gets a configuration parameter from the properties. + * * @param key name of the configuration parameter - * @param dflt default value which will be used in case parameter can not be - * found or if it is invalid + * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value */ public int getConfigInt(final String key, final int dflt) { try { return Integer.parseInt(getConfig(key, Integer.toString(dflt))); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { return dflt; } } /** * Gets a configuration parameter from the properties. + * * @param key name of the configuration parameter - * @param dflt default value which will be used in case parameter can not be - * found or if it is invalid + * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value */ public boolean getConfigBool(final String key, final boolean dflt) { return Boolean.parseBoolean(getConfig(key, Boolean.toString(dflt))); } - + /** * Create a File instance for a configuration setting specifying a path. - * @param key config key - * @param dflt default path value, that is used when there is no value - * key in the configuration. - * @return if the value of the setting is an absolute path String, then the - * returned File is derived from this setting only. Otherwise the path's file - * is constructed from the applications root path + the relative path setting. + * + * @param key config key + * @param dflt default path value, that is used when there is no value key in the + * configuration. + * @return if the value of the setting is an absolute path String, then the returned File is derived from + * this setting only. Otherwise the path's file is constructed from the applications root path + + * the relative path setting. */ public File getDataPath(final String key, final String dflt) { File ret; @@ -298,7 +313,7 @@ public class serverSwitch { ret = (f.isAbsolute() ? new File(f.getAbsolutePath()) : new File(this.dataPath, path)); return ret; } - + public File getAppPath(final String key, final String dflt) { File ret; final String path = getConfig(key, dflt).replace('\\', '/'); @@ -308,265 +323,296 @@ public class serverSwitch { } public Iterator configKeys() { - return configProps.keySet().iterator(); + return this.configProps.keySet().iterator(); } private void saveConfig() { - try { - ConcurrentMap configPropsCopy = new ConcurrentHashMap(); - configPropsCopy.putAll(configProps); // avoid concurrency problems - FileUtils.saveMap(configFile, configPropsCopy, configComment); - } catch (final IOException e) { - log.logSevere("CONFIG: Cannot write config file " + configFile.toString() + ": " + e.getMessage(), e); - //System.out.println("ERROR: cannot write config file " + configFile.toString() + ": " + e.getMessage()); - } + ConcurrentMap configPropsCopy = new ConcurrentHashMap(); + configPropsCopy.putAll(this.configProps); // avoid concurrency problems + FileUtils.saveMap(this.configFile, configPropsCopy, this.configComment); } /** * Gets configuration parameters which have been removed during initialization. + * * @return contains parameter name as key and parameter value as value */ public ConcurrentMap getRemoved() { - return configRemoved; + return this.configRemoved; } public void deployThread( - final String threadName, - final String threadShortDescription, - final String threadLongDescription, - final String threadMonitorURL, - final BusyThread newThread, - final long startupDelay) { - deployThread(threadName, threadShortDescription, threadLongDescription, threadMonitorURL, - newThread, startupDelay, - Long.parseLong(getConfig(threadName + "_idlesleep" , "100")), - Long.parseLong(getConfig(threadName + "_busysleep" , "1000")), - Long.parseLong(getConfig(threadName + "_memprereq" , "1000000"))); + final String threadName, + final String threadShortDescription, + final String threadLongDescription, + final String threadMonitorURL, + final BusyThread newThread, + final long startupDelay) { + deployThread( + threadName, + threadShortDescription, + threadLongDescription, + threadMonitorURL, + newThread, + startupDelay, + Long.parseLong(getConfig(threadName + "_idlesleep", "100")), + Long.parseLong(getConfig(threadName + "_busysleep", "1000")), + Long.parseLong(getConfig(threadName + "_memprereq", "1000000"))); } public void deployThread( - final String threadName, - final String threadShortDescription, - final String threadLongDescription, - final String threadMonitorURL, - final BusyThread newThread, - final long startupDelay, - final long initialIdleSleep, - final long initialBusySleep, - final long initialMemoryPreRequisite) { - if (newThread.isAlive()) throw new RuntimeException("undeployed threads must not live; they are started as part of the deployment"); + final String threadName, + final String threadShortDescription, + final String threadLongDescription, + final String threadMonitorURL, + final BusyThread newThread, + final long startupDelay, + final long initialIdleSleep, + final long initialBusySleep, + final long initialMemoryPreRequisite) { + if ( newThread.isAlive() ) { + throw new RuntimeException( + "undeployed threads must not live; they are started as part of the deployment"); + } newThread.setStartupSleep(startupDelay); long x; try { - x = Long.parseLong(getConfig(threadName + "_idlesleep" , "novalue")); + x = Long.parseLong(getConfig(threadName + "_idlesleep", "novalue")); newThread.setIdleSleep(x); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { newThread.setIdleSleep(initialIdleSleep); setConfig(threadName + "_idlesleep", initialIdleSleep); } try { - x = Long.parseLong(getConfig(threadName + "_busysleep" , "novalue")); + x = Long.parseLong(getConfig(threadName + "_busysleep", "novalue")); newThread.setBusySleep(x); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { newThread.setBusySleep(initialBusySleep); setConfig(threadName + "_busysleep", initialBusySleep); } try { - x = Long.parseLong(getConfig(threadName + "_memprereq" , "novalue")); + x = Long.parseLong(getConfig(threadName + "_memprereq", "novalue")); newThread.setMemPreReqisite(x); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { newThread.setMemPreReqisite(initialMemoryPreRequisite); setConfig(threadName + "_memprereq", initialMemoryPreRequisite); } newThread.setDescription(threadShortDescription, threadLongDescription, threadMonitorURL); - workerThreads.put(threadName, newThread); + this.workerThreads.put(threadName, newThread); // start the thread - if (workerThreads.containsKey(threadName)) newThread.start(); + if ( this.workerThreads.containsKey(threadName) ) { + newThread.start(); + } } public BusyThread getThread(final String threadName) { - return workerThreads.get(threadName); + return this.workerThreads.get(threadName); } - - public void setThreadPerformance(final String threadName, final long idleMillis, final long busyMillis, final long memprereqBytes) { - final BusyThread thread = workerThreads.get(threadName); - if (thread != null) { + + public void setThreadPerformance( + final String threadName, + final long idleMillis, + final long busyMillis, + final long memprereqBytes) { + final BusyThread thread = this.workerThreads.get(threadName); + if ( thread != null ) { setConfig(threadName + "_idlesleep", thread.setIdleSleep(idleMillis)); setConfig(threadName + "_busysleep", thread.setBusySleep(busyMillis)); setConfig(threadName + "_memprereq", memprereqBytes); thread.setMemPreReqisite(memprereqBytes); } } - + public synchronized void terminateThread(final String threadName, final boolean waitFor) { - if (workerThreads.containsKey(threadName)) { - ((WorkflowThread) workerThreads.get(threadName)).terminate(waitFor); - workerThreads.remove(threadName); + if ( this.workerThreads.containsKey(threadName) ) { + ((WorkflowThread) this.workerThreads.get(threadName)).terminate(waitFor); + this.workerThreads.remove(threadName); } } public void intermissionAllThreads(final long pause) { - final Iterator e = workerThreads.keySet().iterator(); - while (e.hasNext()) { - workerThreads.get(e.next()).intermission(pause); + final Iterator e = this.workerThreads.keySet().iterator(); + while ( e.hasNext() ) { + this.workerThreads.get(e.next()).intermission(pause); } } - + public synchronized void terminateAllThreads(final boolean waitFor) { - Iterator e = workerThreads.keySet().iterator(); - while (e.hasNext()) { - ((WorkflowThread) workerThreads.get(e.next())).terminate(false); - } - if (waitFor) { - e = workerThreads.keySet().iterator(); - while (e.hasNext()) { - ((WorkflowThread) workerThreads.get(e.next())).terminate(true); + Iterator e = this.workerThreads.keySet().iterator(); + while ( e.hasNext() ) { + ((WorkflowThread) this.workerThreads.get(e.next())).terminate(false); + } + if ( waitFor ) { + e = this.workerThreads.keySet().iterator(); + while ( e.hasNext() ) { + ((WorkflowThread) this.workerThreads.get(e.next())).terminate(true); e.remove(); } } } - + public String[] sessionsOlderThan(String threadName, long timeout) { final List list = new ArrayList(); final WorkflowThread st = getThread(threadName); - - for (final Session s: ((serverCore) st).getJobList()) { - if (!s.isAlive()) continue; - if (s.getTime() > timeout) { + + for ( final Session s : ((serverCore) st).getJobList() ) { + if ( !s.isAlive() ) { + continue; + } + if ( s.getTime() > timeout ) { list.add(s.getName()); } } return (String[]) list.toArray(); } - + public void closeSessions(String threadName, String sessionName) { - if (sessionName == null) return; + if ( sessionName == null ) { + return; + } final WorkflowThread st = getThread(threadName); - - for (final Session s: ((serverCore) st).getJobList()) { - if ( - (s.isAlive()) && - (s.getName().equals(sessionName)) - ) { + + for ( final Session s : ((serverCore) st).getJobList() ) { + if ( (s.isAlive()) && (s.getName().equals(sessionName)) ) { // try to stop session s.setStopped(true); - try { Thread.sleep(100); } catch (final InterruptedException ex) {} - + try { + Thread.sleep(100); + } catch ( final InterruptedException ex ) { + } + // try to interrupt session s.interrupt(); - try { Thread.sleep(100); } catch (final InterruptedException ex) {} - + try { + Thread.sleep(100); + } catch ( final InterruptedException ex ) { + } + // try to close socket - if (s.isAlive()) { + if ( s.isAlive() ) { s.close(); } - + // wait for session to finish - if (s.isAlive()) { - try { s.join(500); } catch (final InterruptedException ex) {} + if ( s.isAlive() ) { + try { + s.join(500); + } catch ( final InterruptedException ex ) { + } } } } } - - public Iterator /*of serverThread-Names (String)*/ threadNames() { - return workerThreads.keySet().iterator(); + + public Iterator /*of serverThread-Names (String)*/threadNames() { + return this.workerThreads.keySet().iterator(); } // authentication routines: - + public void setAuthentify(final InetAddress host, final String user, final String rights) { // sets access attributes according to host addresses - authorization.put(host, user + "@" + rights); + this.authorization.put(host, user + "@" + rights); } public void removeAuthentify(final InetAddress host) { // remove access attributes according to host addresses - authorization.remove(host); + this.authorization.remove(host); } public String getAuthentifyUser(final InetAddress host) { - // read user name according to host addresses - final String a = authorization.get(host); - if (a == null) return null; - final int p = a.indexOf('@'); - if (p < 0) return null; - return a.substring(0, p); + // read user name according to host addresses + final String a = this.authorization.get(host); + if ( a == null ) { + return null; + } + final int p = a.indexOf('@'); + if ( p < 0 ) { + return null; + } + return a.substring(0, p); } public String getAuthentifyRights(final InetAddress host) { - // read access rigths according to host addresses - final String a = authorization.get(host); - if (a == null) return null; - final int p = a.indexOf('@'); - if (p < 0) return null; - return a.substring(p + 1); + // read access rigths according to host addresses + final String a = this.authorization.get(host); + if ( a == null ) { + return null; + } + final int p = a.indexOf('@'); + if ( p < 0 ) { + return null; + } + return a.substring(p + 1); } public void addAuthentifyRight(final InetAddress host, final String right) { - final String rights = getAuthentifyRights(host); - if (rights == null) { - // create new authentication - setAuthentify(host, "unknown", right); - } else { - // add more authentication - final String user = getAuthentifyUser(host); - setAuthentify(host, user, rights + right); - } + final String rights = getAuthentifyRights(host); + if ( rights == null ) { + // create new authentication + setAuthentify(host, "unknown", right); + } else { + // add more authentication + final String user = getAuthentifyUser(host); + setAuthentify(host, user, rights + right); + } } public boolean hasAuthentifyRight(final InetAddress host, final String right) { - final String rights = getAuthentifyRights(host); - if (rights == null) return false; - return rights.indexOf(right) >= 0; + final String rights = getAuthentifyRights(host); + if ( rights == null ) { + return false; + } + return rights.indexOf(right) >= 0; } public File getDataPath() { - return this.dataPath; + return this.dataPath; } public File getAppPath() { - return this.appPath; + return this.appPath; } - + @Override public String toString() { - return configProps.toString(); + return this.configProps.toString(); } public void handleBusyState(final int jobs) { - serverJobs = jobs; + this.serverJobs = jobs; } - + public void track(final String host, final String accessPath) { this.accessTracker.track(host, accessPath); } - + public Collection accessTrack(final String host) { return this.accessTracker.accessTrack(host); - } + } public int latestAccessCount(final String host, final long timedelta) { return this.accessTracker.latestAccessCount(host, timedelta); - } - + } + public Iterator accessHosts() { return this.accessTracker.accessHosts(); } - + /** - * Retrieve text data (e. g. config file) from file + * Retrieve text data (e. g. config file) from file file may be an url or a filename with path relative to + * rootPath parameter * - * file may be an url or a filename with path relative to rootPath parameter * @param file url or filename * @param rootPath searchpath for file * @param file file to use when remote fetching fails (null if unused) */ - public Reader getConfigFileFromWebOrLocally(final String uri, - final String rootPath, final File file) throws IOException, FileNotFoundException { - if (uri.startsWith("http://") || uri.startsWith("https://")) { + public Reader getConfigFileFromWebOrLocally(final String uri, final String rootPath, final File file) + throws IOException, + FileNotFoundException { + if ( uri.startsWith("http://") || uri.startsWith("https://") ) { final String[] uris = uri.split(","); - for (String netdef: uris) { + for ( String netdef : uris ) { netdef = netdef.trim(); try { final RequestHeader reqHeader = new RequestHeader(); @@ -574,52 +620,57 @@ public class serverSwitch { final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); byte[] data = client.GETbytes(uri); - if (data == null || data.length == 0) continue; + if ( data == null || data.length == 0 ) { + continue; + } // save locally in case next fetch fails - if (file != null) { - FileOutputStream f = new FileOutputStream(file); - f.write(data); - f.close(); + if ( file != null ) { + FileOutputStream f = new FileOutputStream(file); + f.write(data); + f.close(); } return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data))); - } catch (final Exception e) { + } catch ( final Exception e ) { continue; } } - if (file != null && file.exists()) { - return new FileReader(file); + if ( file != null && file.exists() ) { + return new FileReader(file); } else { - throw new FileNotFoundException(); + throw new FileNotFoundException(); } - } else { - final File f = (uri.length() > 0 && uri.startsWith("/")) ? new File(uri) : new File(rootPath, uri); - if (f.exists()) { - return new FileReader(f); + } else { + final File f = + (uri.length() > 0 && uri.startsWith("/")) ? new File(uri) : new File(rootPath, uri); + if ( f.exists() ) { + return new FileReader(f); } else { - throw new FileNotFoundException(f.toString()); + throw new FileNotFoundException(f.toString()); } - } + } } - + private static Random pwGenerator = new Random(); /** * Generates a random password. + * * @return random password which is 20 characters long. */ public String genRandomPassword() { - return genRandomPassword(20); + return genRandomPassword(20); } /** * Generates a random password of a given length. + * * @param length length o password * @return password of given length */ public String genRandomPassword(final int length) { - byte[] bytes = new byte[length]; - pwGenerator.nextBytes(bytes); - return Digest.encodeMD5Hex(bytes); + byte[] bytes = new byte[length]; + pwGenerator.nextBytes(bytes); + return Digest.encodeMD5Hex(bytes); } - + } diff --git a/source/net/yacy/kelondro/util/FileUtils.java b/source/net/yacy/kelondro/util/FileUtils.java index 476674f9d..7c7df2a39 100644 --- a/source/net/yacy/kelondro/util/FileUtils.java +++ b/source/net/yacy/kelondro/util/FileUtils.java @@ -32,6 +32,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; @@ -66,8 +67,8 @@ import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.RowSet; import net.yacy.kelondro.logging.Log; - -public final class FileUtils { +public final class FileUtils +{ private static final int DEFAULT_BUFFER_SIZE = 1024; // this is also the maximum chunk size @@ -77,21 +78,21 @@ public final class FileUtils { /** * Copies an InputStream to an OutputStream. - * + * * @param source InputStream * @param dest OutputStream * @param count the total amount of bytes to copy (-1 for all, else must be greater than zero) * @return Total number of bytes copied. * @throws IOException - * * @see #copy(InputStream source, File dest) * @see #copyRange(File source, OutputStream dest, int start) * @see #copy(File source, OutputStream dest) * @see #copy(File source, File dest) */ - public static long copy(final InputStream source, final OutputStream dest, final long count) throws IOException { + public static long copy(final InputStream source, final OutputStream dest, final long count) + throws IOException { assert count < 0 || count > 0 : "precondition violated: count == " + count + " (nothing to copy)"; - if (count == 0) { + if ( count == 0 ) { // no bytes to copy return 0; } @@ -99,15 +100,18 @@ public final class FileUtils { final byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE); - int c; long total = 0; - while ((c = source.read(buffer, 0, chunkSize)) > 0) { + int c; + long total = 0; + while ( (c = source.read(buffer, 0, chunkSize)) > 0 ) { dest.write(buffer, 0, c); dest.flush(); total += c; - if (count > 0) { + if ( count > 0 ) { chunkSize = (int) Math.min(count - total, DEFAULT_BUFFER_SIZE); - if (chunkSize == 0) break; + if ( chunkSize == 0 ) { + break; + } } } @@ -116,24 +120,31 @@ public final class FileUtils { return total; } - public static int copy(final File source, final Charset inputCharset, final Writer dest) throws IOException { + public static int copy(final File source, final Charset inputCharset, final Writer dest) + throws IOException { InputStream fis = null; try { fis = new FileInputStream(source); return copy(fis, dest, inputCharset); } finally { - if (fis != null) try { fis.close(); } catch (final Exception e) {} + if ( fis != null ) { + try { + fis.close(); + } catch ( final Exception e ) { + } + } } } public static int copy(final InputStream source, final Writer dest) throws IOException { final InputStreamReader reader = new InputStreamReader(source); - return copy(reader,dest); + return copy(reader, dest); } - public static int copy(final InputStream source, final Writer dest, final Charset inputCharset) throws IOException { - final InputStreamReader reader = new InputStreamReader(source,inputCharset); - return copy(reader,dest); + public static int copy(final InputStream source, final Writer dest, final Charset inputCharset) + throws IOException { + final InputStreamReader reader = new InputStreamReader(source, inputCharset); + return copy(reader, dest); } public static int copy(final String source, final Writer dest) throws IOException { @@ -145,35 +156,41 @@ public final class FileUtils { public static int copy(final Reader source, final Writer dest) throws IOException { assert source != null; assert dest != null; - if (source == null) throw new IOException("source is null"); - if (dest == null) throw new IOException("dest is null"); + if ( source == null ) { + throw new IOException("source is null"); + } + if ( dest == null ) { + throw new IOException("dest is null"); + } final char[] buffer = new char[DEFAULT_BUFFER_SIZE]; int count = 0; int n = 0; try { - while (-1 != (n = source.read(buffer))) { + while ( -1 != (n = source.read(buffer)) ) { dest.write(buffer, 0, n); count += n; } dest.flush(); - } catch (final Exception e) { + } catch ( final Exception e ) { assert e != null; // an "sun.io.MalformedInputException: Missing byte-order mark" - exception may occur here //Log.logException(e); - throw new IOException(e == null ? "null" : e.getMessage() == null ? e.toString() : e.getMessage(), e); + throw new IOException( + e == null ? "null" : e.getMessage() == null ? e.toString() : e.getMessage(), + e); } return count; } public static void copy(final InputStream source, final File dest) throws IOException { - copy(source,dest,-1); + copy(source, dest, -1); } /** * Copies an InputStream to a File. - * - * @param source InputStream - * @param dest File + * + * @param source InputStream + * @param dest File * @param count the amount of bytes to copy * @throws IOException * @see #copy(InputStream source, OutputStream dest) @@ -183,20 +200,31 @@ public final class FileUtils { */ public static void copy(final InputStream source, final File dest, final long count) throws IOException { final String path = dest.getParent(); - if (path != null && path.length() > 0) new File(path).mkdirs(); + if ( path != null && path.length() > 0 ) { + new File(path).mkdirs(); + } FileOutputStream fos = null; try { fos = new FileOutputStream(dest); copy(source, fos, count); } finally { - if (fos != null) try {fos.close();} catch (final Exception e) { Log.logWarning("FileUtils", "cannot close FileOutputStream for "+ dest +"! "+ e.getMessage()); } + if ( fos != null ) { + try { + fos.close(); + } catch ( final Exception e ) { + Log.logWarning( + "FileUtils", + "cannot close FileOutputStream for " + dest + "! " + e.getMessage()); + } + } } } /** * Copies a part of a File to an OutputStream. - * @param source File - * @param dest OutputStream + * + * @param source File + * @param dest OutputStream * @param start Number of bytes to skip from the beginning of the File * @throws IOException * @see #copy(InputStream source, OutputStream dest) @@ -204,22 +232,35 @@ public final class FileUtils { * @see #copy(File source, OutputStream dest) * @see #copy(File source, File dest) */ - public static void copyRange(final File source, final OutputStream dest, final int start) throws IOException { + public static void copyRange(final File source, final OutputStream dest, final int start) + throws IOException { InputStream fis = null; try { fis = new FileInputStream(source); final long skipped = fis.skip(start); - if (skipped != start) throw new IllegalStateException("Unable to skip '" + start + "' bytes. Only '" + skipped + "' bytes skipped."); + if ( skipped != start ) { + throw new IllegalStateException("Unable to skip '" + + start + + "' bytes. Only '" + + skipped + + "' bytes skipped."); + } copy(fis, dest, -1); } finally { - if (fis != null) try { fis.close(); } catch (final Exception e) {} + if ( fis != null ) { + try { + fis.close(); + } catch ( final Exception e ) { + } + } } } /** * Copies a File to an OutputStream. - * @param source File - * @param dest OutputStream + * + * @param source File + * @param dest OutputStream * @throws IOException * @see #copy(InputStream source, OutputStream dest) * @see #copy(InputStream source, File dest) @@ -232,14 +273,20 @@ public final class FileUtils { fis = new FileInputStream(source); copy(fis, dest, -1); } finally { - if (fis != null) try { fis.close(); } catch (final Exception e) {} + if ( fis != null ) { + try { + fis.close(); + } catch ( final Exception e ) { + } + } } } /** * Copies a File to a File. - * @param source File - * @param dest File + * + * @param source File + * @param dest File * @param count the amount of bytes to copy * @throws IOException * @see #copy(InputStream source, OutputStream dest) @@ -255,8 +302,18 @@ public final class FileUtils { fos = new FileOutputStream(dest); copy(fis, fos, -1); } finally { - if (fis != null) try {fis.close();} catch (final Exception e) {} - if (fos != null) try {fos.close();} catch (final Exception e) {} + if ( fis != null ) { + try { + fis.close(); + } catch ( final Exception e ) { + } + } + if ( fos != null ) { + try { + fos.close(); + } catch ( final Exception e ) { + } + } } } @@ -270,18 +327,18 @@ public final class FileUtils { } public static byte[] read(final InputStream source) throws IOException { - return read(source,-1); + return read(source, -1); } public static byte[] read(final InputStream source, final int count) throws IOException { - if (count > 0) { + if ( count > 0 ) { final byte[] b = new byte[count]; final int c = source.read(b, 0, count); - assert c == count: "count = " + count + ", c = " + c; - if (c != count) { - final byte[] bb = new byte[c]; - System.arraycopy(b, 0, bb, 0, c); - return bb; + assert c == count : "count = " + count + ", c = " + c; + if ( c != count ) { + final byte[] bb = new byte[c]; + System.arraycopy(b, 0, bb, 0, c); + return bb; } return b; } @@ -297,9 +354,16 @@ public final class FileUtils { try { fis = new FileInputStream(source); int p = 0, c; - while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c; + while ( (c = fis.read(buffer, p, buffer.length - p)) > 0 ) { + p += c; + } } finally { - if (fis != null) try { fis.close(); } catch (final Exception e) {} + if ( fis != null ) { + try { + fis.close(); + } catch ( final Exception e ) { + } + } fis = null; } return buffer; @@ -309,14 +373,24 @@ public final class FileUtils { ByteArrayOutputStream byteOut = null; GZIPOutputStream zipOut = null; try { - byteOut = new ByteArrayOutputStream((int)(source.length()/2)); + byteOut = new ByteArrayOutputStream((int) (source.length() / 2)); zipOut = new GZIPOutputStream(byteOut); copy(source, zipOut); zipOut.close(); return byteOut.toByteArray(); } finally { - if (zipOut != null) try { zipOut.close(); } catch (final Exception e) {} - if (byteOut != null) try { byteOut.close(); } catch (final Exception e) {} + if ( zipOut != null ) { + try { + zipOut.close(); + } catch ( final Exception e ) { + } + } + if ( byteOut != null ) { + try { + byteOut.close(); + } catch ( final Exception e ) { + } + } } } @@ -326,7 +400,12 @@ public final class FileUtils { fos = new FileOutputStream(dest); writeAndGZip(source, fos); } finally { - if (fos != null) try {fos.close();} catch (final Exception e) {} + if ( fos != null ) { + try { + fos.close(); + } catch ( final Exception e ) { + } + } } } @@ -337,29 +416,37 @@ public final class FileUtils { copy(source, zipOut); zipOut.close(); } finally { - if (zipOut != null) try { zipOut.close(); } catch (final Exception e) {} + if ( zipOut != null ) { + try { + zipOut.close(); + } catch ( final Exception e ) { + } + } } } /** * This function determines if a byte array is gzip compressed and uncompress it + * * @param source properly gzip compressed byte array * @return uncompressed byte array * @throws IOException */ public static byte[] uncompressGZipArray(byte[] source) throws IOException { - if (source == null) return null; + if ( source == null ) { + return null; + } // support of gzipped data (requested by roland) - /* "Bitwise OR of signed byte value - * - * [...] Values loaded from a byte array are sign extended to 32 bits before - * any any bitwise operations are performed on the value. Thus, if b[0] - * contains the value 0xff, and x is initially 0, then the code ((x << - * 8) | b[0]) will sign extend 0xff to get 0xffffffff, and thus give the - * value 0xffffffff as the result. [...]" findbugs description of BIT_IOR_OF_SIGNED_BYTE - */ - if ((source.length > 1) && (((source[1] << 8) | (source[0] & 0xff)) == GZIPInputStream.GZIP_MAGIC)) { + /* "Bitwise OR of signed byte value + * + * [...] Values loaded from a byte array are sign extended to 32 bits before + * any any bitwise operations are performed on the value. Thus, if b[0] + * contains the value 0xff, and x is initially 0, then the code ((x << + * 8) | b[0]) will sign extend 0xff to get 0xffffffff, and thus give the + * value 0xffffffff as the result. [...]" findbugs description of BIT_IOR_OF_SIGNED_BYTE + */ + if ( (source.length > 1) && (((source[1] << 8) | (source[0] & 0xff)) == GZIPInputStream.GZIP_MAGIC) ) { System.out.println("DEBUG: uncompressGZipArray - uncompressing source"); try { final ByteArrayInputStream byteInput = new ByteArrayInputStream(source); @@ -369,15 +456,15 @@ public final class FileUtils { int read = 0; // reading gzip file and store it uncompressed - while((read = zippedContent.read(data, 0, 1024)) != -1) { + while ( (read = zippedContent.read(data, 0, 1024)) != -1 ) { byteOutput.write(data, 0, read); } zippedContent.close(); byteOutput.close(); source = byteOutput.toByteArray(); - } catch (final Exception e) { - if (!e.getMessage().equals("Not in GZIP format")) { + } catch ( final Exception e ) { + if ( !e.getMessage().equals("Not in GZIP format") ) { throw new IOException(e.getMessage()); } } @@ -392,14 +479,21 @@ public final class FileUtils { try { br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); String line; - while ((line = br.readLine()) != null) { + while ( (line = br.readLine()) != null ) { line = line.trim(); - if (line.length() > 0 && line.charAt(0) != '#') set.add(line.trim().toLowerCase()); + if ( line.length() > 0 && line.charAt(0) != '#' ) { + set.add(line.trim().toLowerCase()); + } } br.close(); - } catch (final IOException e) { + } catch ( final IOException e ) { } finally { - if (br != null) try { br.close(); } catch (final Exception e) {} + if ( br != null ) { + try { + br.close(); + } catch ( final Exception e ) { + } + } } return set; } @@ -409,98 +503,127 @@ public final class FileUtils { try { final byte[] b = read(f); return table(strings(b)); - } catch (final IOException e2) { + } catch ( final IOException e2 ) { Log.logSevere("FileUtils", f.toString() + " not found", e2); return null; } } - public static void saveMap(final File file, final Map props, final String comment) throws IOException { + public static void saveMap(final File file, final Map props, final String comment) { PrintWriter pw = null; final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000)); - pw = new PrintWriter(tf, "UTF-8"); - pw.println("# " + comment); - String key, value; - for (final Map.Entry entry: props.entrySet()) { - key = entry.getKey(); - if (key != null) - key = key.replace("\\", "\\\\").replace("\n", "\\n").replace("=", "\\="); - if (entry.getValue() == null) { - value = ""; - } else { - value = entry.getValue().replace("\\", "\\\\").replace("\n", "\\n"); + try { + pw = new PrintWriter(tf, "UTF-8"); + pw.println("# " + comment); + String key, value; + for ( final Map.Entry entry : props.entrySet() ) { + key = entry.getKey(); + if ( key != null ) { + key = key.replace("\\", "\\\\").replace("\n", "\\n").replace("=", "\\="); + } + if ( entry.getValue() == null ) { + value = ""; + } else { + value = entry.getValue().replace("\\", "\\\\").replace("\n", "\\n"); + } + pw.println(key + "=" + value); + } + pw.println("# EOF"); + } catch ( FileNotFoundException e ) { + Log.logWarning("FileUtils", e.getMessage(), e); + } catch ( UnsupportedEncodingException e ) { + Log.logWarning("FileUtils", e.getMessage(), e); + } finally { + if ( pw != null ) { + pw.close(); } - pw.println(key + "=" + value); + pw = null; + } + try { + forceMove(tf, file); + } catch ( IOException e ) { + // ignore } - pw.println("# EOF"); - pw.close(); - forceMove(tf, file); } - public static Set loadSet(final File file, final int chunksize, final boolean tree) throws IOException { - final Set set = (tree) ? (Set) new TreeSet() : (Set) new HashSet(); + public static Set loadSet(final File file, final int chunksize, final boolean tree) + throws IOException { + final Set set = + (tree) ? (Set) new TreeSet() : (Set) new HashSet(); final byte[] b = read(file); - for (int i = 0; (i + chunksize) <= b.length; i++) { + for ( int i = 0; (i + chunksize) <= b.length; i++ ) { set.add(UTF8.String(b, i, chunksize)); } return set; } - public static Set loadSet(final File file, final String sep, final boolean tree) throws IOException { - final Set set = (tree) ? (Set) new TreeSet() : (Set) new HashSet(); + public static Set loadSet(final File file, final String sep, final boolean tree) + throws IOException { + final Set set = + (tree) ? (Set) new TreeSet() : (Set) new HashSet(); final byte[] b = read(file); final StringTokenizer st = new StringTokenizer(UTF8.String(b), sep); - while (st.hasMoreTokens()) { + while ( st.hasMoreTokens() ) { set.add(st.nextToken()); } return set; } - public static void saveSet(final File file, final String format, final Set set, final String sep) throws IOException { + public static void saveSet(final File file, final String format, final Set set, final String sep) + throws IOException { final File tf = new File(file.toString() + ".prt" + (System.currentTimeMillis() % 1000)); OutputStream os = null; - if ((format == null) || (format.equals("plain"))) { + if ( (format == null) || (format.equals("plain")) ) { os = new BufferedOutputStream(new FileOutputStream(tf)); - } else if (format.equals("gzip")) { + } else if ( format.equals("gzip") ) { os = new GZIPOutputStream(new FileOutputStream(tf)); - } else if (format.equals("zip")) { + } else if ( format.equals("zip") ) { final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file)); String name = file.getName(); - if (name.endsWith(".zip")) name = name.substring(0, name.length() - 4); + if ( name.endsWith(".zip") ) { + name = name.substring(0, name.length() - 4); + } zos.putNextEntry(new ZipEntry(name + ".txt")); os = zos; } - if(os != null) { - for (final byte[] b : set) { + if ( os != null ) { + for ( final byte[] b : set ) { os.write(b); - if (sep != null) os.write(UTF8.getBytes(sep)); + if ( sep != null ) { + os.write(UTF8.getBytes(sep)); + } } os.close(); } forceMove(tf, file); } - public static void saveSet(final File file, final String format, final RowSet set, final String sep) throws IOException { + public static void saveSet(final File file, final String format, final RowSet set, final String sep) + throws IOException { final File tf = new File(file.toString() + ".prt" + (System.currentTimeMillis() % 1000)); OutputStream os = null; - if ((format == null) || (format.equals("plain"))) { + if ( (format == null) || (format.equals("plain")) ) { os = new BufferedOutputStream(new FileOutputStream(tf)); - } else if (format.equals("gzip")) { + } else if ( format.equals("gzip") ) { os = new GZIPOutputStream(new FileOutputStream(tf)); - } else if (format.equals("zip")) { + } else if ( format.equals("zip") ) { final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file)); String name = file.getName(); - if (name.endsWith(".zip")) name = name.substring(0, name.length() - 4); + if ( name.endsWith(".zip") ) { + name = name.substring(0, name.length() - 4); + } zos.putNextEntry(new ZipEntry(name + ".txt")); os = zos; } - if (os != null) { + if ( os != null ) { final Iterator i = set.iterator(); - if (i.hasNext()) { + if ( i.hasNext() ) { os.write(i.next().getPrimaryKeyBytes()); } - while (i.hasNext()) { - if (sep != null) os.write(UTF8.getBytes(sep)); + while ( i.hasNext() ) { + if ( sep != null ) { + os.write(UTF8.getBytes(sep)); + } os.write(i.next().getPrimaryKeyBytes()); } os.close(); @@ -509,27 +632,30 @@ public final class FileUtils { } public static ConcurrentHashMap table(final Reader r) { - final BufferedReader br = new BufferedReader(r); - return table(new StringsIterator(br)); - } + final BufferedReader br = new BufferedReader(r); + return table(new StringsIterator(br)); + } private final static Pattern escaped_equal = Pattern.compile("\\=", Pattern.LITERAL); private final static Pattern escaped_newline = Pattern.compile("\\n", Pattern.LITERAL); private final static Pattern escaped_backslash = Pattern.compile("\\", Pattern.LITERAL); + //private final static Pattern escaped_backslashbackslash = Pattern.compile("\\\\", Pattern.LITERAL); public static ConcurrentHashMap table(final Iterator li) { String line; final ConcurrentHashMap props = new ConcurrentHashMap(); - while (li.hasNext()) { + while ( li.hasNext() ) { int pos = 0; line = li.next().trim(); - if (line.length() > 0 && line.charAt(0) == '#') continue; // exclude comments + if ( line.length() > 0 && line.charAt(0) == '#' ) { + continue; // exclude comments + } do { // search for unescaped = pos = line.indexOf('=', pos + 1); - } while ( pos > 0 && line.charAt(pos - 1) == '\\'); - if (pos > 0) { + } while ( pos > 0 && line.charAt(pos - 1) == '\\' ); + if ( pos > 0 ) { String key = escaped_equal.matcher(line.substring(0, pos).trim()).replaceAll("="); key = escaped_newline.matcher(key).replaceAll("\n"); key = escaped_backslash.matcher(key).replaceAll("\\"); @@ -547,45 +673,51 @@ public final class FileUtils { } public static Iterator strings(final byte[] a) { - if (a == null) return new ArrayList().iterator(); + if ( a == null ) { + return new ArrayList().iterator(); + } try { - return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8"))); - } catch (final UnsupportedEncodingException e) { + return new StringsIterator(new BufferedReader(new InputStreamReader( + new ByteArrayInputStream(a), + "UTF-8"))); + } catch ( final UnsupportedEncodingException e ) { return null; } } - /** * Read lines of a file into an ArrayList. - * + * * @param listFile the file * @return the resulting array as an ArrayList */ - public static ArrayList getListArray(final File listFile){ + public static ArrayList getListArray(final File listFile) { String line; final ArrayList list = new ArrayList(); BufferedReader br = null; try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile),"UTF-8")); + br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), "UTF-8")); - while((line = br.readLine()) != null){ + while ( (line = br.readLine()) != null ) { list.add(line); } br.close(); - } catch(final IOException e) { + } catch ( final IOException e ) { // list is empty } finally { - if (br!=null) try { br.close(); } catch (final Exception e) {} + if ( br != null ) { + try { + br.close(); + } catch ( final Exception e ) { + } + } } return list; } - - /** * Write a String to a file (used for string representation of lists). - * + * * @param listFile the file to write to * @param out the String to write * @return returns true if successful, false otherwise @@ -597,10 +729,15 @@ public final class FileUtils { bw.write(out); bw.close(); return true; - } catch(final IOException e) { + } catch ( final IOException e ) { return false; } finally { - if (bw!=null) try { bw.close(); } catch (final Exception e) {} + if ( bw != null ) { + try { + bw.close(); + } catch ( final Exception e ) { + } + } } } @@ -609,32 +746,39 @@ public final class FileUtils { /** * Read lines of a text file into a String, optionally ignoring comments. - * + * * @param listFile the File to read from. * @param withcomments If false ignore lines starting with '#'. * @return String representation of the file content. */ - public static String getListString(final File listFile, final boolean withcomments){ + public static String getListString(final File listFile, final boolean withcomments) { final StringBuilder temp = new StringBuilder(300); BufferedReader br = null; - try{ + try { br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile))); temp.ensureCapacity((int) listFile.length()); // Read the List String line = ""; - while ((line = br.readLine()) != null) { - if (line.length() == 0) continue; - if (line.charAt(0) != '#' || withcomments) { + while ( (line = br.readLine()) != null ) { + if ( line.length() == 0 ) { + continue; + } + if ( line.charAt(0) != '#' || withcomments ) { //temp += line + serverCore.CRLF_STRING; temp.append(line).append(CR).append(LF); } } br.close(); - } catch (final IOException e) { + } catch ( final IOException e ) { } finally { - if (br!=null) try { br.close(); } catch (final Exception e) {} + if ( br != null ) { + try { + br.close(); + } catch ( final Exception e ) { + } + } } return new String(temp); @@ -642,21 +786,20 @@ public final class FileUtils { /** * Read content of a directory into a String array of file names. - * @param dirname The directory to get the file listing from. If it doesn't exist yet, - * it will be created. + * + * @param dirname The directory to get the file listing from. If it doesn't exist yet, it will be created. * @return array of file names */ - public static List getDirListing(final String dirname){ + public static List getDirListing(final String dirname) { return getDirListing(dirname, null); } /** * Read content of a directory into a String array of file names. - * @param dirname The directory to get the file listing from. If it doesn't exist yet, - * it will be created. - * @param filter String which contains a regular expression which has to be matched by - * file names in order to appear in returned array. All file names will be returned if - * filter is null. + * + * @param dirname The directory to get the file listing from. If it doesn't exist yet, it will be created. + * @param filter String which contains a regular expression which has to be matched by file names in order + * to appear in returned array. All file names will be returned if filter is null. * @return array of file names */ public static List getDirListing(final String dirname, final String filter) { @@ -665,34 +808,32 @@ public final class FileUtils { /** * Read content of a directory into a String array of file names. - * - * @param dir The directory to get the file listing from. If it doesn't exist yet, - * it will be created. + * + * @param dir The directory to get the file listing from. If it doesn't exist yet, it will be created. * @return array of file names */ - public static List getDirListing(final File dir){ + public static List getDirListing(final File dir) { return getDirListing(dir, null); } /** * Read content of a directory into a String array of file names. - * @param dir The directory to get the file listing from. If it doesn't exist yet, - * it will be created. - * @param filter String which contains a regular expression which has to be matched by - * file names in order to appear in returned array. All file names will be returned if - * filter is null. + * + * @param dir The directory to get the file listing from. If it doesn't exist yet, it will be created. + * @param filter String which contains a regular expression which has to be matched by file names in order + * to appear in returned array. All file names will be returned if filter is null. * @return array of file names */ - public static List getDirListing(final File dir, final String filter){ + public static List getDirListing(final File dir, final String filter) { final List ret = new LinkedList(); File[] fileList; - if (dir != null ) { - if (!dir.exists()) { + if ( dir != null ) { + if ( !dir.exists() ) { dir.mkdir(); } fileList = dir.listFiles(); - for (int i=0; i<= fileList.length-1; i++) { - if (filter == null || fileList[i].getName().matches(filter)) { + for ( int i = 0; i <= fileList.length - 1; i++ ) { + if ( filter == null || fileList[i].getName().matches(filter) ) { ret.add(fileList[i].getName()); } } @@ -702,26 +843,29 @@ public final class FileUtils { } // same as below - public static ArrayList getDirsRecursive(final File dir, final String notdir){ + public static ArrayList getDirsRecursive(final File dir, final String notdir) { return getDirsRecursive(dir, notdir, true); } /** - * Returns a List of all dirs and subdirs as File Objects - * - * Warning: untested + * Returns a List of all dirs and subdirs as File Objects Warning: untested */ - public static ArrayList getDirsRecursive(final File dir, final String notdir, final boolean excludeDotfiles){ + public static ArrayList getDirsRecursive( + final File dir, + final String notdir, + final boolean excludeDotfiles) { final File[] dirList = dir.listFiles(); final ArrayList resultList = new ArrayList(); ArrayList recursive; Iterator iter; - for (int i=0;itrue if successful, false otherwise */ - public static boolean writeList(final File listFile, final String[] list){ + public static boolean writeList(final File listFile, final String[] list) { final StringBuilder out = new StringBuilder(list.length * 40 + 1); - for (final String element : list) { + for ( final String element : list ) { out.append(element).append(CR).append(LF); } return FileUtils.writeList(listFile, new String(out)); //(File, String) } - public static class StringsIterator implements Iterator { + public static class StringsIterator implements Iterator + { private final BufferedReader reader; private String nextLine; + public StringsIterator(final BufferedReader reader) { this.reader = reader; this.nextLine = null; next(); } + + @Override public boolean hasNext() { return this.nextLine != null; } + @Override public String next() { final String line = this.nextLine; try { - while ((this.nextLine = this.reader.readLine()) != null) { + while ( (this.nextLine = this.reader.readLine()) != null ) { this.nextLine = this.nextLine.trim(); - if (this.nextLine.length() > 0) break; + if ( this.nextLine.length() > 0 ) { + break; + } } - } catch (final IOException e) { + } catch ( final IOException e ) { this.nextLine = null; - } catch (final OutOfMemoryError e) { + } catch ( final OutOfMemoryError e ) { Log.logException(e); this.nextLine = null; } return line; } + @Override public void remove() { throw new UnsupportedOperationException(); } @@ -786,7 +936,7 @@ public final class FileUtils { * @throws IOException */ private static void forceMove(final File from, final File to) throws IOException { - if(!(to.delete() && from.renameTo(to))) { + if ( !(to.delete() && from.renameTo(to)) ) { // do it manually copy(from, to); FileUtils.deletedelete(from); @@ -794,32 +944,44 @@ public final class FileUtils { } /** - * Moves all files from a directory to another. - * @param from_dir Directory which contents will be moved. - * @param to_dir Directory to move into. It must exist already. - */ + * Moves all files from a directory to another. + * + * @param from_dir Directory which contents will be moved. + * @param to_dir Directory to move into. It must exist already. + */ public static void moveAll(final File from_dir, final File to_dir) { - if (!(from_dir.isDirectory())) return; - if (!(to_dir.isDirectory())) return; + if ( !(from_dir.isDirectory()) ) { + return; + } + if ( !(to_dir.isDirectory()) ) { + return; + } final String[] list = from_dir.list(); - for (int i = 0; i < list.length; i++) { - if(!new File(from_dir, list[i]).renameTo(new File(to_dir, list[i]))) - Log.logWarning("serverFileUtils", "moveAll(): could not move from "+ from_dir + list[i] +" to "+ to_dir + list[i]); + for ( int i = 0; i < list.length; i++ ) { + if ( !new File(from_dir, list[i]).renameTo(new File(to_dir, list[i])) ) { + Log.logWarning("serverFileUtils", "moveAll(): could not move from " + + from_dir + + list[i] + + " to " + + to_dir + + list[i]); + } } } - - public static class dirlistComparator implements Comparator, Serializable { + public static class dirlistComparator implements Comparator, Serializable + { /** - * generated serial - */ - private static final long serialVersionUID = -5196490300039230135L; + * generated serial + */ + private static final long serialVersionUID = -5196490300039230135L; - public int compare(final File file1, final File file2) { - if (file1.isDirectory() && !file2.isDirectory()) { + @Override + public int compare(final File file1, final File file2) { + if ( file1.isDirectory() && !file2.isDirectory() ) { return -1; - } else if (!file1.isDirectory() && file2.isDirectory()) { + } else if ( !file1.isDirectory() && file2.isDirectory() ) { return 1; } else { return file1.getName().compareToIgnoreCase(file2.getName()); @@ -830,34 +992,39 @@ public final class FileUtils { public static final File createTempFile(final Class classObj, final String name) throws IOException { String parserClassName = classObj.getName(); int idx = parserClassName.lastIndexOf('.'); - if (idx != -1) { - parserClassName = parserClassName.substring(idx+1); + if ( idx != -1 ) { + parserClassName = parserClassName.substring(idx + 1); } // get the file extension idx = name.lastIndexOf('/'); - final String fileName = (idx != -1) ? name.substring(idx+1) : name; + final String fileName = (idx != -1) ? name.substring(idx + 1) : name; idx = fileName.lastIndexOf('.'); - final String fileExt = (idx > -1) ? fileName.substring(idx+1) : ""; + final String fileExt = (idx > -1) ? fileName.substring(idx + 1) : ""; // create the temp file - final File tempFile = File.createTempFile(parserClassName + "_" + ((idx>-1)?fileName.substring(0,idx):fileName), (fileExt.length()>0)?"."+fileExt:fileExt); + final File tempFile = + File.createTempFile( + parserClassName + "_" + ((idx > -1) ? fileName.substring(0, idx) : fileName), + (fileExt.length() > 0) ? "." + fileExt : fileExt); return tempFile; } /** * copies the input stream to one output stream (byte per byte) + * * @param in * @param out * @return number of copies bytes * @throws IOException */ - public static int copyToStream(final BufferedInputStream in, final BufferedOutputStream out) throws IOException { + public static int copyToStream(final BufferedInputStream in, final BufferedOutputStream out) + throws IOException { int count = 0; // copy bytes int b; - while ((b = in.read()) != -1) { + while ( (b = in.read()) != -1 ) { count++; out.write(b); } @@ -867,20 +1034,24 @@ public final class FileUtils { /** * copies the input stream to both output streams (byte per byte) + * * @param in * @param out0 * @param out1 * @return number of copies bytes * @throws IOException */ - public static int copyToStreams(final BufferedInputStream in, final BufferedOutputStream out0, final BufferedOutputStream out1) throws IOException { + public static int copyToStreams( + final BufferedInputStream in, + final BufferedOutputStream out0, + final BufferedOutputStream out1) throws IOException { assert out0 != null; assert out1 != null; int count = 0; // copy bytes int b; - while((b = in.read()) != -1) { + while ( (b = in.read()) != -1 ) { count++; out0.write(b); out1.write(b); @@ -892,20 +1063,24 @@ public final class FileUtils { /** * copies the input stream to all writers (byte per byte) + * * @param data * @param writer * @param charSet * @return * @throws IOException */ - public static int copyToWriter(final BufferedInputStream data, final BufferedWriter writer, final Charset charSet) throws IOException { + public static int copyToWriter( + final BufferedInputStream data, + final BufferedWriter writer, + final Charset charSet) throws IOException { // the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader." final Reader sourceReader = new InputStreamReader(data, charSet); int count = 0; // copy bytes int b; - while((b = sourceReader.read()) != -1) { + while ( (b = sourceReader.read()) != -1 ) { count++; writer.write(b); } @@ -913,7 +1088,11 @@ public final class FileUtils { return count; } - public static int copyToWriters(final BufferedInputStream data, final BufferedWriter writer0, final BufferedWriter writer1, final Charset charSet) throws IOException { + public static int copyToWriters( + final BufferedInputStream data, + final BufferedWriter writer0, + final BufferedWriter writer1, + final Charset charSet) throws IOException { // the docs say: "For top efficiency, consider wrapping an InputStreamReader within a BufferedReader." assert writer0 != null; assert writer1 != null; @@ -922,7 +1101,7 @@ public final class FileUtils { int count = 0; // copy bytes int b; - while((b = sourceReader.read()) != -1) { + while ( (b = sourceReader.read()) != -1 ) { count++; writer0.write(b); writer1.write(b); @@ -933,62 +1112,76 @@ public final class FileUtils { } /** - * delete files and directories - * if a directory is not empty, delete also everything inside - * because deletion sometimes fails on windows, there is also a windows exec included + * delete files and directories if a directory is not empty, delete also everything inside because + * deletion sometimes fails on windows, there is also a windows exec included + * * @param path */ public static void deletedelete(final File path) { - if (path == null || !path.exists()) return; + if ( path == null || !path.exists() ) { + return; + } // empty the directory first - if (path.isDirectory()) { + if ( path.isDirectory() ) { final String[] list = path.list(); - if (list != null) { - for (final String s: list) deletedelete(new File(path, s)); + if ( list != null ) { + for ( final String s : list ) { + deletedelete(new File(path, s)); + } } } int c = 0; - while (c++ < 20) { - if (!path.exists()) break; - if (path.delete()) break; + while ( c++ < 20 ) { + if ( !path.exists() ) { + break; + } + if ( path.delete() ) { + break; + } // some OS may be slow when giving up file pointer //System.runFinalization(); //System.gc(); - try { Thread.sleep(200); } catch (final InterruptedException e) { break; } + try { + Thread.sleep(200); + } catch ( final InterruptedException e ) { + break; + } } - if (path.exists()) { + if ( path.exists() ) { path.deleteOnExit(); String p = ""; try { p = path.getCanonicalPath(); - } catch (final IOException e1) { + } catch ( final IOException e1 ) { Log.logException(e1); } - if (System.getProperties().getProperty("os.name","").toLowerCase().startsWith("windows")) { + if ( System.getProperties().getProperty("os.name", "").toLowerCase().startsWith("windows") ) { // deleting files on windows sometimes does not work with java try { final String command = "cmd /C del /F /Q \"" + p + "\""; final Process r = Runtime.getRuntime().exec(command); - if (r == null) { + if ( r == null ) { Log.logSevere("FileUtils", "cannot execute command: " + command); } else { final byte[] response = read(r.getInputStream()); Log.logInfo("FileUtils", "deletedelete: " + UTF8.String(response)); } - } catch (final IOException e) { + } catch ( final IOException e ) { Log.logException(e); } } - if (path.exists()) Log.logSevere("FileUtils", "cannot delete file " + p); + if ( path.exists() ) { + Log.logSevere("FileUtils", "cannot delete file " + p); + } } } public static void main(final String[] args) { try { writeAndGZip("ein zwei drei, Zauberei".getBytes(), new File("zauberei.txt.gz")); - } catch (final IOException e) { + } catch ( final IOException e ) { Log.logException(e); } } diff --git a/source/net/yacy/peers/graphics/WebStructureGraph.java b/source/net/yacy/peers/graphics/WebStructureGraph.java index 8bdf2c2ef..f2f0b8c67 100644 --- a/source/net/yacy/peers/graphics/WebStructureGraph.java +++ b/source/net/yacy/peers/graphics/WebStructureGraph.java @@ -28,7 +28,6 @@ package net.yacy.peers.graphics; import java.io.File; -import java.io.IOException; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; @@ -64,15 +63,15 @@ import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.LookAheadIterator; - -public class WebStructureGraph { +public class WebStructureGraph +{ public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) public static int maxhosts = 50000; // maximum number of hosts in web structure map private final static Log log = new Log("WebStructureGraph"); - private final File structureFile; + private final File structureFile; private final TreeMap structure_old; // ',' to {}* private final TreeMap structure_new; private final BlockingQueue publicRefDNSResolvingQueue; @@ -80,9 +79,11 @@ public class WebStructureGraph { private final static leanrefObject leanrefObjectPOISON = new leanrefObject(null, null); - private static class leanrefObject { + private static class leanrefObject + { private final DigestURI url; private final Set globalRefURLs; + private leanrefObject(final DigestURI url, final Set globalRefURLs) { this.url = url; this.globalRefURLs = globalRefURLs; @@ -98,73 +99,92 @@ public class WebStructureGraph { // load web structure Map loadedStructure; try { - loadedStructure = (this.structureFile.exists()) ? FileUtils.loadMap(this.structureFile) : new TreeMap(); - } catch (final OutOfMemoryError e) { + loadedStructure = + (this.structureFile.exists()) + ? FileUtils.loadMap(this.structureFile) + : new TreeMap(); + } catch ( final OutOfMemoryError e ) { loadedStructure = new TreeMap(); } - if (loadedStructure != null) this.structure_old.putAll(loadedStructure); + if ( loadedStructure != null ) { + this.structure_old.putAll(loadedStructure); + } // delete out-dated entries in case the structure is too big - if (this.structure_old.size() > maxhosts) { - // fill a set with last-modified - dates of the structure - final TreeSet delset = new TreeSet(); - String key, value; - for (final Map.Entry entry : this.structure_old.entrySet()) { - key = entry.getKey(); - value = entry.getValue(); - if (value.length() >= 8) delset.add(value.substring(0, 8) + key); - } - int delcount = this.structure_old.size() - (maxhosts * 9 / 10); - final Iterator j = delset.iterator(); - while ((delcount > 0) && (j.hasNext())) { - this.structure_old.remove(j.next().substring(8)); - delcount--; - } + if ( this.structure_old.size() > maxhosts ) { + // fill a set with last-modified - dates of the structure + final TreeSet delset = new TreeSet(); + String key, value; + for ( final Map.Entry entry : this.structure_old.entrySet() ) { + key = entry.getKey(); + value = entry.getValue(); + if ( value.length() >= 8 ) { + delset.add(value.substring(0, 8) + key); + } + } + int delcount = this.structure_old.size() - (maxhosts * 9 / 10); + final Iterator j = delset.iterator(); + while ( (delcount > 0) && (j.hasNext()) ) { + this.structure_old.remove(j.next().substring(8)); + delcount--; + } } this.publicRefDNSResolvingWorker = new PublicRefDNSResolvingProcess(); this.publicRefDNSResolvingWorker.start(); } - private class PublicRefDNSResolvingProcess extends Thread { + private class PublicRefDNSResolvingProcess extends Thread + { private PublicRefDNSResolvingProcess() { } + + @Override public void run() { leanrefObject lro; try { - while ((lro = WebStructureGraph.this.publicRefDNSResolvingQueue.take()) != leanrefObjectPOISON) { + while ( (lro = WebStructureGraph.this.publicRefDNSResolvingQueue.take()) != leanrefObjectPOISON ) { learnrefs(lro); } - } catch (final InterruptedException e) { + } catch ( final InterruptedException e ) { } } } - public void generateCitationReference(final DigestURI url, final Document document, final Condenser condenser) { + public void generateCitationReference( + final DigestURI url, + final Document document, + final Condenser condenser) { // generate citation reference - if (url.isLocal()) return; // we do this only for global urls + if ( url.isLocal() ) { + return; // we do this only for global urls + } final Map hl = document.getHyperlinks(); final Iterator it = hl.keySet().iterator(); final HashSet globalRefURLs = new HashSet(); final String refhost = url.getHost(); MultiProtocolURI u; int maxref = 1000; - while (it.hasNext() && maxref-- > 0) { + while ( it.hasNext() && maxref-- > 0 ) { u = it.next(); - if (u == null) continue; - if (refhost != null && u.getHost() != null && !u.getHost().equals(refhost)) { + if ( u == null ) { + continue; + } + if ( refhost != null && u.getHost() != null && !u.getHost().equals(refhost) ) { // this is a global link globalRefURLs.add(u); } } final leanrefObject lro = new leanrefObject(url, globalRefURLs); - if (globalRefURLs.size() > 0) try { - if (this.publicRefDNSResolvingWorker.isAlive()) { - this.publicRefDNSResolvingQueue.put(lro); - } else { + if ( globalRefURLs.size() > 0 ) { + try { + if ( this.publicRefDNSResolvingWorker.isAlive() ) { + this.publicRefDNSResolvingQueue.put(lro); + } else { + learnrefs(lro); + } + } catch ( final InterruptedException e ) { learnrefs(lro); } - } catch (final InterruptedException e) { - learnrefs(lro); } } @@ -173,16 +193,22 @@ public class WebStructureGraph { assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString(); //final String refhashp = ASCII.String(lro.url.hash(), 6, 6); // ref hash part String nexturlhash; - for (final MultiProtocolURI u: lro.globalRefURLs) { + for ( final MultiProtocolURI u : lro.globalRefURLs ) { final byte[] nexturlhashb = new DigestURI(u).hash(); assert nexturlhashb != null; - if (nexturlhashb != null) { + if ( nexturlhashb != null ) { nexturlhash = ASCII.String(nexturlhashb); - assert nexturlhash.length() == 12 : "nexturlhash.length() = " + nexturlhash.length() + ", nexturlhash = " + nexturlhash; + assert nexturlhash.length() == 12 : "nexturlhash.length() = " + + nexturlhash.length() + + ", nexturlhash = " + + nexturlhash; //assert !nexturlhash.substring(6).equals(refhashp); // this is a global link cpg.append(nexturlhash); // store complete hash - assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString(); + assert cpg.length() % 12 == 0 : "cpg.length() = " + + cpg.length() + + ", cpg = " + + cpg.toString(); } } assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString(); @@ -190,22 +216,26 @@ public class WebStructureGraph { } private static int refstr2count(final String refs) { - if ((refs == null) || (refs.length() <= 8)) return 0; + if ( (refs == null) || (refs.length() <= 8) ) { + return 0; + } assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length(); return (refs.length() - 8) / 10; } static Map refstr2map(final String refs) { - if ((refs == null) || (refs.length() <= 8)) return new HashMap(); + if ( (refs == null) || (refs.length() <= 8) ) { + return new HashMap(); + } final Map map = new HashMap(); String c; final int refsc = refstr2count(refs); int d; - for (int i = 0; i < refsc; i++) { + for ( int i = 0; i < refsc; i++ ) { c = refs.substring(8 + i * 10, 8 + (i + 1) * 10); try { d = Integer.valueOf(c.substring(6), 16); - } catch (final NumberFormatException e) { + } catch ( final NumberFormatException e ) { d = 1; } map.put(c.substring(0, 6), d); @@ -217,19 +247,19 @@ public class WebStructureGraph { final StringBuilder s = new StringBuilder(map.size() * 10); s.append(GenericFormatter.SHORT_DAY_FORMATTER.format()); String h; - for (final Map.Entry entry : map.entrySet()) { + for ( final Map.Entry entry : map.entrySet() ) { s.append(entry.getKey()); h = Integer.toHexString(entry.getValue().intValue()); final int hl = h.length(); - if (hl == 0) { + if ( hl == 0 ) { s.append("0000"); - } else if (hl == 1) { + } else if ( hl == 1 ) { s.append("000").append(h); - } else if (hl == 2) { + } else if ( hl == 2 ) { s.append("00").append(h); - } else if (hl == 3) { + } else if ( hl == 3 ) { s.append('0').append(h); - } else if (hl == 4) { + } else if ( hl == 4 ) { s.append(h); } else { s.append("FFFF"); @@ -246,11 +276,11 @@ public class WebStructureGraph { String hostname = ""; String date = ""; String ref; - synchronized (this.structure_old) { + synchronized ( this.structure_old ) { tailMap = this.structure_old.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { hostname = key.substring(7); ref = tailMap.get(key); date = ref.substring(0, 8); @@ -258,68 +288,87 @@ public class WebStructureGraph { } } } - synchronized (this.structure_new) { + synchronized ( this.structure_new ) { tailMap = this.structure_new.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { ref = tailMap.get(key); - if (hostname.length() == 0) hostname = key.substring(7); - if (date.length() == 0) date = ref.substring(0, 8); + if ( hostname.length() == 0 ) { + hostname = key.substring(7); + } + if ( date.length() == 0 ) { + date = ref.substring(0, 8); + } h.putAll(refstr2map(ref)); } } } - if (h.isEmpty()) return null; + if ( h.isEmpty() ) { + return null; + } return new StructureEntry(hosthash, hostname, date, h); } public StructureEntry incomingReferences(final String hosthash) { final String hostname = hostHash2hostName(hosthash); - if (hostname == null) return null; + if ( hostname == null ) { + return null; + } // collect the references WebStructureGraph.StructureEntry sentry; final HashMap hosthashes = new HashMap(); Iterator i = new StructureIterator(false); - while (i.hasNext()) { + while ( i.hasNext() ) { sentry = i.next(); - if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); + if ( sentry.references.containsKey(hosthash) ) { + hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); + } } i = new StructureIterator(true); - while (i.hasNext()) { + while ( i.hasNext() ) { sentry = i.next(); - if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); + if ( sentry.references.containsKey(hosthash) ) { + hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); + } } // construct a new structureEntry Object return new StructureEntry( - hosthash, - hostname, - GenericFormatter.SHORT_DAY_FORMATTER.format(), - hosthashes); + hosthash, + hostname, + GenericFormatter.SHORT_DAY_FORMATTER.format(), + hosthashes); } - public static class HostReferenceFactory implements ReferenceFactory { + public static class HostReferenceFactory implements ReferenceFactory + { - private static final Row hostReferenceRow = new Row("String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}", Base64Order.enhancedCoder); + private static final Row hostReferenceRow = new Row( + "String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}", + Base64Order.enhancedCoder); public HostReferenceFactory() { } + @Override public Row getRow() { return hostReferenceRow; } + @Override public HostReference produceSlow(final Entry e) { return new HostReference(e); } + @Override public HostReference produceFast(final HostReference e) { return e; } } - public static class HostReference extends AbstractReference implements Reference { + public static class HostReference extends AbstractReference implements Reference + { private final Row.Entry entry; @@ -339,14 +388,17 @@ public class WebStructureGraph { this.entry = entry; } + @Override public String toPropertyForm() { return this.entry.toPropertyForm(':', true, true, false, true); } + @Override public Entry toKelondroEntry() { return this.entry; } + @Override public byte[] urlhash() { return this.entry.getPrimaryKeyBytes(); } @@ -355,40 +407,50 @@ public class WebStructureGraph { return (int) this.entry.getColLong(2); } + @Override public long lastModified() { return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(1)); } + @Override public void join(final Reference r) { // joins two entries into one entry final HostReference oe = (HostReference) r; // combine date final long o = oe.lastModified(); - if (lastModified() < o) this.entry.setCol(1, MicroDate.microDateDays(o)); + if ( lastModified() < o ) { + this.entry.setCol(1, MicroDate.microDateDays(o)); + } // combine count final int c = oe.count(); - if (count() < c) this.entry.setCol(2, c); + if ( count() < c ) { + this.entry.setCol(2, c); + } } + @Override public Collection positions() { return new ArrayList(0); } } public static final HostReferenceFactory hostReferenceFactory = new HostReferenceFactory(); - public static ReferenceContainerCache hostReferenceIndexCache = null; - public static long hostReferenceIndexCacheTime = 0; + public static ReferenceContainerCache hostReferenceIndexCache = null; + public static long hostReferenceIndexCacheTime = 0; public static final long hostReferenceIndexCacheTTL = 1000 * 60 * 60 * 12; // 12 hours time to live for cache public synchronized ReferenceContainerCache incomingReferences() { // we return a cache if the cache is filled and not stale - if (hostReferenceIndexCache != null && - hostReferenceIndexCacheTime + hostReferenceIndexCacheTTL > System.currentTimeMillis()) return hostReferenceIndexCache; + if ( hostReferenceIndexCache != null + && hostReferenceIndexCacheTime + hostReferenceIndexCacheTTL > System.currentTimeMillis() ) { + return hostReferenceIndexCache; + } // collect the references - final ReferenceContainerCache idx = new ReferenceContainerCache(hostReferenceFactory, Base64Order.enhancedCoder, 6); + final ReferenceContainerCache idx = + new ReferenceContainerCache(hostReferenceFactory, Base64Order.enhancedCoder, 6); // we iterate over all structure entries. // one structure entry has information that a specific host links to a list of other hosts @@ -403,40 +465,47 @@ public class WebStructureGraph { } private void incomingReferencesEnrich( - final ReferenceContainerCache idx, - final Iterator structureIterator, - final long time) { + final ReferenceContainerCache idx, + final Iterator structureIterator, + final long time) { // we iterate over all structure entries. // one structure entry has information that a specific host links to a list of other hosts final long timeout = System.currentTimeMillis() + time; byte[] term; HostReference hr; WebStructureGraph.StructureEntry sentry; - structureLoop: while (structureIterator.hasNext()) { + structureLoop: while ( structureIterator.hasNext() ) { sentry = structureIterator.next(); // then we loop over all the hosts that are linked from sentry.hosthash - refloop: for (final Map.Entry refhosthashandcounter: sentry.references.entrySet()) { + refloop: for ( final Map.Entry refhosthashandcounter : sentry.references + .entrySet() ) { term = UTF8.getBytes(refhosthashandcounter.getKey()); try { - hr = new HostReference(ASCII.getBytes(sentry.hosthash), GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(), refhosthashandcounter.getValue().intValue()); - } catch (final ParseException e) { + hr = + new HostReference( + ASCII.getBytes(sentry.hosthash), + GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(), + refhosthashandcounter.getValue().intValue()); + } catch ( final ParseException e ) { continue refloop; } // each term refers to an index entry. look if we already have such an entry ReferenceContainer r = idx.get(term, null); try { - if (r == null) { + if ( r == null ) { r = new ReferenceContainer(hostReferenceFactory, term); r.add(hr); idx.add(r); } else { r.put(hr); } - } catch (final RowSpaceExceededException e) { + } catch ( final RowSpaceExceededException e ) { continue refloop; } } - if (System.currentTimeMillis() > timeout) break structureLoop; + if ( System.currentTimeMillis() > timeout ) { + break structureLoop; + } } } @@ -459,23 +528,25 @@ public class WebStructureGraph { public int referencesCount(final String hosthash) { // returns the number of hosts that are referenced by this hosthash assert hosthash.length() == 6 : "hosthash = " + hosthash; - if (hosthash == null || hosthash.length() != 6) return 0; + if ( hosthash == null || hosthash.length() != 6 ) { + return 0; + } SortedMap tailMap; int c = 0; - synchronized (this.structure_old) { + synchronized ( this.structure_old ) { tailMap = this.structure_old.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { c = refstr2count(tailMap.get(key)); } } } - synchronized (this.structure_new) { + synchronized ( this.structure_new ) { tailMap = this.structure_new.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { c += refstr2count(tailMap.get(key)); } } @@ -487,20 +558,20 @@ public class WebStructureGraph { // returns the host as string, null if unknown assert hosthash.length() == 6; SortedMap tailMap; - synchronized(this.structure_old) { + synchronized ( this.structure_old ) { tailMap = this.structure_old.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { return key.substring(7); } } } - synchronized(this.structure_new) { + synchronized ( this.structure_new ) { tailMap = this.structure_new.tailMap(hosthash); - if (!tailMap.isEmpty()) { + if ( !tailMap.isEmpty() ) { final String key = tailMap.firstKey(); - if (key.startsWith(hosthash)) { + if ( key.startsWith(hosthash) ) { return key.substring(7); } } @@ -513,53 +584,61 @@ public class WebStructureGraph { // parse the new reference string and join it with the stored references final StructureEntry structure = outgoingReferences(hosthash); - final Map refs = (structure == null) ? new HashMap() : structure.references; - assert reference.length() % 12 == 0 : "reference.length() = " + reference.length() + ", reference = " + reference.toString(); + final Map refs = + (structure == null) ? new HashMap() : structure.references; + assert reference.length() % 12 == 0 : "reference.length() = " + + reference.length() + + ", reference = " + + reference.toString(); String dom; int c; - for (int i = 0; i < reference.length() / 12; i++) { + for ( int i = 0; i < reference.length() / 12; i++ ) { dom = reference.substring(i * 12 + 6, (i + 1) * 12); c = 0; - if (refs.containsKey(dom)) { + if ( refs.containsKey(dom) ) { c = (refs.get(dom)).intValue(); } refs.put(dom, Integer.valueOf(++c)); } // check if the maxref is exceeded - if (refs.size() > maxref) { + if ( refs.size() > maxref ) { int shrink = refs.size() - (maxref * 9 / 10); - delloop: while (shrink > 0) { + delloop: while ( shrink > 0 ) { // shrink the references: the entry with the smallest number of references is removed int minrefcount = Integer.MAX_VALUE; String minrefkey = null; - findloop: for (final Map.Entry entry : refs.entrySet()) { - if (entry.getValue().intValue() < minrefcount) { + findloop: for ( final Map.Entry entry : refs.entrySet() ) { + if ( entry.getValue().intValue() < minrefcount ) { minrefcount = entry.getValue().intValue(); minrefkey = entry.getKey(); } - if (minrefcount == 1) break findloop; + if ( minrefcount == 1 ) { + break findloop; + } } // remove the smallest - if (minrefkey == null) break delloop; + if ( minrefkey == null ) { + break delloop; + } refs.remove(minrefkey); shrink--; } } // store the map back to the structure - synchronized(this.structure_new) { + synchronized ( this.structure_new ) { this.structure_new.put(hosthash + "," + url.getHost(), map2refstr(refs)); } } private static void joinStructure(final TreeMap into, final TreeMap from) { - for (final Map.Entry e: from.entrySet()) { - if (into.containsKey(e.getKey())) { + for ( final Map.Entry e : from.entrySet() ) { + if ( into.containsKey(e.getKey()) ) { final Map s0 = refstr2map(into.get(e.getKey())); final Map s1 = refstr2map(e.getValue()); - for (final Map.Entry r: s1.entrySet()) { - if (s0.containsKey(r.getKey())) { + for ( final Map.Entry r : s1.entrySet() ) { + if ( s0.containsKey(r.getKey()) ) { s0.put(r.getKey(), s0.get(r.getKey()).intValue() + r.getValue().intValue()); } else { s0.put(r.getKey(), r.getValue().intValue()); @@ -573,7 +652,7 @@ public class WebStructureGraph { } public void joinOldNew() { - synchronized(this.structure_new) { + synchronized ( this.structure_new ) { joinStructure(this.structure_old, this.structure_new); this.structure_new.clear(); } @@ -584,10 +663,10 @@ public class WebStructureGraph { String maxhost = null; int refsize, maxref = 0; joinOldNew(); - synchronized(this.structure_new) { - for (final Map.Entry entry : this.structure_old.entrySet()) { + synchronized ( this.structure_new ) { + for ( final Map.Entry entry : this.structure_old.entrySet() ) { refsize = entry.getValue().length(); - if (refsize > maxref) { + if ( refsize > maxref ) { maxref = refsize; maxhost = entry.getKey().substring(7); } @@ -600,41 +679,59 @@ public class WebStructureGraph { return new StructureIterator(latest); } - private class StructureIterator extends LookAheadIterator implements Iterator { + private class StructureIterator extends LookAheadIterator implements + Iterator + { private final Iterator> i; private StructureIterator(final boolean latest) { - this.i = ((latest) ? WebStructureGraph.this.structure_new : WebStructureGraph.this.structure_old).entrySet().iterator(); + this.i = + ((latest) ? WebStructureGraph.this.structure_new : WebStructureGraph.this.structure_old) + .entrySet() + .iterator(); } + @Override public StructureEntry next0() { Map.Entry entry = null; String dom = null, ref = ""; - while (this.i.hasNext()) { + while ( this.i.hasNext() ) { entry = this.i.next(); ref = entry.getValue(); - if ((ref.length() - 8) % 10 != 0) continue; + if ( (ref.length() - 8) % 10 != 0 ) { + continue; + } dom = entry.getKey(); - if (dom.length() >= 8) break; + if ( dom.length() >= 8 ) { + break; + } dom = null; } - if (entry == null || dom == null) return null; + if ( entry == null || dom == null ) { + return null; + } assert (ref.length() - 8) % 10 == 0 : "refs = " + ref + ", length = " + ref.length(); - return new StructureEntry(dom.substring(0, 6), dom.substring(7), ref.substring(0, 8), refstr2map(ref)); + return new StructureEntry( + dom.substring(0, 6), + dom.substring(7), + ref.substring(0, 8), + refstr2map(ref)); } } - public static class StructureEntry { + public static class StructureEntry + { public String hosthash; // the tail of the host hash public String hostname; // the host name - public String date; // date of latest change + public String date; // date of latest change public Map references; // a map from the referenced host hash to the number of referenced to that host + private StructureEntry( - final String hosthash, - final String hostname, - final String date, - final Map references) { + final String hosthash, + final String hostname, + final String date, + final Map references) { this.hosthash = hosthash; this.hostname = hostname; this.date = date; @@ -644,30 +741,42 @@ public class WebStructureGraph { public void close() { // finish dns resolving queue - if (this.publicRefDNSResolvingWorker.isAlive()) { + if ( this.publicRefDNSResolvingWorker.isAlive() ) { log.logInfo("Waiting for the DNS Resolving Queue to terminate"); try { this.publicRefDNSResolvingQueue.put(leanrefObjectPOISON); this.publicRefDNSResolvingWorker.join(5000); - } catch (final InterruptedException e) { + } catch ( final InterruptedException e ) { } } // save to web structure file - log.logInfo("Saving Web Structure File: new = " + this.structure_new.size() + " entries, old = " + this.structure_old.size() + " entries"); + log.logInfo("Saving Web Structure File: new = " + + this.structure_new.size() + + " entries, old = " + + this.structure_old.size() + + " entries"); final long time = System.currentTimeMillis(); joinOldNew(); - if (this.structure_old.size() > 0) try { - synchronized(this.structure_old) { - if (this.structure_old.size() > 0) { - FileUtils.saveMap(this.structureFile, this.structure_old, "Web Structure Syntax: ',' to {}*"); + if ( this.structure_old.size() > 0 ) { + synchronized ( this.structure_old ) { + if ( this.structure_old.size() > 0 ) { + FileUtils + .saveMap( + this.structureFile, + this.structure_old, + "Web Structure Syntax: ',' to {}*"); final long t = Math.max(1, System.currentTimeMillis() - time); - log.logInfo("Saved Web Structure File: " + this.structure_old.size() + " entries in " + t + " milliseconds, " + (this.structure_old.size() * 1000 / t) + " entries/second"); + log.logInfo("Saved Web Structure File: " + + this.structure_old.size() + + " entries in " + + t + + " milliseconds, " + + (this.structure_old.size() * 1000 / t) + + " entries/second"); } this.structure_old.clear(); } - } catch (final IOException e) { - Log.logException(e); } } }