diff --git a/defaults/yacy.init b/defaults/yacy.init index cb938392b..62245f709 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -431,13 +431,6 @@ peerName=anomic # of the period here (minutes) peerCycle=2 -# The p2p maintenance can run in either of two online modes: -# - don't process jobs and only access available in cache -> mode 0 -# - process any job only if we are online, which is technically only the case -# if the proxy is used -> mode 1 -# - process jobs periodically, with periodes according to peerCycle -> mode 2 -onlineMode=2 - # Debug mode for YACY network: this will trigger that also local ip's are # accepted as peer addresses yacyDebugMode=false diff --git a/htroot/Network.html b/htroot/Network.html index feec221ef..9d19518b2 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -165,21 +165,6 @@
- #(comment)# - :: - - :: - - #(/comment)#
diff --git a/htroot/Network.java b/htroot/Network.java index d00bf4a77..9ec5b46f1 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -164,16 +164,6 @@ public class Network { prop.putNum("table_gppm", otherppm + ((iAmActive) ? myppm : 0)); prop.putNum("table_gqph", Math.round(6000d * otherqpm + 100d * ((iAmActive) ? myqph : 0d)) / 100d); - -// String comment = ""; - prop.put("table_comment", 0); - if (conCount == 0) { - if (Integer.parseInt(sb.getConfig("onlineMode", "1")) == 2) { - prop.put("table_comment", 1);//in onlinemode, but not online - } else { - prop.put("table_comment", 2);//not in online mode, and not online - } - } prop.put("table", 2); // triggers overview prop.put("page", 0); } else if (post != null && Integer.parseInt(post.get("page", "1")) == 4) { diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java index 357ba3a68..76fc88ecb 100644 --- a/htroot/SettingsAck_p.java +++ b/htroot/SettingsAck_p.java @@ -288,28 +288,6 @@ public class SettingsAck_p { prop.putHTML("info_filter", filter); return prop; } - - if (post.containsKey("pmode")) { - env.setConfig("onlineMode", "2"); - prop.put("info", "11");//permanent online mode - yacyCore.setOnlineMode(2); - yacyCore.triggerOnlineAction(); - return prop; - } - - if (post.containsKey("emode")) { - env.setConfig("onlineMode", "1"); - prop.put("info", "24");//event-based online mode - yacyCore.setOnlineMode(1); - return prop; - } - - if (post.containsKey("cmode")) { - env.setConfig("onlineMode", "0"); - prop.put("info", "25");//cache mode - yacyCore.setOnlineMode(0); - return prop; - } if (post.containsKey("proxysettings")) { diff --git a/htroot/Status.java b/htroot/Status.java index e7bf09338..4582f6816 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -270,14 +270,6 @@ public class Status { prop.put("tray", "1"); } - if (sb.getConfig("onlineMode", "1").equals("0")) { - prop.put("omode", "0"); - } else if (sb.getConfig("onlineMode", "1").equals("1")) { - prop.put("omode", "1"); - } else { - prop.put("omode", "2"); - } - // memory usage and system attributes prop.put("freeMemory", serverMemory.bytesToString(serverMemory.free())); prop.put("totalMemory", serverMemory.bytesToString(serverMemory.total())); diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 9dcc71b33..1fbf60eb8 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -476,6 +476,8 @@ public final class CrawlStacker extends Thread { // store information final boolean local = entry.initiator().equals(sb.webIndex.seedDB.mySeed().hash); + final boolean proxy = (entry.initiator() == null || entry.initiator().equals("------------")) && profile.handle().equals(this.sb.webIndex.defaultProxyProfile.handle()); + final boolean remote = profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle()); final boolean global = (profile.remoteIndexing()) /* granted */ && (entry.depth() == profile.generalDepth()) /* leaf node */ && @@ -485,15 +487,29 @@ public final class CrawlStacker extends Thread { (sb.webIndex.seedDB.mySeed().isPrincipal()) ) /* qualified */; - if (!local && !global && !profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle())) { - this.log.logSevere("URL '" + entry.url().toString() + "' can neither be crawled local nor global."); + if (!local && !global && !remote && !proxy) { + this.log.logSevere("URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + } else { + if (global) { + // it may be possible that global == true and local == true, so do not check an error case against it + if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_LIMIT, entry); + } + if (local) { + if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry); + } + if (proxy) { + if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle()); + sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry); + } + if (remote) { + sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_REMOTE, entry); + } + } - - // add the url into the crawling queue - sb.crawlQueues.noticeURL.push( - ((global) ? NoticedURL.STACK_TYPE_LIMIT : - ((local) ? NoticedURL.STACK_TYPE_CORE : NoticedURL.STACK_TYPE_REMOTE)) /*local/remote stack*/, - entry); return null; } diff --git a/source/de/anomic/crawler/IndexingStack.java b/source/de/anomic/crawler/IndexingStack.java index e22b89b9f..66c7bd634 100644 --- a/source/de/anomic/crawler/IndexingStack.java +++ b/source/de/anomic/crawler/IndexingStack.java @@ -318,7 +318,7 @@ public class IndexingStack { } public String initiator() { - return (initiator == null) ? "------------" : initiator; + return (initiator == null) ? "------------" : initiator; // TODO: this may cause problems for methods that check if the initiator is the proxy } public yacySeed initiatorPeer() { @@ -396,10 +396,10 @@ public class IndexingStack { // 6) local fetching for global crawling (other known or unknwon initiator) int processCase = plasmaSwitchboardConstants.PROCESSCASE_0_UNKNOWN; // FIXME the equals seems to be incorrect: String.equals(boolean) - if ((initiator == null) || (initiator.equals(initiator.length() == 0))) { + if ((initiator == null) || initiator.length() == 0 || initiator.equals("------------")) { // proxy-load processCase = plasmaSwitchboardConstants.PROCESSCASE_4_PROXY_LOAD; - } else if ((initiator != null) && (initiator.equals(wordIndex.seedDB.mySeed().hash))) { + } else if (initiator.equals(wordIndex.seedDB.mySeed().hash)) { // normal crawling processCase = plasmaSwitchboardConstants.PROCESSCASE_5_LOCAL_CRAWLING; } else { diff --git a/source/de/anomic/http/JakartaCommonsHttpClient.java b/source/de/anomic/http/JakartaCommonsHttpClient.java index bfeb96b5e..e5374d685 100644 --- a/source/de/anomic/http/JakartaCommonsHttpClient.java +++ b/source/de/anomic/http/JakartaCommonsHttpClient.java @@ -446,6 +446,10 @@ public class JakartaCommonsHttpClient { // cleanUp statistics HttpConnectionInfo.removeConnection(generateConInfo(method)); throw e; + } catch (final IllegalStateException e) { + // cleanUp statistics + HttpConnectionInfo.removeConnection(generateConInfo(method)); + throw new IOException(e.getMessage()); } if (serverLog.isFinest("HTTPC")) serverLog.logFinest("HTTPC", "<-" + method.hashCode() + " response headers " + Arrays.toString(method.getResponseHeaders())); diff --git a/source/de/anomic/http/httpdProxyCacheEntry.java b/source/de/anomic/http/httpdProxyCacheEntry.java index 190029d6f..8a8edd744 100755 --- a/source/de/anomic/http/httpdProxyCacheEntry.java +++ b/source/de/anomic/http/httpdProxyCacheEntry.java @@ -158,6 +158,13 @@ public class httpdProxyCacheEntry implements indexDocumentMetadata { this.depth = depth; this.responseStatus = responseStatus; this.profile = profile; + + // the initiator is the hash of the peer that caused the hash entry + // it is stored here only to track processed in the peer and this + // information is not permanently stored in the web index after the queue has + // been processed + // in case of proxy usage, the initiator hash is null, + // which distinguishes local crawling from proxy indexing this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator); this.language = yacyURL.language(url); diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 4a9d1d4f7..0146ac7bb 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -93,7 +93,6 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverMiniLogFormatter; -import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyURL; public final class httpdProxyHandler { @@ -415,35 +414,32 @@ public final class httpdProxyHandler { // 4. cache stale - refill - superfluous // in two of these cases we trigger a scheduler to handle newly arrived files: // case 1 and case 3 - final indexDocumentMetadata cacheEntry = (cachedResponseHeader == null) ? null : - new httpdProxyCacheEntry( - 0, // crawling depth - url, // url - "", // name of the url is unknown - //requestHeader, // request headers - "200 OK", // request status - requestHeader, - cachedResponseHeader, - null, // initiator - switchboard.webIndex.defaultProxyProfile // profile - ); - if (cacheEntry != null) plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry); - - if (yacyCore.getOnlineMode() == 0) { - if (cacheExists) { - if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache"); - fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); - } else { - theLogger.logInfo("URL not availabe in Cache"+" and not in online-mode!"); - httpd.sendRespondError(conProp,countedRespond,4,404,null,"URL not availabe in Cache",null); - } - } else if (cacheExists && cacheEntry.shallUseCacheForProxy()) { - if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache"); - fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); - } else { - if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from web"); - fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); + if (cachedResponseHeader == null) { + if (theLogger.isFinest()) theLogger.logFinest(reqID + " page not in cache: fulfill request from web"); + fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); + } else { + final indexDocumentMetadata cacheEntry = new httpdProxyCacheEntry( + 0, // crawling depth + url, // url + "", // name of the url is unknown + //requestHeader, // request headers + "200 OK", // request status + requestHeader, + cachedResponseHeader, + null, // initiator + switchboard.webIndex.defaultProxyProfile // profile + ); + plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry); // TODO: check if this storeMetadata is necessary + + if (cacheExists && cacheEntry.shallUseCacheForProxy()) { + if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from cache"); + fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); + } else { + if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from web"); + fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond); + } } + } catch (final Exception e) { try { diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index e81a99ac8..a6df6224b 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -85,6 +85,8 @@ public class rssParser extends AbstractParser implements Parser { final serverCharBuffer authors = new serverCharBuffer(); final RSSFeed feed = new RSSReader(source).getFeed(); + if (feed == null) throw new ParserException("no feed in document",location); + if (feed.getChannel() == null) throw new ParserException("no channel in document",location); // getting the rss feed title and description final String feedTitle = feed.getChannel().getTitle(); diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index cdb61a53e..2a6d201fd 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -699,7 +699,6 @@ public final class plasmaHTCache { * ACCESS METHODS */ - // Store to Cache public static void storeMetadata( @@ -712,7 +711,6 @@ public final class plasmaHTCache { hm.putAll(responseHeader); hm.put("@@URL", metadata.url().toNormalform(false, false)); hm.put("@@DEPTH", Integer.toString(metadata.depth())); - if (metadata.initiator() != null) hm.put("@@INITIATOR", metadata.initiator()); responseHeaderDB.put(metadata.urlHash(), hm); } catch (final Exception e) { log.logWarning("could not write ResourceInfo: " diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index 11bf9c55e..f38bbc9ea 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -81,7 +81,6 @@ public class yacyCore { // public static boolean terminate = false; // class variables - private static int onlineMode = 1; plasmaSwitchboard sb; public static int yacyTime() { @@ -115,41 +114,12 @@ public class yacyCore { // ATTENTION, VERY IMPORTANT: before starting the thread, the httpd yacy server must be running! speedKey = System.currentTimeMillis() - time; - - // start with a seedList update to propagate out peer, if possible - onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1")); - //lastSeedUpdate = universalTime(); lastOnlineTime = 0; - - // cycle - // within cycle: update seed file, strengthen network, pass news (new, old seed's) - if (online()) { - log.logConfig("you are in online mode"); - } else { - log.logConfig("YOU ARE OFFLINE! ---"); - log.logConfig("--- TO START BOOTSTRAPING, YOU MUST USE THE PROXY,"); - log.logConfig("--- OR HIT THE BUTTON 'go online'"); - log.logConfig("--- ON THE STATUS PAGE http://localhost:" + serverCore.getPortNr(sb.getConfig("port", "8080")) + "/Status.html"); - } } synchronized static public void triggerOnlineAction() { lastOnlineTime = System.currentTimeMillis(); } - - public final boolean online() { - onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1")); - return ((onlineMode == 2) || ((System.currentTimeMillis() - lastOnlineTime) < 10000)); - } - - public static int getOnlineMode() { - return onlineMode; - } - - public static void setOnlineMode(final int newOnlineMode) { - onlineMode = newOnlineMode; - return; - } public final void publishSeedList() { if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish"); @@ -201,7 +171,6 @@ public class yacyCore { } public final void peerPing() { - if (!online()) { return; } if ((sb.isRobinsonMode()) && (sb.getConfig("cluster.mode", "").equals("privatepeer"))) { // in case this peer is a privat peer we omit the peer ping // all other robinson peer types do a peer ping: