diff --git a/defaults/yacy.init b/defaults/yacy.init
index cb938392b..62245f709 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -431,13 +431,6 @@ peerName=anomic
# of the period here (minutes)
peerCycle=2
-# The p2p maintenance can run in either of two online modes:
-# - don't process jobs and only access available in cache -> mode 0
-# - process any job only if we are online, which is technically only the case
-# if the proxy is used -> mode 1
-# - process jobs periodically, with periodes according to peerCycle -> mode 2
-onlineMode=2
-
# Debug mode for YACY network: this will trigger that also local ip's are
# accepted as peer addresses
yacyDebugMode=false
diff --git a/htroot/Network.html b/htroot/Network.html
index feec221ef..9d19518b2 100644
--- a/htroot/Network.html
+++ b/htroot/Network.html
@@ -165,21 +165,6 @@
- #(comment)#
- ::
-
-
- You are in online mode, but probably no internet resource is available.
- Please check your internet connection.
-
- ::
-
- You are not in online mode. To get online, press this button:
-
-
- #(/comment)#
diff --git a/htroot/Network.java b/htroot/Network.java
index d00bf4a77..9ec5b46f1 100644
--- a/htroot/Network.java
+++ b/htroot/Network.java
@@ -164,16 +164,6 @@ public class Network {
prop.putNum("table_gppm", otherppm + ((iAmActive) ? myppm : 0));
prop.putNum("table_gqph", Math.round(6000d * otherqpm + 100d * ((iAmActive) ? myqph : 0d)) / 100d);
-
-// String comment = "";
- prop.put("table_comment", 0);
- if (conCount == 0) {
- if (Integer.parseInt(sb.getConfig("onlineMode", "1")) == 2) {
- prop.put("table_comment", 1);//in onlinemode, but not online
- } else {
- prop.put("table_comment", 2);//not in online mode, and not online
- }
- }
prop.put("table", 2); // triggers overview
prop.put("page", 0);
} else if (post != null && Integer.parseInt(post.get("page", "1")) == 4) {
diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java
index 357ba3a68..76fc88ecb 100644
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@@ -288,28 +288,6 @@ public class SettingsAck_p {
prop.putHTML("info_filter", filter);
return prop;
}
-
- if (post.containsKey("pmode")) {
- env.setConfig("onlineMode", "2");
- prop.put("info", "11");//permanent online mode
- yacyCore.setOnlineMode(2);
- yacyCore.triggerOnlineAction();
- return prop;
- }
-
- if (post.containsKey("emode")) {
- env.setConfig("onlineMode", "1");
- prop.put("info", "24");//event-based online mode
- yacyCore.setOnlineMode(1);
- return prop;
- }
-
- if (post.containsKey("cmode")) {
- env.setConfig("onlineMode", "0");
- prop.put("info", "25");//cache mode
- yacyCore.setOnlineMode(0);
- return prop;
- }
if (post.containsKey("proxysettings")) {
diff --git a/htroot/Status.java b/htroot/Status.java
index e7bf09338..4582f6816 100644
--- a/htroot/Status.java
+++ b/htroot/Status.java
@@ -270,14 +270,6 @@ public class Status {
prop.put("tray", "1");
}
- if (sb.getConfig("onlineMode", "1").equals("0")) {
- prop.put("omode", "0");
- } else if (sb.getConfig("onlineMode", "1").equals("1")) {
- prop.put("omode", "1");
- } else {
- prop.put("omode", "2");
- }
-
// memory usage and system attributes
prop.put("freeMemory", serverMemory.bytesToString(serverMemory.free()));
prop.put("totalMemory", serverMemory.bytesToString(serverMemory.total()));
diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java
index 9dcc71b33..1fbf60eb8 100644
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@@ -476,6 +476,8 @@ public final class CrawlStacker extends Thread {
// store information
final boolean local = entry.initiator().equals(sb.webIndex.seedDB.mySeed().hash);
+ final boolean proxy = (entry.initiator() == null || entry.initiator().equals("------------")) && profile.handle().equals(this.sb.webIndex.defaultProxyProfile.handle());
+ final boolean remote = profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle());
final boolean global =
(profile.remoteIndexing()) /* granted */ &&
(entry.depth() == profile.generalDepth()) /* leaf node */ &&
@@ -485,15 +487,29 @@ public final class CrawlStacker extends Thread {
(sb.webIndex.seedDB.mySeed().isPrincipal())
) /* qualified */;
- if (!local && !global && !profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle())) {
- this.log.logSevere("URL '" + entry.url().toString() + "' can neither be crawled local nor global.");
+ if (!local && !global && !remote && !proxy) {
+ this.log.logSevere("URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ } else {
+ if (global) {
+ // it may be possible that global == true and local == true, so do not check an error case against it
+ if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_LIMIT, entry);
+ }
+ if (local) {
+ if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry);
+ }
+ if (proxy) {
+ if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
+ sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry);
+ }
+ if (remote) {
+ sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_REMOTE, entry);
+ }
+
}
-
- // add the url into the crawling queue
- sb.crawlQueues.noticeURL.push(
- ((global) ? NoticedURL.STACK_TYPE_LIMIT :
- ((local) ? NoticedURL.STACK_TYPE_CORE : NoticedURL.STACK_TYPE_REMOTE)) /*local/remote stack*/,
- entry);
return null;
}
diff --git a/source/de/anomic/crawler/IndexingStack.java b/source/de/anomic/crawler/IndexingStack.java
index e22b89b9f..66c7bd634 100644
--- a/source/de/anomic/crawler/IndexingStack.java
+++ b/source/de/anomic/crawler/IndexingStack.java
@@ -318,7 +318,7 @@ public class IndexingStack {
}
public String initiator() {
- return (initiator == null) ? "------------" : initiator;
+ return (initiator == null) ? "------------" : initiator; // TODO: this may cause problems for methods that check if the initiator is the proxy
}
public yacySeed initiatorPeer() {
@@ -396,10 +396,10 @@ public class IndexingStack {
// 6) local fetching for global crawling (other known or unknwon initiator)
int processCase = plasmaSwitchboardConstants.PROCESSCASE_0_UNKNOWN;
// FIXME the equals seems to be incorrect: String.equals(boolean)
- if ((initiator == null) || (initiator.equals(initiator.length() == 0))) {
+ if ((initiator == null) || initiator.length() == 0 || initiator.equals("------------")) {
// proxy-load
processCase = plasmaSwitchboardConstants.PROCESSCASE_4_PROXY_LOAD;
- } else if ((initiator != null) && (initiator.equals(wordIndex.seedDB.mySeed().hash))) {
+ } else if (initiator.equals(wordIndex.seedDB.mySeed().hash)) {
// normal crawling
processCase = plasmaSwitchboardConstants.PROCESSCASE_5_LOCAL_CRAWLING;
} else {
diff --git a/source/de/anomic/http/JakartaCommonsHttpClient.java b/source/de/anomic/http/JakartaCommonsHttpClient.java
index bfeb96b5e..e5374d685 100644
--- a/source/de/anomic/http/JakartaCommonsHttpClient.java
+++ b/source/de/anomic/http/JakartaCommonsHttpClient.java
@@ -446,6 +446,10 @@ public class JakartaCommonsHttpClient {
// cleanUp statistics
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e;
+ } catch (final IllegalStateException e) {
+ // cleanUp statistics
+ HttpConnectionInfo.removeConnection(generateConInfo(method));
+ throw new IOException(e.getMessage());
}
if (serverLog.isFinest("HTTPC")) serverLog.logFinest("HTTPC", "<-" + method.hashCode() + " response headers " +
Arrays.toString(method.getResponseHeaders()));
diff --git a/source/de/anomic/http/httpdProxyCacheEntry.java b/source/de/anomic/http/httpdProxyCacheEntry.java
index 190029d6f..8a8edd744 100755
--- a/source/de/anomic/http/httpdProxyCacheEntry.java
+++ b/source/de/anomic/http/httpdProxyCacheEntry.java
@@ -158,6 +158,13 @@ public class httpdProxyCacheEntry implements indexDocumentMetadata {
this.depth = depth;
this.responseStatus = responseStatus;
this.profile = profile;
+
+ // the initiator is the hash of the peer that caused the hash entry
+ // it is stored here only to track processed in the peer and this
+ // information is not permanently stored in the web index after the queue has
+ // been processed
+ // in case of proxy usage, the initiator hash is null,
+ // which distinguishes local crawling from proxy indexing
this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator);
this.language = yacyURL.language(url);
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index 4a9d1d4f7..0146ac7bb 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -93,7 +93,6 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.server.logging.serverMiniLogFormatter;
-import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public final class httpdProxyHandler {
@@ -415,35 +414,32 @@ public final class httpdProxyHandler {
// 4. cache stale - refill - superfluous
// in two of these cases we trigger a scheduler to handle newly arrived files:
// case 1 and case 3
- final indexDocumentMetadata cacheEntry = (cachedResponseHeader == null) ? null :
- new httpdProxyCacheEntry(
- 0, // crawling depth
- url, // url
- "", // name of the url is unknown
- //requestHeader, // request headers
- "200 OK", // request status
- requestHeader,
- cachedResponseHeader,
- null, // initiator
- switchboard.webIndex.defaultProxyProfile // profile
- );
- if (cacheEntry != null) plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry);
-
- if (yacyCore.getOnlineMode() == 0) {
- if (cacheExists) {
- if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache");
- fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
- } else {
- theLogger.logInfo("URL not availabe in Cache"+" and not in online-mode!");
- httpd.sendRespondError(conProp,countedRespond,4,404,null,"URL not availabe in Cache",null);
- }
- } else if (cacheExists && cacheEntry.shallUseCacheForProxy()) {
- if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache");
- fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
- } else {
- if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from web");
- fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
+ if (cachedResponseHeader == null) {
+ if (theLogger.isFinest()) theLogger.logFinest(reqID + " page not in cache: fulfill request from web");
+ fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
+ } else {
+ final indexDocumentMetadata cacheEntry = new httpdProxyCacheEntry(
+ 0, // crawling depth
+ url, // url
+ "", // name of the url is unknown
+ //requestHeader, // request headers
+ "200 OK", // request status
+ requestHeader,
+ cachedResponseHeader,
+ null, // initiator
+ switchboard.webIndex.defaultProxyProfile // profile
+ );
+ plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry); // TODO: check if this storeMetadata is necessary
+
+ if (cacheExists && cacheEntry.shallUseCacheForProxy()) {
+ if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from cache");
+ fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
+ } else {
+ if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from web");
+ fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
+ }
}
+
} catch (final Exception e) {
try {
diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java
index e81a99ac8..a6df6224b 100644
--- a/source/de/anomic/plasma/parser/rss/rssParser.java
+++ b/source/de/anomic/plasma/parser/rss/rssParser.java
@@ -85,6 +85,8 @@ public class rssParser extends AbstractParser implements Parser {
final serverCharBuffer authors = new serverCharBuffer();
final RSSFeed feed = new RSSReader(source).getFeed();
+ if (feed == null) throw new ParserException("no feed in document",location);
+ if (feed.getChannel() == null) throw new ParserException("no channel in document",location);
// getting the rss feed title and description
final String feedTitle = feed.getChannel().getTitle();
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index cdb61a53e..2a6d201fd 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -699,7 +699,6 @@ public final class plasmaHTCache {
* ACCESS METHODS
*/
-
// Store to Cache
public static void storeMetadata(
@@ -712,7 +711,6 @@ public final class plasmaHTCache {
hm.putAll(responseHeader);
hm.put("@@URL", metadata.url().toNormalform(false, false));
hm.put("@@DEPTH", Integer.toString(metadata.depth()));
- if (metadata.initiator() != null) hm.put("@@INITIATOR", metadata.initiator());
responseHeaderDB.put(metadata.urlHash(), hm);
} catch (final Exception e) {
log.logWarning("could not write ResourceInfo: "
diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java
index 11bf9c55e..f38bbc9ea 100644
--- a/source/de/anomic/yacy/yacyCore.java
+++ b/source/de/anomic/yacy/yacyCore.java
@@ -81,7 +81,6 @@ public class yacyCore {
// public static boolean terminate = false;
// class variables
- private static int onlineMode = 1;
plasmaSwitchboard sb;
public static int yacyTime() {
@@ -115,41 +114,12 @@ public class yacyCore {
// ATTENTION, VERY IMPORTANT: before starting the thread, the httpd yacy server must be running!
speedKey = System.currentTimeMillis() - time;
-
- // start with a seedList update to propagate out peer, if possible
- onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1"));
- //lastSeedUpdate = universalTime();
lastOnlineTime = 0;
-
- // cycle
- // within cycle: update seed file, strengthen network, pass news (new, old seed's)
- if (online()) {
- log.logConfig("you are in online mode");
- } else {
- log.logConfig("YOU ARE OFFLINE! ---");
- log.logConfig("--- TO START BOOTSTRAPING, YOU MUST USE THE PROXY,");
- log.logConfig("--- OR HIT THE BUTTON 'go online'");
- log.logConfig("--- ON THE STATUS PAGE http://localhost:" + serverCore.getPortNr(sb.getConfig("port", "8080")) + "/Status.html");
- }
}
synchronized static public void triggerOnlineAction() {
lastOnlineTime = System.currentTimeMillis();
}
-
- public final boolean online() {
- onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1"));
- return ((onlineMode == 2) || ((System.currentTimeMillis() - lastOnlineTime) < 10000));
- }
-
- public static int getOnlineMode() {
- return onlineMode;
- }
-
- public static void setOnlineMode(final int newOnlineMode) {
- onlineMode = newOnlineMode;
- return;
- }
public final void publishSeedList() {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
@@ -201,7 +171,6 @@ public class yacyCore {
}
public final void peerPing() {
- if (!online()) { return; }
if ((sb.isRobinsonMode()) && (sb.getConfig("cluster.mode", "").equals("privatepeer"))) {
// in case this peer is a privat peer we omit the peer ping
// all other robinson peer types do a peer ping: