fixed bug with wrong proxy result enqueueing. See:

http://forum.yacy-websuche.de/viewtopic.php?p=8130#p8130
- removed the online status property. This influenced the proxy behavior and created some complexity that was not needed because the online status was never used as it was ceated for (offline browsing)
- checked all proxy identification procedures during crawling and enhanced transparency and error checking
- fixed a proxy identification routine that caused the wrong selection of the proxy result queue

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5173 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent e071f759d2
commit 0cd0fee546

@ -431,13 +431,6 @@ peerName=anomic
# of the period here (minutes)
peerCycle=2
# The p2p maintenance can run in either of two online modes:
# - don't process jobs and only access available in cache -> mode 0
# - process any job only if we are online, which is technically only the case
# if the proxy is used -> mode 1
# - process jobs periodically, with periodes according to peerCycle -> mode 2
onlineMode=2
# Debug mode for YACY network: this will trigger that also local ip's are
# accepted as peer addresses
yacyDebugMode=false

@ -165,21 +165,6 @@
</div>
<div id="left">
#(comment)#
::
<ul><li>
You are in online mode, but probably no internet resource is available.
Please check your internet connection.
</li></ul>
::
<ul><li>You are not in online mode. To get online, press this button:
<form action="SettingsAck_p.html" method="get">
<fieldset>
<input type="submit" name="pmode" value="go online" />
</fieldset>
</form>
</li></ul>
#(/comment)#
</div>
<div id="left">

@ -164,16 +164,6 @@ public class Network {
prop.putNum("table_gppm", otherppm + ((iAmActive) ? myppm : 0));
prop.putNum("table_gqph", Math.round(6000d * otherqpm + 100d * ((iAmActive) ? myqph : 0d)) / 100d);
// String comment = "";
prop.put("table_comment", 0);
if (conCount == 0) {
if (Integer.parseInt(sb.getConfig("onlineMode", "1")) == 2) {
prop.put("table_comment", 1);//in onlinemode, but not online
} else {
prop.put("table_comment", 2);//not in online mode, and not online
}
}
prop.put("table", 2); // triggers overview
prop.put("page", 0);
} else if (post != null && Integer.parseInt(post.get("page", "1")) == 4) {

@ -288,28 +288,6 @@ public class SettingsAck_p {
prop.putHTML("info_filter", filter);
return prop;
}
if (post.containsKey("pmode")) {
env.setConfig("onlineMode", "2");
prop.put("info", "11");//permanent online mode
yacyCore.setOnlineMode(2);
yacyCore.triggerOnlineAction();
return prop;
}
if (post.containsKey("emode")) {
env.setConfig("onlineMode", "1");
prop.put("info", "24");//event-based online mode
yacyCore.setOnlineMode(1);
return prop;
}
if (post.containsKey("cmode")) {
env.setConfig("onlineMode", "0");
prop.put("info", "25");//cache mode
yacyCore.setOnlineMode(0);
return prop;
}
if (post.containsKey("proxysettings")) {

@ -270,14 +270,6 @@ public class Status {
prop.put("tray", "1");
}
if (sb.getConfig("onlineMode", "1").equals("0")) {
prop.put("omode", "0");
} else if (sb.getConfig("onlineMode", "1").equals("1")) {
prop.put("omode", "1");
} else {
prop.put("omode", "2");
}
// memory usage and system attributes
prop.put("freeMemory", serverMemory.bytesToString(serverMemory.free()));
prop.put("totalMemory", serverMemory.bytesToString(serverMemory.total()));

@ -476,6 +476,8 @@ public final class CrawlStacker extends Thread {
// store information
final boolean local = entry.initiator().equals(sb.webIndex.seedDB.mySeed().hash);
final boolean proxy = (entry.initiator() == null || entry.initiator().equals("------------")) && profile.handle().equals(this.sb.webIndex.defaultProxyProfile.handle());
final boolean remote = profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle());
final boolean global =
(profile.remoteIndexing()) /* granted */ &&
(entry.depth() == profile.generalDepth()) /* leaf node */ &&
@ -485,15 +487,29 @@ public final class CrawlStacker extends Thread {
(sb.webIndex.seedDB.mySeed().isPrincipal())
) /* qualified */;
if (!local && !global && !profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle())) {
this.log.logSevere("URL '" + entry.url().toString() + "' can neither be crawled local nor global.");
if (!local && !global && !remote && !proxy) {
this.log.logSevere("URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
} else {
if (global) {
// it may be possible that global == true and local == true, so do not check an error case against it
if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_LIMIT, entry);
}
if (local) {
if (proxy) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry);
}
if (proxy) {
if (remote) this.log.logWarning("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + entry.initiator() + ", profile.handle = " + profile.handle());
sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_CORE, entry);
}
if (remote) {
sb.crawlQueues.noticeURL.push(NoticedURL.STACK_TYPE_REMOTE, entry);
}
}
// add the url into the crawling queue
sb.crawlQueues.noticeURL.push(
((global) ? NoticedURL.STACK_TYPE_LIMIT :
((local) ? NoticedURL.STACK_TYPE_CORE : NoticedURL.STACK_TYPE_REMOTE)) /*local/remote stack*/,
entry);
return null;
}

@ -318,7 +318,7 @@ public class IndexingStack {
}
public String initiator() {
return (initiator == null) ? "------------" : initiator;
return (initiator == null) ? "------------" : initiator; // TODO: this may cause problems for methods that check if the initiator is the proxy
}
public yacySeed initiatorPeer() {
@ -396,10 +396,10 @@ public class IndexingStack {
// 6) local fetching for global crawling (other known or unknwon initiator)
int processCase = plasmaSwitchboardConstants.PROCESSCASE_0_UNKNOWN;
// FIXME the equals seems to be incorrect: String.equals(boolean)
if ((initiator == null) || (initiator.equals(initiator.length() == 0))) {
if ((initiator == null) || initiator.length() == 0 || initiator.equals("------------")) {
// proxy-load
processCase = plasmaSwitchboardConstants.PROCESSCASE_4_PROXY_LOAD;
} else if ((initiator != null) && (initiator.equals(wordIndex.seedDB.mySeed().hash))) {
} else if (initiator.equals(wordIndex.seedDB.mySeed().hash)) {
// normal crawling
processCase = plasmaSwitchboardConstants.PROCESSCASE_5_LOCAL_CRAWLING;
} else {

@ -446,6 +446,10 @@ public class JakartaCommonsHttpClient {
// cleanUp statistics
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e;
} catch (final IllegalStateException e) {
// cleanUp statistics
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw new IOException(e.getMessage());
}
if (serverLog.isFinest("HTTPC")) serverLog.logFinest("HTTPC", "<-" + method.hashCode() + " response headers " +
Arrays.toString(method.getResponseHeaders()));

@ -158,6 +158,13 @@ public class httpdProxyCacheEntry implements indexDocumentMetadata {
this.depth = depth;
this.responseStatus = responseStatus;
this.profile = profile;
// the initiator is the hash of the peer that caused the hash entry
// it is stored here only to track processed in the peer and this
// information is not permanently stored in the web index after the queue has
// been processed
// in case of proxy usage, the initiator hash is null,
// which distinguishes local crawling from proxy indexing
this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator);
this.language = yacyURL.language(url);

@ -93,7 +93,6 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.server.logging.serverMiniLogFormatter;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public final class httpdProxyHandler {
@ -415,35 +414,32 @@ public final class httpdProxyHandler {
// 4. cache stale - refill - superfluous
// in two of these cases we trigger a scheduler to handle newly arrived files:
// case 1 and case 3
final indexDocumentMetadata cacheEntry = (cachedResponseHeader == null) ? null :
new httpdProxyCacheEntry(
0, // crawling depth
url, // url
"", // name of the url is unknown
//requestHeader, // request headers
"200 OK", // request status
requestHeader,
cachedResponseHeader,
null, // initiator
switchboard.webIndex.defaultProxyProfile // profile
);
if (cacheEntry != null) plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry);
if (yacyCore.getOnlineMode() == 0) {
if (cacheExists) {
if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache");
fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
} else {
theLogger.logInfo("URL not availabe in Cache"+" and not in online-mode!");
httpd.sendRespondError(conProp,countedRespond,4,404,null,"URL not availabe in Cache",null);
}
} else if (cacheExists && cacheEntry.shallUseCacheForProxy()) {
if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from cache");
fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
} else {
if (theLogger.isFinest()) theLogger.logFinest(reqID +" fulfill request from web");
fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
if (cachedResponseHeader == null) {
if (theLogger.isFinest()) theLogger.logFinest(reqID + " page not in cache: fulfill request from web");
fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
} else {
final indexDocumentMetadata cacheEntry = new httpdProxyCacheEntry(
0, // crawling depth
url, // url
"", // name of the url is unknown
//requestHeader, // request headers
"200 OK", // request status
requestHeader,
cachedResponseHeader,
null, // initiator
switchboard.webIndex.defaultProxyProfile // profile
);
plasmaHTCache.storeMetadata(cachedResponseHeader, cacheEntry); // TODO: check if this storeMetadata is necessary
if (cacheExists && cacheEntry.shallUseCacheForProxy()) {
if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from cache");
fulfillRequestFromCache(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
} else {
if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from web");
fulfillRequestFromWeb(conProp,url,ext,requestHeader,cachedResponseHeader,cacheFile,countedRespond);
}
}
} catch (final Exception e) {
try {

@ -85,6 +85,8 @@ public class rssParser extends AbstractParser implements Parser {
final serverCharBuffer authors = new serverCharBuffer();
final RSSFeed feed = new RSSReader(source).getFeed();
if (feed == null) throw new ParserException("no feed in document",location);
if (feed.getChannel() == null) throw new ParserException("no channel in document",location);
// getting the rss feed title and description
final String feedTitle = feed.getChannel().getTitle();

@ -699,7 +699,6 @@ public final class plasmaHTCache {
* ACCESS METHODS
*/
// Store to Cache
public static void storeMetadata(
@ -712,7 +711,6 @@ public final class plasmaHTCache {
hm.putAll(responseHeader);
hm.put("@@URL", metadata.url().toNormalform(false, false));
hm.put("@@DEPTH", Integer.toString(metadata.depth()));
if (metadata.initiator() != null) hm.put("@@INITIATOR", metadata.initiator());
responseHeaderDB.put(metadata.urlHash(), hm);
} catch (final Exception e) {
log.logWarning("could not write ResourceInfo: "

@ -81,7 +81,6 @@ public class yacyCore {
// public static boolean terminate = false;
// class variables
private static int onlineMode = 1;
plasmaSwitchboard sb;
public static int yacyTime() {
@ -115,41 +114,12 @@ public class yacyCore {
// ATTENTION, VERY IMPORTANT: before starting the thread, the httpd yacy server must be running!
speedKey = System.currentTimeMillis() - time;
// start with a seedList update to propagate out peer, if possible
onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1"));
//lastSeedUpdate = universalTime();
lastOnlineTime = 0;
// cycle
// within cycle: update seed file, strengthen network, pass news (new, old seed's)
if (online()) {
log.logConfig("you are in online mode");
} else {
log.logConfig("YOU ARE OFFLINE! ---");
log.logConfig("--- TO START BOOTSTRAPING, YOU MUST USE THE PROXY,");
log.logConfig("--- OR HIT THE BUTTON 'go online'");
log.logConfig("--- ON THE STATUS PAGE http://localhost:" + serverCore.getPortNr(sb.getConfig("port", "8080")) + "/Status.html");
}
}
synchronized static public void triggerOnlineAction() {
lastOnlineTime = System.currentTimeMillis();
}
public final boolean online() {
onlineMode = Integer.parseInt(sb.getConfig("onlineMode", "1"));
return ((onlineMode == 2) || ((System.currentTimeMillis() - lastOnlineTime) < 10000));
}
public static int getOnlineMode() {
return onlineMode;
}
public static void setOnlineMode(final int newOnlineMode) {
onlineMode = newOnlineMode;
return;
}
public final void publishSeedList() {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
@ -201,7 +171,6 @@ public class yacyCore {
}
public final void peerPing() {
if (!online()) { return; }
if ((sb.isRobinsonMode()) && (sb.getConfig("cluster.mode", "").equals("privatepeer"))) {
// in case this peer is a privat peer we omit the peer ping
// all other robinson peer types do a peer ping:

Loading…
Cancel
Save