removed directly linked servlets that had been there to test memory failures that appeared in that servlets

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6612 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 69c29acb6e
commit 23bcca07a3

@ -93,8 +93,6 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.visualization.RasterPlotter;
import de.anomic.data.MimeTable;
import de.anomic.http.server.servlets.crawlReceipt;
import de.anomic.http.server.servlets.transferURL;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
import de.anomic.server.serverClassLoader;
@ -1162,6 +1160,7 @@ public final class HTTPDFileHandler {
public static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
// debug functions: for special servlets call them without reflection to get better stack trace results
/*
if (targetClass.getName().equals("transferURL.class")) {
try {
return transferURL.respond(request, args, switchboard);
@ -1180,7 +1179,7 @@ public final class HTTPDFileHandler {
throw new InvocationTargetException(e);
}
}
*/
Object result;
if (safeServletsMode) synchronized (switchboard) {
result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});

@ -1,172 +0,0 @@
//crawlReceipt.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//$LastChangedDate: 2009-11-05 21:28:37 +0100 (Do, 05 Nov 2009) $
//$LastChangedRevision: 6458 $
//$LastChangedBy: orbiter $
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//You must compile this file with
//javac -classpath .:../classes crawlOrder.java
package de.anomic.http.server.servlets;
import java.io.IOException;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import de.anomic.crawler.retrieval.EventOrigin;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
public final class crawlReceipt {
/*
* this is used to respond on a remote crawling request
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
if ((post == null) || (env == null)) return prop;
if (!yacyNetwork.authentifyRequest(post, env)) return prop;
final Log log = sb.getLog();
//int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0"));
//int crawlingDepth = Integer.parseInt(env.getConfig("crawlingDepth", "0"));
// request values
final String iam = post.get("iam", ""); // seed hash of requester
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
//String process = post.get("process", ""); // process type
final String key = post.get("key", ""); // transmission key
//String receivedUrlhash = post.get("urlhash", ""); // the url hash that has been crawled
final String result = post.get("result", ""); // the result; either "ok" or "fail"
final String reason = post.get("reason", ""); // the reason for that result
//String words = post.get("wordh", ""); // priority word hashes
final String propStr = crypt.simpleDecode(post.get("lurlEntry", ""), key);
/*
the result can have one of the following values:
negative cases, no retry
unavailable - the resource is not available (a broken link); not found or interrupted
exception - an exception occurred
robot - a robot-file has denied to crawl that resource
negative cases, retry possible
rejected - the peer has rejected to load the resource
dequeue - peer too busy - rejected to crawl
positive cases with crawling
fill - the resource was loaded and processed
update - the resource was already in database but re-loaded and processed
positive cases without crawling
known - the resource is already in database, believed to be fresh and not reloaded
stale - the resource was reloaded but not processed because source had no changes
*/
final yacySeed otherPeer = sb.peers.get(iam);
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
if ((sb.peers.mySeed() == null) || (!(sb.peers.mySeed().hash.equals(youare)))) {
// no yacy connection / unknown peers
prop.put("delay", "3600");
return prop;
}
if (propStr == null) {
// error with url / wrong key
prop.put("delay", "3600");
return prop;
}
if ((sb.isRobinsonMode()) && (!sb.isInMyCluster(otherPeer))) {
// we reject urls that are from outside our cluster
prop.put("delay", "9999");
}
// generating a new loaded URL entry
final URIMetadataRow entry = URIMetadataRow.importEntry(propStr);
if (entry == null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "3600");
return prop;
}
final URIMetadataRow.Components metadata = entry.metadata();
if (metadata.url() == null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "3600");
return prop;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
if (urlRejectReason != null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
return prop;
}
if (result.equals("fill")) try {
// put new entry into database
sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry);
sb.crawlResults.stack(entry, youare, iam, EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + entry.hash() + ":" + metadata.url().toNormalform(false, true));
// ready for more
prop.put("delay", "10");
return prop;
} catch (final IOException e) {
Log.logException(e);
prop.put("delay", "3600");
return prop;
}
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
sb.crawlQueues.errorURL.push(
entry.toBalancerEntry(iam),
youare,
null,
0,
result + ":" + reason);
//switchboard.noticeURL.remove(receivedUrlhash);
prop.put("delay", "3600");
return prop;
// return rewrite properties
}
}

@ -1,169 +0,0 @@
// this is a temporary 1-to-1 copy of the transferURL servlet
package de.anomic.http.server.servlets;
import java.io.IOException;
import java.text.ParseException;
import net.yacy.document.content.RSSMessage;
import net.yacy.document.parser.xml.RSSFeed;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.repository.Blacklist;
import de.anomic.crawler.retrieval.EventOrigin;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
public final class transferURL {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException {
final long start = System.currentTimeMillis();
long freshdate = 0;
try {freshdate = DateFormatter.parseShortDay("20061101").getTime();} catch (final ParseException e1) {}
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
if ((post == null) || (env == null)) return prop;
if (!yacyNetwork.authentifyRequest(post, env)) return prop;
// request values
final String iam = post.get("iam", ""); // seed hash of requester
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
// final String key = post.get("key", ""); // transmission key
final int urlc = post.getInt("urlc", 0); // number of transported urls
final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
final boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
// response values
String result = "";
String doublevalues = "0";
final yacySeed otherPeer = sb.peers.get(iam);
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
yacyCore.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target";
} else if ((!granted) || (sb.isRobinsonMode())) {
yacyCore.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Not granted.");
result = "error_not_granted";
} else {
int received = 0;
int blocked = 0;
final int sizeBefore = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size();
// read the urls from the other properties and store
String urls;
URIMetadataRow lEntry;
for (int i = 0; i < urlc; i++) {
serverCore.checkInterruption();
// read new lurl-entry
urls = post.get("url" + i);
if (urls == null) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
blocked++;
continue;
}
// parse new lurl-entry
lEntry = URIMetadataRow.importEntry(urls);
if (lEntry == null) {
yacyCore.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}
// check if entry is well-formed
final URIMetadataRow.Components metadata = lEntry.metadata();
if (metadata.url() == null) {
yacyCore.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}
// check whether entry is too old
if (lEntry.freshdate().getTime() <= freshdate) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate());
blocked++;
continue;
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()))) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
if (urlRejectReason != null) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: blocked URL '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// write entry to database
yacyCore.log.logInfo("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.metadata().url().toNormalform(true, false));
try {
sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry); // OOM here!
sb.crawlResults.stack(lEntry, iam, iam, EventOrigin.DHT_TRANSFER);
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: received URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++;
} catch (final IOException e) {
Log.logException(e);
}
}
sb.peers.mySeed().incRU(received);
// return rewrite properties
final int more = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size() - sizeBefore;
doublevalues = Integer.toString(received - more);
yacyCore.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs");
RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " URLs from peer " + otherPeerName + ", blocked " + blocked, "", ""));
if ((received - more) > 0) yacyCore.log.logSevere("Received " + doublevalues + " double URLs from peer " + otherPeerName);
result = "ok";
}
prop.put("double", doublevalues);
prop.put("result", result);
return prop;
}
}
Loading…
Cancel
Save