fixed unlimited FileSizeLimit

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4954 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
danielr 17 years ago
parent 2dc7c00c1c
commit 63eadfdf84

@ -587,6 +587,9 @@ public class SettingsAck_p {
long maxHttpSize;
try {
maxHttpSize = Integer.valueOf(maxSizeStr).intValue();
if(maxHttpSize < 0) {
maxHttpSize = -1;
}
env.setConfig("crawler.http.maxFileSize", Long.toString(maxHttpSize));
} catch (NumberFormatException e) {
prop.put("info", "30");

@ -59,6 +59,7 @@ import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpHeader;
import de.anomic.http.httpdBoundedSizeOutputStream;
import de.anomic.http.httpdByteCountOutputStream;
import de.anomic.http.httpdLimitExceededException;
import de.anomic.index.indexReferenceBlacklist;
import de.anomic.plasma.plasmaHTCache;
@ -154,6 +155,7 @@ public final class HTTPLoader {
// take a file from the net
plasmaHTCache.Entry htCache = null;
final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE);
try {
// create a request header
httpHeader requestHeader = new httpHeader();
@ -217,10 +219,16 @@ public final class HTTPLoader {
long contentLength = res.getResponseHeader().contentLength();
// check the maximum allowed file size
if (contentLength == -1) {
fos = new httpdBoundedSizeOutputStream(fos, sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE));
} else if (contentLength > sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE)) {
this.log.logInfo("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE) + " bytes.");
if (contentLength == -1 || maxFileSize == -1) {
if(maxFileSize == -1) {
// unlimited
fos = new httpdByteCountOutputStream(fos);
} else {
// check filesize while loading page
fos = new httpdBoundedSizeOutputStream(fos, maxFileSize);
}
} else if (contentLength > maxFileSize) {
this.log.logInfo("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes.");
sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_FILESIZE_LIMIT_EXCEEDED);
return null;
}
@ -322,7 +330,7 @@ public final class HTTPLoader {
this.log.logInfo("CRAWLER Interruption detected because of server shutdown.");
failreason = ErrorURL.DENIED_SERVER_SHUTDOWN;
} else if (e instanceof httpdLimitExceededException) {
this.log.logWarning("CRAWLER Max file size limit '" + sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE) + "' exceeded while downloading URL " + entry.url());
this.log.logWarning("CRAWLER Max file size limit '" + maxFileSize + "' exceeded while downloading URL " + entry.url());
failreason = ErrorURL.DENIED_FILESIZE_LIMIT_EXCEEDED;
} else if (e instanceof MalformedURLException) {
this.log.logWarning("CRAWLER Malformed URL '" + entry.url().toString() + "' detected. ");

@ -48,7 +48,7 @@ import java.io.OutputStream;
public class httpdBoundedSizeOutputStream extends httpdByteCountOutputStream {
protected long maxSize = 0;
protected final long maxSize;
public httpdBoundedSizeOutputStream(OutputStream outputStream, long sizeLimit) {
this(outputStream,0,sizeLimit);

@ -499,13 +499,11 @@ nextLine:
while((line = buffer.readLine()) != null) {
output.add(line);
}
log.logInfo("logpoint 4 output done of '"+ name +"'");
done = true;
} catch(final IOException ix) { log.logWarning("logpoint 5 " + ix.getMessage());}
} catch(final IOException ix) { log.logWarning("logpoint 4 " + ix.getMessage());}
}
public List<String> getOutput(){
log.logInfo("logpoint 6 getOutput() of '"+ name +"' requested");
while(!isDone()) {
try {
Thread.sleep(1);

Loading…
Cancel
Save