- code cleanup / added debug line for further investigation in HTTPDemon.parseMultipart

- changed data structure for sorting in search which performs better in that specific case (too many updates)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7150 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent ac1c08924e
commit 5870b13f3a

@ -57,15 +57,6 @@ public final class HTTPLoader {
* The socket timeout that should be used * The socket timeout that should be used
*/ */
private final int socketTimeout; private final int socketTimeout;
/**
* The maximum allowed file size
*/
//private long maxFileSize = -1;
//private String acceptEncoding;
//private String acceptLanguage;
//private String acceptCharset;
private final Switchboard sb; private final Switchboard sb;
private final Log log; private final Log log;
@ -119,27 +110,20 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING)); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));
// HTTP-Client // HTTP-Client
// final Client client = new Client(socketTimeout, requestHeader);
// ResponseContainer res = null;
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient();
client.setTimout(socketTimeout); client.setTimout(socketTimeout);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
// try {
// send request // send request
// res = client.GET(request.url().toString(), maxFileSize);
final byte[] responseBody = client.GETbytes(request.url().toString(), maxFileSize); final byte[] responseBody = client.GETbytes(request.url().toString(), maxFileSize);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders()); final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode(); final int code = client.getHttpResponse().getStatusLine().getStatusCode();
// FIXME: 30*-handling (bottom) is never reached // FIXME: 30*-handling (bottom) is never reached
// we always get the final content because httpClient.followRedirects = true // we always get the final content because httpClient.followRedirects = true
// if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
if (responseBody != null && (code == 200 || code == 203)) { if (responseBody != null && (code == 200 || code == 203)) {
// the transfer is ok // the transfer is ok
// we write the new cache entry to file system directly // we write the new cache entry to file system directly
// res.setAccountingName("CRAWLER");
// final byte[] responseBody = res.getData();
long contentLength = responseBody.length; long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength); ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
@ -154,8 +138,6 @@ public final class HTTPLoader {
response = new Response( response = new Response(
request, request,
requestHeader, requestHeader,
// res.getResponseHeader(),
// res.getStatusLine(),
header, header,
Integer.toString(code), Integer.toString(code),
mp == null ? null : new CrawlProfile(mp), mp == null ? null : new CrawlProfile(mp),
@ -163,12 +145,9 @@ public final class HTTPLoader {
); );
return response; return response;
// } else if (res.getStatusLine().startsWith("30")) {
// if (res.getResponseHeader().containsKey(HeaderFramework.LOCATION)) {
} else if (code > 299 && code < 310) { } else if (code > 299 && code < 310) {
if (header.containsKey(HeaderFramework.LOCATION)) { if (header.containsKey(HeaderFramework.LOCATION)) {
// getting redirection URL // getting redirection URL
// String redirectionUrlString = res.getResponseHeader().get(HeaderFramework.LOCATION);
String redirectionUrlString = header.get(HeaderFramework.LOCATION); String redirectionUrlString = header.get(HeaderFramework.LOCATION);
redirectionUrlString = redirectionUrlString.trim(); redirectionUrlString = redirectionUrlString.trim();
@ -181,7 +160,6 @@ public final class HTTPLoader {
final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(request.url(), redirectionUrlString)); final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(request.url(), redirectionUrlString));
// restart crawling with new url // restart crawling with new url
// this.log.logInfo("CRAWLER Redirection detected ('" + res.getStatusLine() + "') for URL " + request.url().toString());
this.log.logInfo("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + request.url().toString()); this.log.logInfo("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + request.url().toString());
this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl); this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl);
@ -204,17 +182,9 @@ public final class HTTPLoader {
} }
} else { } else {
// if the response has not the right response type then reject file // if the response has not the right response type then reject file
// sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + res.getStatusCode() + ")");
// throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + request.url().toString());
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + code + ")"); sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + code + ")");
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString()); throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
} }
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
return response; return response;
} }
@ -251,22 +221,15 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET); requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);
// HTTP-Client
// final Client client = new Client(20000, requestHeader);
// ResponseContainer res = null;
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient();
client.setTimout(20000); client.setTimout(20000);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
// try {
// send request
// res = client.GET(request.url().toString(), Long.MAX_VALUE);
final byte[] responseBody = client.GETbytes(request.url().toString(), Long.MAX_VALUE); final byte[] responseBody = client.GETbytes(request.url().toString(), Long.MAX_VALUE);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders()); final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode(); final int code = client.getHttpResponse().getStatusLine().getStatusCode();
// FIXME: 30*-handling (bottom) is never reached // FIXME: 30*-handling (bottom) is never reached
// we always get the final content because httpClient.followRedirects = true // we always get the final content because httpClient.followRedirects = true
// if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
if (responseBody != null && (code == 200 || code == 203)) { if (responseBody != null && (code == 200 || code == 203)) {
// the transfer is ok // the transfer is ok
@ -274,15 +237,11 @@ public final class HTTPLoader {
ByteCount.addAccountCount(ByteCount.CRAWLER, responseBody.length); ByteCount.addAccountCount(ByteCount.CRAWLER, responseBody.length);
// we write the new cache entry to file system directly // we write the new cache entry to file system directly
// res.setAccountingName("CRAWLER");
// final byte[] responseBody = res.getData();
// create a new cache entry // create a new cache entry
response = new Response( response = new Response(
request, request,
requestHeader, requestHeader,
// res.getResponseHeader(),
// res.getStatusLine(),
header, header,
Integer.toString(code), Integer.toString(code),
null, null,
@ -290,12 +249,9 @@ public final class HTTPLoader {
); );
return response; return response;
// } else if (res.getStatusLine().startsWith("30")) {
// if (res.getResponseHeader().containsKey(HeaderFramework.LOCATION)) {
} else if (code > 299 && code < 310) { } else if (code > 299 && code < 310) {
if (header.containsKey(HeaderFramework.LOCATION)) { if (header.containsKey(HeaderFramework.LOCATION)) {
// getting redirection URL // getting redirection URL
// String redirectionUrlString = res.getResponseHeader().get(HeaderFramework.LOCATION);
String redirectionUrlString = header.get(HeaderFramework.LOCATION); String redirectionUrlString = header.get(HeaderFramework.LOCATION);
redirectionUrlString = redirectionUrlString.trim(); redirectionUrlString = redirectionUrlString.trim();
@ -318,15 +274,8 @@ public final class HTTPLoader {
} }
} else { } else {
// if the response has not the right response type then reject file // if the response has not the right response type then reject file
// throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + request.url().toString());
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString()); throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
} }
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
return response; return response;
} }

@ -1032,22 +1032,7 @@ public final class HTTPDFileHandler {
// flush all // flush all
try {newOut.flush();}catch (final Exception e) {} try {newOut.flush();}catch (final Exception e) {}
/*
// wait a little time until everything closes so that clients can read from the streams/sockets
if ((contentLength >= 0) && (requestHeader.get(RequestHeader.CONNECTION, "close")).indexOf("keep-alive") == -1) {
// in case that the client knows the size in advance (contentLength present) the waiting will have no effect on the interface performance
// but if the client waits on a connection interruption this will slow down.
try {Thread.sleep(2000);} catch (final InterruptedException e) {} // FIXME: is this necessary?
}
*/
} }
// check mime type again using the result array: these are 'magics'
// if (serverByteBuffer.equals(result, 1, "PNG".getBytes())) mimeType = mimeTable.getProperty("png","text/html");
// else if (serverByteBuffer.equals(result, 0, "GIF89".getBytes())) mimeType = mimeTable.getProperty("gif","text/html");
// else if (serverByteBuffer.equals(result, 6, "JFIF".getBytes())) mimeType = mimeTable.getProperty("jpg","text/html");
//System.out.print("MAGIC:"); for (int i = 0; i < 10; i++) System.out.print(Integer.toHexString((int) result[i]) + ","); System.out.println();
} }
} else { } else {
HTTPDemon.sendRespondError(conProp,out,3,404,"File not Found",null,null); HTTPDemon.sendRespondError(conProp,out,3,404,"File not Found",null,null);
@ -1055,8 +1040,7 @@ public final class HTTPDFileHandler {
} }
} catch (final Exception e) { } catch (final Exception e) {
try { try {
// doing some errorhandling ... // error handling
//Log.logException(e);
int httpStatusCode = 400; int httpStatusCode = 400;
final String httpStatusText = null; final String httpStatusText = null;
final StringBuilder errorMessage = new StringBuilder(2000); final StringBuilder errorMessage = new StringBuilder(2000);

@ -484,21 +484,16 @@ public final class HTTPDProxyHandler {
// send request // send request
try { try {
// res = client.GET(getUrl);
// if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine());
client.GET(getUrl); client.GET(getUrl);
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader); conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader);
// final ResponseHeader responseHeader = res.getResponseHeader();
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders()); final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
// determine if it's an internal error of the httpc // determine if it's an internal error of the httpc
if (responseHeader.isEmpty()) { if (responseHeader.isEmpty()) {
// throw new Exception(res.getStatusLine());
throw new Exception(client.getHttpResponse().getStatusLine().toString()); throw new Exception(client.getHttpResponse().getStatusLine().toString());
} }
// final ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, res.getStatusCode(), respond);
final ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond); final ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond);
// the cache does either not exist or is (supposed to be) stale // the cache does either not exist or is (supposed to be) stale
@ -539,13 +534,6 @@ public final class HTTPDProxyHandler {
} }
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
// HTTPDemon.sendRespondHeader(
// conProp,
// respond,
// httpVer,
// res.getStatusCode(),
// res.getStatusLine().substring(4), // status text
// responseHeader);
HTTPDemon.sendRespondHeader( HTTPDemon.sendRespondHeader(
conProp, conProp,
respond, respond,
@ -554,7 +542,6 @@ public final class HTTPDProxyHandler {
client.getHttpResponse().getStatusLine().toString(), // status text client.getHttpResponse().getStatusLine().toString(), // status text
responseHeader); responseHeader);
// if (hasBody(res.getStatusCode())) {
if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) { if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) {
final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond); final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
@ -562,7 +549,6 @@ public final class HTTPDProxyHandler {
request, request,
requestHeader, requestHeader,
responseHeader, responseHeader,
// res.getStatusLine(),
Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()), Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
sb.crawler.defaultProxyProfile sb.crawler.defaultProxyProfile
); );
@ -940,47 +926,19 @@ public final class HTTPDProxyHandler {
if(body == null) { if(body == null) {
log.logSevere("no body to POST!"); log.logSevere("no body to POST!");
} }
// from old httpc:
// "if there is a body to the call, we would have a CONTENT-LENGTH tag in the requestHeader"
// it seems that it is a HTTP/1.1 connection which stays open (the inputStream) and endlessly waits for
// input so we have to end it to do the request
// this should not be needed anymore - see org.apache.http.entity.InputStreamEntity
// final int contentLength = requestHeader.getContentLength();
// if (contentLength > -1) {
// final byte[] bodyData;
// if(contentLength == 0) {
// // no body
// bodyData = new byte[0];
// } else {
// // read content-length bytes into memory
// bodyData = new byte[contentLength];
// int bytes_read = 0;
// while(bytes_read < contentLength) {
// bytes_read += body.read(bodyData, bytes_read, contentLength-bytes_read);
// }
// }
// body = new ByteArrayInputStream(bodyData);
// }
// ResponseContainer res = null;
try { try {
// sending the request // sending the request
// res = client.POST(getUrl, body);
// if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine());
client.POST(getUrl, body, contentLength); client.POST(getUrl, body, contentLength);
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
// final ResponseHeader responseHeader = res.getResponseHeader();
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders()); final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
// determine if it's an internal error of the httpc // determine if it's an internal error of the httpc
if (responseHeader.isEmpty()) { if (responseHeader.isEmpty()) {
// throw new Exception(res.getStatusLine());
throw new Exception(client.getHttpResponse().getStatusLine().toString()); throw new Exception(client.getHttpResponse().getStatusLine().toString());
} }
// final ChunkedOutputStream chunked = setTransferEncoding(conProp, responseHeader, res.getStatusCode(), countedRespond);
final ChunkedOutputStream chunked = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), countedRespond); final ChunkedOutputStream chunked = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), countedRespond);
// prepareResponseHeader(responseHeader, res.getHttpVer());
prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString()); prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString());
// sending the respond header back to the client // sending the respond header back to the client
@ -990,12 +948,6 @@ public final class HTTPDProxyHandler {
// sending response headers // sending response headers
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
// HTTPDemon.sendRespondHeader(conProp,
// countedRespond,
// httpVer,
// res.getStatusCode(),
// res.getStatusLine().substring(4), // status text
// responseHeader);
HTTPDemon.sendRespondHeader(conProp, HTTPDemon.sendRespondHeader(conProp,
countedRespond, countedRespond,
httpVer, httpVer,
@ -1003,19 +955,7 @@ public final class HTTPDProxyHandler {
client.getHttpResponse().getStatusLine().toString(), // status text client.getHttpResponse().getStatusLine().toString(), // status text
responseHeader); responseHeader);
// respondHeader(respond, res.status, res.responseHeader);
// Saver.writeContent(res, (chunked != null) ? new BufferedOutputStream(chunked) : new BufferedOutputStream(respond));
/*
// *** (Uebernommen aus Saver-Klasse: warum ist dies hier die einzige Methode, die einen OutputStream statt einen Writer benutzt?)
try {
serverFileUtils.copyToStream(new BufferedInputStream(res.getDataAsStream()), (chunked != null) ? new BufferedOutputStream(chunked) : new BufferedOutputStream(respond));
} finally {
res.closeStream();
}
if (chunked != null) chunked.finish();
*/
final OutputStream outStream = (chunked != null) ? chunked : countedRespond; final OutputStream outStream = (chunked != null) ? chunked : countedRespond;
// FileUtils.copy(res.getDataAsStream(), outStream);
client.writeTo(outStream); client.writeTo(outStream);
if (chunked != null) { if (chunked != null) {
@ -1024,14 +964,8 @@ public final class HTTPDProxyHandler {
outStream.flush(); outStream.flush();
} catch(SocketException se) { } catch(SocketException se) {
// connection closed by client, abort download // connection closed by client, abort download
// res.abort();
client.finish(); client.finish();
} finally { } finally {
// if opened ...
// if(res != null) {
// // ... close connection
// res.closeStream();
// }
client.finish(); client.finish();
} }
} catch (final Exception e) { } catch (final Exception e) {
@ -1118,8 +1052,6 @@ public final class HTTPDProxyHandler {
*/ */
private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) { private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) {
// setup HTTP-client // setup HTTP-client
// final Client client = new Client(timeout, requestHeader);
// client.setFollowRedirects(false);
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient();
client.setTimout(timeout); client.setTimout(timeout);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
@ -1293,20 +1225,13 @@ public final class HTTPDProxyHandler {
// possibly branch into PROXY-PROXY connection // possibly branch into PROXY-PROXY connection
if (ProxySettings.use && ProxySettings.use4ssl) { if (ProxySettings.use && ProxySettings.use4ssl) {
// final Client remoteProxy = new Client(timeout, requestHeader);
// remoteProxy.setFollowRedirects(false); // should not be needed, but safe is safe
final HTTPClient remoteProxy = setupHttpClient(requestHeader, host); final HTTPClient remoteProxy = setupHttpClient(requestHeader, host);
// ResponseContainer response = null;
try { try {
// response = remoteProxy.CONNECT(host, port);
remoteProxy.HEADResponse("http://" + host + ":" + port); remoteProxy.HEADResponse("http://" + host + ":" + port);
ResponseHeader header = new ResponseHeader(remoteProxy.getHttpResponse().getAllHeaders()); ResponseHeader header = new ResponseHeader(remoteProxy.getHttpResponse().getAllHeaders());
// outputs a logline to the serverlog with the current status // outputs a logline to the serverlog with the current status
// log.logInfo("CONNECT-RESPONSE: status=" + response.getStatusLine() + ", header=" + response.getResponseHeader().toString());
// // (response.getStatusLine().charAt(0) == '2') || (response.getStatusLine().charAt(0) == '3')
// final boolean success = response.getStatusCode() >= 200 && response.getStatusCode() <= 399;
log.logInfo("CONNECT-RESPONSE: status=" + remoteProxy.getHttpResponse().getStatusLine() + ", header=" + header.toString()); log.logInfo("CONNECT-RESPONSE: status=" + remoteProxy.getHttpResponse().getStatusLine() + ", header=" + header.toString());
final boolean success = remoteProxy.getHttpResponse().getStatusLine().getStatusCode() >= 200 && remoteProxy.getHttpResponse().getStatusLine().getStatusCode() <= 399; final boolean success = remoteProxy.getHttpResponse().getStatusLine().getStatusCode() >= 200 && remoteProxy.getHttpResponse().getStatusLine().getStatusCode() <= 399;
if (success) { if (success) {
@ -1316,7 +1241,6 @@ public final class HTTPDProxyHandler {
// go on (see below) // go on (see below)
} else { } else {
// pass error response back to client // pass error response back to client
// HTTPDemon.sendRespondHeader(conProp,clientOut,httpVersion,response.getStatusCode(),response.getStatusLine().substring(4),response.getResponseHeader());
HTTPDemon.sendRespondHeader( HTTPDemon.sendRespondHeader(
conProp, conProp,
clientOut, clientOut,
@ -1328,16 +1252,8 @@ public final class HTTPDProxyHandler {
forceConnectionClose(conProp); forceConnectionClose(conProp);
return; return;
} }
// } catch (SocketException se) {
// // connection closed by client, abort download
// response.abort();
} catch (final Exception e) { } catch (final Exception e) {
throw new IOException(e.getMessage()); throw new IOException(e.getMessage());
// } finally {
// if(response != null) {
// // release connection
// response.closeStream();
// }
} }
} }

@ -802,8 +802,8 @@ public final class HTTPDemon implements serverHandler, Cloneable {
* @throws IOException * @throws IOException
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static Map<String, byte[]> parseMultipart(final RequestHeader header, final serverObjects args, final InputStream in) public static Map<String, byte[]> parseMultipart(final RequestHeader header, final serverObjects args, final InputStream in) throws IOException {
throws IOException { //ByteArrayInputStream in = new ByteArrayInputStream(FileUtils.read(inx));
final InputStream body = prepareBody(header, in); final InputStream body = prepareBody(header, in);
RequestContext request = new yacyContextRequest(header, body); RequestContext request = new yacyContextRequest(header, body);
@ -821,13 +821,15 @@ public final class HTTPDemon implements serverHandler, Cloneable {
// parse data in memory // parse data in memory
FileUpload upload = new FileUpload(diskFileItemFactory); FileUpload upload = new FileUpload(diskFileItemFactory);
List<FileItem> items; List<FileItem> items;
long time = System.currentTimeMillis();
try { try {
items = upload.parseRequest(request); items = upload.parseRequest(request);
} catch (FileUploadException e) { } catch (FileUploadException e) {
//Log.logException(e); //Log.logException(e);
throw new IOException("FileUploadException " + e.getMessage()); throw new IOException("FileUploadException " + e.getMessage());
} }
System.out.println("**** FileUploadBase.parseRequest time = " + (System.currentTimeMillis() - time));
// format information for further usage // format information for further usage
final HashMap<String, byte[]> files = new HashMap<String, byte[]>(); final HashMap<String, byte[]> files = new HashMap<String, byte[]>();
for (FileItem item : items) { for (FileItem item : items) {

@ -35,6 +35,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -50,8 +51,6 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.BinSearch; import net.yacy.kelondro.index.BinSearch;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
@ -69,9 +68,9 @@ public final class RankingProcess extends Thread {
private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000; private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000;
private final QueryParams query; private final QueryParams query;
private final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final TreeSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter private final int[] flagcount; // flag counter
private final HandleSet misses; // contains url-hashes that could not been found in the LURL-DB private final TreeSet<byte[]> misses; // contains url-hashes that could not been found in the LURL-DB
//private final int[] domZones; //private final int[] domZones;
private TreeMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion; private TreeMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
@ -102,8 +101,10 @@ public final class RankingProcess extends Thread {
this.remote_indexCount = 0; this.remote_indexCount = 0;
this.local_resourceSize = 0; this.local_resourceSize = 0;
this.local_indexCount = 0; this.local_indexCount = 0;
this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.urlhashes = new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder);
this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); //this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
this.misses = new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder);
//this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
this.flagcount = new int[32]; this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new Navigator(); this.hostNavigator = new Navigator();
@ -221,13 +222,8 @@ public final class RankingProcess extends Thread {
this.hostNavigator.inc(domhash, uhb); this.hostNavigator.inc(domhash, uhb);
} }
// accept; insert to ranked stack with double-check if (urlhashes.add(iEntry.metadataHash())) {
try { stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
if (!urlhashes.put(iEntry.metadataHash())) {
stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
}
} catch (RowSpaceExceededException e) {
Log.logException(e);
} }
// increase counter for statistics // increase counter for statistics
@ -364,11 +360,7 @@ public final class RankingProcess extends Thread {
urlhash = obrwi.getElement().metadataHash(); urlhash = obrwi.getElement().metadataHash();
final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight()); final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight());
if (page == null) { if (page == null) {
try { misses.add(obrwi.getElement().metadataHash());
misses.put(obrwi.getElement().metadataHash());
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
continue; continue;
} }

@ -155,8 +155,7 @@ public final class SearchEvent {
} else { } else {
// do a local search // do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2); this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);
this.rankedCache.run(); this.rankedCache.run(); // this is not started concurrently here on purpose!
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
if (generateAbstracts) { if (generateAbstracts) {
// compute index abstracts // compute index abstracts

@ -1133,6 +1133,7 @@ public final class yacyClient {
} }
} }
} catch (IOException e) { } catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
} }
System.out.println("Search Time: " + (System.currentTimeMillis() - time)); System.out.println("Search Time: " + (System.currentTimeMillis() - time));

@ -422,8 +422,12 @@ public class Domains {
if ((host == null) || (host.length() == 0)) return null; if ((host == null) || (host.length() == 0)) return null;
host = host.toLowerCase().trim(); host = host.toLowerCase().trim();
// try to simply parse the address
InetAddress ip = parseInetAddress(host);
if (ip != null) return ip;
// trying to resolve host by doing a name cache lookup // trying to resolve host by doing a name cache lookup
final InetAddress ip = nameCacheHit.get(host); ip = nameCacheHit.get(host);
if (ip != null) return ip; if (ip != null) return ip;
if (nameCacheMiss.containsKey(host)) return null; if (nameCacheMiss.containsKey(host)) return null;

Loading…
Cancel
Save