diff --git a/htroot/api/yacydoc.html b/htroot/api/yacydoc.html index 475bf2eef..e1509ebdf 100644 --- a/htroot/api/yacydoc.html +++ b/htroot/api/yacydoc.html @@ -12,8 +12,8 @@ you can validate it with http://validator.w3.org/ - #[dc_title]# #%env/templates/metas.template%# + #[dc_title]# diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index d961d9afb..bb18d4882 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -1073,7 +1073,7 @@ public final class HTTPDFileHandler { errorMessage.append("\nSession: ").append(Thread.currentThread().getName()) .append("\nQuery: ").append(path) .append("\nClient: ").append(conProp.getProperty(HeaderFramework.CONNECTION_PROP_CLIENTIP,"unknown")) - .append("\nReason: ").append(e.toString()); + .append("\nReason: ").append(e.getMessage()); if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { // sending back an error message to the client diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 49d6c074d..4e00b3ebf 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -775,7 +775,7 @@ public final class serverCore extends AbstractBusyThread implements BusyThread { // wrong parameters: this can only be an internal problem writeLine(this.commandObj.error(e)); break; - } catch (final java.lang.ClassCastException e) { + } catch (final ClassCastException e) { log.logSevere("command execution, cast exception " + e.getMessage() + " for client " + this.userAddress.getHostAddress(), e); // ?? writeLine(this.commandObj.error(e)); diff --git a/source/net/yacy/cora/protocol/HttpConnector.java b/source/net/yacy/cora/protocol/HttpConnector.java index 6e72d421d..ac1d17cd9 100644 --- a/source/net/yacy/cora/protocol/HttpConnector.java +++ b/source/net/yacy/cora/protocol/HttpConnector.java @@ -24,6 +24,8 @@ package net.yacy.cora.protocol; import java.io.IOException; import java.util.List; +import net.yacy.cora.document.MultiProtocolURI; + import org.apache.commons.httpclient.methods.multipart.Part; import de.anomic.crawler.retrieval.HTTPLoader; @@ -79,5 +81,30 @@ public class HttpConnector { } return content; } + + public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException { + return wget(url.toNormalform(false, false), url.getHost(), timeout); + } + + public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException { + final RequestHeader header = new RequestHeader(); + header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); + header.put(HeaderFramework.HOST, vhost); + final Client client = new Client(timeout, header); + + ResponseContainer res = null; + byte[] content = null; + try { + // send request/data + res = client.GET(url); + content = res.getData(); + } finally { + if(res != null) { + // release connection + res.closeStream(); + } + } + return content; + } } diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index 3919d72ae..b201baab4 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -26,8 +26,10 @@ package net.yacy.document.parser; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.MalformedURLException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; @@ -35,6 +37,7 @@ import java.util.HashSet; import java.util.Set; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.HttpConnector; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Idiom; @@ -103,7 +106,7 @@ public class htmlParser extends AbstractParser implements Idiom { charset = patchCharsetEncoding(charset); } - if (!documentCharset.equalsIgnoreCase(charset)) { + if (documentCharset == null || !documentCharset.equalsIgnoreCase(charset)) { theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true)); } @@ -247,4 +250,25 @@ public class htmlParser extends AbstractParser implements Idiom { public boolean indexingDenied() { return false; } + + public static void main(String[] args) { + // test parsing of a url + MultiProtocolURI url; + try { + url = new MultiProtocolURI(args[0]); + byte[] content = HttpConnector.wget(url, 3000); + Document document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content)); + String title = document.dc_title(); + System.out.println(title); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ParserException e) { + e.printStackTrace(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } diff --git a/source/net/yacy/kelondro/blob/ArrayStack.java b/source/net/yacy/kelondro/blob/ArrayStack.java index a4d8d01db..b5aebf580 100755 --- a/source/net/yacy/kelondro/blob/ArrayStack.java +++ b/source/net/yacy/kelondro/blob/ArrayStack.java @@ -607,7 +607,6 @@ public class ArrayStack implements BLOB { public BlobValues(byte[] key) { this.bii = blobs.iterator(); this.key = key; - next0(); } protected byte[] next0() { diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java index 20b38acae..9e4b9b584 100644 --- a/source/net/yacy/kelondro/index/RowSet.java +++ b/source/net/yacy/kelondro/index/RowSet.java @@ -292,16 +292,17 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable