diff --git a/htroot/api/yacydoc.html b/htroot/api/yacydoc.html
index 475bf2eef..e1509ebdf 100644
--- a/htroot/api/yacydoc.html
+++ b/htroot/api/yacydoc.html
@@ -12,8 +12,8 @@ you can validate it with http://validator.w3.org/
- #[dc_title]#
#%env/templates/metas.template%#
+ #[dc_title]#
diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java
index d961d9afb..bb18d4882 100644
--- a/source/de/anomic/http/server/HTTPDFileHandler.java
+++ b/source/de/anomic/http/server/HTTPDFileHandler.java
@@ -1073,7 +1073,7 @@ public final class HTTPDFileHandler {
errorMessage.append("\nSession: ").append(Thread.currentThread().getName())
.append("\nQuery: ").append(path)
.append("\nClient: ").append(conProp.getProperty(HeaderFramework.CONNECTION_PROP_CLIENTIP,"unknown"))
- .append("\nReason: ").append(e.toString());
+ .append("\nReason: ").append(e.getMessage());
if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
// sending back an error message to the client
diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java
index 49d6c074d..4e00b3ebf 100644
--- a/source/de/anomic/server/serverCore.java
+++ b/source/de/anomic/server/serverCore.java
@@ -775,7 +775,7 @@ public final class serverCore extends AbstractBusyThread implements BusyThread {
// wrong parameters: this can only be an internal problem
writeLine(this.commandObj.error(e));
break;
- } catch (final java.lang.ClassCastException e) {
+ } catch (final ClassCastException e) {
log.logSevere("command execution, cast exception " + e.getMessage() + " for client " + this.userAddress.getHostAddress(), e);
// ??
writeLine(this.commandObj.error(e));
diff --git a/source/net/yacy/cora/protocol/HttpConnector.java b/source/net/yacy/cora/protocol/HttpConnector.java
index 6e72d421d..ac1d17cd9 100644
--- a/source/net/yacy/cora/protocol/HttpConnector.java
+++ b/source/net/yacy/cora/protocol/HttpConnector.java
@@ -24,6 +24,8 @@ package net.yacy.cora.protocol;
import java.io.IOException;
import java.util.List;
+import net.yacy.cora.document.MultiProtocolURI;
+
import org.apache.commons.httpclient.methods.multipart.Part;
import de.anomic.crawler.retrieval.HTTPLoader;
@@ -79,5 +81,30 @@ public class HttpConnector {
}
return content;
}
+
+ public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
+ return wget(url.toNormalform(false, false), url.getHost(), timeout);
+ }
+
+ public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
+ final RequestHeader header = new RequestHeader();
+ header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
+ header.put(HeaderFramework.HOST, vhost);
+ final Client client = new Client(timeout, header);
+
+ ResponseContainer res = null;
+ byte[] content = null;
+ try {
+ // send request/data
+ res = client.GET(url);
+ content = res.getData();
+ } finally {
+ if(res != null) {
+ // release connection
+ res.closeStream();
+ }
+ }
+ return content;
+ }
}
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index 3919d72ae..b201baab4 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -26,8 +26,10 @@
package net.yacy.document.parser;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
@@ -35,6 +37,7 @@ import java.util.HashSet;
import java.util.Set;
import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.protocol.HttpConnector;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Idiom;
@@ -103,7 +106,7 @@ public class htmlParser extends AbstractParser implements Idiom {
charset = patchCharsetEncoding(charset);
}
- if (!documentCharset.equalsIgnoreCase(charset)) {
+ if (documentCharset == null || !documentCharset.equalsIgnoreCase(charset)) {
theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
}
@@ -247,4 +250,25 @@ public class htmlParser extends AbstractParser implements Idiom {
public boolean indexingDenied() {
return false;
}
+
+ public static void main(String[] args) {
+ // test parsing of a url
+ MultiProtocolURI url;
+ try {
+ url = new MultiProtocolURI(args[0]);
+ byte[] content = HttpConnector.wget(url, 3000);
+ Document document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
+ String title = document.dc_title();
+ System.out.println(title);
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (ParserException e) {
+ e.printStackTrace();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
}
diff --git a/source/net/yacy/kelondro/blob/ArrayStack.java b/source/net/yacy/kelondro/blob/ArrayStack.java
index a4d8d01db..b5aebf580 100755
--- a/source/net/yacy/kelondro/blob/ArrayStack.java
+++ b/source/net/yacy/kelondro/blob/ArrayStack.java
@@ -607,7 +607,6 @@ public class ArrayStack implements BLOB {
public BlobValues(byte[] key) {
this.bii = blobs.iterator();
this.key = key;
- next0();
}
protected byte[] next0() {
diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java
index 20b38acae..9e4b9b584 100644
--- a/source/net/yacy/kelondro/index/RowSet.java
+++ b/source/net/yacy/kelondro/index/RowSet.java
@@ -292,16 +292,17 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable