git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6922 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 989819a28c
commit 3a1cebb598

@ -12,8 +12,8 @@ you can validate it with http://validator.w3.org/
<head profile="http://www.w3.org/2003/g/data-view">
<link rel="transformation" href="http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl"/>
<title>#[dc_title]#</title>
#%env/templates/metas.template%#
<title>#[dc_title]#</title>
</head>
<body>

@ -1073,7 +1073,7 @@ public final class HTTPDFileHandler {
errorMessage.append("\nSession: ").append(Thread.currentThread().getName())
.append("\nQuery: ").append(path)
.append("\nClient: ").append(conProp.getProperty(HeaderFramework.CONNECTION_PROP_CLIENTIP,"unknown"))
.append("\nReason: ").append(e.toString());
.append("\nReason: ").append(e.getMessage());
if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
// sending back an error message to the client

@ -775,7 +775,7 @@ public final class serverCore extends AbstractBusyThread implements BusyThread {
// wrong parameters: this can only be an internal problem
writeLine(this.commandObj.error(e));
break;
} catch (final java.lang.ClassCastException e) {
} catch (final ClassCastException e) {
log.logSevere("command execution, cast exception " + e.getMessage() + " for client " + this.userAddress.getHostAddress(), e);
// ??
writeLine(this.commandObj.error(e));

@ -24,6 +24,8 @@ package net.yacy.cora.protocol;
import java.io.IOException;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import org.apache.commons.httpclient.methods.multipart.Part;
import de.anomic.crawler.retrieval.HTTPLoader;
@ -79,5 +81,30 @@ public class HttpConnector {
}
return content;
}
public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
return wget(url.toNormalform(false, false), url.getHost(), timeout);
}
public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
final RequestHeader header = new RequestHeader();
header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
header.put(HeaderFramework.HOST, vhost);
final Client client = new Client(timeout, header);
ResponseContainer res = null;
byte[] content = null;
try {
// send request/data
res = client.GET(url);
content = res.getData();
} finally {
if(res != null) {
// release connection
res.closeStream();
}
}
return content;
}
}

@ -26,8 +26,10 @@
package net.yacy.document.parser;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
@ -35,6 +37,7 @@ import java.util.HashSet;
import java.util.Set;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Idiom;
@ -103,7 +106,7 @@ public class htmlParser extends AbstractParser implements Idiom {
charset = patchCharsetEncoding(charset);
}
if (!documentCharset.equalsIgnoreCase(charset)) {
if (documentCharset == null || !documentCharset.equalsIgnoreCase(charset)) {
theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
}
@ -247,4 +250,25 @@ public class htmlParser extends AbstractParser implements Idiom {
public boolean indexingDenied() {
return false;
}
public static void main(String[] args) {
// test parsing of a url
MultiProtocolURI url;
try {
url = new MultiProtocolURI(args[0]);
byte[] content = HttpConnector.wget(url, 3000);
Document document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
String title = document.dc_title();
System.out.println(title);
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

@ -607,7 +607,6 @@ public class ArrayStack implements BLOB {
public BlobValues(byte[] key) {
this.bii = blobs.iterator();
this.key = key;
next0();
}
protected byte[] next0() {

@ -292,16 +292,17 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
private int p;
final int bound;
public keyIterator(final boolean up, final byte[] firstKey) {
public keyIterator(final boolean up, byte[] firstKey) {
// see that all elements are sorted
sort();
this.up = up;
if (firstKey != null && firstKey.length == 0) firstKey = null;
this.first = firstKey;
this.bound = sortBound;
if (first == null) {
p = 0;
} else {
assert first.length == rowdef.primaryKeyLength;
assert first.length == rowdef.primaryKeyLength : "first.length = " + first.length + ", rowdef.primaryKeyLength = " + rowdef.primaryKeyLength;
p = binaryPosition(first, 0); // check this to find bug in DHT selection enumeration
}
}

@ -45,7 +45,7 @@ public abstract class AbstractBufferedIndex<ReferenceType extends Reference> ext
// creates a set of indexContainers
// this does not use the cache
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.ordering().clone());
if (startHash.length == 0) startHash = null;
if (startHash != null && startHash.length == 0) startHash = null;
ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startHash);
containerOrder.rotate(emptyContainer);
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);

Loading…
Cancel
Save