From d3f8aa5a2a49cfc570f23b06a08a053f677dee47 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 29 Apr 2009 21:36:20 +0000 Subject: [PATCH] set of small fixes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5903 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/transferRWI.java | 16 +++++++++++++--- .../htmlFilter/htmlFilterCharacterCoding.java | 6 +++++- source/de/anomic/plasma/plasmaParser.java | 15 +++++++++++++-- source/de/anomic/yacy/yacyURL.java | 5 ++++- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 42cf569ba..40deb0bbf 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -96,11 +96,21 @@ public final class transferRWI { sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.webIndex.peers().mySeed().hash); result = "wrong_target"; pause = 0; - } else if ((!granted) || (sb.isRobinsonMode())) { + } else if (otherPeer == null) { // we dont want to receive indexes - sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted."); + sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. Other Peer is unknown"); result = "not_granted"; - pause = 0; + pause = 60000; + } else if (!granted) { + // we dont want to receive indexes + sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Granted is false"); + result = "not_granted"; + pause = 60000; + } else if (sb.isRobinsonMode()) { + // we dont want to receive indexes + sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode"); + result = "not_granted"; + pause = 60000; } else if (sb.webIndex.index().getBufferSize() > cachelimit) { // we are too busy to receive indexes sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.index().getBufferSize() + ")."); diff --git a/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java index e8fb12f12..701a6d879 100644 --- a/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java +++ b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java @@ -262,7 +262,11 @@ public class htmlFilterCharacterCoding { sb.append(new char[] {(char) Integer.parseInt(s.substring(3, s.length() - 1), 16)}); continue; } - sb.append(new char[] {(char) Integer.parseInt(s.substring(2, s.length() - 1))}); + String ucs = s.substring(2, s.length() - 1); + try { + int uc = Integer.parseInt(ucs); + sb.append(new char[] {(char) uc}); + } catch (NumberFormatException e) {} continue; } // the entity is unknown, skip it diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index e76b1dced..ce9aebe9c 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -36,6 +36,7 @@ import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -695,7 +696,11 @@ public final class plasmaParser { } } - private plasmaParserDocument parseHtml(final yacyURL location, final String mimeType, final String documentCharset, final InputStream sourceStream) throws IOException, ParserException { + private plasmaParserDocument parseHtml( + final yacyURL location, + final String mimeType, + final String documentCharset, + final InputStream sourceStream) throws IOException, ParserException { // make a scraper and transformer final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false); @@ -710,10 +715,16 @@ public final class plasmaParser { theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true)); } + Charset c; + try { + c = Charset.forName(charset); + } catch (IllegalCharsetNameException e) { + c = Charset.defaultCharset(); + } // parsing the content final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); final htmlFilterWriter writer = new htmlFilterWriter(null,null,scraper,null,false); - FileUtils.copy(htmlFilter, writer, Charset.forName(charset)); + FileUtils.copy(htmlFilter, writer, c); writer.close(); //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); //serverFileUtils.copy(sourceFile, hfos); diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index 1efde1dda..ff669d7b5 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -754,7 +754,10 @@ public class yacyURL implements Serializable { // combine the attributes final StringBuilder hash = new StringBuilder(12); // form the 'local' part of the hash - hash.append(Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(toNormalform(true, true))).substring(0, 5)); // 5 chars + String normalform = toNormalform(true, true); + String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform)); + if (b64l.length() < 5) return null; + hash.append(b64l.substring(0, 5)); // 5 chars hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char // form the 'global' part of the hash hash.append(hosthash5(this.protocol, host, port)); // 5 chars