From d3f8aa5a2a49cfc570f23b06a08a053f677dee47 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 29 Apr 2009 21:36:20 +0000
Subject: [PATCH] set of small fixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5903 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/yacy/transferRWI.java                     | 16 +++++++++++++---
 .../htmlFilter/htmlFilterCharacterCoding.java    |  6 +++++-
 source/de/anomic/plasma/plasmaParser.java        | 15 +++++++++++++--
 source/de/anomic/yacy/yacyURL.java               |  5 ++++-
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java
index 42cf569ba..40deb0bbf 100644
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@@ -96,11 +96,21 @@ public final class transferRWI {
         	sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.webIndex.peers().mySeed().hash);
             result = "wrong_target";
             pause = 0;
-        } else if ((!granted) || (sb.isRobinsonMode())) {
+        } else if (otherPeer == null) {
             // we dont want to receive indexes
-            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted.");
+            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. Other Peer is unknown");
             result = "not_granted";
-            pause = 0;
+            pause = 60000;
+        } else if (!granted) {
+            // we dont want to receive indexes
+            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Granted is false");
+            result = "not_granted";
+            pause = 60000;
+        } else if (sb.isRobinsonMode()) {
+            // we dont want to receive indexes
+            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode");
+            result = "not_granted";
+            pause = 60000;
         } else if (sb.webIndex.index().getBufferSize() > cachelimit) {
             // we are too busy to receive indexes
             sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.index().getBufferSize() + ").");
diff --git a/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
index e8fb12f12..701a6d879 100644
--- a/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
+++ b/source/de/anomic/htmlFilter/htmlFilterCharacterCoding.java
@@ -262,7 +262,11 @@ public class htmlFilterCharacterCoding {
                     sb.append(new char[] {(char) Integer.parseInt(s.substring(3, s.length() - 1), 16)});
                     continue;
                 }
-                sb.append(new char[] {(char) Integer.parseInt(s.substring(2, s.length() - 1))});
+                String ucs = s.substring(2, s.length() - 1);
+                try {
+                	int uc = Integer.parseInt(ucs);
+                	sb.append(new char[] {(char) uc});
+                } catch (NumberFormatException e) {}
                 continue;
             }
             // the entity is unknown, skip it
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index e76b1dced..ce9aebe9c 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -36,6 +36,7 @@ import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -695,7 +696,11 @@ public final class plasmaParser {
         }        
     }
     
-    private plasmaParserDocument parseHtml(final yacyURL location, final String mimeType, final String documentCharset, final InputStream sourceStream) throws IOException, ParserException {
+    private plasmaParserDocument parseHtml(
+    		final yacyURL location, 
+    		final String mimeType, 
+    		final String documentCharset, 
+    		final InputStream sourceStream) throws IOException, ParserException {
         
         // make a scraper and transformer
         final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false);
@@ -710,10 +715,16 @@ public final class plasmaParser {
             theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
         }
         
+        Charset c;
+        try {
+        	c = Charset.forName(charset);
+        } catch (IllegalCharsetNameException e) {
+        	c = Charset.defaultCharset();
+        }
         // parsing the content
         final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);        
         final htmlFilterWriter writer = new htmlFilterWriter(null,null,scraper,null,false);
-        FileUtils.copy(htmlFilter, writer, Charset.forName(charset));
+        FileUtils.copy(htmlFilter, writer, c);
         writer.close();
         //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);            
         //serverFileUtils.copy(sourceFile, hfos);
diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java
index 1efde1dda..ff669d7b5 100644
--- a/source/de/anomic/yacy/yacyURL.java
+++ b/source/de/anomic/yacy/yacyURL.java
@@ -754,7 +754,10 @@ public class yacyURL implements Serializable {
         // combine the attributes
         final StringBuilder hash = new StringBuilder(12);
         // form the 'local' part of the hash
-        hash.append(Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(toNormalform(true, true))).substring(0, 5)); // 5 chars
+        String normalform = toNormalform(true, true);
+        String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
+        if (b64l.length() < 5) return null;
+        hash.append(b64l.substring(0, 5)); // 5 chars
         hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char
         // form the 'global' part of the hash
         hash.append(hosthash5(this.protocol, host, port)); // 5 chars