set of small fixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5903 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 78ffb61297
commit d3f8aa5a2a

@ -96,11 +96,21 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.webIndex.peers().mySeed().hash);
result = "wrong_target";
pause = 0;
} else if ((!granted) || (sb.isRobinsonMode())) {
} else if (otherPeer == null) {
// we dont want to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted.");
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. Other Peer is unknown");
result = "not_granted";
pause = 0;
pause = 60000;
} else if (!granted) {
// we dont want to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Granted is false");
result = "not_granted";
pause = 60000;
} else if (sb.isRobinsonMode()) {
// we dont want to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode");
result = "not_granted";
pause = 60000;
} else if (sb.webIndex.index().getBufferSize() > cachelimit) {
// we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.index().getBufferSize() + ").");

@ -262,7 +262,11 @@ public class htmlFilterCharacterCoding {
sb.append(new char[] {(char) Integer.parseInt(s.substring(3, s.length() - 1), 16)});
continue;
}
sb.append(new char[] {(char) Integer.parseInt(s.substring(2, s.length() - 1))});
String ucs = s.substring(2, s.length() - 1);
try {
int uc = Integer.parseInt(ucs);
sb.append(new char[] {(char) uc});
} catch (NumberFormatException e) {}
continue;
}
// the entity is unknown, skip it

@ -36,6 +36,7 @@ import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -695,7 +696,11 @@ public final class plasmaParser {
}
}
private plasmaParserDocument parseHtml(final yacyURL location, final String mimeType, final String documentCharset, final InputStream sourceStream) throws IOException, ParserException {
private plasmaParserDocument parseHtml(
final yacyURL location,
final String mimeType,
final String documentCharset,
final InputStream sourceStream) throws IOException, ParserException {
// make a scraper and transformer
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false);
@ -710,10 +715,16 @@ public final class plasmaParser {
theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
}
Charset c;
try {
c = Charset.forName(charset);
} catch (IllegalCharsetNameException e) {
c = Charset.defaultCharset();
}
// parsing the content
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);
final htmlFilterWriter writer = new htmlFilterWriter(null,null,scraper,null,false);
FileUtils.copy(htmlFilter, writer, Charset.forName(charset));
FileUtils.copy(htmlFilter, writer, c);
writer.close();
//OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);
//serverFileUtils.copy(sourceFile, hfos);

@ -754,7 +754,10 @@ public class yacyURL implements Serializable {
// combine the attributes
final StringBuilder hash = new StringBuilder(12);
// form the 'local' part of the hash
hash.append(Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(toNormalform(true, true))).substring(0, 5)); // 5 chars
String normalform = toNormalform(true, true);
String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
if (b64l.length() < 5) return null;
hash.append(b64l.substring(0, 5)); // 5 chars
hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char
// form the 'global' part of the hash
hash.append(hosthash5(this.protocol, host, port)); // 5 chars

Loading…
Cancel
Save