From d7cbf4cdd41d86ad582340556ebcfc95639dad20 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 17 Apr 2009 13:47:06 +0000 Subject: [PATCH] more performance hacks: less overhead in word hash computation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5825 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/http/httpClient.java | 5 +-- .../de/anomic/kelondro/order/Base64Order.java | 37 +++++++++++++++++++ source/de/anomic/plasma/parser/Word.java | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/source/de/anomic/http/httpClient.java b/source/de/anomic/http/httpClient.java index c02050645..14b9c8322 100644 --- a/source/de/anomic/http/httpClient.java +++ b/source/de/anomic/http/httpClient.java @@ -554,9 +554,8 @@ public class httpClient { Log.logWarning("HTTPC", "Proxy authentication contains invalid characters, trying anyway"); } final String remoteProxyPwd = hostProxyConfig.getProxyPwd(); - final String credentials = Base64Order.standardCoder.encodeString(remoteProxyUser.replace(":", - "") + - ":" + remoteProxyPwd); + final String credentials = Base64Order.standardCoder.encodeString( + remoteProxyUser.replace(":", "") + ":" + remoteProxyPwd); method.setRequestHeader(httpRequestHeader.PROXY_AUTHORIZATION, "Basic " + credentials); } } diff --git a/source/de/anomic/kelondro/order/Base64Order.java b/source/de/anomic/kelondro/order/Base64Order.java index b2ff31bea..0c7fdf7b9 100644 --- a/source/de/anomic/kelondro/order/Base64Order.java +++ b/source/de/anomic/kelondro/order/Base64Order.java @@ -154,6 +154,15 @@ public class Base64Order extends AbstractOrder implements ByteOrder, Cod return new String(s); } + public final byte[] encodeLongSubstr(long c, int length) { + final byte[] s = new byte[length]; + while (length > 0) { + s[--length] = (byte) alpha[(byte) (c & 0x3F)]; + c >>= 6; + } + return s; + } + public final void encodeLong(long c, final byte[] b, final int offset, int length) { assert offset + length <= b.length; while (length > 0) { @@ -213,6 +222,34 @@ public class Base64Order extends AbstractOrder implements ByteOrder, Cod //assert lene == out.length() : "lene = " + lene + ", out.len = " + out.length(); return new String(out); } + + public final byte[] encodeSubstring(final byte[] in, int sublen) { + if (in.length == 0) return null; + byte[] out = new byte[sublen]; + int writepos = 0; + int pos = 0; + long l; + while (in.length - pos >= 3 && writepos < sublen) { + l = ((((0XffL & in[pos]) << 8) + (0XffL & in[pos + 1])) << 8) + (0XffL & in[pos + 2]); + pos += 3; + System.arraycopy(encodeLongSubstr(l, 4), 0, out, writepos, 4); + writepos += 4; + } + // now there may be remaining bytes + if (in.length % 3 != 0 && writepos < sublen) { + if (in.length % 3 == 2) { + System.arraycopy(encodeLong((((0XffL & in[pos]) << 8) + (0XffL & in[pos + 1])) << 8, 4), 0, out, writepos, 3); + writepos += 3; + } else { + System.arraycopy(encodeLong((((0XffL & in[pos])) << 8) << 8, 4).substring(0, 2), 0, out, writepos, 2); + writepos += 2; + } + } + + if (rfc1113compliant) while (writepos % 4 > 0 && writepos < sublen) out[writepos] = '='; + //assert encode(in).substring(0, sublen).equals(new String(out)); + return out; + } public final String decodeString(final String in, final String info) { try { diff --git a/source/de/anomic/plasma/parser/Word.java b/source/de/anomic/plasma/parser/Word.java index 66bb017c0..bbbebbf51 100644 --- a/source/de/anomic/plasma/parser/Word.java +++ b/source/de/anomic/plasma/parser/Word.java @@ -84,7 +84,7 @@ public class Word { public static final byte[] word2hash(final String word) { byte[] h = hashCache.get(word); if (h != null) return h; - h = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(word.toLowerCase(Locale.ENGLISH))).substring(0, yacySeedDB.commonHashLength).getBytes(); + h = Base64Order.enhancedCoder.encodeSubstring(Digest.encodeMD5Raw(word.toLowerCase(Locale.ENGLISH)), yacySeedDB.commonHashLength); hashCache.put(word, h); // prevent expensive MD5 computation and encoding return h; }