From 1db9cdd4e44b1055c47d965727c2755281bbf4f3 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 3 May 2009 19:35:10 +0000 Subject: [PATCH] fixed bug in writing of robots.txt entries in case that host names exceeded 64 characters and some other problems git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5918 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/kelondro/blob/BLOBHeap.java | 75 ++++++++++++++++--- source/de/anomic/kelondro/blob/MapView.java | 15 +++- .../text/ReferenceContainerCache.java | 1 + source/de/anomic/kelondro/util/FileUtils.java | 2 +- source/de/anomic/yacy/yacyURL.java | 2 +- 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/source/de/anomic/kelondro/blob/BLOBHeap.java b/source/de/anomic/kelondro/blob/BLOBHeap.java index 382445e7f..fca51877d 100755 --- a/source/de/anomic/kelondro/blob/BLOBHeap.java +++ b/source/de/anomic/kelondro/blob/BLOBHeap.java @@ -116,7 +116,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { */ public synchronized boolean has(final byte[] key) { assert index != null; - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // check the buffer if (this.buffer.containsKey(new String(key))) return true; @@ -132,7 +132,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { private void add(final byte[] key, final byte[] blob) throws IOException { assert blob.length > 0; assert key.length == this.keylength; - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; if ((blob == null) || (blob.length == 0)) return; final int pos = (int) file.length(); file.seek(pos); @@ -154,6 +154,60 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { while (i.hasNext()) l += i.next().getValue().length; assert l == this.buffersize; + // simulate write: this whole code block is only here to test the assert at the end of the block; remove after testing + i = this.buffer.entrySet().iterator(); + int posBuffer = 0; + Map.Entry entry; + byte[] key, blob; + while (i.hasNext()) { + entry = i.next(); + key = entry.getKey().getBytes(); + assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength; + blob = entry.getValue(); + posBuffer += 4 + key.length + blob.length; + } + assert l + (4 + this.keylength) * this.buffer.size() == posBuffer : "l = " + l + ", this.keylength = " + this.keylength + ", this.buffer.size() = " + this.buffer.size() + ", posBuffer = " + posBuffer; + + // append all contents of the buffer into one byte[] + i = this.buffer.entrySet().iterator(); + final long pos = file.length(); + long posFile = pos; + posBuffer = 0; + byte[] ba = new byte[l + (4 + this.keylength) * this.buffer.size()]; + byte[] b; + while (i.hasNext()) { + entry = i.next(); + key = entry.getKey().getBytes(); + assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength; + blob = entry.getValue(); + index.put(key, posFile); + b = AbstractRandomAccess.int2array(key.length + blob.length); + assert b.length == 4; + assert posBuffer + 4 < ba.length : "posBuffer = " + posBuffer + ", ba.length = " + ba.length; + System.arraycopy(b, 0, ba, posBuffer, 4); + assert posBuffer + 4 + key.length <= ba.length : "posBuffer = " + posBuffer + ", key.length = " + key.length + ", ba.length = " + ba.length; + System.arraycopy(key, 0, ba, posBuffer + 4, key.length); + assert posBuffer + 4 + key.length + blob.length <= ba.length : "posBuffer = " + posBuffer + ", key.length = " + key.length + ", blob.length = " + blob.length + ", ba.length = " + ba.length; + System.out.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); + System.err.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); + System.arraycopy(blob, 0, ba, posBuffer + 4 + this.keylength, blob.length); //java.lang.ArrayIndexOutOfBoundsException here + posFile += 4 + key.length + blob.length; + posBuffer += 4 + key.length + blob.length; + } + assert ba.length == posBuffer; // must fit exactly + this.file.seek(pos); + this.file.write(ba); + this.buffer.clear(); + this.buffersize = 0; + } + /* + private void flushBuffer() throws IOException { + // check size of buffer + Iterator> i = this.buffer.entrySet().iterator(); + int l = 0; + while (i.hasNext()) l += i.next().getValue().length; + assert l == this.buffersize; + // append all contents of the buffer into one byte[] i = this.buffer.entrySet().iterator(); final long pos = file.length(); @@ -175,8 +229,8 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { assert posBuffer + 4 + key.length <= ba.length : "posBuffer = " + posBuffer + ", key.length = " + key.length + ", ba.length = " + ba.length; System.arraycopy(key, 0, ba, posBuffer + 4, key.length); assert posBuffer + 4 + key.length + blob.length <= ba.length : "posBuffer = " + posBuffer + ", key.length = " + key.length + ", blob.length = " + blob.length + ", ba.length = " + ba.length; - //System.out.println("*** DEBUG blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length + ",blob.length=" + blob.length); - //System.err.println("*** DEBUG blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length + ",blob.length=" + blob.length); + System.out.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); + System.err.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); System.arraycopy(blob, 0, ba, posBuffer + 4 + key.length, blob.length); //java.lang.ArrayIndexOutOfBoundsException here posFile += 4 + key.length + blob.length; posBuffer += 4 + key.length + blob.length; @@ -187,6 +241,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { this.buffer.clear(); this.buffersize = 0; } + */ /** * read a blob from the heap @@ -195,7 +250,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { * @throws IOException */ public synchronized byte[] get(final byte[] key) throws IOException { - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // check the buffer byte[] blob = this.buffer.get(new String(key)); @@ -211,7 +266,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { * @throws IOException */ public synchronized long length(byte[] key) throws IOException { - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // check the buffer byte[] blob = this.buffer.get(new String(key)); @@ -253,7 +308,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { * @throws IOException */ public synchronized void put(final byte[] key, final byte[] b) throws IOException { - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // we do not write records of length 0 into the BLOB if (b.length == 0) return; @@ -284,7 +339,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { } private boolean putToGap(final byte[] key, final byte[] b) throws IOException { - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // we do not write records of length 0 into the BLOB if (b.length == 0) return true; @@ -295,7 +350,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { // find the largest entry long lseek = -1; int lsize = 0; - final int reclen = b.length + index.row().primaryKeyLength; + final int reclen = b.length + this.keylength; Map.Entry entry; Iterator> i = this.free.entrySet().iterator(); while (i.hasNext()) { @@ -369,7 +424,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { * @throws IOException */ public synchronized void remove(final byte[] key) throws IOException { - assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; + assert this.keylength == key.length : this.keylength + "!=" + key.length; // check the buffer byte[] blob = this.buffer.remove(new String(key)); diff --git a/source/de/anomic/kelondro/blob/MapView.java b/source/de/anomic/kelondro/blob/MapView.java index 093e6aefa..8d0e181bf 100644 --- a/source/de/anomic/kelondro/blob/MapView.java +++ b/source/de/anomic/kelondro/blob/MapView.java @@ -144,7 +144,7 @@ public class MapView { assert (key.length() > 0); assert (newMap != null); if (cacheScore == null) return; // may appear during shutdown - while (key.length() < blob.keylength()) key += "_"; + key = normalizeKey(key); // write entry blob.put(key.getBytes(), map2string(newMap, "W" + DateFormatter.formatShortSecond() + " ").getBytes()); @@ -165,7 +165,7 @@ public class MapView { public synchronized void remove(String key) throws IOException { // update elementCount if (key == null) return; - while (key.length() < blob.keylength()) key += "_"; + key = normalizeKey(key); // remove from cache cacheScore.deleteScore(key); @@ -184,7 +184,8 @@ public class MapView { public synchronized boolean has(String key) throws IOException { assert key != null; if (cache == null) return false; // case may appear during shutdown - while (key.length() < blob.keylength()) key += "_"; + key = normalizeKey(key); + if (this.cache.containsKey(key)) return true; return this.blob.has(key.getBytes()); } @@ -198,12 +199,18 @@ public class MapView { if (key == null) return null; return get(key, true); } + + private String normalizeKey(String key) { + if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength()); + while (key.length() < blob.keylength()) key += "_"; + return key; + } protected synchronized Map get(String key, final boolean storeCache) throws IOException { // load map from cache assert key != null; if (cache == null) return null; // case may appear during shutdown - while (key.length() < blob.keylength()) key += "_"; + key = normalizeKey(key); Map map = cache.get(key); if (map != null) return map; diff --git a/source/de/anomic/kelondro/text/ReferenceContainerCache.java b/source/de/anomic/kelondro/text/ReferenceContainerCache.java index b20a0aa61..255328f37 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerCache.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerCache.java @@ -468,6 +468,7 @@ public final class ReferenceContainerCache exte public void add(final ReferenceContainer container) { // this puts the entries into the cache + assert this.cache != null; if (this.cache == null || container == null || container.size() == 0) return; // put new words into cache diff --git a/source/de/anomic/kelondro/util/FileUtils.java b/source/de/anomic/kelondro/util/FileUtils.java index e905c9ce2..912e8da37 100644 --- a/source/de/anomic/kelondro/util/FileUtils.java +++ b/source/de/anomic/kelondro/util/FileUtils.java @@ -82,7 +82,7 @@ public final class FileUtils { * @see #copy(File source, File dest) */ public static long copy(final InputStream source, final OutputStream dest, final long count) throws IOException { - assert count == -1 || count > 0 : "precondition violated: count == -1 || count > 0 (nothing to copy)"; + assert count == -1 || count > 0 : "precondition violated: count == " + count + " (nothing to copy)"; if(count == 0) { // no bytes to copy return 0; diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index d78cb29cf..188582ea8 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -893,7 +893,7 @@ public class yacyURL implements Serializable { if (this.hash == null) { if (this.host.startsWith("127.") || this.host.equals("localhost") || this.host.startsWith("0:0:0:0:0:0:0:1")) return true; synchronized (this) { - this.hash = urlHashComputation(); + if (this.hash == null) this.hash = urlHashComputation(); } } return domDomain(this.hash) == 7;