a collection of

- small bug fixes
- better/more comments
- more asserts
- fixed synchronization
- test case enhancements
- code cleanup
- performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6073 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 39779e4796
commit b8e738a7be

@ -197,7 +197,13 @@ public class RobotsTxt {
}
// store the data into the robots DB
int sz = this.robotsTable.size();
addEntry(robotsTxt4Host);
if (this.robotsTable.size() <= sz) {
Log.logSevere("RobotsTxt", "new entry in robots.txt table failed, reseing database");
this.resetDatabase();
addEntry(robotsTxt4Host);
}
} else {
final robotsParser parserResult = new robotsParser((byte[]) result[DOWNLOAD_ROBOTS_TXT]);
ArrayList<String> denyPath = parserResult.denyList();

@ -359,7 +359,7 @@ public final class httpTemplate {
if (java.util.Arrays.equals(keyStream.toByteArray(),appendBytes(slash, key, null,null))) {
pis2 = new PushbackInputStream(new ByteArrayInputStream(text.getBytes()));
//this maybe the wrong, but its the last
structure.append('<').append(key).append(" type=\"alternative\" which=\"".getBytes()).append(Integer.toString(whichPattern).getBytes("UTF-8")).append("\" found=\"0\">\n".getBytes());
structure.append('<').append(key).append(" type=\"alternative\" which=\"".getBytes()).append(Integer.toString(whichPattern).getBytes()).append("\" found=\"0\">\n".getBytes());
structure.append(writeTemplate(pis2, out, pattern, dflt, newPrefix(prefix,key)));
structure.append("</".getBytes()).append(key).append(">\n".getBytes());
found=true;
@ -381,7 +381,7 @@ public final class httpTemplate {
if ((bb & 0xFF) == ':'){
if(currentPattern == whichPattern){ //found the pattern
pis2 = new PushbackInputStream(new ByteArrayInputStream(text.getBytes()));
structure.append("<".getBytes()).append(key).append(" type=\"alternative\" which=\"".getBytes()).append(Integer.toString(whichPattern).getBytes("UTF-8")).append("\" found=\"0\">\n".getBytes());
structure.append('<').append(key).append(" type=\"alternative\" which=\"".getBytes()).append(Integer.toString(whichPattern).getBytes()).append("\" found=\"0\">\n".getBytes());
structure.append(writeTemplate(pis2, out, pattern, dflt, newPrefix(prefix,key)));
structure.append("</".getBytes()).append(key).append(">\n".getBytes());

@ -418,7 +418,7 @@ public class BLOBArray implements BLOB {
* ask for the number of blob entries in each blob of the blob array
* @return the number of entries in each blob
*/
public synchronized int[] sizes() {
public int[] sizes() {
int[] s = new int[blobs.size()];
int c = 0;
for (blobItem bi: blobs) s[c++] = bi.blob.size();

@ -272,6 +272,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
if (b.length == 0) return;
// first remove the old entry (removes from buffer and file)
// TODO: this can be enhanced!
this.remove(key);
// then look if we can use a free entry

@ -120,6 +120,9 @@ public class BLOBHeapModifier extends HeapReader implements BLOB {
final long seek = index.get(key);
if (seek < 0) return;
// check consistency of the index
assert (checkKey(key, seek)) : "key compare failed; key = " + new String(key) + ", seek = " + seek;
// access the file and read the container
this.file.seek(seek);
int size = file.readInt();
@ -248,13 +251,16 @@ public class BLOBHeapModifier extends HeapReader implements BLOB {
throw new UnsupportedOperationException("put is not supported in BLOBHeapModifier");
}
public int replace(byte[] key, Rewriter rewriter) throws IOException {
public synchronized int replace(byte[] key, Rewriter rewriter) throws IOException {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long pos = index.get(key);
if (pos < 0) return 0;
// check consistency of the index
assert (checkKey(key, pos)) : "key compare failed; key = " + new String(key) + ", seek = " + pos;
// access the file and read the container
file.seek(pos);
final int len = file.readInt() - index.row().primaryKeyLength;

@ -29,7 +29,6 @@ import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
@ -49,7 +48,7 @@ public class HeapReader {
public final static long keepFreeMem = 20 * 1024 * 1024;
protected int keylength; // the length of the primary key
protected HandleMap index; // key/seek relation for used records
protected HandleMap index; // key/seek relation for used records
protected Gap free; // set of {seek, size} pairs denoting space and position of free records
protected File heapFile; // the file of the heap
protected final ByteOrder ordering; // the ordering on keys
@ -260,7 +259,7 @@ public class HeapReader {
file.readFully(keyf, 0, keyf.length);
if (!this.ordering.equal(key, keyf)) {
// verification of the indexed access failed. we must re-read the index
Log.logWarning("kelondroBLOBHeap", "verification indexed access for " + heapFile.toString() + " failed, re-building index");
Log.logSevere("kelondroBLOBHeap", "verification indexed access for " + heapFile.toString() + " failed, re-building index");
// this is a severe operation, it should never happen.
// but if the process ends in this state, it would completely fail
// if the index is not rebuild now at once
@ -273,9 +272,19 @@ public class HeapReader {
return blob;
}
protected boolean checkKey(final byte[] key, final long pos) throws IOException {
file.seek(pos);
file.readInt(); // skip the size value
// read the key
final byte[] keyf = new byte[index.row().primaryKeyLength];
file.readFully(keyf, 0, keyf.length);
return this.ordering.equal(key, keyf);
}
/**
* retrieve the size of the BLOB
* retrieve the size of the BLOB. This should not be used excessively, because it depends on IO operations.
* @param key
* @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException
@ -362,17 +371,6 @@ public class HeapReader {
public long length() throws IOException {
return this.heapFile.length();
}
public String excave(final byte[] rawKey, char fillChar) {
int n = this.keylength - 1;
if (n >= rawKey.length) n = rawKey.length - 1;
while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--;
try {
return new String(rawKey, 0, n + 1, "UTF-8");
} catch (UnsupportedEncodingException e) {
return new String(rawKey, 0, n + 1);
}
}
/**
* static iterator of entries in BLOBHeap files:

@ -57,14 +57,9 @@ public class IndexTest {
public static final long mb = 1024 * 1024;
public static void main(String[] args) {
System.out.println("Performance test: comparing HashMap, TreeMap and kelondroRow");
if (args.length == 0) {
System.out.println("use one parameter: number of test entries");
System.exit(0);
}
// pre-generate test data so it will not influence test case time
int count = Integer.parseInt(args[0]);
int count = args.length == 0 ? 1000000 : Integer.parseInt(args[0]);
byte[][] tests = new byte[count][];
Random r = new Random(0);
for (int i = 0; i < count; i++) tests[i] = randomHash(r);
@ -120,7 +115,7 @@ public class IndexTest {
for (int i = 0; i < count; i++) ii.putUnique(tests[i], 1);
ii.get(randomHash(r)); // trigger sort
long t6 = System.currentTimeMillis();
System.out.println("time for kelondroMap<byte[]> generation: " + (t6 - t5));
System.out.println("time for HandleMap<byte[]> generation: " + (t6 - t5));
bugs = 0;
for (int i = 0; i < count; i++) if (ii.get(tests[i]) != 1) bugs++;
@ -128,8 +123,8 @@ public class IndexTest {
long freeEndKelondro = MemoryControl.available();
ii.clear(); ii = null;
long t7 = System.currentTimeMillis();
System.out.println("time for kelondroMap<byte[]> test: " + (t7 - t6) + ", " + bugs + " bugs");
System.out.println("memory for kelondroMap<byte[]>: " + (freeStartKelondro - freeEndKelondro) / mb + " MB\n");
System.out.println("time for HandleMap<byte[]> test: " + (t7 - t6) + ", " + bugs + " bugs");
System.out.println("memory for HandleMap<byte[]>: " + (freeStartKelondro - freeEndKelondro) / mb + " MB\n");
// test ByteArray
System.out.println("unsorted map");

@ -37,6 +37,7 @@ import java.util.TreeMap;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowSet;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
import de.anomic.kelondro.util.ByteBuffer;
@ -380,6 +381,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
assert (keylength == i2.rowdef.width(0));
final ReferenceContainer<ReferenceType> conj = new ReferenceContainer<ReferenceType>(factory, null, i1.rowdef, 0); // start with empty search result
if (!((i1.rowdef.getOrdering().signature().equals(i2.rowdef.getOrdering().signature())))) return conj; // ordering must be equal
ByteOrder ordering = i1.rowdef.getOrdering();
final Iterator<ReferenceType> e1 = i1.entries();
final Iterator<ReferenceType> e2 = i2.entries();
int c;
@ -392,7 +394,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
while (true) {
assert (ie1.metadataHash().length() == keylength) : "ie1.urlHash() = " + ie1.metadataHash();
assert (ie2.metadataHash().length() == keylength) : "ie2.urlHash() = " + ie2.metadataHash();
c = i1.rowdef.getOrdering().compare(ie1.metadataHash().getBytes(), ie2.metadataHash().getBytes());
c = ordering.compare(ie1.metadataHash().getBytes(), ie2.metadataHash().getBytes());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = e1.next(); else break;

@ -203,14 +203,14 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
int k = 1;
ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(a, payloadrow));
if (System.currentTimeMillis() > timeout) {
Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
Log.logWarning("ReferenceContainerArray", "timout in index retrieval (1): " + k + " tables searched. timeout = 1000");
return c;
}
while (entries.hasNext()) {
c = c.merge(new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(entries.next(), payloadrow)));
k++;
if (System.currentTimeMillis() > timeout) {
Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
Log.logWarning("ReferenceContainerArray", "timout in index retrieval (2): " + k + " tables searched. timeout = 1000");
return c;
}
}

@ -178,7 +178,7 @@ public final class ByteBuffer extends OutputStream {
}
public ByteBuffer append(final byte[] bb) {
write(bb);
write(bb, 0, bb.length);
return this;
}

@ -1855,7 +1855,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
private static SimpleDateFormat DateFormatter822 = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US);
public static String dateString822(final Date date) {
if (date == null) return "";
return DateFormatter822.format(date);
try {
return DateFormatter822.format(date);
} catch (Exception e) {
e.printStackTrace();
return DateFormatter822.format(new Date());
}
}

Loading…
Cancel
Save