From dbdd10da8444139754bdb39d1714421635eb4271 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 9 Mar 2009 22:32:04 +0000 Subject: [PATCH] better logging and startup behaviour for referenceHash computation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5690 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/URLAnalysis.java | 2 ++ .../anomic/kelondro/table/AbstractRecords.java | 8 ++++---- .../anomic/kelondro/text/IndexCollection.java | 18 +++++++++++------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index dfab5fa58..ee5697c61 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -393,7 +393,9 @@ public class URLAnalysis { 12, Base64Order.enhancedCoder, ReferenceRow.urlEntryRow); + System.out.println("COLLECTION INDEX REFERENCE COLLECTION starting dump of statistics"); idx.dump(new File(statisticPath)); + System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished dump"); } catch (IOException e) { e.printStackTrace(); } diff --git a/source/de/anomic/kelondro/table/AbstractRecords.java b/source/de/anomic/kelondro/table/AbstractRecords.java index 2caffc474..514250472 100644 --- a/source/de/anomic/kelondro/table/AbstractRecords.java +++ b/source/de/anomic/kelondro/table/AbstractRecords.java @@ -723,10 +723,10 @@ public abstract class AbstractRecords implements RandomAccessRecords { return USAGE.FREEC; } - protected final Set deletedHandles(final long maxTime) throws kelondroException, IOException { + protected final Set deletedHandles(final long maxTime) throws IOException { // initialize set with deleted nodes; the set contains Handle-Objects // this may last only the given maxInitTime - // if the initTime is exceeded, the method throws an kelondroException + // if the initTime is exceeded, the method returns what it found so far final TreeSet markedDeleted = new TreeSet(); final long timeLimit = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; long seekp; @@ -761,7 +761,7 @@ public abstract class AbstractRecords implements RandomAccessRecords { } // this appears to be correct. go on. - if (System.currentTimeMillis() > timeLimit) throw new kelondroException(filename, "time limit of " + maxTime + " exceeded; > " + markedDeleted.size() + " deleted entries"); + if (System.currentTimeMillis() > timeLimit) return markedDeleted; } System.out.println("\nDEBUG: " + markedDeleted.size() + " deleted entries in " + entryFile.name()); } @@ -956,7 +956,7 @@ public abstract class AbstractRecords implements RandomAccessRecords { private final boolean fullyMarked; private Node next; - public contentNodeIterator(final long maxInitTime) throws IOException, kelondroException { + public contentNodeIterator(final long maxInitTime) throws IOException { // initialize markedDeleted set of deleted Handles markedDeleted = deletedHandles(maxInitTime); fullyMarked = (maxInitTime < 0); diff --git a/source/de/anomic/kelondro/text/IndexCollection.java b/source/de/anomic/kelondro/text/IndexCollection.java index d3d6fea40..852a7bf9a 100644 --- a/source/de/anomic/kelondro/text/IndexCollection.java +++ b/source/de/anomic/kelondro/text/IndexCollection.java @@ -331,7 +331,7 @@ public class IndexCollection implements Index { // loop over all elements in array and create index entry for each row Row.EntryIndex aentry; Row.Entry ientry; - final Iterator ei = array.contentRows(-1); + final Iterator ei = array.contentRows(10000); byte[] key; final long start = System.currentTimeMillis(); long lastlog = start; @@ -359,7 +359,7 @@ public class IndexCollection implements Index { // write a log if (System.currentTimeMillis() - lastlog > 30000) { - Log.logFine("COLLECTION INDEX STARTUP", "created " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); + Log.logInfo("COLLECTION INDEX STARTUP", "created " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); lastlog = System.currentTimeMillis(); } } @@ -410,6 +410,7 @@ public class IndexCollection implements Index { final String[] list = path.list(); FixedWidthArray array; + System.out.println("COLLECTION INDEX REFERENCE COLLECTION startup"); IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000); for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) { // open array @@ -417,18 +418,20 @@ public class IndexCollection implements Index { if (pos < 0) continue; final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16); final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16); + System.out.println("COLLECTION INDEX REFERENCE COLLECTION opening partition " + partitionNumber + ", " + i + " of " + list.length); try { array = openArrayFile(path, filenameStub, keylength, partitionNumber, serialNumber, indexOrder, payloadrow.objectsize, true); } catch (final IOException e) { e.printStackTrace(); continue; } - + System.out.println("COLLECTION INDEX REFERENCE COLLECTION opened partition " + partitionNumber + ", initializing iterator"); // loop over all elements in array and collect reference hashes Row.EntryIndex arrayrow; - final Iterator ei = array.contentRows(-1); + final Iterator ei = array.contentRows(10000); + System.out.println("COLLECTION INDEX REFERENCE COLLECTION opened partition " + partitionNumber + ", starting reference scanning"); final long start = System.currentTimeMillis(); - long lastlog = start; + long lastlog = start - 27000; int count = 0; while (ei.hasNext()) { arrayrow = ei.next(); @@ -441,12 +444,13 @@ public class IndexCollection implements Index { count++; // write a log if (System.currentTimeMillis() - lastlog > 30000) { - Log.logFine("COLLECTION INDEX REFERENCE COLLECTION", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); + System.out.println("COLLECTION INDEX REFERENCE COLLECTION scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); + //Log.logInfo("COLLECTION INDEX REFERENCE COLLECTION", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); lastlog = System.currentTimeMillis(); } } - } + System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished with reference collection"); return references; }