better logging and startup behaviour for referenceHash computation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5690 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent d612430fce
commit dbdd10da84

@ -393,7 +393,9 @@ public class URLAnalysis {
12, 12,
Base64Order.enhancedCoder, Base64Order.enhancedCoder,
ReferenceRow.urlEntryRow); ReferenceRow.urlEntryRow);
System.out.println("COLLECTION INDEX REFERENCE COLLECTION starting dump of statistics");
idx.dump(new File(statisticPath)); idx.dump(new File(statisticPath));
System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished dump");
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }

@ -723,10 +723,10 @@ public abstract class AbstractRecords implements RandomAccessRecords {
return USAGE.FREEC; return USAGE.FREEC;
} }
protected final Set<RecordHandle> deletedHandles(final long maxTime) throws kelondroException, IOException { protected final Set<RecordHandle> deletedHandles(final long maxTime) throws IOException {
// initialize set with deleted nodes; the set contains Handle-Objects // initialize set with deleted nodes; the set contains Handle-Objects
// this may last only the given maxInitTime // this may last only the given maxInitTime
// if the initTime is exceeded, the method throws an kelondroException // if the initTime is exceeded, the method returns what it found so far
final TreeSet<RecordHandle> markedDeleted = new TreeSet<RecordHandle>(); final TreeSet<RecordHandle> markedDeleted = new TreeSet<RecordHandle>();
final long timeLimit = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; final long timeLimit = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long seekp; long seekp;
@ -761,7 +761,7 @@ public abstract class AbstractRecords implements RandomAccessRecords {
} }
// this appears to be correct. go on. // this appears to be correct. go on.
if (System.currentTimeMillis() > timeLimit) throw new kelondroException(filename, "time limit of " + maxTime + " exceeded; > " + markedDeleted.size() + " deleted entries"); if (System.currentTimeMillis() > timeLimit) return markedDeleted;
} }
System.out.println("\nDEBUG: " + markedDeleted.size() + " deleted entries in " + entryFile.name()); System.out.println("\nDEBUG: " + markedDeleted.size() + " deleted entries in " + entryFile.name());
} }
@ -956,7 +956,7 @@ public abstract class AbstractRecords implements RandomAccessRecords {
private final boolean fullyMarked; private final boolean fullyMarked;
private Node next; private Node next;
public contentNodeIterator(final long maxInitTime) throws IOException, kelondroException { public contentNodeIterator(final long maxInitTime) throws IOException {
// initialize markedDeleted set of deleted Handles // initialize markedDeleted set of deleted Handles
markedDeleted = deletedHandles(maxInitTime); markedDeleted = deletedHandles(maxInitTime);
fullyMarked = (maxInitTime < 0); fullyMarked = (maxInitTime < 0);

@ -331,7 +331,7 @@ public class IndexCollection implements Index {
// loop over all elements in array and create index entry for each row // loop over all elements in array and create index entry for each row
Row.EntryIndex aentry; Row.EntryIndex aentry;
Row.Entry ientry; Row.Entry ientry;
final Iterator<EntryIndex> ei = array.contentRows(-1); final Iterator<EntryIndex> ei = array.contentRows(10000);
byte[] key; byte[] key;
final long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
long lastlog = start; long lastlog = start;
@ -359,7 +359,7 @@ public class IndexCollection implements Index {
// write a log // write a log
if (System.currentTimeMillis() - lastlog > 30000) { if (System.currentTimeMillis() - lastlog > 30000) {
Log.logFine("COLLECTION INDEX STARTUP", "created " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); Log.logInfo("COLLECTION INDEX STARTUP", "created " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array");
lastlog = System.currentTimeMillis(); lastlog = System.currentTimeMillis();
} }
} }
@ -410,6 +410,7 @@ public class IndexCollection implements Index {
final String[] list = path.list(); final String[] list = path.list();
FixedWidthArray array; FixedWidthArray array;
System.out.println("COLLECTION INDEX REFERENCE COLLECTION startup");
IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000); IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000);
for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) { for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) {
// open array // open array
@ -417,18 +418,20 @@ public class IndexCollection implements Index {
if (pos < 0) continue; if (pos < 0) continue;
final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16); final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16);
final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16); final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16);
System.out.println("COLLECTION INDEX REFERENCE COLLECTION opening partition " + partitionNumber + ", " + i + " of " + list.length);
try { try {
array = openArrayFile(path, filenameStub, keylength, partitionNumber, serialNumber, indexOrder, payloadrow.objectsize, true); array = openArrayFile(path, filenameStub, keylength, partitionNumber, serialNumber, indexOrder, payloadrow.objectsize, true);
} catch (final IOException e) { } catch (final IOException e) {
e.printStackTrace(); e.printStackTrace();
continue; continue;
} }
System.out.println("COLLECTION INDEX REFERENCE COLLECTION opened partition " + partitionNumber + ", initializing iterator");
// loop over all elements in array and collect reference hashes // loop over all elements in array and collect reference hashes
Row.EntryIndex arrayrow; Row.EntryIndex arrayrow;
final Iterator<EntryIndex> ei = array.contentRows(-1); final Iterator<EntryIndex> ei = array.contentRows(10000);
System.out.println("COLLECTION INDEX REFERENCE COLLECTION opened partition " + partitionNumber + ", starting reference scanning");
final long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
long lastlog = start; long lastlog = start - 27000;
int count = 0; int count = 0;
while (ei.hasNext()) { while (ei.hasNext()) {
arrayrow = ei.next(); arrayrow = ei.next();
@ -441,12 +444,13 @@ public class IndexCollection implements Index {
count++; count++;
// write a log // write a log
if (System.currentTimeMillis() - lastlog > 30000) { if (System.currentTimeMillis() - lastlog > 30000) {
Log.logFine("COLLECTION INDEX REFERENCE COLLECTION", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); System.out.println("COLLECTION INDEX REFERENCE COLLECTION scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array");
//Log.logInfo("COLLECTION INDEX REFERENCE COLLECTION", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array");
lastlog = System.currentTimeMillis(); lastlog = System.currentTimeMillis();
} }
} }
} }
System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished with reference collection");
return references; return references;
} }

Loading…
Cancel
Save