From b856e377a9a4449c6dba6b9ae890a078a9672ee0 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 21 Oct 2007 23:26:22 +0000 Subject: [PATCH] some additions and a small bugfix to SVN 4158 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4173 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../kelondro/kelondroCollectionIndex.java | 43 ++++++++----------- .../kelondro/kelondroRowCollection.java | 4 +- source/de/anomic/plasma/plasmaGrafics.java | 4 +- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 27f7918e5..1ee25c74b 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -598,8 +598,7 @@ public class kelondroCollectionIndex { // to avoid that this grows too big int newPartitionNumber; while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) { - kelondroRowSet newcollection = shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); - oldcollection = newcollection; + shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); } // work on with oldcollection @@ -712,8 +711,7 @@ public class kelondroCollectionIndex { // to avoid that this grows too big int newPartitionNumber; while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) { - kelondroRowSet newcollection = shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); - oldcollection = newcollection; + shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); } // work on with oldcollection @@ -744,15 +742,16 @@ public class kelondroCollectionIndex { } } - private kelondroRowSet shrinkCollection(byte[] key, kelondroRowSet collection, int targetSize) { + private void shrinkCollection(byte[] key, kelondroRowSet collection, int targetSize) { //TODO Remove timing before release // removes entries from collection // the removed entries are stored in a 'commons' dump file + if (key.length != 12) return; // check if the collection is already small enough int oldsize = collection.size(); - kelondroRowSet survival = new kelondroRowSet(collection.rowdef, 0); - if (oldsize <= targetSize) return survival; + if (oldsize <= targetSize) return; + kelondroRowSet newcommon = new kelondroRowSet(collection.rowdef, 0); long sadd1 = 0, srem1 = 0, sadd2 = 0, srem2 = 0, tot1 = 0, tot2 = 0; long t1 = 0, t2 = 0; @@ -764,30 +763,30 @@ public class kelondroCollectionIndex { while (i.hasNext()) { entry = (kelondroRow.Entry) i.next(); ref = entry.getColBytes(0); - if ((ref.length == 12) && (yacyURL.probablyRootURL(new String(ref)))) { + if ((ref.length != 12) || (!yacyURL.probablyRootURL(new String(ref)))) { t2 = System.currentTimeMillis(); - survival.addUnique(entry); + newcommon.addUnique(entry); sadd1 += System.currentTimeMillis() - t2; t2 = System.currentTimeMillis(); i.remove(); srem1 += System.currentTimeMillis() - t2; } } - int firstSurvival = survival.size(); + int firstnewcommon = newcommon.size(); tot1 = System.currentTimeMillis() - t1; // check if we shrinked enough Random rand = new Random(System.currentTimeMillis()); t1 = System.currentTimeMillis(); - while (survival.size() > targetSize) { + while (collection.size() > targetSize) { // now delete randomly more entries from the survival collection - i = survival.rows(); + i = collection.rows(); while (i.hasNext()) { entry = (kelondroRow.Entry) i.next(); ref = entry.getColBytes(0); if (rand.nextInt() % 4 != 0) { t2 = System.currentTimeMillis(); - collection.addUnique(entry); + newcommon.addUnique(entry); sadd2 += System.currentTimeMillis() - t2; t2 = System.currentTimeMillis(); i.remove(); @@ -798,16 +797,10 @@ public class kelondroCollectionIndex { tot2 = System.currentTimeMillis() - t1; serverLog.logFine("kelondroCollectionIndex", "tot= "+tot1+'/'+tot2+" # add/rem(1)= "+sadd1+'/'+srem1+" # add/rem(2)= "+sadd2+'/'+srem2); - serverLog.logInfo("kelondroCollectionIndex", "shrinked common word " + new String(key) + "; old size = " + oldsize + ", new size = " + collection.size() + ", maximum size = " + targetSize + ", survival size = " + survival.size() + ", first survival = " + firstSurvival); + serverLog.logInfo("kelondroCollectionIndex", "shrinked common word " + new String(key) + "; old size = " + oldsize + ", new size = " + collection.size() + ", maximum size = " + targetSize + ", newcommon size = " + newcommon.size() + ", first newcommon = " + firstnewcommon); - //finally dump the removed entries to a file - saveCommons(key, collection); - return survival; - } - - private void saveCommons(byte[] key, kelondroRowSet collection) { - if (key.length != 12) return; - collection.sort(); + // finally dump the removed entries to a file + newcommon.sort(); TimeZone GMTTimeZone = TimeZone.getTimeZone("GMT"); Calendar gregorian = new GregorianCalendar(GMTTimeZone); SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss"); @@ -816,11 +809,11 @@ public class kelondroCollectionIndex { storagePath.mkdirs(); File file = new File(storagePath, filename); try { - collection.saveCollection(file); - serverLog.logInfo("kelondroCollectionIndex", "dumped common word " + new String(key) + " to " + file.toString() + "; size = " + collection.size()); + newcommon.saveCollection(file); + serverLog.logInfo("kelondroCollectionIndex", "dumped common word " + new String(key) + " to " + file.toString() + "; size = " + newcommon.size()); } catch (IOException e) { e.printStackTrace(); - serverLog.logWarning("kelondroCollectionIndex", "failed to dump common word " + new String(key) + " to " + file.toString() + "; size = " + collection.size()); + serverLog.logWarning("kelondroCollectionIndex", "failed to dump common word " + new String(key) + " to " + file.toString() + "; size = " + newcommon.size()); } } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 86ec84448..786e2bf1c 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -394,8 +394,8 @@ public class kelondroRowCollection { this.rowdef.objectsize()); } // we moved the last element to the remove position: (p+1)st element - // only the first p elements keep their order - if (sortBound > p) sortBound = p; + // only the first p elements keep their order (element p is already outside the order) + if (sortBound >= p) sortBound = p; } chunkcount--; this.lastTimeWrote = System.currentTimeMillis(); diff --git a/source/de/anomic/plasma/plasmaGrafics.java b/source/de/anomic/plasma/plasmaGrafics.java index 0bb761976..5693ff05e 100644 --- a/source/de/anomic/plasma/plasmaGrafics.java +++ b/source/de/anomic/plasma/plasmaGrafics.java @@ -403,10 +403,12 @@ public class plasmaGrafics { } //[MN] + /* quoted method because it is not used anywhere (and creates a warning in eclipse) private static String addBlanksAndDots(int input, int length) { return addBlanksAndDots(input + "", length); } - + */ + //[MN] private static String addBlanksAndDots(long input, int length) { return addBlanksAndDots(input + "", length);