some additions and a small bugfix to SVN 4158

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4173 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 501a7aae90
commit b856e377a9

@ -598,8 +598,7 @@ public class kelondroCollectionIndex {
// to avoid that this grows too big // to avoid that this grows too big
int newPartitionNumber; int newPartitionNumber;
while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) { while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) {
kelondroRowSet newcollection = shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions));
oldcollection = newcollection;
} }
// work on with oldcollection // work on with oldcollection
@ -712,8 +711,7 @@ public class kelondroCollectionIndex {
// to avoid that this grows too big // to avoid that this grows too big
int newPartitionNumber; int newPartitionNumber;
while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) { while ((newPartitionNumber = arrayIndex(oldcollection.size())) > maxPartitions) {
kelondroRowSet newcollection = shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions)); shrinkCollection(key, oldcollection, arrayCapacity(maxPartitions));
oldcollection = newcollection;
} }
// work on with oldcollection // work on with oldcollection
@ -744,15 +742,16 @@ public class kelondroCollectionIndex {
} }
} }
private kelondroRowSet shrinkCollection(byte[] key, kelondroRowSet collection, int targetSize) { private void shrinkCollection(byte[] key, kelondroRowSet collection, int targetSize) {
//TODO Remove timing before release //TODO Remove timing before release
// removes entries from collection // removes entries from collection
// the removed entries are stored in a 'commons' dump file // the removed entries are stored in a 'commons' dump file
if (key.length != 12) return;
// check if the collection is already small enough // check if the collection is already small enough
int oldsize = collection.size(); int oldsize = collection.size();
kelondroRowSet survival = new kelondroRowSet(collection.rowdef, 0); if (oldsize <= targetSize) return;
if (oldsize <= targetSize) return survival; kelondroRowSet newcommon = new kelondroRowSet(collection.rowdef, 0);
long sadd1 = 0, srem1 = 0, sadd2 = 0, srem2 = 0, tot1 = 0, tot2 = 0; long sadd1 = 0, srem1 = 0, sadd2 = 0, srem2 = 0, tot1 = 0, tot2 = 0;
long t1 = 0, t2 = 0; long t1 = 0, t2 = 0;
@ -764,30 +763,30 @@ public class kelondroCollectionIndex {
while (i.hasNext()) { while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next(); entry = (kelondroRow.Entry) i.next();
ref = entry.getColBytes(0); ref = entry.getColBytes(0);
if ((ref.length == 12) && (yacyURL.probablyRootURL(new String(ref)))) { if ((ref.length != 12) || (!yacyURL.probablyRootURL(new String(ref)))) {
t2 = System.currentTimeMillis(); t2 = System.currentTimeMillis();
survival.addUnique(entry); newcommon.addUnique(entry);
sadd1 += System.currentTimeMillis() - t2; sadd1 += System.currentTimeMillis() - t2;
t2 = System.currentTimeMillis(); t2 = System.currentTimeMillis();
i.remove(); i.remove();
srem1 += System.currentTimeMillis() - t2; srem1 += System.currentTimeMillis() - t2;
} }
} }
int firstSurvival = survival.size(); int firstnewcommon = newcommon.size();
tot1 = System.currentTimeMillis() - t1; tot1 = System.currentTimeMillis() - t1;
// check if we shrinked enough // check if we shrinked enough
Random rand = new Random(System.currentTimeMillis()); Random rand = new Random(System.currentTimeMillis());
t1 = System.currentTimeMillis(); t1 = System.currentTimeMillis();
while (survival.size() > targetSize) { while (collection.size() > targetSize) {
// now delete randomly more entries from the survival collection // now delete randomly more entries from the survival collection
i = survival.rows(); i = collection.rows();
while (i.hasNext()) { while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next(); entry = (kelondroRow.Entry) i.next();
ref = entry.getColBytes(0); ref = entry.getColBytes(0);
if (rand.nextInt() % 4 != 0) { if (rand.nextInt() % 4 != 0) {
t2 = System.currentTimeMillis(); t2 = System.currentTimeMillis();
collection.addUnique(entry); newcommon.addUnique(entry);
sadd2 += System.currentTimeMillis() - t2; sadd2 += System.currentTimeMillis() - t2;
t2 = System.currentTimeMillis(); t2 = System.currentTimeMillis();
i.remove(); i.remove();
@ -798,16 +797,10 @@ public class kelondroCollectionIndex {
tot2 = System.currentTimeMillis() - t1; tot2 = System.currentTimeMillis() - t1;
serverLog.logFine("kelondroCollectionIndex", "tot= "+tot1+'/'+tot2+" # add/rem(1)= "+sadd1+'/'+srem1+" # add/rem(2)= "+sadd2+'/'+srem2); serverLog.logFine("kelondroCollectionIndex", "tot= "+tot1+'/'+tot2+" # add/rem(1)= "+sadd1+'/'+srem1+" # add/rem(2)= "+sadd2+'/'+srem2);
serverLog.logInfo("kelondroCollectionIndex", "shrinked common word " + new String(key) + "; old size = " + oldsize + ", new size = " + collection.size() + ", maximum size = " + targetSize + ", survival size = " + survival.size() + ", first survival = " + firstSurvival); serverLog.logInfo("kelondroCollectionIndex", "shrinked common word " + new String(key) + "; old size = " + oldsize + ", new size = " + collection.size() + ", maximum size = " + targetSize + ", newcommon size = " + newcommon.size() + ", first newcommon = " + firstnewcommon);
//finally dump the removed entries to a file // finally dump the removed entries to a file
saveCommons(key, collection); newcommon.sort();
return survival;
}
private void saveCommons(byte[] key, kelondroRowSet collection) {
if (key.length != 12) return;
collection.sort();
TimeZone GMTTimeZone = TimeZone.getTimeZone("GMT"); TimeZone GMTTimeZone = TimeZone.getTimeZone("GMT");
Calendar gregorian = new GregorianCalendar(GMTTimeZone); Calendar gregorian = new GregorianCalendar(GMTTimeZone);
SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss"); SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
@ -816,11 +809,11 @@ public class kelondroCollectionIndex {
storagePath.mkdirs(); storagePath.mkdirs();
File file = new File(storagePath, filename); File file = new File(storagePath, filename);
try { try {
collection.saveCollection(file); newcommon.saveCollection(file);
serverLog.logInfo("kelondroCollectionIndex", "dumped common word " + new String(key) + " to " + file.toString() + "; size = " + collection.size()); serverLog.logInfo("kelondroCollectionIndex", "dumped common word " + new String(key) + " to " + file.toString() + "; size = " + newcommon.size());
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
serverLog.logWarning("kelondroCollectionIndex", "failed to dump common word " + new String(key) + " to " + file.toString() + "; size = " + collection.size()); serverLog.logWarning("kelondroCollectionIndex", "failed to dump common word " + new String(key) + " to " + file.toString() + "; size = " + newcommon.size());
} }
} }

@ -394,8 +394,8 @@ public class kelondroRowCollection {
this.rowdef.objectsize()); this.rowdef.objectsize());
} }
// we moved the last element to the remove position: (p+1)st element // we moved the last element to the remove position: (p+1)st element
// only the first p elements keep their order // only the first p elements keep their order (element p is already outside the order)
if (sortBound > p) sortBound = p; if (sortBound >= p) sortBound = p;
} }
chunkcount--; chunkcount--;
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();

@ -403,10 +403,12 @@ public class plasmaGrafics {
} }
//[MN] //[MN]
/* quoted method because it is not used anywhere (and creates a warning in eclipse)
private static String addBlanksAndDots(int input, int length) { private static String addBlanksAndDots(int input, int length) {
return addBlanksAndDots(input + "", length); return addBlanksAndDots(input + "", length);
} }
*/
//[MN] //[MN]
private static String addBlanksAndDots(long input, int length) { private static String addBlanksAndDots(long input, int length) {
return addBlanksAndDots(input + "", length); return addBlanksAndDots(input + "", length);

Loading…
Cancel
Save