From 6d1de8abfd793dd55771db2d4fc013663938a7fb Mon Sep 17 00:00:00 2001 From: borg-0300 Date: Tue, 20 Sep 2005 15:43:31 +0000 Subject: [PATCH] finals; cleaned; Properties; git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@756 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/kelondro/kelondroMap.java | 185 ++++++++++--------- source/de/anomic/plasma/plasmaWordIndex.java | 124 ++++++------- 2 files changed, 157 insertions(+), 152 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java index 38e333f32..ec7f9e89f 100644 --- a/source/de/anomic/kelondro/kelondroMap.java +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -4,7 +4,10 @@ // (C) by Michael Peter Christen; mc@anomic.de // first published on http://www.anomic.de // Frankfurt, Germany, 2004 -// last major change: 26.10.2004 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -39,7 +42,6 @@ // the intact and unchanged copyright notice. // Contributions and changes to the program code must be marked as such. - package de.anomic.kelondro; import java.io.IOException; @@ -48,9 +50,9 @@ import java.util.Iterator; import java.util.Map; public class kelondroMap { - + private static final int cachesize = 500; - + private kelondroDyn dyn; private kelondroMScoreCluster cacheScore; private HashMap cache; @@ -59,7 +61,7 @@ public class kelondroMap { private HashMap sortClusterMap; // a String-kelondroMScoreCluster - relation private HashMap accMap; // to store accumulations of specific fields private int elementCount; - + public kelondroMap(kelondroDyn dyn) { this(dyn, null, null); } @@ -70,23 +72,27 @@ public class kelondroMap { this.cacheScore = new kelondroMScoreCluster(); this.startup = System.currentTimeMillis(); this.elementCount = 0; - + // create fast ordering clusters and acc fields this.sortfields = sortfields; this.accfields = accfields; - + kelondroMScoreCluster[] cluster = null; if (sortfields == null) sortClusterMap = null; else { sortClusterMap = new HashMap(); cluster = new kelondroMScoreCluster[sortfields.length]; - for (int i = 0; i < sortfields.length; i++) cluster[i] = new kelondroMScoreCluster(); + for (int i = 0; i < sortfields.length; i++) { + cluster[i] = new kelondroMScoreCluster(); + } } - + Long[] accumulator = null; if (accfields == null) accMap = null; else { accMap = new HashMap(); accumulator = new Long[accfields.length]; - for (int i = 0; i < accfields.length; i++) accumulator[i] = new Long(0); + for (int i = 0; i < accfields.length; i++) { + accumulator[i] = new Long(0); + } } // fill cluster and accumulator with values @@ -97,14 +103,14 @@ public class kelondroMap { Map map; while (it.hasNext()) { key = (String) it.next(); - //System.out.println("kelondroMap: enumerating key " + key); +// System.out.println("kelondroMap: enumerating key " + key); map = get(key); - + if (sortfields != null) for (int i = 0; i < sortfields.length; i++) { value = (String) map.get(sortfields[i]); if (value != null) cluster[i].setScore(key, kelondroMScoreCluster.string2score(value)); } - + if (accfields != null) for (int i = 0; i < accfields.length; i++) { value = (String) map.get(accfields[i]); if (value != null) try { @@ -115,7 +121,7 @@ public class kelondroMap { elementCount++; } } catch (IOException e) {} - + // fill cluster if (sortfields != null) for (int i = 0; i < sortfields.length; i++) sortClusterMap.put(sortfields[i], cluster[i]); @@ -126,19 +132,19 @@ public class kelondroMap { public int keySize() { return dyn.columnSize(0); } - + public int cacheChunkSize() { return dyn.cacheChunkSize(); } - + public int[] cacheFillStatus() { return dyn.cacheFillStatus(); } - + public synchronized void set(String key, Map newMap) throws IOException { // update elementCount if ((sortfields != null) || (accfields != null)) { - Map oldMap = get(key, false); + final Map oldMap = get(key, false); if (oldMap == null) { // new element elementCount++; @@ -147,49 +153,51 @@ public class kelondroMap { if (accfields != null) updateAcc(oldMap, false); } } - // write entry writeKra(key, newMap, ""); - + // check for space in cache checkCacheSpace(); - + // write map to cache cacheScore.setScore(key, (int) ((System.currentTimeMillis() - startup) / 1000)); cache.put(key, newMap); - + // update sortCluster if (sortClusterMap != null) updateSortCluster(key, newMap); - + // update accumulators with new values (add) if (accfields != null) updateAcc(newMap, true); } - - private synchronized void writeKra(String key, Map newMap, String comment) throws IOException { + + private synchronized void writeKra(final String key, final Map newMap, String comment) throws IOException { // write map to kra - kelondroRA kra = dyn.getRA(key); - kra.writeMap(newMap, comment); - kra.close(); + final kelondroRA kra = dyn.getRA(key); + kra.writeMap(newMap, comment); + kra.close(); } - + private void updateAcc(Map map, boolean add) { String value; long valuel; Long accumulator; for (int i = 0; i < accfields.length; i++) { value = (String) map.get(accfields[i]); - if (value != null) try { - valuel = Long.parseLong(value); - accumulator = (Long) accMap.get(accfields[i]); - if (add) - accMap.put(accfields[i], new Long(accumulator.longValue() + ((long) valuel))); - else - accMap.put(accfields[i], new Long(accumulator.longValue() - ((long) valuel))); - } catch (NumberFormatException e) {} + if (value != null) { + try { + valuel = Long.parseLong(value); + accumulator = (Long) accMap.get(accfields[i]); + if (add) { + accMap.put(accfields[i], new Long(accumulator.longValue() + ((long) valuel))); + } else { + accMap.put(accfields[i], new Long(accumulator.longValue() - ((long) valuel))); + } + } catch (NumberFormatException e) {} + } } } - private void updateSortCluster(String key, Map map) { + private void updateSortCluster(final String key, final Map map) { String value; kelondroMScoreCluster cluster; for (int i = 0; i < sortfields.length; i++) { @@ -201,16 +209,16 @@ public class kelondroMap { } } } - + public synchronized void remove(String key) throws IOException { // update elementCount if (key == null) return; if ((sortfields != null) || (accfields != null)) { - Map map = get(key); + final Map map = get(key); if (map != null) { // update count elementCount--; - + // update accumulators (subtract) if (accfields != null) updateAcc(map, false); @@ -218,16 +226,15 @@ public class kelondroMap { if (sortfields != null) deleteSortCluster(key); } } - // remove from cache cacheScore.deleteScore(key); cache.remove(key); - + // remove from file dyn.remove(key); } - - private void deleteSortCluster(String key) { + + private void deleteSortCluster(final String key) { if (key == null) return; kelondroMScoreCluster cluster; for (int i = 0; i < sortfields.length; i++) { @@ -236,23 +243,23 @@ public class kelondroMap { sortClusterMap.put(sortfields[i], cluster); } } - - public synchronized Map get(String key) throws IOException { + + public synchronized Map get(final String key) throws IOException { if (key == null) return null; return get(key, true); } - - private synchronized Map get(String key, boolean storeCache) throws IOException { - // load map from cache + + private synchronized Map get(final String key, final boolean storeCache) throws IOException { + // load map from cache Map map = (Map) cache.get(key); if (map != null) return map; - - // load map from kra + + // load map from kra if (!(dyn.existsDyn(key))) return null; - kelondroRA kra = dyn.getRA(key); - map = kra.readMap(); - kra.close(); - + final kelondroRA kra = dyn.getRA(key); + map = kra.readMap(); + kra.close(); + if (storeCache) { // cache it also checkCacheSpace(); @@ -260,66 +267,65 @@ public class kelondroMap { cacheScore.setScore(key, (int) ((System.currentTimeMillis() - startup) / 1000)); cache.put(key, map); } - + // return value - return map; + return map; } - private synchronized void checkCacheSpace() { // check for space in cache if (cache.size() >= cachesize) { // delete one entry - String delkey = (String) cacheScore.getMinObject(); + final String delkey = (String) cacheScore.getMinObject(); cacheScore.deleteScore(delkey); cache.remove(delkey); } } - public synchronized kelondroDyn.dynKeyIterator keys(boolean up, boolean rotating) throws IOException { + public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating) throws IOException { // simple enumeration of key names without special ordering return dyn.dynKeys(up, rotating); } - - public synchronized kelondroDyn.dynKeyIterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + + public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException { // simple enumeration of key names without special ordering return dyn.dynKeys(up, rotating, firstKey); } - - public synchronized Iterator keys(boolean up, /* sorted by */ String field) { + + public synchronized Iterator keys(final boolean up, /* sorted by */ String field) { // sorted iteration using the sortClusters if (sortClusterMap == null) return null; - kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field); + final kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field); if (cluster == null) return null; // sort field does not exist //System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString()); return cluster.scores(up); } - - public synchronized mapIterator maps(boolean up, boolean rotating) throws IOException { + + public synchronized mapIterator maps(final boolean up, final boolean rotating) throws IOException { return new mapIterator(keys(up, rotating)); } - - public synchronized mapIterator maps(boolean up, boolean rotating, byte[] firstKey) throws IOException { + + public synchronized mapIterator maps(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException { return new mapIterator(keys(up, rotating, firstKey)); } - - public synchronized mapIterator maps(boolean up, String field) { + + public synchronized mapIterator maps(final boolean up, final String field) { return new mapIterator(keys(up, field)); } - - public synchronized long getAcc(String field) { - Long accumulator = (Long) accMap.get(field); + + public synchronized long getAcc(final String field) { + final Long accumulator = (Long) accMap.get(field); if (accumulator == null) return -1; else return accumulator.longValue(); } - + public synchronized int size() { if ((sortfields != null) || (accfields != null)) return elementCount; else return dyn.size(); } - + public void close() throws IOException { // finish queue //writeWorker.terminate(true); - + // close cluster if (sortClusterMap != null) { for (int i = 0; i < sortfields.length; i++) sortClusterMap.remove(sortfields[i]); @@ -331,32 +337,31 @@ public class kelondroMap { // close file dyn.close(); } - - + public class mapIterator implements Iterator { // enumerates Map-Type elements // the key is also included in every map that is returned; it's key is 'key' - + Iterator keyIterator; boolean finish; - + public mapIterator(Iterator keyIterator) { this.keyIterator = keyIterator; this.finish = false; } - + public boolean hasNext() { return (!(finish)) && (keyIterator.hasNext()); } - + public Object next() { - String nextKey = (String) keyIterator.next(); + final String nextKey = (String) keyIterator.next(); if (nextKey == null) { finish = true; return null; } try { - Map map = get(nextKey); + final Map map = get(nextKey); if (map == null) throw new kelondroException(dyn.filename, "no more elements available"); map.put("key", nextKey); return map; @@ -365,10 +370,10 @@ public class kelondroMap { return null; } } - + public void remove() { throw new UnsupportedOperationException(); } - - } -} + } // class mapIterator + +} // class kelondroMap diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 8799b3a2f..40a0f0da1 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -4,7 +4,10 @@ // (C) by Michael Peter Christen; mc@anomic.de // first published on http://www.anomic.de // Frankfurt, Germany, 2005 -// last major change: 02.02.2005 +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -42,7 +45,6 @@ // compile with // javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java - package de.anomic.plasma; import java.io.File; @@ -57,28 +59,28 @@ import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; public final class plasmaWordIndex { - + final File databaseRoot; final plasmaWordIndexCache ramCache; - + public plasmaWordIndex(File databaseRoot, int bufferkb, serverLog log) throws IOException { this.databaseRoot = databaseRoot; plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(databaseRoot, log); this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, bufferkb, log); } - + public File getRoot() { return databaseRoot; } - + public int maxURLinWordCache() { return ramCache.maxURLinWordCache(); } - + public int wordCacheRAMSize() { return ramCache.wordCacheRAMSize(); } - + public int[] assortmentSizes() { return ramCache.assortmentsSizes(); } @@ -94,48 +96,47 @@ public final class plasmaWordIndex { public void setMaxWords(int maxWords) { ramCache.setMaxWords(maxWords); } - + public int addEntries(plasmaWordIndexEntryContainer entries) { return ramCache.addEntries(entries, System.currentTimeMillis()); } - + public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) { return ramCache.getIndex(wordHash, deleteIfEmpty); } - + public int size() { return ramCache.size(); } - + public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { return ramCache.removeEntries(wordHash, urlHashes, deleteComplete); } - + public void intermission(long pause) { - this.ramCache.intermission(pause); + this.ramCache.intermission(pause); } public void close(int waitingBoundSeconds) { ramCache.close(waitingBoundSeconds); } - + public void deleteIndex(String wordHash) { ramCache.deleteIndex(wordHash); } - + public Iterator wordHashes(String startHash, boolean up, boolean rot) { //return ramCache.wordHashes(startHash, up); return new correctedWordIterator(up, rot, startHash); // use correction until bug is found } - private class correctedWordIterator implements Iterator { - - Iterator ii; + private class correctedWordIterator implements Iterator { + Iterator iter; String nextWord; - + public correctedWordIterator(boolean up, boolean rotating, String firstWord) { - ii = ramCache.wordHashes(firstWord, up); - nextWord = (ii.hasNext()) ? (String) ii.next() : null; + iter = ramCache.wordHashes(firstWord, up); + nextWord = (iter.hasNext()) ? (String) iter.next() : null; boolean corrected = true; int cc = 0; // to avoid rotation loops while ((nextWord != null) && (corrected) && (cc < 50)) { @@ -144,60 +145,59 @@ public final class plasmaWordIndex { if ((c > 0) && (up)) { // firstKey > nextNode.getKey() //System.out.println("CORRECTING WORD ITERATOR: firstWord=" + firstWord + ", nextWord=" + nextWord); - nextWord = (ii.hasNext()) ? (String) ii.next() : null; + nextWord = (iter.hasNext()) ? (String) iter.next() : null; corrected = true; cc++; } if ((c < 0) && (!(up))) { - nextWord = (ii.hasNext()) ? (String) ii.next() : null; + nextWord = (iter.hasNext()) ? (String) iter.next() : null; corrected = true; cc++; } } } - + public void finalize() { - ii = null; + iter = null; nextWord = null; } - - public boolean hasNext() { + + public boolean hasNext() { return nextWord != null; - } + } public Object next() { String r = nextWord; - nextWord = (ii.hasNext()) ? (String) ii.next() : null; + nextWord = (iter.hasNext()) ? (String) iter.next() : null; return r; } - + public void remove() { throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported"); - } - } - + } + } // correctedWordIterator + public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) { return new iterateFiles(startHash, up, deleteEmpty); } - + public class iterateFiles implements Iterator { // Iterator of hash-strings in WORDS path - + private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries private Comparator comp; // for string-compare private String buffer; // the prefetch-buffer private boolean delete; - - + public iterateFiles(String startHash, boolean up, boolean deleteEmpty) { this.hierarchy = new ArrayList(); this.comp = kelondroMSetTools.fastStringComparator(up); this.delete = deleteEmpty; - + // the we initially fill the hierarchy with the content of the root folder String path = "WORDS"; TreeSet list = list(new File(databaseRoot, path)); - + // if we have a start hash then we find the appropriate subdirectory to start if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) { delete(startHash.substring(0, 1), list); @@ -221,22 +221,22 @@ public final class plasmaWordIndex { buffer = next0(); } } - + private synchronized void delete(String pattern, TreeSet names) { String name; while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name); } - + private TreeSet list(File path) { - //System.out.println("PATH: " + path); +// System.out.println("PATH: " + path); TreeSet t = new TreeSet(comp); String[] l = path.list(); if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]); - //else System.out.println("DEBUG: wrong path " + path); - //System.out.println(t); +// else System.out.println("DEBUG: wrong path " + path); +// System.out.println(t); return t; } - + private synchronized String next0() { // the object is a File pointing to the corresponding file File f; @@ -267,36 +267,36 @@ public final class plasmaWordIndex { } } while (f == null); // thats it - if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) { - return null; - } else { - return n.substring(0, yacySeedDB.commonHashLength); - } + if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) { + return null; + } else { + return n.substring(0, yacySeedDB.commonHashLength); + } } - + public boolean hasNext() { return buffer != null; } - + public Object next() { String r = buffer; while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {}; return r; } - + public void remove() { - } } - + public static void main(String[] args) { - //System.out.println(kelondroMSetTools.fastStringComparator(true).compare("RwGeoUdyDQ0Y", "rwGeoUdyDQ0Y")); +// System.out.println(kelondroMSetTools.fastStringComparator(true).compare("RwGeoUdyDQ0Y", "rwGeoUdyDQ0Y")); try { - plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, new serverLog("TESTAPP")); - Iterator i = index.wordHashes("5A8yhZMh_Kmv", true, true); - while (i.hasNext()) { - System.out.println("File: " + (String) i.next()); - } + plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, new serverLog("TESTAPP")); + Iterator iter = index.wordHashes("5A8yhZMh_Kmv", true, true); + while (iter.hasNext()) { + System.out.println("File: " + (String) iter.next()); + } } catch (IOException e) {} } + }