diff --git a/source/net/yacy/kelondro/blob/MapColumnIndex.java b/source/net/yacy/kelondro/blob/MapColumnIndex.java
new file mode 100644
index 000000000..ed1582b4c
--- /dev/null
+++ b/source/net/yacy/kelondro/blob/MapColumnIndex.java
@@ -0,0 +1,168 @@
+/**
+ * MapColumnIndex
+ * Copyright 2012 by Michael Christen
+ * First released 01.02.2012 at http://yacy.net
+ *
+ * $LastChangedDate$
+ * $LastChangedRevision$
+ * $LastChangedBy$
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+
+package net.yacy.kelondro.blob;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import net.yacy.cora.document.ASCII;
+import net.yacy.kelondro.order.NaturalOrder;
+
+/**
+ * a mapping from a column name to maps with the value of the columns to the primary keys where the entry exist in the table
+ */
+public class MapColumnIndex extends HashMap>> implements Map>> {
+
+ private static final long serialVersionUID=-424741536889467566L;
+
+ public MapColumnIndex() {
+ super();
+ }
+
+ public Collection getIndex(final String whereKey, final String isValue) throws UnsupportedOperationException {
+ Map> references = this.get(whereKey);
+ if (references == null) throw new UnsupportedOperationException();
+ Collection indexes = references.get(isValue);
+ if (indexes == null) return new ArrayList(0); // empty collection
+ return indexes;
+ }
+
+ /**
+ * create a full index for the whereKey
+ * @param whereKey
+ * @param isValue
+ * @param table
+ */
+ public void init(final String whereKey, final String isValue, final Iterator>> table) {
+ Map> valueIdxMap = new HashMap>();
+ this.put(whereKey, valueIdxMap);
+ Map.Entry> line;
+ while (table.hasNext()) {
+ line = table.next();
+ String value = line.getValue().get(whereKey);
+ if (value == null) continue; // we don't need to remember that
+ indexupdate(line.getKey(), valueIdxMap, value);
+ }
+ }
+
+ /**
+ * update an index entry
+ * @param primarykey the primary key for the row that is updated
+ * @param row the row that was updated (a mapping from column names to values)
+ */
+ public void update(final byte[] primarykey, final Map row) {
+ for (Map.Entry>> entry: this.entrySet()) {
+ // create an index for all columns that we track
+ String value = row.get(entry.getKey());
+ if (value == null) continue; // we don't need to remember that
+ indexupdate(primarykey, entry.getValue(), value);
+ }
+ }
+
+ private void indexupdate(final byte[] primarykey, final Map> valueIdxMap, final String value) {
+ Collection indexes = valueIdxMap.get(value);
+ if (indexes == null) {
+ // create a new index entry
+ indexes = new ArrayList(1);
+ indexes.add(primarykey);
+ valueIdxMap.put(value, indexes);
+ } else {
+ // update the existing index entry
+ // check if value already exist
+ if (!net.yacy.kelondro.util.ByteBuffer.contains(indexes, primarykey)) {
+ indexes.add(primarykey);
+ }
+ }
+ }
+
+ /**
+ * delete all references to the primary key
+ * @param primarykey
+ */
+ public void delete(final byte[] primarykey) {
+ for (Map.Entry>> entry: this.entrySet()) {
+ // we must check all index reference maps: iterate over entries
+ indexdelete(primarykey, entry.getValue());
+ }
+ }
+
+ private void indexdelete(final byte[] index, final Map> valueIdxMap) {
+ Iterator>> i = valueIdxMap.entrySet().iterator();
+ Map.Entry> ref;
+ while (i.hasNext()) {
+ ref = i.next();
+ net.yacy.kelondro.util.ByteBuffer.remove(ref.getValue(), index);
+ if (ref.getValue().isEmpty()) {
+ i.remove();
+ }
+ }
+ }
+
+ private static Collection getIndexWithExceptionHandler(final MapColumnIndex idx, final String whereKey, final String isValue, Map> table) {
+ try {
+ return idx.getIndex(whereKey, isValue);
+ } catch (UnsupportedOperationException e) {
+ idx.init(whereKey, isValue, table.entrySet().iterator());
+ try {
+ return idx.getIndex(whereKey, isValue);
+ } catch (UnsupportedOperationException ee) {
+ throw ee;
+ }
+ }
+ }
+
+ private static void printIndex(Collection index) {
+ System.out.print("idx{");
+ int c = 0;
+ for (byte[] a: index) {
+ if (c++ != 0) System.out.print(", ");
+ System.out.print(ASCII.String(a));
+ }
+ System.out.print("}");
+ }
+
+ public static void main(String[] args) {
+ Map> table = new TreeMap>(NaturalOrder.naturalOrder);
+ Map row;
+ row = new HashMap(); row.put("a", "1"); row.put("b", "2"); row.put("c", "2"); table.put("line1".getBytes(), row);
+ row = new HashMap(); row.put("a", "3"); row.put("b", "2"); row.put("c", "4"); table.put("line2".getBytes(), row);
+ row = new HashMap(); row.put("a", "5"); row.put("b", "2"); row.put("c", "4"); table.put("line3".getBytes(), row);
+ row = new HashMap(); row.put("a", "6"); row.put("b", "7"); row.put("c", "8"); table.put("line4".getBytes(), row);
+ MapColumnIndex idx = new MapColumnIndex();
+ System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "2", table)); System.out.println();
+ System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
+ System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "7", table)); System.out.println();
+ System.out.print("colum d, value 0: "); printIndex(getIndexWithExceptionHandler(idx, "d", "0", table)); System.out.println();
+ row = new HashMap(); row.put("a", "9"); row.put("b", "9"); row.put("c", "4"); table.put("line5".getBytes(), row);
+ idx.update("line5".getBytes(), row);
+ System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
+ }
+
+}
diff --git a/source/net/yacy/kelondro/blob/MapDataMining.java b/source/net/yacy/kelondro/blob/MapDataMining.java
index 1fc87c0ef..80098b618 100644
--- a/source/net/yacy/kelondro/blob/MapDataMining.java
+++ b/source/net/yacy/kelondro/blob/MapDataMining.java
@@ -29,9 +29,11 @@ package net.yacy.kelondro.blob;
import java.io.File;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.UTF8;
@@ -55,6 +57,7 @@ public class MapDataMining extends MapHeap {
private Map> sortClusterMap; // a String-kelondroMScoreCluster - relation
private Map accLong; // to store accumulations of Long cells
private Map accFloat; // to store accumulations of Float cells
+ private final MapColumnIndex columnIndex; // to store fast select-where indexes
@SuppressWarnings("unchecked")
public MapDataMining(final File heapFile,
@@ -73,6 +76,8 @@ public class MapDataMining extends MapHeap {
this.longaccfields = longaccfields;
this.floataccfields = floataccfields;
+ this.columnIndex = new MapColumnIndex();
+
ScoreMap[] cluster = null;
if (sortfields == null) this.sortClusterMap = null; else {
this.sortClusterMap = new ConcurrentHashMap>();
@@ -192,6 +197,8 @@ public class MapDataMining extends MapHeap {
this.accFloat.put(floataccfield, FLOAT0);
}
}
+
+ this.columnIndex.clear();
}
@Override
@@ -216,6 +223,8 @@ public class MapDataMining extends MapHeap {
// update sortCluster
if (this.sortClusterMap != null) updateSortCluster(UTF8.String(key), newMap);
+
+ this.columnIndex.update(key, newMap);
}
private void updateAcc(final Map map, final boolean add) {
@@ -294,6 +303,8 @@ public class MapDataMining extends MapHeap {
}
}
super.delete(key);
+
+ this.columnIndex.delete(key);
}
private void deleteSortCluster(final String key) {
@@ -315,6 +326,10 @@ public class MapDataMining extends MapHeap {
return new string2bytearrayIterator(cluster.keys(up));
}
+ private synchronized Iterator keys() throws IOException {
+ return super.keys(true, null);
+ }
+
private static class string2bytearrayIterator implements Iterator {
private final Iterator s;
@@ -342,15 +357,35 @@ public class MapDataMining extends MapHeap {
}
- @Override
public synchronized Iterator>> entries(final String whereKey, final String isValue) throws IOException {
- return super.entries(whereKey, isValue);
+ Collection idx = null;
+ try {
+ idx = this.columnIndex.getIndex(whereKey, isValue);
+ } catch (UnsupportedOperationException e) {
+ this.columnIndex.init(whereKey, isValue, new FullMapIterator(keys()));
+ try {
+ idx = this.columnIndex.getIndex(whereKey, isValue);
+ } catch (UnsupportedOperationException ee) {
+ throw ee;
+ }
+ }
+ Map> resultMap = new TreeMap>(this.ordering());
+ for (byte[] pk: idx) {
+ try {
+ resultMap.put(pk, this.get(pk));
+ } catch (final IOException e) {
+ Log.logException(e);
+ } catch (final RowSpaceExceededException e) {
+ Log.logException(e);
+ }
+ }
+ return resultMap.entrySet().iterator();
}
-
+
public synchronized Iterator>> entries(final boolean up, final String field) {
- return new MapIterator(keys(up, field), null, null);
+ return new FullMapIterator(keys(up, field));
}
-
+
public synchronized long getLongAcc(final String field) {
final Long accumulator = this.accLong.get(field);
if (accumulator == null) return -1;
diff --git a/source/net/yacy/kelondro/blob/MapHeap.java b/source/net/yacy/kelondro/blob/MapHeap.java
index 42073f9c7..5d5049f7f 100644
--- a/source/net/yacy/kelondro/blob/MapHeap.java
+++ b/source/net/yacy/kelondro/blob/MapHeap.java
@@ -82,6 +82,14 @@ public class MapHeap implements Map> {
return this.blob.keylength();
}
+ /**
+ * get the ordering of the primary keys
+ * @return
+ */
+ public ByteOrder ordering() {
+ return this.blob.ordering();
+ }
+
/**
* clears the content of the database
* @throws IOException
@@ -366,6 +374,10 @@ public class MapHeap implements Map> {
return new KeyIterator(up, rotating, firstKey, secondKey);
}
+ public synchronized CloneableIterator keys(boolean up, byte[] firstKey) throws IOException {
+ return this.blob.keys(up, firstKey);
+ }
+
public class KeyIterator implements CloneableIterator, Iterator {
final boolean up, rotating;
@@ -406,17 +418,13 @@ public class MapHeap implements Map> {
}
}
-
- public synchronized Iterator>> entries(final String whereKey, final String isValue) throws IOException {
- return new MapIterator(this.blob.keys(true, null), whereKey, isValue);
- }
public synchronized Iterator>> entries(final boolean up, final boolean rotating) throws IOException {
- return new MapIterator(keys(up, rotating), null, null);
+ return new FullMapIterator(keys(up, rotating));
}
public synchronized Iterator>> entries(final boolean up, final boolean rotating, final byte[] firstKey, final byte[] secondKey) throws IOException {
- return new MapIterator(keys(up, rotating, firstKey, secondKey), null, null);
+ return new FullMapIterator(keys(up, rotating, firstKey, secondKey));
}
/**
@@ -448,18 +456,15 @@ public class MapHeap implements Map> {
public void finalize() {
close();
}
-
- public class MapIterator extends LookAheadIterator>> implements Iterator>> {
+
+ protected class FullMapIterator extends LookAheadIterator>> implements Iterator>> {
// enumerates Map-Type elements
// the key is also included in every map that is returned; it's key is 'key'
private final Iterator keyIterator;
- private final String whereKey, isValue;
- MapIterator(final Iterator keyIterator, final String whereKey, final String isValue) {
+ FullMapIterator(final Iterator keyIterator) {
this.keyIterator = keyIterator;
- this.whereKey = whereKey;
- this.isValue = isValue;
}
@Override
@@ -479,19 +484,14 @@ public class MapHeap implements Map> {
continue;
}
if (map == null) continue; // circumvention of a modified exception
- // check if the where case holds
- if (this.whereKey != null && this.isValue != null) {
- String v = map.get(this.whereKey);
- if (v == null) continue;
- if (!v.equals(this.isValue)) continue;
- }
// produce entry
Map.Entry> entry = new AbstractMap.SimpleImmutableEntry>(nextKey, map);
return entry;
}
return null;
}
- } // class mapIterator
+ } // class FullMapIterator
+
@Override
public void putAll(final Map extends byte[], ? extends Map> map) {
diff --git a/source/net/yacy/kelondro/util/ByteBuffer.java b/source/net/yacy/kelondro/util/ByteBuffer.java
index 051467e61..bd0985a60 100644
--- a/source/net/yacy/kelondro/util/ByteBuffer.java
+++ b/source/net/yacy/kelondro/util/ByteBuffer.java
@@ -32,6 +32,7 @@ import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Iterator;
import java.util.List;
import java.util.Properties;
@@ -140,6 +141,7 @@ public final class ByteBuffer extends OutputStream {
this.offset = 0;
}
+ @Override
public void write(final int b) {
write((byte) (b & 0xff));
}
@@ -518,6 +520,20 @@ public final class ByteBuffer extends OutputStream {
return false;
}
+ public static int remove(final Collection collection, final byte[] key) {
+ Iterator i = collection.iterator();
+ byte[] v;
+ int c = 0;
+ while (i.hasNext()) {
+ v = i.next();
+ if (equals(v, key)) {
+ i.remove();
+ c++;
+ }
+ }
+ return c;
+ }
+
public static List split(final byte[] b, final byte s) {
final ArrayList a = new ArrayList();
int c = 0;
diff --git a/source/net/yacy/peers/SeedDB.java b/source/net/yacy/peers/SeedDB.java
index 29f67e052..a94b5e321 100644
--- a/source/net/yacy/peers/SeedDB.java
+++ b/source/net/yacy/peers/SeedDB.java
@@ -29,10 +29,8 @@ import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
-import java.lang.ref.SoftReference;
import java.net.InetAddress;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
@@ -98,8 +96,6 @@ public final class SeedDB implements AlternativeDomainNames {
private Seed mySeed; // my own seed
private final Set myBotIDs; // list of id's that this bot accepts as robots.txt identification
- private final Map nameLookupCache; // a name-to-hash relation
- private final Map> ipLookupCache;
public SeedDB(
final File networkRoot,
@@ -128,12 +124,6 @@ public final class SeedDB implements AlternativeDomainNames {
this.seedPassiveDB = openSeedTable(this.seedPassiveDBFile);
this.seedPotentialDB = openSeedTable(this.seedPotentialDBFile);
- // start our virtual DNS service for yacy peers with empty cache
- this.nameLookupCache = new HashMap();
-
- // cache for reverse name lookup
- this.ipLookupCache = new HashMap>();
-
// check if we are in the seedCaches: this can happen if someone else published our seed
removeMySeed();
@@ -184,12 +174,6 @@ public final class SeedDB implements AlternativeDomainNames {
this.seedPassiveDB = openSeedTable(this.seedPassiveDBFile);
this.seedPotentialDB = openSeedTable(this.seedPotentialDBFile);
- // start our virtual DNS service for yacy peers with empty cache
- this.nameLookupCache.clear();
-
- // cache for reverse name lookup
- this.ipLookupCache.clear();
-
// check if we are in the seedCaches: this can happen if someone else published our seed
removeMySeed();
@@ -497,7 +481,6 @@ public final class SeedDB implements AlternativeDomainNames {
//seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
synchronized (this) {
try {
- this.nameLookupCache.put(seed.getName(), seed.hash);
final ConcurrentMap seedPropMap = seed.getMap();
this.seedActiveDB.insert(ASCII.getBytes(seed.hash), seedPropMap);
this.seedPassiveDB.delete(ASCII.getBytes(seed.hash));
@@ -513,7 +496,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (seed.isProper(false) != null) return;
synchronized (this) {
try {
- this.nameLookupCache.remove(seed.getName());
this.seedActiveDB.delete(ASCII.getBytes(seed.hash));
this.seedPotentialDB.delete(ASCII.getBytes(seed.hash));
} catch (final Exception e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
@@ -532,7 +514,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (seed.isProper(false) != null) return;
synchronized (this) {
try {
- this.nameLookupCache.remove(seed.getName());
this.seedActiveDB.delete(ASCII.getBytes(seed.hash));
this.seedPassiveDB.delete(ASCII.getBytes(seed.hash));
} catch (final Exception e) { Log.logWarning("yacySeedDB", "could not remove hash ("+ e.getClass() +"): "+ e.getMessage()); }
@@ -637,17 +618,8 @@ public final class SeedDB implements AlternativeDomainNames {
return this.mySeed;
}
- // then try to use the cache
peerName = peerName.toLowerCase();
- final String seedhash = this.nameLookupCache.get(peerName);
Seed seed;
- if (seedhash != null) {
- seed = this.get(seedhash);
- if (seed != null) {
- //System.out.println("*** found lookupByName in cache: " + peerName);
- return seed;
- }
- }
// enumerate the cache
String name = Seed.checkPeerName(peerName);
@@ -659,7 +631,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (entry == null) break;
seed = this.getConnected(ASCII.String(entry.getKey()));
if (seed == null) continue;
- if (seed.isProper(false) == null) this.nameLookupCache.put(seed.getName().toLowerCase(), seed.hash);
//System.out.println("*** found lookupByName in seedActiveDB: " + peerName);
return seed;
}
@@ -672,7 +643,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (entry == null) break;
seed = this.getConnected(ASCII.String(entry.getKey()));
if (seed == null) continue;
- if (seed.isProper(false) == null) this.nameLookupCache.put(seed.getName().toLowerCase(), seed.hash);
//System.out.println("*** found lookupByName in seedPassiveDB: " + peerName);
return seed;
}
@@ -682,7 +652,6 @@ public final class SeedDB implements AlternativeDomainNames {
// check local seed
if (this.mySeed == null) initMySeed();
name = this.mySeed.getName().toLowerCase();
- if (this.mySeed.isProper(false) == null) this.nameLookupCache.put(name, this.mySeed.hash);
if (name.equals(peerName)) return this.mySeed;
// nothing found
return null;
@@ -705,16 +674,7 @@ public final class SeedDB implements AlternativeDomainNames {
}
// then try to use the cache
- final SoftReference ref = this.ipLookupCache.get(peerIP);
Seed seed = null;
- if (ref != null) {
- seed = ref.get();
- if (seed != null) {
- //System.out.println("*** found lookupByIP in cache: " + peerIP.toString() + " -> " + this.mySeed.getName());
- return seed;
- }
- }
-
String ipString = peerIP.getHostAddress();
Map.Entry> entry;
@@ -729,7 +689,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (port > 0 && Integer.parseInt(p) != port) continue;
seed = this.getConnected(ASCII.String(entry.getKey()));
if (seed == null) continue;
- this.ipLookupCache.put(peerIP, new SoftReference(seed));
//System.out.println("*** found lookupByIP in connected: " + peerIP.toString() + " -> " + seed.getName());
return seed;
}
@@ -748,7 +707,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (port > 0 && Integer.parseInt(p) != port) continue;
seed = this.getDisconnected(ASCII.String(entry.getKey()));
if (seed == null) continue;
- this.ipLookupCache.put(peerIP, new SoftReference(seed));
//System.out.println("*** found lookupByIP in disconnected: " + peerIP.toString() + " -> " + seed.getName());
return seed;
}
@@ -767,7 +725,6 @@ public final class SeedDB implements AlternativeDomainNames {
if (port > 0 && Integer.parseInt(p) != port) continue;
seed = this.getPotential(ASCII.String(entry.getKey()));
if (seed == null) continue;
- this.ipLookupCache.put(peerIP, new SoftReference(seed));
//System.out.println("*** found lookupByIP in potential: " + peerIP.toString() + " -> " + seed.getName());
return seed;
}