* implemented (finished) class indexRowSetContainer

* replaced indexTreeMapContainer by indexRowSetContainer
* deleted indexTreeMapContainer and abstract class
This is another step to the new database structure

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2343 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 9183d21f25
commit ebc2233092

@ -1,64 +0,0 @@
// indexAbstractConatiner.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 20.05.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
import de.anomic.kelondro.kelondroBase64Order;
public abstract class indexAbstractContainer implements indexContainer {
private String wordHash;
private long updateTime;
public void setWordHash(String newWordHash) {
// this is used to replicate a container for different word indexes during global search
this.wordHash = newWordHash;
}
public long updated() {
return updateTime;
}
public String getWordHash() {
return wordHash;
}
public int add(indexEntry entry) {
return add(entry, System.currentTimeMillis());
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
for (int i = 0; i < urlHashes.length; i++) count += (remove(urlHashes[i]) == null) ? 0 : 1;
return count;
}
public int hashCode() {
return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4));
}
}

@ -30,7 +30,7 @@ package de.anomic.index;
public abstract class indexAbstractRI implements indexRI {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
indexContainer container = new indexRowSetContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}

@ -53,10 +53,7 @@ public interface indexContainer {
public int add(indexEntry[] entries, long updateTime);
public int add(indexContainer c, long maxTime);
public Set urlHashes();
public boolean contains(String urlHash) ;
public indexEntry get(String urlHash);
public indexEntry[] getEntryArray() ;
public indexEntry remove(String urlHash);
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);
@ -66,6 +63,4 @@ public interface indexContainer {
public String toString();
public int hashCode();
//public void joinConstructive(indexContainer c, long time, int maxDistance);
}

@ -101,7 +101,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
Map.Entry entry;
String wordHash;
indexTreeMapContainer container;
indexContainer container;
long updateTime;
indexEntry iEntry;
kelondroRow.Entry row = dumpArray.row().newEntry();
@ -110,7 +110,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
synchronized (kCache) {
Iterator i = kCache.values().iterator();
while (i.hasNext()) {
container = (indexTreeMapContainer) i.next();
container = (indexContainer) i.next();
// put entries on stack
if (container != null) {
@ -139,7 +139,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
entry = (Map.Entry) i.next();
wordHash = (String) entry.getKey();
updateTime = getUpdateTime(wordHash);
container = (indexTreeMapContainer) entry.getValue();
container = (indexContainer) entry.getValue();
// put entries on stack
if (container != null) {
@ -269,7 +269,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public int indexSize(String wordHash) {
int size = 0;
indexTreeMapContainer cacheIndex = (indexTreeMapContainer) wCache.get(wordHash);
indexContainer cacheIndex = (indexContainer) wCache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size();
return size;
}
@ -326,13 +326,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
// find entries in kCache that are too old for that place and shift them to the wCache
long time;
Long l;
indexTreeMapContainer container;
indexContainer container;
synchronized (kCache) {
while (kCache.size() > 0) {
l = (Long) kCache.firstKey();
time = l.longValue();
if (System.currentTimeMillis() - time < kCacheMaxAge) return;
container = (indexTreeMapContainer) kCache.remove(l);
container = (indexContainer) kCache.remove(l);
addEntries(container, container.updated(), false);
}
}
@ -386,13 +386,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
}
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) {
return (indexTreeMapContainer) wCache.get(wordHash);
return (indexContainer) wCache.get(wordHash);
}
public indexContainer deleteContainer(String wordHash) {
// returns the index that had been deleted
synchronized (wCache) {
indexTreeMapContainer container = (indexTreeMapContainer) wCache.remove(wordHash);
indexContainer container = (indexContainer) wCache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
return container;
@ -401,7 +401,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (wCache) {
indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash);
indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) {
if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true;
this.addEntries(c, System.currentTimeMillis(), false);
@ -414,7 +414,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
if (urlHashes.size() == 0) return 0;
int count = 0;
synchronized (wCache) {
indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash);
indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) {
count = c.removeEntries(wordHash, urlHashes, deleteComplete);
if (c.size() != 0) this.addEntries(c, System.currentTimeMillis(), false);
@ -432,13 +432,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
Iterator i = kCache.entrySet().iterator();
Map.Entry entry;
Long l;
indexTreeMapContainer c;
indexContainer c;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
l = (Long) entry.getKey();
// get container
c = (indexTreeMapContainer) entry.getValue();
c = (indexContainer) entry.getValue();
if (c.remove(urlHash) != null) {
if (c.size() == 0) {
i.remove();
@ -466,8 +466,8 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} else synchronized (wCache) {
// put container into wCache
String wordHash = container.getWordHash();
indexTreeMapContainer entries = (indexTreeMapContainer) wCache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexTreeMapContainer(wordHash);
indexContainer entries = (indexContainer) wCache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexRowSetContainer(wordHash);
added = entries.add(container, -1);
if (added > 0) {
wCache.put(wordHash, entries);
@ -482,15 +482,15 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
if (dhtCase) synchronized (kCache) {
// put container into kCache
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
indexContainer container = new indexRowSetContainer(wordHash);
container.add(newEntry);
kCache.put(new Long(updateTime + kCacheInc), container);
kCacheInc++;
if (kCacheInc > 10000) kCacheInc = 0;
return null;
} else synchronized (wCache) {
indexTreeMapContainer container = (indexTreeMapContainer) wCache.get(wordHash);
if (container == null) container = new indexTreeMapContainer(wordHash);
indexContainer container = (indexContainer) wCache.get(wordHash);
if (container == null) container = new indexRowSetContainer(wordHash);
indexEntry[] entries = new indexEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
wCache.put(wordHash, container);

@ -30,7 +30,11 @@ import java.lang.reflect.Method;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -38,14 +42,19 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
private String wordHash;
public indexRowSetContainer(kelondroRow rowdef) {
super(rowdef);
public indexRowSetContainer(String wordHash) {
this(wordHash, new kelondroNaturalOrder(true), 0);
}
public indexRowSetContainer(String wordHash, kelondroOrder ordering, int column) {
super(indexURLEntry.urlEntryRow);
this.wordHash = wordHash;
this.lastTimeWrote = 0;
this.setOrdering(ordering, column);
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexRowSetContainer(this.rowdef);
newContainer.setWordHash(this.wordHash);
newContainer.setOrdering(this.sortOrder, this.sortColumn);
indexContainer newContainer = new indexRowSetContainer(this.wordHash, this.sortOrder, this.sortColumn);
newContainer.add(this, -1);
return newContainer;
}
@ -97,47 +106,72 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
private boolean addi(indexEntry entry) {
// returns true if the new entry was added, false if it already existed
indexEntry oldEntry = new indexURLEntry(this.put(entry.toKelondroEntry())); // FIXME: see if cloning is necessary
if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
kelondroRow.Entry oldEntryRow = this.put(entry.toKelondroEntry());
if (oldEntryRow == null) {
return true;
} else {
indexEntry oldEntry = new indexURLEntry(oldEntryRow); // FIXME: see if cloning is necessary
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
} else {
return true;
}
}
return (oldEntry == null);
}
public boolean contains(String urlHash) {
// TODO Auto-generated method stub
return false;
}
public indexEntry get(String urlHash) {
// TODO Auto-generated method stub
return null;
}
public indexEntry[] getEntryArray() {
// TODO Auto-generated method stub
return null;
kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null;
return new indexURLEntry(entry);
}
public indexEntry remove(String urlHash) {
// TODO Auto-generated method stub
return null;
kelondroRow.Entry entry = this.remove(urlHash.getBytes());
if (entry == null) return null;
return new indexURLEntry(entry);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
// TODO Auto-generated method stub
return false;
if (!wordHash.equals(this.wordHash)) return false;
return remove(urlHash) != null;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
// TODO Auto-generated method stub
return 0;
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
Iterator i = urlHashes.iterator();
while (i.hasNext()) count += (remove((String) i.next()) == null) ? 0 : 1;
return count;
}
public Iterator entries() {
// TODO Auto-generated method stub
return null;
// returns an iterator of indexEntry objects
return new entryIterator();
}
public class entryIterator implements Iterator {
Iterator rowEntryIterator;
public entryIterator() {
rowEntryIterator = rows();
}
public boolean hasNext() {
return rowEntryIterator.hasNext();
}
public Object next() {
kelondroRow.Entry rentry = (kelondroRow.Entry) rowEntryIterator.next();
if (rentry == null) return null;
return new indexURLEntry(rentry);
}
public void remove() {
rowEntryIterator.remove();
}
}
public static Method containerMergeMethod = null;
@ -163,9 +197,137 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
return c;
}
public Set urlHashes() {
// TODO Auto-generated method stub
return null;
public static indexContainer joinContainer(Set containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis();
// order entities by their size
TreeMap map = new TreeMap();
indexContainer singleContainer;
Iterator i = containers.iterator();
int count = 0;
while (i.hasNext()) {
// get next entity:
singleContainer = (indexContainer) i.next();
// check result
if ((singleContainer == null) || (singleContainer.size() == 0)) return new indexRowSetContainer(null); // as this is a cunjunction of searches, we have no result if any word is not known
// store result in order of result size
map.put(new Long(singleContainer.size() * 1000 + count), singleContainer);
count++;
}
// check if there is any result
if (map.size() == 0) return new indexRowSetContainer(null); // no result, nothing found
// the map now holds the search results in order of number of hits per word
// we now must pairwise build up a conjunction of these sets
Long k = (Long) map.firstKey(); // the smallest, which means, the one with the least entries
indexContainer searchA, searchB, searchResult = (indexContainer) map.remove(k);
while ((map.size() > 0) && (searchResult.size() > 0)) {
// take the first element of map which is a result and combine it with result
k = (Long) map.firstKey(); // the next smallest...
time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis();
searchA = searchResult;
searchB = (indexContainer) map.remove(k);
searchResult = indexRowSetContainer.joinConstructive(searchA, searchB, 2 * time / (map.size() + 1), maxDistance);
// free resources
searchA = null;
searchB = null;
}
// in 'searchResult' is now the combined search result
if (searchResult.size() == 0) return new indexRowSetContainer(null);
return searchResult;
}
// join methods
private static int log2(int x) {
int l = 0;
while (x > 0) {x = x >> 1; l++;}
return l;
}
public static indexContainer joinConstructive(indexContainer i1, indexContainer i2, long time, int maxDistance) {
if ((i1 == null) || (i2 == null)) return null;
if ((i1.size() == 0) || (i2.size() == 0)) return new indexRowSetContainer(null);
// decide which method to use
int high = ((i1.size() > i2.size()) ? i1.size() : i2.size());
int low = ((i1.size() > i2.size()) ? i2.size() : i1.size());
int stepsEnum = 10 * (high + low - 1);
int stepsTest = 12 * log2(high) * low;
// start most efficient method
if (stepsEnum > stepsTest) {
if (i1.size() < i2.size())
return joinConstructiveByTest(i1, i2, time, maxDistance);
else
return joinConstructiveByTest(i2, i1, time, maxDistance);
} else {
return joinConstructiveByEnumeration(i1, i2, time, maxDistance);
}
}
private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY TEST");
indexContainer conj = new indexRowSetContainer(null); // start with empty search result
Iterator se = small.entries();
indexEntry ie0, ie1;
long stamp = System.currentTimeMillis();
while ((se.hasNext()) && ((System.currentTimeMillis() - stamp) < time)) {
ie0 = (indexEntry) se.next();
ie1 = large.get(ie0.urlHash());
if (ie1 != null) {
// this is a hit. Calculate word distance:
ie0.combineDistance(ie1);
if (ie0.worddistance() <= maxDistance) conj.add(ie0);
}
}
return conj;
}
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexContainer conj = new indexRowSetContainer(null); // start with empty search result
if (!((i1.getOrdering().signature().equals(i2.getOrdering().signature())) &&
(i1.getOrderColumn() == i2.getOrderColumn()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;
if ((e1.hasNext()) && (e2.hasNext())) {
indexEntry ie1;
indexEntry ie2;
ie1 = (indexEntry) e1.next();
ie2 = (indexEntry) e2.next();
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.getOrdering().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
} else if (c > 0) {
if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1.combineDistance(ie2);
if (ie1.worddistance() <= maxDistance) conj.add(ie1);
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
}
}
}
return conj;
}
public String toString() {
return "C[" + wordHash + "] has " + this.size() + " entries";
}
public int hashCode() {
return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4));
}
}

@ -1,322 +0,0 @@
// indexTreeMapContainer.java
// (C) 2005, 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 07.05.2005 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
an indexContainer is a bag of indexEntries for a single word
such an container represents a RWI snippet:
it collects a new RWI until it is so big that it should be flushed to either
- an indexAssortment: collection of indexContainers of same size or
- the backend storage
the creationTime is necessary to organize caching of containers
*/
package de.anomic.index;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
public final class indexTreeMapContainer extends indexAbstractContainer implements indexContainer {
private String wordHash;
private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
private long updateTime;
private kelondroOrder ordering;
private int order_column;
public indexTreeMapContainer(String wordHash) {
this(wordHash, new kelondroNaturalOrder(true), 0);
}
public indexTreeMapContainer(String wordHash, kelondroOrder ordering, int column) {
this.wordHash = wordHash;
this.updateTime = 0;
this.ordering = ordering;
this.order_column = column;
container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexTreeMapContainer(this.wordHash, this.ordering, this.order_column);
newContainer.add(this, -1);
return newContainer;
}
public void setWordHash(String newWordHash) {
// this is used to replicate a container for different word indexes during global search
this.wordHash = newWordHash;
}
public void clear() {
container.clear();
}
public int size() {
return container.size();
}
public long updated() {
return updateTime;
}
public void setOrdering(kelondroOrder newOrder, int newColumn) {
this.ordering = newOrder;
this.order_column = newColumn;
}
public kelondroOrder getOrdering() {
return this.ordering;
}
public int getOrderColumn() {
return this.order_column;
}
public String getWordHash() {
return wordHash;
}
public int add(indexEntry entry) {
return add(entry, System.currentTimeMillis());
}
public int add(indexEntry entry, long updateTime) {
this.updateTime = java.lang.Math.max(this.updateTime, updateTime);
return (addi(entry)) ? 1 : 0;
}
public int add(indexEntry[] entries, long updateTime) {
int c = 0;
for (int i = 0; i < entries.length; i++) if (addi(entries[i])) c++;
this.updateTime = java.lang.Math.max(this.updateTime, updateTime);
return c;
}
public int add(indexContainer c, long maxTime) {
// returns the number of new elements
long startTime = System.currentTimeMillis();
if (c == null) return 0;
int x = 0;
synchronized (c) {
Iterator i = c.entries();
while ((i.hasNext()) && ((maxTime < 0) || ((startTime + maxTime) > System.currentTimeMillis()))) {
try {
if (addi((indexEntry) i.next())) x++;
} catch (ConcurrentModificationException e) {}
}
}
this.updateTime = java.lang.Math.max(this.updateTime, c.updated());
return x;
}
private boolean addi(indexEntry entry) {
// returns true if the new entry was added, false if it already existed
indexEntry oldEntry = (indexEntry) container.put(entry.urlHash(), entry);
if ((oldEntry != null) && (entry.isOlder(oldEntry))) { // A more recent Entry is already in this container
container.put(entry.urlHash(), oldEntry); // put it back
return false;
}
return (oldEntry == null);
}
public boolean contains(String urlHash) {
return container.containsKey(urlHash);
}
public indexEntry get(String urlHash) {
return (indexEntry) container.get(urlHash);
}
public indexEntry[] getEntryArray() {
return (indexEntry[]) container.values().toArray();
}
public indexEntry remove(String urlHash) {
return (indexEntry) container.remove(urlHash);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return false;
return remove(urlHash) != null;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
Iterator i = urlHashes.iterator();
while (i.hasNext()) count += (remove((String) i.next()) == null) ? 0 : 1;
return count;
}
public Iterator entries() {
// returns an iterator of indexEntry objects
return container.values().iterator();
}
public String toString() {
return "C[" + wordHash + "] has " + container.size() + " entries";
}
public int hashCode() {
return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4));
}
public static indexContainer joinContainer(Set containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis();
// order entities by their size
TreeMap map = new TreeMap();
indexTreeMapContainer singleContainer;
Iterator i = containers.iterator();
int count = 0;
while (i.hasNext()) {
// get next entity:
singleContainer = (indexTreeMapContainer) i.next();
// check result
if ((singleContainer == null) || (singleContainer.size() == 0)) return new indexTreeMapContainer(null); // as this is a cunjunction of searches, we have no result if any word is not known
// store result in order of result size
map.put(new Long(singleContainer.size() * 1000 + count), singleContainer);
count++;
}
// check if there is any result
if (map.size() == 0) return new indexTreeMapContainer(null); // no result, nothing found
// the map now holds the search results in order of number of hits per word
// we now must pairwise build up a conjunction of these sets
Long k = (Long) map.firstKey(); // the smallest, which means, the one with the least entries
indexContainer searchA, searchB, searchResult = (indexContainer) map.remove(k);
while ((map.size() > 0) && (searchResult.size() > 0)) {
// take the first element of map which is a result and combine it with result
k = (Long) map.firstKey(); // the next smallest...
time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis();
searchA = searchResult;
searchB = (indexContainer) map.remove(k);
searchResult = indexTreeMapContainer.joinConstructive(searchA, searchB, 2 * time / (map.size() + 1), maxDistance);
// free resources
searchA = null;
searchB = null;
}
// in 'searchResult' is now the combined search result
if (searchResult.size() == 0) return new indexTreeMapContainer(null);
return searchResult;
}
// join methods
private static int log2(int x) {
int l = 0;
while (x > 0) {x = x >> 1; l++;}
return l;
}
public static indexContainer joinConstructive(indexContainer i1, indexContainer i2, long time, int maxDistance) {
if ((i1 == null) || (i2 == null)) return null;
if ((i1.size() == 0) || (i2.size() == 0)) return new indexTreeMapContainer(null);
// decide which method to use
int high = ((i1.size() > i2.size()) ? i1.size() : i2.size());
int low = ((i1.size() > i2.size()) ? i2.size() : i1.size());
int stepsEnum = 10 * (high + low - 1);
int stepsTest = 12 * log2(high) * low;
// start most efficient method
if (stepsEnum > stepsTest) {
if (i1.size() < i2.size())
return joinConstructiveByTest(i1, i2, time, maxDistance);
else
return joinConstructiveByTest(i2, i1, time, maxDistance);
} else {
return joinConstructiveByEnumeration(i1, i2, time, maxDistance);
}
}
private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY TEST");
indexTreeMapContainer conj = new indexTreeMapContainer(null); // start with empty search result
Iterator se = small.entries();
indexEntry ie0, ie1;
long stamp = System.currentTimeMillis();
while ((se.hasNext()) && ((System.currentTimeMillis() - stamp) < time)) {
ie0 = (indexEntry) se.next();
ie1 = large.get(ie0.urlHash());
if (ie1 != null) {
// this is a hit. Calculate word distance:
ie0.combineDistance(ie1);
if (ie0.worddistance() <= maxDistance) conj.add(ie0);
}
}
return conj;
}
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexTreeMapContainer conj = new indexTreeMapContainer(null); // start with empty search result
if (!((i1.getOrdering().signature().equals(i2.getOrdering().signature())) &&
(i1.getOrderColumn() == i2.getOrderColumn()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;
if ((e1.hasNext()) && (e2.hasNext())) {
indexEntry ie1;
indexEntry ie2;
ie1 = (indexEntry) e1.next();
ie2 = (indexEntry) e2.next();
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.getOrdering().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
} else if (c > 0) {
if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1.combineDistance(ie2);
if (ie1.worddistance() <= maxDistance) conj.add(ie1);
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
if (e2.hasNext()) ie2 = (indexEntry) e2.next(); else break;
}
}
}
return conj;
}
public Set urlHashes() {
return container.keySet();
}
}

@ -49,7 +49,6 @@ import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.serverCodings;
@ -75,22 +74,22 @@ public class plasmaDHTChunk {
private int status = chunkStatus_UNDEFINED;
private String startPointHash;
private indexTreeMapContainer[] indexContainers = null;
private indexContainer[] indexContainers = null;
private HashMap urlCache; // String (url-hash) / plasmaCrawlLURL.Entry
private int idxCount;
private long selectionStartTime = 0;
private long selectionEndTime = 0;
public indexTreeMapContainer firstContainer() {
public indexContainer firstContainer() {
return indexContainers[0];
}
public indexTreeMapContainer lastContainer() {
public indexContainer lastContainer() {
return indexContainers[indexContainers.length - 1];
}
public indexTreeMapContainer[] containers() {
public indexContainer[] containers() {
return indexContainers;
}
@ -200,7 +199,7 @@ public class plasmaDHTChunk {
double maximumDistance = ((double) peerRedundancy * 2) / ((double) yacyCore.seedDB.sizeConnected());
while ((maxcount > refcount) && (indexContainerIterator.hasNext()) && ((container = (indexContainer) indexContainerIterator.next()) != null) && (container.size() > 0)
&& ((tmpContainers.size() == 0) || (yacyDHTAction.dhtDistance(container.getWordHash(), ((indexTreeMapContainer) tmpContainers.get(0)).getWordHash()) < maximumDistance))) {
&& ((tmpContainers.size() == 0) || (yacyDHTAction.dhtDistance(container.getWordHash(), ((indexContainer) tmpContainers.get(0)).getWordHash()) < maximumDistance))) {
// make an on-the-fly entity and insert values
int notBoundCounter = 0;
try {
@ -243,7 +242,7 @@ public class plasmaDHTChunk {
}
}
// create result
indexContainers = (indexTreeMapContainer[]) tmpContainers.toArray(new indexTreeMapContainer[tmpContainers.size()]);
indexContainers = (indexContainer[]) tmpContainers.toArray(new indexContainer[tmpContainers.size()]);
if ((indexContainers == null) || (indexContainers.length == 0)) {
log.logFine("No index available for index transfer, hash start-point " + startPointHash);
@ -256,13 +255,13 @@ public class plasmaDHTChunk {
return refcount;
} catch (kelondroException e) {
log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
indexContainers = new indexTreeMapContainer[0];
indexContainers = new indexContainer[0];
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;
} catch (IOException e) {
log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
indexContainers = new indexTreeMapContainer[0];
indexContainers = new indexContainer[0];
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;

@ -53,7 +53,7 @@ import de.anomic.server.serverInstantThread;
import de.anomic.yacy.yacySearch;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexRowSetContainer;
public final class plasmaSearchEvent extends Thread implements Runnable {
@ -86,8 +86,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.ranking = ranking;
this.urlStore = urlStore;
this.snippetCache = snippetCache;
this.rcLocal = new indexTreeMapContainer(null);
this.rcGlobal = new indexTreeMapContainer(null);
this.rcLocal = new indexRowSetContainer(null);
this.rcGlobal = new indexRowSetContainer(null);
this.rcGlobalCount = 0;
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
@ -178,13 +178,13 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// since this is a conjunction we return an empty entity if any word
// is not known
if (containers == null) {
rcLocal = new indexTreeMapContainer(null);
rcLocal = new indexRowSetContainer(null);
return 0;
}
// join the result
profileLocal.startTimer();
rcLocal = indexTreeMapContainer.joinContainer(containers,
rcLocal = indexRowSetContainer.joinContainer(containers,
profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_JOIN),
query.maxDistance);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_JOIN);
@ -220,7 +220,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
indexTreeMapContainer searchResult = new indexTreeMapContainer(null);
indexContainer searchResult = new indexRowSetContainer(null);
long preorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.startTimer();

@ -49,8 +49,8 @@ import java.util.Iterator;
import de.anomic.server.serverCodings;
import de.anomic.server.serverFileUtils;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.kelondro.kelondroBinSearch;
public final class plasmaSearchPreOrder {
@ -123,7 +123,7 @@ public final class plasmaSearchPreOrder {
return (indexEntry) pageAcc.remove(top);
}
public void addContainer(indexTreeMapContainer container, long maxTime) {
public void addContainer(indexContainer container, long maxTime) {
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
indexEntry iEntry;

@ -130,9 +130,10 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURL;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
@ -1487,7 +1488,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String word = (String) wentry.getKey();
wordStat = (plasmaCondenser.wordStatProp) wentry.getValue();
String wordHash = indexEntryAttribute.word2hash(word);
indexTreeMapContainer wordIdxContainer = new indexTreeMapContainer(wordHash);
indexContainer wordIdxContainer = new indexRowSetContainer(wordHash);
indexEntry wordIdxEntry = new indexURLEntry(urlHash,
urlLength, urlComps,
wordStat.count,
@ -1517,7 +1518,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// transfering the index to the storage peer
HashMap resultObj = yacyClient.transferIndex(
seed,
(indexTreeMapContainer[])tmpContainers.toArray(new indexTreeMapContainer[tmpContainers.size()]),
(indexContainer[]) tmpContainers.toArray(new indexContainer[tmpContainers.size()]),
urlCache,
true,
120000);

@ -66,7 +66,6 @@ import de.anomic.index.indexRAMCacheRI;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
@ -371,7 +370,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public synchronized indexContainer deleteContainer(String wordHash) {
indexContainer c = ramCache.deleteContainer(wordHash);
if (c == null) c = new indexTreeMapContainer(wordHash);
if (c == null) c = new indexRowSetContainer(wordHash);
c.add(assortmentCluster.deleteContainer(wordHash, -1), -1);
c.add(backend.deleteContainer(wordHash), -1);
return c;

@ -59,7 +59,7 @@ import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroTree;
@ -246,7 +246,7 @@ public final class plasmaWordIndexAssortment {
if (row == null) return null;
String wordHash = row.getColString(0, null);
final long updateTime = row.getColLongB256(2);
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
indexContainer container = new indexRowSetContainer(wordHash);
int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) {
container.add(

@ -58,7 +58,6 @@ import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroObjectCache;
import de.anomic.kelondro.kelondroRecords;
@ -160,10 +159,10 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
int clusterStart = clusterCount - (int) (Math.random() * (clusterCount - clusterMinStart));
// do the insert
indexTreeMapContainer c;
indexContainer c;
Iterator i = newContainer.entries();
for (int j = clusterStart; j >= 1; j--) {
c = new indexTreeMapContainer(newContainer.getWordHash());
c = new indexRowSetContainer(newContainer.getWordHash());
for (int k = 0; k < j; k++) {
if (i.hasNext()) {
c.add((indexEntry) i.next(), newContainer.updated());
@ -202,11 +201,11 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
if (need == 0) {
// we found spaces so that we can put in the newContainer into these spaces
indexTreeMapContainer c;
indexContainer c;
Iterator i = newContainer.entries();
for (int j = testsize - 1; j >= 0; j--) {
if (spaces[j] == 0) continue;
c = new indexTreeMapContainer(newContainer.getWordHash());
c = new indexRowSetContainer(newContainer.getWordHash());
for (int k = 0; k <= j; k++) {
assert (i.hasNext());
c.add((indexEntry) i.next(), newContainer.updated());
@ -232,7 +231,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
public indexContainer deleteContainer(String wordHash, long maxTime) {
// removes all records from all the assortments and return them
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
indexContainer buffer, record = new indexRowSetContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {
@ -257,7 +256,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
*/
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
indexContainer buffer, record = new indexRowSetContainer(wordHash);
boolean found = false;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -273,7 +272,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
indexContainer buffer, record = new indexRowSetContainer(wordHash);
int initialSize = urlHashes.size();
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -298,7 +297,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
indexContainer buffer, record = new indexRowSetContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {

@ -54,7 +54,7 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexRowSetContainer;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -230,7 +230,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists()) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
indexContainer container = new indexRowSetContainer(wordHash);
indexEntry entry;
Iterator i = entity.elements(true);
while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) {
@ -239,7 +239,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
}
return container;
} else {
return new indexTreeMapContainer(wordHash);
return new indexRowSetContainer(wordHash);
}
}
@ -254,7 +254,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
public indexContainer deleteContainer(String wordHash) {
plasmaWordIndexFile.removePlasmaIndex(databaseRoot, wordHash);
return new indexTreeMapContainer(wordHash);
return new indexRowSetContainer(wordHash);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {

@ -56,7 +56,7 @@ import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexTreeMapContainer;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaCrawlLURL;
@ -468,9 +468,9 @@ public final class yacyClient {
// create containers
final int words = wordhashes.length() / indexEntryAttribute.wordHashLength;
indexTreeMapContainer[] container = new indexTreeMapContainer[words];
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
container[i] = new indexTreeMapContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
container[i] = new indexRowSetContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
}
// insert results to containers

Loading…
Cancel
Save