diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 4d34a1430..640fee2fa 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -305,7 +305,7 @@ public class IndexControlRWIs_p { // generate list if (post.containsKey("keyhashsimilar")) try { - final Iterator> containerIt = segment.termIndex().references(keyhash, true, 256, false).iterator(); + final Iterator> containerIt = segment.termIndex().referenceContainer(keyhash, true, 256, false).iterator(); ReferenceContainer container; i = 0; int rows = 0, cols = 0; diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java index c6d2445f8..7ac19d40f 100644 --- a/source/de/anomic/search/Segment.java +++ b/source/de/anomic/search/Segment.java @@ -470,7 +470,7 @@ public class Segment { DigestURI url = null; final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); try { - Iterator> indexContainerIterator = Segment.this.termIndex.references(this.startHash, false, 100, false).iterator(); + Iterator> indexContainerIterator = Segment.this.termIndex.referenceContainer(this.startHash, false, 100, false).iterator(); while (indexContainerIterator.hasNext() && this.run) { waiter(); container = indexContainerIterator.next(); @@ -503,7 +503,7 @@ public class Segment { if (!containerIterator.hasNext()) { // We may not be finished yet, try to get the next chunk of wordHashes - final TreeSet> containers = Segment.this.termIndex.references(container.getTermHash(), false, 100, false); + final TreeSet> containers = Segment.this.termIndex.referenceContainer(container.getTermHash(), false, 100, false); indexContainerIterator = containers.iterator(); // Make sure we don't get the same wordhash twice, but don't skip a word if ((indexContainerIterator.hasNext()) && (!container.getTermHash().equals(indexContainerIterator.next().getTermHash()))) { diff --git a/source/de/anomic/yacy/dht/Dispatcher.java b/source/de/anomic/yacy/dht/Dispatcher.java index 776f6f956..33bb42742 100755 --- a/source/de/anomic/yacy/dht/Dispatcher.java +++ b/source/de/anomic/yacy/dht/Dispatcher.java @@ -168,7 +168,7 @@ public class Dispatcher { final ArrayList> containers = new ArrayList>(maxContainerCount); - final Iterator> indexContainerIterator = this.segment.termIndex().references(hash, true, ram); + final Iterator> indexContainerIterator = this.segment.termIndex().referenceContainerIterator(hash, true, ram); ReferenceContainer container; int refcount = 0; diff --git a/source/net/yacy/cora/ranking/AbstractOrder.java b/source/net/yacy/cora/ranking/AbstractOrder.java new file mode 100644 index 000000000..cbc74d556 --- /dev/null +++ b/source/net/yacy/cora/ranking/AbstractOrder.java @@ -0,0 +1,77 @@ +/** + * AbstractOrder + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 25.08.2011 at http://yacy.net + * + * $LastChangedDate$ + * $LastChangedRevision$ + * $LastChangedBy$ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.ranking; + +public abstract class AbstractOrder implements Order { + + protected A zero = null; + protected boolean asc = true; + + @Override + abstract public Order clone(); + + public A zero() { + return this.zero; + } + + public void direction(final boolean ascending) { + this.asc = ascending; + } + + public long partition(final A key, final int forks) { + final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks; + return cardinal(key) / d; + } + + public void rotate(final A newzero) { + this.zero = newzero; + } + + @SuppressWarnings("unchecked") + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!(obj instanceof Order)) return false; + final Order other = (Order) obj; + final String thisSig = signature(); + final String otherSig = other.signature(); + if ((thisSig == null) || (otherSig == null)) return false; + return thisSig.equals(otherSig); + } + + @Override + public int hashCode() { + return signature().hashCode(); + } + + public A smallest(final A a, final A b) { + return (compare(a, b) > 0) ? b : a; + } + + public A largest(final A a, final A b) { + return (compare(a, b) > 0) ? a : b; + } +} diff --git a/source/net/yacy/cora/ranking/Order.java b/source/net/yacy/cora/ranking/Order.java new file mode 100644 index 000000000..6b66cd157 --- /dev/null +++ b/source/net/yacy/cora/ranking/Order.java @@ -0,0 +1,85 @@ +/** + * Order + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 25.08.2011 at http://yacy.net + * + * $LastChangedDate$ + * $LastChangedRevision$ + * $LastChangedBy$ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.ranking; + +import java.util.Comparator; + +public interface Order extends Comparator { + + /** + * returns true if and only if a has only characters that belong to the implemented order + * @param a + * @return + */ + public boolean wellformed(A a); + + public Order clone(); + + /** + * the ordering direction can be changed at any time + * @param ascending + */ + public void direction(boolean ascending); + + /** + * returns a signature String so that different orderings have different signatures + * @return + */ + public String signature(); + + public long partition(A key, int forkes); + + /** + * returns a cardinal number in the range of 0 .. Long.MAX_VALUE + * @param key + * @return + */ + public long cardinal(A key); + + public int compare(A a, A b); + + public boolean equal(A a, A b); + + /** + * returns the zero point of the Ordering; null if not defined + * @return + */ + public A zero(); + + /** + * defines that the ordering rotates, and sets the zero point for the rotation + * @param zero + */ + public void rotate(A zero); + + /** + * used to compare different order objects; they may define the same ordering + */ + @Override + public boolean equals(Object o); + + @Override + public int hashCode(); +} diff --git a/source/net/yacy/cora/ranking/Rating.java b/source/net/yacy/cora/ranking/Rating.java new file mode 100644 index 000000000..e1a1c67f2 --- /dev/null +++ b/source/net/yacy/cora/ranking/Rating.java @@ -0,0 +1,82 @@ +/** + * Rating + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 25.08.2011 at http://yacy.net + * + * $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $ + * $LastChangedRevision: 7567 $ + * $LastChangedBy: low012 $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.ranking; + +import java.util.Comparator; + +public class Rating { + + private final A object; + private long score; + + public Rating(final A o, final long score) { + this.object = o; + this.score = score; + } + + public void setScore(final long score) { + this.score = score; + } + + public long getScore() { + return this.score; + } + + public A getObject() { + return this.object; + } + + @SuppressWarnings("rawtypes") + public final static ObjectComparator objectComparator = new ObjectComparator(); + public final static ScoreComparator scoreComparator = new ScoreComparator(); + + public static class ObjectComparator implements Comparator> { + + @SuppressWarnings("unchecked") + public int compare(final Rating arg0, final Rating arg1) { + if (!(arg0 instanceof Comparable)) throw new UnsupportedOperationException("object class must implement comparable"); + return ((Comparable) arg0.getObject()).compareTo(arg1.getObject()); + } + } + + public static class ScoreComparator implements Comparator> { + + public int compare(final Rating arg0, final Rating arg1) { + if (arg0.getScore() < arg1.getScore()) return -1; + if (arg0.getScore() > arg1.getScore()) return 1; + return 0; + } + } + + public static class FoldedScoreComparator> implements Comparator> { + + public int compare(final Rating arg0, final Rating arg1) { + final int c = scoreComparator.compare(arg0, arg1); + if (c != 0) return c; + return arg0.getObject().compareTo(arg1.getObject()); + } + } + +} diff --git a/source/net/yacy/cora/ranking/RatingOrder.java b/source/net/yacy/cora/ranking/RatingOrder.java new file mode 100644 index 000000000..5d85680ba --- /dev/null +++ b/source/net/yacy/cora/ranking/RatingOrder.java @@ -0,0 +1,66 @@ +// RatingOrder.java +// ----------------------- +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://yacy.net +// Frankfurt, Germany, 2011 +// created 25.08.2011 +// +// $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $ +// $LastChangedRevision: 7567 $ +// $LastChangedBy: low012 $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +package net.yacy.cora.ranking; + + +public class RatingOrder extends AbstractOrder> implements Order> { + + Order ordering; + + public RatingOrder(final Order ordering) { + this.ordering = ordering; + } + + public int compare(final Rating a, final Rating b) { + return this.ordering.compare(a.getObject(), b.getObject()); + } + + @Override + public boolean wellformed(final Rating a) { + return true; + } + + @Override + public String signature() { + return "RA"; + } + + @Override + public long cardinal(final Rating key) { + return key.getScore(); + } + + @Override + public boolean equal(final Rating a, final Rating b) { + return this.ordering.compare(a.getObject(), b.getObject()) == 1; + } + + @Override + public Order> clone() { + return this; + } +} diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index e199bf740..f5cd3cea5 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -37,13 +37,13 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; import net.yacy.cora.document.UTF8; +import net.yacy.cora.ranking.AbstractOrder; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.AbstractOrder; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.NaturalOrder; -import net.yacy.kelondro.order.Order; import net.yacy.kelondro.util.ByteBuffer; import net.yacy.kelondro.util.kelondroException; diff --git a/source/net/yacy/kelondro/order/AbstractOrder.java b/source/net/yacy/kelondro/order/AbstractOrder.java deleted file mode 100644 index 19839565f..000000000 --- a/source/net/yacy/kelondro/order/AbstractOrder.java +++ /dev/null @@ -1,79 +0,0 @@ -// AbstractOrder.java -// ----------------------- -// part of The Kelondro Database -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// created 29.12.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.order; - -public abstract class AbstractOrder implements Order { - - protected A zero = null; - protected boolean asc = true; - - @Override - abstract public Order clone(); - - public A zero() { - return zero; - } - - public void direction(final boolean ascending) { - asc = ascending; - } - - public long partition(final A key, final int forks) { - final long d = (Long.MAX_VALUE / forks) + ((Long.MAX_VALUE % forks) + 1) / forks; - return cardinal(key) / d; - } - - public void rotate(final A newzero) { - this.zero = newzero; - } - - @SuppressWarnings("unchecked") - @Override - public boolean equals(final Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (!(obj instanceof Order)) return false; - Order other = (Order) obj; - final String thisSig = this.signature(); - final String otherSig = other.signature(); - if ((thisSig == null) || (otherSig == null)) return false; - return thisSig.equals(otherSig); - } - - @Override - public int hashCode() { - return this.signature().hashCode(); - } - - public A smallest(A a, A b) { - return (compare(a, b) > 0) ? b : a; - } - - public A largest(A a, A b) { - return (compare(a, b) > 0) ? a : b; - } -} diff --git a/source/net/yacy/kelondro/order/Base64Order.java b/source/net/yacy/kelondro/order/Base64Order.java index 0121bd04a..15c5f91fa 100644 --- a/source/net/yacy/kelondro/order/Base64Order.java +++ b/source/net/yacy/kelondro/order/Base64Order.java @@ -30,6 +30,8 @@ package net.yacy.kelondro.order; import java.util.Comparator; import net.yacy.cora.document.UTF8; +import net.yacy.cora.ranking.AbstractOrder; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; diff --git a/source/net/yacy/kelondro/order/ByteOrder.java b/source/net/yacy/kelondro/order/ByteOrder.java index 4018a6810..972471560 100644 --- a/source/net/yacy/kelondro/order/ByteOrder.java +++ b/source/net/yacy/kelondro/order/ByteOrder.java @@ -27,6 +27,7 @@ package net.yacy.kelondro.order; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; diff --git a/source/net/yacy/kelondro/order/MergeIterator.java b/source/net/yacy/kelondro/order/MergeIterator.java index 836af11c5..95aa695cf 100644 --- a/source/net/yacy/kelondro/order/MergeIterator.java +++ b/source/net/yacy/kelondro/order/MergeIterator.java @@ -29,6 +29,7 @@ import java.util.Comparator; import java.util.ConcurrentModificationException; import java.util.Iterator; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.logging.Log; diff --git a/source/net/yacy/kelondro/order/NaturalOrder.java b/source/net/yacy/kelondro/order/NaturalOrder.java index 88950f62b..9a76c682d 100644 --- a/source/net/yacy/kelondro/order/NaturalOrder.java +++ b/source/net/yacy/kelondro/order/NaturalOrder.java @@ -29,6 +29,8 @@ package net.yacy.kelondro.order; import java.util.Comparator; import java.util.Iterator; +import net.yacy.cora.ranking.AbstractOrder; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; diff --git a/source/net/yacy/kelondro/order/Order.java b/source/net/yacy/kelondro/order/Order.java deleted file mode 100644 index 7b15f65d5..000000000 --- a/source/net/yacy/kelondro/order/Order.java +++ /dev/null @@ -1,58 +0,0 @@ -// Order.java -// ----------------------- -// part of The Kelondro Database -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// created 29.12.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.order; - -import java.util.Comparator; - -public interface Order extends Comparator { - - public boolean wellformed(A a); // returns true if and only if a has only characters that belong to the implemented order - - public Order clone(); - - public void direction(boolean ascending); // the ordering direction can be changed at any time - - public String signature(); // returns a signature String so that different orderings have different signatures - - public long partition(A key, int forkes); - - public long cardinal(A key); // returns a cardinal number in the range of 0 .. Long.MAX_VALUE - - public int compare(A a, A b); - - public boolean equal(A a, A b); - - public A zero(); // returns the zero point of the Ordering; null if not defined - - public void rotate(A zero); // defines that the ordering rotates, and sets the zero point for the rotation - - @Override - public boolean equals(Object o); // used to compare different order objects; they may define the same ordering - - @Override - public int hashCode(); -} diff --git a/source/net/yacy/kelondro/order/StringOrder.java b/source/net/yacy/kelondro/order/StringOrder.java index 0106e2454..a92561de4 100644 --- a/source/net/yacy/kelondro/order/StringOrder.java +++ b/source/net/yacy/kelondro/order/StringOrder.java @@ -30,6 +30,7 @@ package net.yacy.kelondro.order; import java.util.Comparator; import net.yacy.cora.document.UTF8; +import net.yacy.cora.ranking.Order; public class StringOrder implements Comparator { diff --git a/source/net/yacy/kelondro/rwi/AbstractBufferedIndex.java b/source/net/yacy/kelondro/rwi/AbstractBufferedIndex.java index 58d15f1ff..8acec27c7 100644 --- a/source/net/yacy/kelondro/rwi/AbstractBufferedIndex.java +++ b/source/net/yacy/kelondro/rwi/AbstractBufferedIndex.java @@ -10,7 +10,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -31,8 +31,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.TreeSet; - -import net.yacy.kelondro.order.Order; +import net.yacy.cora.ranking.Order; public abstract class AbstractBufferedIndex extends AbstractIndex implements BufferedIndex { @@ -40,16 +39,16 @@ public abstract class AbstractBufferedIndex ext public AbstractBufferedIndex(final ReferenceFactory factory) { super(factory); } - - public synchronized TreeSet> references(byte[] startHash, final boolean rot, int count, boolean ram) throws IOException { + + public synchronized TreeSet> referenceContainer(byte[] startHash, final boolean rot, int count, final boolean ram) throws IOException { // creates a set of indexContainers // this does not use the cache - final Order> containerOrder = new ReferenceContainerOrder(factory, this.termKeyOrdering().clone()); + final Order> containerOrder = new ReferenceContainerOrder(this.factory, termKeyOrdering().clone()); if (startHash != null && startHash.length == 0) startHash = null; - ReferenceContainer emptyContainer = ReferenceContainer.emptyContainer(factory, startHash); + final ReferenceContainer emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash); containerOrder.rotate(emptyContainer); final TreeSet> containers = new TreeSet>(containerOrder); - final Iterator> i = references(startHash, rot, ram); + final Iterator> i = referenceContainerIterator(startHash, rot, ram); if (ram) count = Math.min(size(), count); ReferenceContainer container; // this loop does not terminate using the i.hasNex() predicate when rot == true diff --git a/source/net/yacy/kelondro/rwi/AbstractIndex.java b/source/net/yacy/kelondro/rwi/AbstractIndex.java index 4fc575986..06e826f06 100644 --- a/source/net/yacy/kelondro/rwi/AbstractIndex.java +++ b/source/net/yacy/kelondro/rwi/AbstractIndex.java @@ -10,7 +10,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -32,44 +32,44 @@ import java.util.Iterator; import java.util.TreeMap; import java.util.TreeSet; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Order; public abstract class AbstractIndex implements Index { - + final protected ReferenceFactory factory; public AbstractIndex(final ReferenceFactory factory) { this.factory = factory; } - + /** * merge this index with another index * @param otherIndex - * @throws IOException - * @throws RowSpaceExceededException + * @throws IOException + * @throws RowSpaceExceededException */ - public void merge(Index otherIndex) throws IOException, RowSpaceExceededException { + public void merge(final Index otherIndex) throws IOException, RowSpaceExceededException { byte[] term; - for (ReferenceContainer otherContainer: otherIndex) { + for (final ReferenceContainer otherContainer: otherIndex) { term = otherContainer.getTermHash(); synchronized (this) { - ReferenceContainer container = this.get(term, null); + final ReferenceContainer container = get(term, null); if (container == null) { this.add(otherContainer); } else { container.merge(otherContainer); - this.delete(term); // in some file-based environments we cannot just change the container + delete(term); // in some file-based environments we cannot just change the container this.add(container); } } } } - + public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException { // remove the same url hashes for multiple words // this is mainly used when correcting a index after a search @@ -78,7 +78,7 @@ public abstract class AbstractIndex implements removeDelayed(i.next(), urlHashBytes); } } - + public int remove(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException { // remove the same url hashes for multiple words // this is mainly used when correcting a index after a search @@ -89,15 +89,15 @@ public abstract class AbstractIndex implements } return c; } - - public synchronized TreeSet> references(final byte[] startHash, final boolean rot, int count) throws IOException { + + public synchronized TreeSet> referenceContainer(final byte[] startHash, final boolean rot, int count) throws IOException { // creates a set of indexContainers // this does not use the cache - final Order> containerOrder = new ReferenceContainerOrder(factory, this.termKeyOrdering().clone()); - final ReferenceContainer emptyContainer = ReferenceContainer.emptyContainer(factory, startHash); + final Order> containerOrder = new ReferenceContainerOrder(this.factory, termKeyOrdering().clone()); + final ReferenceContainer emptyContainer = ReferenceContainer.emptyContainer(this.factory, startHash); containerOrder.rotate(emptyContainer); final TreeSet> containers = new TreeSet>(containerOrder); - final Iterator> i = references(startHash, rot); + final Iterator> i = referenceContainerIterator(startHash, rot); //if (ram) count = Math.min(size(), count); ReferenceContainer container; // this loop does not terminate using the i.hasNex() predicate when rot == true @@ -113,10 +113,10 @@ public abstract class AbstractIndex implements } return containers; // this may return less containers as demanded } - - + + // methods to search in the index - + /** * collect containers for given word hashes. * This collection stops if a single container does not contain any references. @@ -139,26 +139,26 @@ public abstract class AbstractIndex implements ReferenceContainer singleContainer; final Iterator i = wordHashes.iterator(); while (i.hasNext()) { - + // get next word hash: singleHash = i.next(); - + // retrieve index try { - singleContainer = this.get(singleHash, urlselection); - } catch (IOException e) { + singleContainer = get(singleHash, urlselection); + } catch (final IOException e) { Log.logException(e); continue; } - + // check result if ((singleContainer == null || singleContainer.isEmpty())) return new TreeMap>(Base64Order.enhancedCoder); - + containers.put(singleHash, singleContainer); } return containers; } - + /** * collect containers for given word hashes and join them as they are retrieved. * This collection stops if a single container does not contain any references @@ -168,39 +168,39 @@ public abstract class AbstractIndex implements * @param urlselection * @param maxDistance the maximum distance that the words in the result may have * @return ReferenceContainer the join result - * @throws RowSpaceExceededException + * @throws RowSpaceExceededException */ public ReferenceContainer searchJoin(final HandleSet wordHashes, final HandleSet urlselection, final int maxDistance) throws RowSpaceExceededException { // first check if there is any entry that has no match; // this uses only operations in ram - for (byte[] wordHash: wordHashes) { - if (!this.has(wordHash)) return ReferenceContainer.emptyContainer(factory, null, 0); + for (final byte[] wordHash: wordHashes) { + if (!has(wordHash)) return ReferenceContainer.emptyContainer(this.factory, null, 0); } - + // retrieve entities that belong to the hashes ReferenceContainer resultContainer = null; ReferenceContainer singleContainer; - for (byte[] wordHash: wordHashes) { + for (final byte[] wordHash: wordHashes) { // retrieve index try { - singleContainer = this.get(wordHash, urlselection); - } catch (IOException e) { + singleContainer = get(wordHash, urlselection); + } catch (final IOException e) { Log.logException(e); continue; } - + // check result - if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(factory, null, 0); + if ((singleContainer == null || singleContainer.isEmpty())) return ReferenceContainer.emptyContainer(this.factory, null, 0); if (resultContainer == null) resultContainer = singleContainer; else { - resultContainer = ReferenceContainer.joinConstructive(factory, resultContainer, singleContainer, maxDistance); + resultContainer = ReferenceContainer.joinConstructive(this.factory, resultContainer, singleContainer, maxDistance); } - + // finish if the result is empty if (resultContainer.isEmpty()) return resultContainer; } return resultContainer; } - + public TermSearch query( final HandleSet queryHashes, final HandleSet excludeHashes, diff --git a/source/net/yacy/kelondro/rwi/BufferedIndex.java b/source/net/yacy/kelondro/rwi/BufferedIndex.java index c5843c2c9..b4e056547 100644 --- a/source/net/yacy/kelondro/rwi/BufferedIndex.java +++ b/source/net/yacy/kelondro/rwi/BufferedIndex.java @@ -10,7 +10,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -50,7 +50,7 @@ public interface BufferedIndex extends Index extends Index extends Index extends Index extends Index> references( + public CloneableIterator> referenceContainerIterator( byte[] startHash, boolean rot, boolean buffer ) throws IOException; - + /** * collect reference container in index. this method differs from the collector in an Index @@ -124,11 +124,11 @@ public interface BufferedIndex extends Index> references( + public TreeSet> referenceContainer( byte[] startHash, boolean rot, int count, boolean buffer ) throws IOException; - + } diff --git a/source/net/yacy/kelondro/rwi/Index.java b/source/net/yacy/kelondro/rwi/Index.java index 552448e43..d08978a90 100644 --- a/source/net/yacy/kelondro/rwi/Index.java +++ b/source/net/yacy/kelondro/rwi/Index.java @@ -10,7 +10,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -32,6 +32,7 @@ import java.io.IOException; import java.util.TreeMap; import java.util.TreeSet; +import net.yacy.cora.ranking.Rating; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.order.ByteOrder; @@ -39,19 +40,19 @@ import net.yacy.kelondro.order.CloneableIterator; public interface Index extends Iterable> { - + /** * every index entry is made for a term which has a fixed size * @return the size of the term */ public int termKeyLength(); - + /** * merge this index with another index * @param otherIndex */ public void merge(Index otherIndex) throws IOException, RowSpaceExceededException; - + /** * add references to the reverse index * if no references to the word are stored, the new Entries are added, @@ -59,7 +60,7 @@ public interface Index extends Iterable newEntries) throws IOException, RowSpaceExceededException; @@ -71,17 +72,17 @@ public interface Index extends Iterable extends Iterable extends Iterable get(byte[] termHash, HandleSet referenceselection) throws IOException; - + /** * delete all references for a word * @param termHash @@ -109,7 +110,7 @@ public interface Index extends Iterable delete(byte[] termHash) throws IOException; - + /** * remove a specific reference entry * @param termHash @@ -119,7 +120,7 @@ public interface Index extends Iterable extends Iterable> referenceCountIterator( + byte[] startHash, + boolean rot + ) throws IOException; + /** * iterate all references from the beginning of a specific word hash * @param startHash @@ -141,13 +156,13 @@ public interface Index extends Iterable> references( - byte[] startHash, - boolean rot - ) throws IOException; - + public CloneableIterator> referenceContainerIterator( + byte[] startHash, + boolean rot + ) throws IOException; + - public TreeSet> references( + public TreeSet> referenceContainer( byte[] startHash, boolean rot, int count @@ -160,31 +175,31 @@ public interface Index extends Iterable> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection); - + public TreeMap> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection); + /** * delete all references entries * @throws IOException */ public void clear() throws IOException; - + /** * close the reverse index */ public void close(); - + /** * the number of all references * @return the nnumber of all references */ public int size(); - + /** * calculate needed memory * @return the memory needed to operate the object */ public int minMem(); - + /** * return the order that is used for the storage of the word hashes * @return diff --git a/source/net/yacy/kelondro/rwi/IndexCell.java b/source/net/yacy/kelondro/rwi/IndexCell.java index 349133d5d..de48ee3fc 100644 --- a/source/net/yacy/kelondro/rwi/IndexCell.java +++ b/source/net/yacy/kelondro/rwi/IndexCell.java @@ -32,6 +32,9 @@ import java.util.Iterator; import java.util.Map; import java.util.TreeMap; +import net.yacy.cora.ranking.Order; +import net.yacy.cora.ranking.Rating; +import net.yacy.cora.ranking.RatingOrder; import net.yacy.cora.storage.ComparableARC; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.index.HandleSet; @@ -40,7 +43,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.MergeIterator; -import net.yacy.kelondro.order.Order; import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.MemoryControl; @@ -447,16 +449,32 @@ public final class IndexCell extends AbstractBu } public Iterator> iterator() { - return references(null, false); + return referenceContainerIterator(null, false); } - public CloneableIterator> references(final byte[] starttermHash, final boolean rot) { + public CloneableIterator> referenceCountIterator(final byte[] starttermHash, final boolean rot) { + final RatingOrder containerOrder = new RatingOrder(this.ram.rowdef().getOrdering()); + containerOrder.rotate(new Rating(starttermHash, 0)); + return new MergeIterator>( + this.ram.referenceCountIterator(starttermHash, rot), + new MergeIterator>( + this.ram.referenceCountIterator(starttermHash, false), + this.array.referenceCountIterator(starttermHash, false), + containerOrder, + ReferenceContainer.containerMergeMethod, + true), + containerOrder, + ReferenceContainer.containerMergeMethod, + true); + } + + public CloneableIterator> referenceContainerIterator(final byte[] starttermHash, final boolean rot) { final Order> containerOrder = new ReferenceContainerOrder(this.factory, this.ram.rowdef().getOrdering().clone()); containerOrder.rotate(new ReferenceContainer(this.factory, starttermHash)); return new MergeIterator>( - this.ram.references(starttermHash, rot), + this.ram.referenceContainerIterator(starttermHash, rot), new MergeIterator>( - this.ram.references(starttermHash, false), + this.ram.referenceContainerIterator(starttermHash, false), this.array.referenceContainerIterator(starttermHash, false), containerOrder, ReferenceContainer.containerMergeMethod, @@ -466,14 +484,14 @@ public final class IndexCell extends AbstractBu true); } - public CloneableIterator> references(final byte[] startTermHash, final boolean rot, final boolean ram) { + public CloneableIterator> referenceContainerIterator(final byte[] startTermHash, final boolean rot, final boolean ram) { final Order> containerOrder = new ReferenceContainerOrder(this.factory, this.ram.rowdef().getOrdering().clone()); containerOrder.rotate(new ReferenceContainer(this.factory, startTermHash)); if (ram) { - return this.ram.references(startTermHash, rot); + return this.ram.referenceContainerIterator(startTermHash, rot); } return new MergeIterator>( - this.ram.references(startTermHash, false), + this.ram.referenceContainerIterator(startTermHash, false), this.array.referenceContainerIterator(startTermHash, false), containerOrder, ReferenceContainer.containerMergeMethod, diff --git a/source/net/yacy/kelondro/rwi/IndexReader.java b/source/net/yacy/kelondro/rwi/IndexReader.java index 0b9c8697e..e1db98ecc 100644 --- a/source/net/yacy/kelondro/rwi/IndexReader.java +++ b/source/net/yacy/kelondro/rwi/IndexReader.java @@ -9,7 +9,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -34,8 +34,8 @@ public interface IndexReader { public int size(); public boolean has(byte[] wordHash); // should only be used if in case that true is returned the getContainer is NOT called - public ReferenceContainer get(byte[] wordHash, HandleSet urlselection); - public CloneableIterator> references(byte[] startWordHash, boolean rot); + public ReferenceContainer get(byte[] wordHash, HandleSet urlselection); + public CloneableIterator> referenceContainerIterator(byte[] startWordHash, boolean rot); public void close(); - + } diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java index 9f12e5112..0bf8834e1 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java @@ -7,7 +7,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -29,6 +29,7 @@ import java.io.IOException; import java.util.Date; import java.util.Iterator; +import net.yacy.cora.ranking.Rating; import net.yacy.kelondro.blob.ArrayStack; import net.yacy.kelondro.blob.BLOB; import net.yacy.kelondro.index.HandleMap; @@ -45,7 +46,7 @@ public final class ReferenceContainerArray { protected final ReferenceFactory factory; protected final ArrayStack array; private final IODispatcher merger; - + /** * open a index container array based on BLOB dumps. The content of the BLOBs will not be read * unless a .idx file exists. Only the .idx file is opened to get a fast read access to @@ -54,7 +55,7 @@ public final class ReferenceContainerArray { * is still possible * @param payloadrow the row definition for the BLOB data structure * @param log - * @throws IOException + * @throws IOException */ public ReferenceContainerArray( final File heapLocation, @@ -62,7 +63,7 @@ public final class ReferenceContainerArray { final ReferenceFactory factory, final ByteOrder termOrder, final int termSize, - IODispatcher merger) throws IOException { + final IODispatcher merger) throws IOException { this.factory = factory; this.array = new ArrayStack( heapLocation, @@ -74,49 +75,49 @@ public final class ReferenceContainerArray { assert merger != null; this.merger = merger; } - + public void close() { this.array.close(true); } - + public void clear() throws IOException { this.array.clear(); } - + public long mem() { - return array.mem(); + return this.array.mem(); } - + public int[] sizes() { return (this.array == null) ? new int[0] : this.array.sizes(); } - + public ByteOrder ordering() { return this.array.ordering(); } - + public File newContainerBLOBFile() { return this.array.newBLOB(new Date()); } - - public void mountBLOBFile(File location) throws IOException { + + public void mountBLOBFile(final File location) throws IOException { this.array.mountBLOB(location, false); } - + public Row rowdef() { return this.factory.getRow(); } - + /** * return an iterator object that creates top-level-clones of the indexContainers * in the cache, so that manipulations of the iterated objects do not change * objects in the cache. - * @throws IOException + * @throws IOException */ public CloneableIterator> referenceContainerIterator(final byte[] startWordHash, final boolean rot) { try { return new ReferenceContainerIterator(startWordHash, rot); - } catch (IOException e) { + } catch (final IOException e) { Log.logException(e); return null; } @@ -128,59 +129,132 @@ public final class ReferenceContainerArray { // and because every indexContainer Object that is iterated must be returned as top-level-clone // so this class simulates wCache.tailMap(startWordHash).values().iterator() // plus the mentioned features - + private final boolean rot; protected CloneableIterator iterator; - + public ReferenceContainerIterator(final byte[] startWordHash, final boolean rot) throws IOException { this.rot = rot; - this.iterator = array.keys(true, startWordHash); + this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash); // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. } - + public ReferenceContainerIterator clone(final Object secondWordHash) { try { - return new ReferenceContainerIterator((byte[]) secondWordHash, rot); - } catch (IOException e) { + return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot); + } catch (final IOException e) { Log.logException(e); return null; } } - + public boolean hasNext() { if (this.iterator == null) return false; - if (rot) return true; - return iterator.hasNext(); + if (this.rot) return true; + return this.iterator.hasNext(); } public ReferenceContainer next() { - if (iterator.hasNext()) try { - return get(iterator.next()); - } catch (Exception e) { + if (this.iterator.hasNext()) try { + return get(this.iterator.next()); + } catch (final Exception e) { Log.logException(e); return null; } // rotation iteration - if (!rot) { + if (!this.rot) { return null; } try { - iterator = array.keys(true, null); - return get(iterator.next()); - } catch (Exception e) { + this.iterator = ReferenceContainerArray.this.array.keys(true, null); + return get(this.iterator.next()); + } catch (final Exception e) { Log.logException(e); return null; } } public void remove() { - iterator.remove(); + this.iterator.remove(); } public Iterator> iterator() { return this; } - + + } + + /** + * return an iterator object that counts the number of references in indexContainers + * the startWordHash may be null to iterate all from the beginning + * @throws IOException + */ + public CloneableIterator> referenceCountIterator(final byte[] startWordHash, final boolean rot) { + try { + return new ReferenceCountIterator(startWordHash, rot); + } catch (final IOException e) { + Log.logException(e); + return null; + } + } + + public class ReferenceCountIterator implements CloneableIterator>, Iterable> { + + private final boolean rot; + protected CloneableIterator iterator; + + public ReferenceCountIterator(final byte[] startWordHash, final boolean rot) throws IOException { + this.rot = rot; + this.iterator = ReferenceContainerArray.this.array.keys(true, startWordHash); + // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. + } + + public ReferenceCountIterator clone(final Object secondWordHash) { + try { + return new ReferenceCountIterator((byte[]) secondWordHash, this.rot); + } catch (final IOException e) { + Log.logException(e); + return null; + } + } + + public boolean hasNext() { + if (this.iterator == null) return false; + if (this.rot) return true; + return this.iterator.hasNext(); + } + + public Rating next() { + byte[] reference; + if (this.iterator.hasNext()) try { + reference = this.iterator.next(); + return new Rating(reference, count(reference)); + } catch (final Exception e) { + Log.logException(e); + return null; + } + // rotation iteration + if (!this.rot) { + return null; + } + try { + this.iterator = ReferenceContainerArray.this.array.keys(true, null); + reference = this.iterator.next(); + return new Rating(reference, count(reference)); + } catch (final Exception e) { + Log.logException(e); + return null; + } + } + + public void remove() { + this.iterator.remove(); + } + + public Iterator> iterator() { + return this; + } + } /** @@ -188,24 +262,24 @@ public final class ReferenceContainerArray { * this works with heaps in write- and read-mode * @param key * @return true, if the key is used in the heap; false otherwise - * @throws IOException + * @throws IOException */ public boolean has(final byte[] termHash) { return this.array.containsKey(termHash); } - + /** * get a indexContainer from a heap * @param key * @return the indexContainer if one exist, null otherwise - * @throws IOException - * @throws RowSpaceExceededException + * @throws IOException + * @throws RowSpaceExceededException */ public ReferenceContainer get(final byte[] termHash) throws IOException, RowSpaceExceededException { - long timeout = System.currentTimeMillis() + 3000; - Iterator entries = this.array.getAll(termHash).iterator(); + final long timeout = System.currentTimeMillis() + 3000; + final Iterator entries = this.array.getAll(termHash).iterator(); if (entries == null || !entries.hasNext()) return null; - byte[] a = entries.next(); + final byte[] a = entries.next(); int k = 1; ReferenceContainer c = new ReferenceContainer(this.factory, termHash, RowSet.importRowSet(a, this.factory.getRow())); if (System.currentTimeMillis() > timeout) { @@ -222,12 +296,12 @@ public final class ReferenceContainerArray { } return c; } - + public int count(final byte[] termHash) throws IOException { - long timeout = System.currentTimeMillis() + 3000; - Iterator entries = this.array.lengthAll(termHash).iterator(); + final long timeout = System.currentTimeMillis() + 3000; + final Iterator entries = this.array.lengthAll(termHash).iterator(); if (entries == null || !entries.hasNext()) return 0; - Long a = entries.next(); + final Long a = entries.next(); int k = 1; int c = RowSet.importRowCount(a, this.factory.getRow()); assert c >= 0; @@ -247,7 +321,7 @@ public final class ReferenceContainerArray { assert c >= 0; return c; } - + /** * calculate an upper limit for a ranking number of the container size * the returned number is not a counter. It can only be used to compare the @@ -259,110 +333,110 @@ public final class ReferenceContainerArray { public long lenghtRankingUpperLimit(final byte[] termHash) throws IOException { return this.array.lengthAdd(termHash); } - + /** * delete a indexContainer from the heap cache. This can only be used for write-enabled heaps * @param wordHash * @return the indexContainer if the cache contained the container, null otherwise - * @throws IOException + * @throws IOException */ public void delete(final byte[] termHash) throws IOException { // returns the index that had been deleted - array.delete(termHash); + this.array.delete(termHash); } - - public int reduce(final byte[] termHash, ContainerReducer reducer) throws IOException, RowSpaceExceededException { - return array.reduce(termHash, new BLOBReducer(termHash, reducer)); + + public int reduce(final byte[] termHash, final ContainerReducer reducer) throws IOException, RowSpaceExceededException { + return this.array.reduce(termHash, new BLOBReducer(termHash, reducer)); } - + public class BLOBReducer implements BLOB.Reducer { ContainerReducer rewriter; byte[] wordHash; - - public BLOBReducer(byte[] wordHash, ContainerReducer rewriter) { + + public BLOBReducer(final byte[] wordHash, final ContainerReducer rewriter) { this.rewriter = rewriter; this.wordHash = wordHash; } - - public byte[] rewrite(byte[] b) throws RowSpaceExceededException { + + public byte[] rewrite(final byte[] b) throws RowSpaceExceededException { if (b == null) return null; - ReferenceContainer c = rewriter.reduce(new ReferenceContainer(factory, this.wordHash, RowSet.importRowSet(b, factory.getRow()))); + final ReferenceContainer c = this.rewriter.reduce(new ReferenceContainer(ReferenceContainerArray.this.factory, this.wordHash, RowSet.importRowSet(b, ReferenceContainerArray.this.factory.getRow()))); if (c == null) return null; - byte bb[] = c.exportCollection(); + final byte bb[] = c.exportCollection(); assert bb.length <= b.length; return bb; } } public interface ContainerReducer { - + public ReferenceContainer reduce(ReferenceContainer container); - + } - + public int entries() { return this.array.entries(); } - - public boolean shrink(long targetFileSize, long maxFileSize) { + + public boolean shrink(final long targetFileSize, final long maxFileSize) { if (this.array.entries() < 2) return false; boolean donesomething = false; - + // first try to merge small files that match while (this.merger.queueLength() < 3 || this.array.entries() >= 50) { - File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize); + final File[] ff = this.array.unmountBestMatch(2.0f, targetFileSize); if (ff == null) break; Log.logInfo("RICELL-shrink1", "unmountBestMatch(2.0, " + targetFileSize + ")"); - merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); + this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); donesomething = true; } - + // then try to merge simply any small file while (this.merger.queueLength() < 2) { - File[] ff = this.array.unmountSmallest(targetFileSize); + final File[] ff = this.array.unmountSmallest(targetFileSize); if (ff == null) break; Log.logInfo("RICELL-shrink2", "unmountSmallest(" + targetFileSize + ")"); - merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); + this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); donesomething = true; } - + // if there is no small file, then merge matching files up to limit while (this.merger.queueLength() < 1) { - File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize); + final File[] ff = this.array.unmountBestMatch(2.0f, maxFileSize); if (ff == null) break; Log.logInfo("RICELL-shrink3", "unmountBestMatch(2.0, " + maxFileSize + ")"); - merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); + this.merger.merge(ff[0], ff[1], this.factory, this.array, newContainerBLOBFile()); donesomething = true; } // rewrite old files (hack from sixcooler, see http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004) while (this.merger.queueLength() < 1) { - File ff = this.array.unmountOldest(); + final File ff = this.array.unmountOldest(); if (ff == null) break; Log.logInfo("RICELL-shrink4/rewrite", "unmountOldest()"); - merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile()); + this.merger.merge(ff, null, this.factory, this.array, newContainerBLOBFile()); donesomething = true; } return donesomething; } - + public static HandleMap referenceHashes( final File heapLocation, final ReferenceFactory factory, final ByteOrder termOrder, final Row payloadrow) throws IOException, RowSpaceExceededException { - + System.out.println("CELL REFERENCE COLLECTION startup"); - HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath()); - String[] files = heapLocation.list(); - for (String f: files) { + final HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath()); + final String[] files = heapLocation.list(); + for (final String f: files) { if (f.length() < 22 || !f.startsWith("text.index") || !f.endsWith(".blob")) continue; - File fl = new File(heapLocation, f); + final File fl = new File(heapLocation, f); System.out.println("CELL REFERENCE COLLECTION opening blob " + fl); - CloneableIterator> ei = new ReferenceIterator(fl, factory); - + final CloneableIterator> ei = new ReferenceIterator(fl, factory); + ReferenceContainer container; final long start = System.currentTimeMillis(); long lastlog = start - 27000; @@ -372,7 +446,7 @@ public final class ReferenceContainerArray { while (ei.hasNext()) { container = ei.next(); if (container == null) continue; - Iterator refi = container.entries(); + final Iterator refi = container.entries(); while (refi.hasNext()) { reference = refi.next(); if (reference == null) continue; @@ -392,5 +466,5 @@ public final class ReferenceContainerArray { return references; } - + } diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java index 33623e67b..95dd7bf3b 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java @@ -36,6 +36,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import net.yacy.cora.ranking.Rating; import net.yacy.kelondro.blob.HeapWriter; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.Row; @@ -177,6 +178,17 @@ public final class ReferenceContainerCache exte return cachecopy; } + protected List> ratingList() { + final List> list = new ArrayList>(this.cache.size()); + synchronized (this.cache) { + for (final Map.Entry> entry: this.cache.entrySet()) { + if (entry.getValue() != null && entry.getValue().getTermHash() != null) list.add(new Rating(entry.getKey(), entry.getValue().size())); + } + } + Collections.sort(list, new Rating.ObjectComparator()); + return list; + } + public int size() { return (this.cache == null) ? 0 : this.cache.size(); } @@ -195,26 +207,24 @@ public final class ReferenceContainerCache exte return max; } + public Iterator> iterator() { + return referenceContainerIterator(null, false); + } + /** * return an iterator object that creates top-level-clones of the indexContainers * in the cache, so that manipulations of the iterated objects do not change * objects in the cache. */ - public synchronized CloneableIterator> references(final byte[] startWordHash, final boolean rot) { - return new heapCacheIterator(startWordHash, rot); + public synchronized CloneableIterator> referenceContainerIterator(final byte[] startWordHash, final boolean rot) { + return new ReferenceContainerIterator(startWordHash, rot); } - - public Iterator> iterator() { - return references(null, false); - } - - /** * cache iterator: iterates objects within the heap cache. This can only be used * for write-enabled heaps, read-only heaps do not have a heap cache */ - public class heapCacheIterator implements CloneableIterator>, Iterable> { + public class ReferenceContainerIterator implements CloneableIterator>, Iterable> { // this class exists, because the wCache cannot be iterated with rotation // and because every indexContainer Object that is iterated must be returned as top-level-clone @@ -226,7 +236,7 @@ public final class ReferenceContainerCache exte private int p; private byte[] latestTermHash; - public heapCacheIterator(byte[] startWordHash, final boolean rot) { + public ReferenceContainerIterator(byte[] startWordHash, final boolean rot) { this.rot = rot; if (startWordHash != null && startWordHash.length == 0) startWordHash = null; this.cachecopy = sortedClone(); @@ -242,8 +252,8 @@ public final class ReferenceContainerCache exte // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. } - public heapCacheIterator clone(final Object secondWordHash) { - return new heapCacheIterator((byte[]) secondWordHash, this.rot); + public ReferenceContainerIterator clone(final Object secondWordHash) { + return new ReferenceContainerIterator((byte[]) secondWordHash, this.rot); } public boolean hasNext() { @@ -289,6 +299,75 @@ public final class ReferenceContainerCache exte } + @Override + public CloneableIterator> referenceCountIterator(final byte[] startHash, final boolean rot) { + return new ReferenceCountIterator(startHash, rot); + } + + /** + * cache iterator: iterates objects within the heap cache. This can only be used + * for write-enabled heaps, read-only heaps do not have a heap cache + */ + public class ReferenceCountIterator implements CloneableIterator>, Iterable> { + + private final boolean rot; + private final List> cachecounts; + private int p; + private byte[] latestTermHash; + + public ReferenceCountIterator(byte[] startWordHash, final boolean rot) { + this.rot = rot; + if (startWordHash != null && startWordHash.length == 0) startWordHash = null; + this.cachecounts = ratingList(); + assert this.cachecounts != null; + assert ReferenceContainerCache.this.termOrder != null; + this.p = 0; + if (startWordHash != null) { + while ( this.p < this.cachecounts.size() && + ReferenceContainerCache.this.termOrder.compare(this.cachecounts.get(this.p).getObject().asBytes(), startWordHash) < 0 + ) this.p++; + } + this.latestTermHash = null; + // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. + } + + public ReferenceCountIterator clone(final Object secondWordHash) { + return new ReferenceCountIterator((byte[]) secondWordHash, this.rot); + } + + public boolean hasNext() { + if (this.rot) return this.cachecounts.size() > 0; + return this.p < this.cachecounts.size(); + } + + public Rating next() { + if (this.p < this.cachecounts.size()) { + final Rating c = this.cachecounts.get(this.p++); + this.latestTermHash = c.getObject().asBytes(); + return new Rating(c.getObject().asBytes(), c.getScore()); + } + // rotation iteration + if (!this.rot) { + return null; + } + if (this.cachecounts.isEmpty()) return null; + this.p = 0; + final Rating c = this.cachecounts.get(this.p++); + this.latestTermHash = c.getObject().asBytes(); + return new Rating(c.getObject().asBytes(), c.getScore()); + } + + public void remove() { + System.arraycopy(this.cachecounts, this.p, this.cachecounts, this.p - 1, this.cachecounts.size() - this.p); + ReferenceContainerCache.this.cache.remove(new ByteArray(this.latestTermHash)); + } + + public Iterator> iterator() { + return this; + } + + } + /** * test if a given key is in the heap * this works with heaps in write- and read-mode diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerOrder.java b/source/net/yacy/kelondro/rwi/ReferenceContainerOrder.java index e7bbd56f7..22be80d09 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerOrder.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerOrder.java @@ -26,8 +26,8 @@ package net.yacy.kelondro.rwi; -import net.yacy.kelondro.order.AbstractOrder; -import net.yacy.kelondro.order.Order; +import net.yacy.cora.ranking.AbstractOrder; +import net.yacy.cora.ranking.Order; public class ReferenceContainerOrder extends AbstractOrder> implements Order>, Cloneable { diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java index d1602f1b5..008f4c12e 100644 --- a/source/net/yacy/kelondro/table/SplitTable.java +++ b/source/net/yacy/kelondro/table/SplitTable.java @@ -44,6 +44,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import net.yacy.cora.date.GenericFormatter; +import net.yacy.cora.ranking.Order; import net.yacy.kelondro.blob.ArrayStack; import net.yacy.kelondro.index.Cache; import net.yacy.kelondro.index.HandleSet; @@ -55,7 +56,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.MergeIterator; -import net.yacy.kelondro.order.Order; import net.yacy.kelondro.order.StackIterator; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.NamePrefixThreadFactory; diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index e9770ae0d..2fe10b096 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -209,7 +209,7 @@ public final class yacy { } sb = new Switchboard(dataHome, appHome, "defaults/yacy.init".replace("/", File.separator), newconf); //sbSync.V(); // signal that the sb reference was set - + // switch the memory strategy MemoryControl.setStandardStrategy(sb.getConfigBool("memory.standardStrategy", true)); @@ -648,7 +648,7 @@ public final class yacy { new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), 10000, (long) Integer.MAX_VALUE, false, false); - final Iterator> indexContainerIterator = wordIndex.termIndex().references("AAAAAAAAAAAA".getBytes(), false, false); + final Iterator> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false); long urlCounter = 0, wordCounter = 0; long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0; @@ -828,7 +828,7 @@ public final class yacy { new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), 10000, (long) Integer.MAX_VALUE, false, false); - indexContainerIterator = WordIndex.termIndex().references(wordChunkStartHash.getBytes(), false, false); + indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false); } int counter = 0; ReferenceContainer container = null;