- added memory computation to termlist_p.xml

- added option to delete terms in termlist_p.xml

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7901 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 45e497a9bd
commit a5541751a8

@ -21,11 +21,14 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.ranking.Rating; import net.yacy.cora.ranking.Rating;
import net.yacy.kelondro.index.Row;
import de.anomic.search.Segment; import de.anomic.search.Segment;
import de.anomic.search.Segments; import de.anomic.search.Segments;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
@ -39,7 +42,7 @@ public class termlist_p {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
Segment segment = null; Segment segment = null;
final boolean html = post != null && post.containsKey("html"); final boolean delete = post != null && post.containsKey("delete");
final long mincount = post == null ? 10000 : post.getLong("mincount", 10000); final long mincount = post == null ? 10000 : post.getLong("mincount", 10000);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header, false)) { if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header, false)) {
segment = sb.indexSegments.segment(post.get("segment")); segment = sb.indexSegments.segment(post.get("segment"));
@ -49,9 +52,14 @@ public class termlist_p {
Rating<byte[]> e; Rating<byte[]> e;
int c = 0; int c = 0;
byte[] termhash, maxterm = null; byte[] termhash, maxterm = null;
long count, maxcount = 0; long count, maxcount = 0, totalmemory = 0;
int termnumber = 0;
final Row referenceRow = segment.termIndex().referenceRow();
final int rowsize = referenceRow.objectsize;
final ArrayList<byte[]> deleteterms = new ArrayList<byte[]>();
while (i.hasNext()) { while (i.hasNext()) {
e = i.next(); e = i.next();
termnumber++;
count = e.getScore(); count = e.getScore();
if (count > maxcount) { if (count > maxcount) {
maxcount = count; maxcount = count;
@ -59,13 +67,27 @@ public class termlist_p {
} }
if (count < mincount) continue; if (count < mincount) continue;
termhash = e.getObject(); termhash = e.getObject();
if (delete) deleteterms.add(termhash);
prop.put("terms_" + c + "_termhash", ASCII.String(termhash)); prop.put("terms_" + c + "_termhash", ASCII.String(termhash));
prop.put("terms_" + c + "_count", count); prop.put("terms_" + c + "_count", count);
prop.put("terms_" + c + "_memory", 20 + count * rowsize);
c++; c++;
totalmemory += 20 + count * rowsize;
}
if (delete) {
for (final byte[] t: deleteterms) {
try {
segment.termIndex().delete(t);
} catch (final IOException e1) {
e1.printStackTrace();
}
}
} }
prop.put("terms", c); prop.put("terms", c);
prop.put("maxterm", maxterm == null ? "" : ASCII.String(maxterm)); prop.put("maxterm", maxterm == null ? "" : ASCII.String(maxterm));
prop.put("maxcount", maxcount); prop.put("maxcount", maxcount);
prop.put("termnumber", termnumber);
prop.put("totalmemory", totalmemory);
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -1,11 +1,12 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<terms> <terms allcount="#[termnumber]#" selectcount="#[terms]#" selectmemory="#[totalmemory]#">
<maxterm hash="#[maxterm]#"> <maxterm hash="#[maxterm]#">
<count>#[maxcount]#</count> <count>#[maxcount]#</count>
</maxterm> </maxterm>
#{terms}# #{terms}#
<term hash="#[termhash]#" id="#[termhash]#"> <term hash="#[termhash]#" id="#[termhash]#">
<count>#[count]#</count> <count>#[count]#</count>
<memory>#[memory]#</memory>
</term> </term>
#{/terms}# #{/terms}#
</terms> </terms>

@ -34,6 +34,7 @@ import java.util.TreeSet;
import net.yacy.cora.ranking.Order; import net.yacy.cora.ranking.Order;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
@ -210,4 +211,8 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
return new TermSearch<ReferenceType>(this, queryHashes, excludeHashes, urlselection, termFactory, maxDistance); return new TermSearch<ReferenceType>(this, queryHashes, excludeHashes, urlselection, termFactory, maxDistance);
} }
public Row referenceRow() {
return this.factory.getRow();
}
} }

@ -34,6 +34,7 @@ import java.util.TreeSet;
import net.yacy.cora.ranking.Rating; import net.yacy.cora.ranking.Rating;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.CloneableIterator;
@ -205,4 +206,10 @@ public interface Index <ReferenceType extends Reference> extends Iterable<Refere
* @return * @return
*/ */
public ByteOrder termKeyOrdering(); public ByteOrder termKeyOrdering();
/**
* ask for the Row that is used to construct one reference
* @return
*/
public Row referenceRow();
} }

Loading…
Cancel
Save