From cec3836e73996bd791b5279e1d28f33e757acec4 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 7 Sep 2011 21:47:54 +0000 Subject: [PATCH] added reference limitation to IndexControlRWIs_p.html servlet git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7936 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControlRWIs_p.html | 16 +++++++++++++++- htroot/IndexControlRWIs_p.java | 19 ++++++++++++++++--- source/de/anomic/search/Switchboard.java | 4 ++++ .../yacy/kelondro/rwi/ReferenceContainer.java | 16 +++++++--------- 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/htroot/IndexControlRWIs_p.html b/htroot/IndexControlRWIs_p.html index 7e4368b63..617db8ecc 100644 --- a/htroot/IndexControlRWIs_p.html +++ b/htroot/IndexControlRWIs_p.html @@ -49,12 +49,26 @@


-


+

+ +
+
Limitations +
+
Index Reference Size
+
+ No reference size limitation (this may cause strong CPU load when words are searched that appear very often)
+ + Limitation of number of references per word: (this causes that old references als deleted if that limit is reached)

+ +
+
+
+
#(/cleanup)# #(searchresult)#:: diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 640fee2fa..a8541602c 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -85,7 +85,7 @@ public class IndexControlRWIs_p { prop.putHTML("keystring", ""); prop.put("keyhash", ""); prop.put("result", ""); - prop.put("cleanup", post == null ? 1 : 0); + prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); prop.put("cleanup_solr", sb.solrConnector == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1); String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); @@ -151,7 +151,7 @@ public class IndexControlRWIs_p { } } - // delete everything + // delete everything if (post.containsKey("deletecomplete")) { if (post.get("deleteIndex", "").equals("on")) { segment.clear(); @@ -173,11 +173,21 @@ public class IndexControlRWIs_p { sb.robots.clear(); } if (post.get("deleteSearchFl", "").equals("on")) { - sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME); + sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME); } post.remove("deletecomplete"); } + // set reference limitation + if (post.containsKey("maxReferencesLimit")) { + if (post.get("maxReferencesRadio", "").equals("on")) { + ReferenceContainer.maxReferences = post.getInt("maxReferences", 0); + } else { + ReferenceContainer.maxReferences = 0; + } + sb.setConfig("index.maxReferences", ReferenceContainer.maxReferences); + } + // delete word if (post.containsKey("keyhashdeleteall")) try { if (delurl || delurlref) { @@ -407,6 +417,9 @@ public class IndexControlRWIs_p { // insert constants prop.putNum("wcount", segment.termIndex().sizesMax()); + prop.put("cleanup_maxReferencesRadioChecked", ReferenceContainer.maxReferences > 0 ? 1 : 0); + prop.put("cleanup_maxReferences", ReferenceContainer.maxReferences > 0 ? ReferenceContainer.maxReferences : 100000); + // return rewrite properties return prop; } diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index e5ea8f545..2a0d818e9 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -115,6 +115,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.NaturalOrder; +import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; @@ -358,6 +359,9 @@ public final class Switchboard extends serverSwitch { partitionExponent, this.useTailCache, this.exceed134217727); + + // initialize index + ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); final File oldSingleSegment = new File(new File(indexPath, networkName), "TEXT"); final File newSegmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); Segments.migrateOld(oldSingleSegment, newSegmentsPath, getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default")); diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainer.java b/source/net/yacy/kelondro/rwi/ReferenceContainer.java index 7264673db..d97bb67c6 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainer.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainer.java @@ -35,8 +35,6 @@ import java.util.Iterator; import java.util.List; import java.util.TreeMap; -import de.anomic.search.Switchboard; - import net.yacy.cora.document.ASCII; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.Row; @@ -58,7 +56,7 @@ public class ReferenceContainer extends RowSet private byte[] termHash; protected ReferenceFactory factory; - private static int maxReferences = Switchboard.getSwitchboard().getConfigInt("index.maxReferences", 0); + public static int maxReferences = 0; // overwrite this to enable automatic index shrinking. 0 means no shrinking public ReferenceContainer(final ReferenceFactory factory, final byte[] termHash, final RowSet collection) { super(collection); @@ -191,19 +189,19 @@ public class ReferenceContainer extends RowSet while (i.hasNext()) count += (delete(i.next())) ? 1 : 0; return count; } - + public void shrinkReferences() { - final int diff = this.size() - maxReferences; + final int diff = size() - maxReferences; if (maxReferences <= 0 || diff <= 0) return; final int[] indexes = oldPostions(diff); Arrays.sort(indexes); for (int i = indexes.length - 1; i >= 0; i--) { if (indexes[i] < 0) break; - this.removeRow(indexes[i], false); + removeRow(indexes[i], false); } - this.sort(); + sort(); } - + private int[] oldPostions(final int count) { final int[] indexes = new int[count]; int i = 0; @@ -215,7 +213,7 @@ public class ReferenceContainer extends RowSet } return indexes; } - + private Collection> positionsByLastMod() { long mod; List positions;