added reference limitation to IndexControlRWIs_p.html servlet

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7936 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent ecb4986b38
commit cec3836e73

@ -49,12 +49,26 @@
<input type="checkbox" name="deleteCache" id="deleteCache" disabled="disabled" /><label for="deleteCache">Delete HTTP &amp; FTP Cache</label><br/> <input type="checkbox" name="deleteCache" id="deleteCache" disabled="disabled" /><label for="deleteCache">Delete HTTP &amp; FTP Cache</label><br/>
<input type="checkbox" name="deleteCrawlQueues" id="deleteCrawlQueues" disabled="disabled" /><label for="deleteCrawlQueues">Stop Crawler and delete Crawl Queues</label><br/> <input type="checkbox" name="deleteCrawlQueues" id="deleteCrawlQueues" disabled="disabled" /><label for="deleteCrawlQueues">Stop Crawler and delete Crawl Queues</label><br/>
<input type="checkbox" name="deleteRobots" id="deleteRobots" disabled="disabled" /><label for="deleteRobots">Delete robots.txt Cache</label><br/> <input type="checkbox" name="deleteRobots" id="deleteRobots" disabled="disabled" /><label for="deleteRobots">Delete robots.txt Cache</label><br/>
<input type="checkbox" name="deleteSearchFl" id="deleteSearchFl" disabled="disabled" /><label for="deleteSearchFl">Delete cached snippet-fetching failures during search</label><br/><br/><br/> <input type="checkbox" name="deleteSearchFl" id="deleteSearchFl" disabled="disabled" /><label for="deleteSearchFl">Delete cached snippet-fetching failures during search</label><br/><br/>
<input type="submit" name="deletecomplete" id="deletecomplete" value="Delete" disabled="disabled"/> <input type="submit" name="deletecomplete" id="deletecomplete" value="Delete" disabled="disabled"/>
</dd> </dd>
</dl> </dl>
</fieldset> </fieldset>
</form> </form>
</form>
<form action="IndexControlRWIs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Limitations</legend>
<dl>
<dt class="TableCellDark">Index Reference Size</dt>
<dd><input type="radio" name="maxReferencesRadio" id="maxReferencesRadioOff" value="off" #(maxReferencesRadioChecked)#checked="checked"::#(/maxReferencesRadioChecked)# onclick="window.location.href='IndexControlRWIs_p.html?maxReferencesRadio=off&maxReferencesLimit='"/>
No reference size limitation (this may cause strong CPU load when words are searched that appear very often)<br/>
<input type="radio" name="maxReferencesRadio" id="maxReferencesRadioOn" value="on" #(maxReferencesRadioChecked)#::checked="checked"#(/maxReferencesRadioChecked)# onclick="window.location.href='IndexControlRWIs_p.html?maxReferencesRadio=on&maxReferencesLimit=&maxReferences=#[maxReferences]#'"/>
Limitation of number of references per word: <input type="text" name="maxReferences" value="#[maxReferences]#" size="9" maxlength="12" onfocus="document.getElementById('maxReferencesRadioOff').checked = false;document.getElementById('maxReferencesRadioOn').checked = true"/> (this causes that old references als deleted if that limit is reached)<br/><br/>
<input type="submit" name="maxReferencesLimit" id="maxReferencesLimit" value="Set References Limit"/>
</dd>
</dl>
</fieldset>
</form>
#(/cleanup)# #(/cleanup)#
#(searchresult)#:: #(searchresult)#::

@ -85,7 +85,7 @@ public class IndexControlRWIs_p {
prop.putHTML("keystring", ""); prop.putHTML("keystring", "");
prop.put("keyhash", ""); prop.put("keyhash", "");
prop.put("result", ""); prop.put("result", "");
prop.put("cleanup", post == null ? 1 : 0); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
prop.put("cleanup_solr", sb.solrConnector == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1); prop.put("cleanup_solr", sb.solrConnector == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
@ -151,7 +151,7 @@ public class IndexControlRWIs_p {
} }
} }
// delete everything // delete everything
if (post.containsKey("deletecomplete")) { if (post.containsKey("deletecomplete")) {
if (post.get("deleteIndex", "").equals("on")) { if (post.get("deleteIndex", "").equals("on")) {
segment.clear(); segment.clear();
@ -173,11 +173,21 @@ public class IndexControlRWIs_p {
sb.robots.clear(); sb.robots.clear();
} }
if (post.get("deleteSearchFl", "").equals("on")) { if (post.get("deleteSearchFl", "").equals("on")) {
sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME); sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME);
} }
post.remove("deletecomplete"); post.remove("deletecomplete");
} }
// set reference limitation
if (post.containsKey("maxReferencesLimit")) {
if (post.get("maxReferencesRadio", "").equals("on")) {
ReferenceContainer.maxReferences = post.getInt("maxReferences", 0);
} else {
ReferenceContainer.maxReferences = 0;
}
sb.setConfig("index.maxReferences", ReferenceContainer.maxReferences);
}
// delete word // delete word
if (post.containsKey("keyhashdeleteall")) try { if (post.containsKey("keyhashdeleteall")) try {
if (delurl || delurlref) { if (delurl || delurlref) {
@ -407,6 +417,9 @@ public class IndexControlRWIs_p {
// insert constants // insert constants
prop.putNum("wcount", segment.termIndex().sizesMax()); prop.putNum("wcount", segment.termIndex().sizesMax());
prop.put("cleanup_maxReferencesRadioChecked", ReferenceContainer.maxReferences > 0 ? 1 : 0);
prop.put("cleanup_maxReferences", ReferenceContainer.maxReferences > 0 ? ReferenceContainer.maxReferences : 100000);
// return rewrite properties // return rewrite properties
return prop; return prop;
} }

@ -115,6 +115,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
@ -358,6 +359,9 @@ public final class Switchboard extends serverSwitch {
partitionExponent, partitionExponent,
this.useTailCache, this.useTailCache,
this.exceed134217727); this.exceed134217727);
// initialize index
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
final File oldSingleSegment = new File(new File(indexPath, networkName), "TEXT"); final File oldSingleSegment = new File(new File(indexPath, networkName), "TEXT");
final File newSegmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); final File newSegmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
Segments.migrateOld(oldSingleSegment, newSegmentsPath, getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default")); Segments.migrateOld(oldSingleSegment, newSegmentsPath, getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"));

@ -35,8 +35,6 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
import de.anomic.search.Switchboard;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
@ -58,7 +56,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
private byte[] termHash; private byte[] termHash;
protected ReferenceFactory<ReferenceType> factory; protected ReferenceFactory<ReferenceType> factory;
private static int maxReferences = Switchboard.getSwitchboard().getConfigInt("index.maxReferences", 0); public static int maxReferences = 0; // overwrite this to enable automatic index shrinking. 0 means no shrinking
public ReferenceContainer(final ReferenceFactory<ReferenceType> factory, final byte[] termHash, final RowSet collection) { public ReferenceContainer(final ReferenceFactory<ReferenceType> factory, final byte[] termHash, final RowSet collection) {
super(collection); super(collection);
@ -191,19 +189,19 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
while (i.hasNext()) count += (delete(i.next())) ? 1 : 0; while (i.hasNext()) count += (delete(i.next())) ? 1 : 0;
return count; return count;
} }
public void shrinkReferences() { public void shrinkReferences() {
final int diff = this.size() - maxReferences; final int diff = size() - maxReferences;
if (maxReferences <= 0 || diff <= 0) return; if (maxReferences <= 0 || diff <= 0) return;
final int[] indexes = oldPostions(diff); final int[] indexes = oldPostions(diff);
Arrays.sort(indexes); Arrays.sort(indexes);
for (int i = indexes.length - 1; i >= 0; i--) { for (int i = indexes.length - 1; i >= 0; i--) {
if (indexes[i] < 0) break; if (indexes[i] < 0) break;
this.removeRow(indexes[i], false); removeRow(indexes[i], false);
} }
this.sort(); sort();
} }
private int[] oldPostions(final int count) { private int[] oldPostions(final int count) {
final int[] indexes = new int[count]; final int[] indexes = new int[count];
int i = 0; int i = 0;
@ -215,7 +213,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
} }
return indexes; return indexes;
} }
private Collection<List<Integer>> positionsByLastMod() { private Collection<List<Integer>> positionsByLastMod() {
long mod; long mod;
List<Integer> positions; List<Integer> positions;

Loading…
Cancel
Save