added separate delete commands for the local+remote solr index, the old

metadata and old rwi and for the citation index. The important
advancement is the separation of the citation index deletion because
that index is responsible for the linkdepth calculation. Now a search
index can be deleted without the citation index and that should cause
that less clickdepths must be post-processed.
pull/1/head
Michael Peter Christen 12 years ago
parent 6f0baaa309
commit 38d3feae65

@ -89,13 +89,13 @@ function updatepage(str) {
<dl>
<dt class="TableCellDark">Index Deletion</dt>
<dd><input type="checkbox" name="deleteIndex" id="deleteIndex"
onclick="x=document.getElementById('deleteIndex').checked;document.getElementById('deleteTriplestore').checked=x;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteCrawlQueues').checked=x;c='disabled';document.getElementById('deleteSearchFl').checked=x;if(x){c='';}document.getElementById('deleteTriplestore').disabled=c;document.getElementById('deletecomplete').disabled=c;document.getElementById('deleteCache').disabled=c;document.getElementById('deleteRobots').disabled=c;document.getElementById('deleteCrawlQueues').disabled=c;document.getElementById('deleteSearchFl').disabled=c;"
/><label for="deleteIndex">Delete local search index (including local RWI, Metadata and embedded solr)</label><br/>
#(solr)#::<input type="checkbox" name="deleteRemoteSolr" id="deleteRemoteSolr"
onclick="x=document.getElementById('deleteRemoteSolr').checked;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteCrawlQueues').checked=x;c='disabled';document.getElementById('deleteSearchFl').checked=x;if(x){c='';}document.getElementById('deletecomplete').disabled=c;document.getElementById('deleteCache').disabled=c;document.getElementById('deleteRobots').disabled=c;document.getElementById('deleteCrawlQueues').disabled=c;document.getElementById('deleteSearchFl').disabled=c;"
/><label for="deleteRemoteSolr">Delete remote solr index</label><br/>#(/solr)#
<input type="checkbox" name="deleteTriplestore" id="deleteTriplestore" disabled="disabled" /><label for="deleteTriplestore">Delete RDF Triplestore</label><br/>
onclick="x=document.getElementById('deleteIndex').checked;#(rwi)#::document.getElementById('deleteRWI').checked=x;#(/rwi)#document.getElementById('deleteTriplestore').checked=x;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteCrawlQueues').checked=x;c='disabled';document.getElementById('deleteSearchFl').checked=x;if(x){c='';};document.getElementById('deleteTriplestore').disabled=c;document.getElementById('deletecomplete').disabled=c;document.getElementById('deleteCache').disabled=c;document.getElementById('deleteRobots').disabled=c;document.getElementById('deleteCrawlQueues').disabled=c;document.getElementById('deleteSearchFl').disabled=c;"
/><label for="deleteIndex">Delete local search index (embedded Solr and old Metadata)</label><br/>
#(solr)#::<input type="checkbox" name="deleteRemoteSolr" id="deleteRemoteSolr" onclick="x=document.getElementById('deleteRemoteSolr').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteRemoteSolr">Delete remote solr index</label><br/>#(/solr)#
#(rwi)#::<input type="checkbox" name="deleteRWI" id="deleteRWI" onclick="x=document.getElementById('deleteRWI').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteRWI">Delete RWI Index (DHT transmission words)</label><br/>#(/rwi)#
#(citation)#::<input type="checkbox" name="deleteCitation" id="deleteCitation" onclick="x=document.getElementById('deleteCitation').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteCitation">Delete Citation Index (linking between URLs)</label><br/>#(/citation)#
<input type="checkbox" name="deleteCache" id="deleteCache" disabled="disabled" /><label for="deleteCache">Delete HTTP &amp; FTP Cache</label><br/>
<input type="checkbox" name="deleteTriplestore" id="deleteTriplestore" disabled="disabled" /><label for="deleteTriplestore">Delete RDF Triplestore</label><br/>
<input type="checkbox" name="deleteCrawlQueues" id="deleteCrawlQueues" disabled="disabled" /><label for="deleteCrawlQueues">Stop Crawler and delete Crawl Queues</label><br/>
<input type="checkbox" name="deleteRobots" id="deleteRobots" disabled="disabled" /><label for="deleteRobots">Delete robots.txt Cache</label><br/>
<input type="checkbox" name="deleteSearchFl" id="deleteSearchFl" disabled="disabled" /><label for="deleteSearchFl">Delete cached snippet-fetching failures during search</label><br/><br/>

@ -79,8 +79,10 @@ public class IndexControlURLs_p {
List<File> dumpFiles = segment.fulltext().dumpFiles();
prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath());
prop.put("cleanup", post == null ? 1 : 0);
prop.put("cleanup_solr", sb.index.fulltext().connectedRemoteSolr() ? 1 : 0);
prop.put("cleanup_solr", segment.fulltext().connectedRemoteSolr() ? 1 : 0);
prop.put("cleanup_rwi", segment.termIndex() != null && !segment.termIndex().isEmpty() ? 1 : 0);
prop.put("cleanup_citation", segment.urlCitation() != null && !segment.urlCitation().isEmpty() ? 1 : 0);
// show export messages
final Fulltext.Export export = segment.fulltext().export();
if ((export != null) && (export.isAlive())) {
@ -140,14 +142,17 @@ public class IndexControlURLs_p {
// delete everything
if ( post.containsKey("deletecomplete") ) {
if ( post.get("deleteIndex", "").equals("on") ) {
segment.clear();
try {segment.fulltext().clearURLIndex();} catch (IOException e) {}
try {segment.fulltext().clearLocalSolr();} catch (IOException e) {}
}
if ( post.get("deleteRemoteSolr", "").equals("on")) {
try {
sb.index.fulltext().getSolr().clear();
} catch ( final Exception e ) {
Log.logException(e);
}
try {segment.fulltext().clearRemoteSolr();} catch (IOException e) {}
}
if ( post.get("deleteRWI", "").equals("on")) {
if (segment.termIndex() != null) try {segment.termIndex().clear();} catch (IOException e) {}
}
if ( post.get("deleteCitation", "").equals("on")) {
if (segment.urlCitation() != null) try {segment.urlCitation().clear();} catch (IOException e) {}
}
if ( post.get("deleteCrawlQueues", "").equals("on") ) {
sb.crawlQueues.clear();

@ -182,6 +182,24 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
if (this.solr1 != null) this.solr1.close();
}
/**
* delete everything in the local solr index
* @throws IOException
*/
public void clear0() throws IOException {
this.clearCache();
if (this.solr0 != null) this.solr0.clear();
}
/**
* delete everything in the remote solr index
* @throws IOException
*/
public void clear1() throws IOException {
this.clearCache();
if (this.solr1 != null) this.solr1.clear();
}
/**
* delete everything in the solr index
* @throws IOException

@ -193,14 +193,6 @@ public final class CrawlSwitchboard {
return new CrawlProfile(m);
}
public int getActiveSize() {
return this.profilesActiveCrawls.size();
}
public int getPassiveSize() {
return this.profilesPassiveCrawls.size();
}
public Set<byte[]> getActive() {
return this.profilesActiveCrawls.keySet();
}

@ -49,7 +49,7 @@ public abstract class AbstractBufferedIndex<ReferenceType extends Reference> ext
containerOrder.rotate(emptyContainer);
final TreeSet<ReferenceContainer<ReferenceType>> containers = new TreeSet<ReferenceContainer<ReferenceType>>(containerOrder);
final Iterator<ReferenceContainer<ReferenceType>> i = referenceContainerIterator(startHash, rot, excludePrivate, ram);
if (ram) count = Math.min(size(), count);
if (ram) count = (this instanceof IndexCell) ? count : Math.min(size(), count);
ReferenceContainer<ReferenceType> container;
// this loop does not terminate using the i.hasNex() predicate when rot == true
// because then the underlying iterator is a rotating iterator without termination

@ -45,6 +45,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.MergeIterator;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
/*
@ -537,6 +538,9 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
this.removeDelayedURLs.clear();
this.ram.clear();
this.array.clear();
if (Switchboard.getSwitchboard() != null &&
Switchboard.getSwitchboard().peers != null &&
Switchboard.getSwitchboard().peers.mySeed() != null) Switchboard.getSwitchboard().peers.mySeed().resetCounters();
}
/**
@ -557,9 +561,18 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
this.array.close();
}
public boolean isEmpty() {
if (this.ram.size() > 0) return false;
for (int s: this.array.sizes()) if (s > 0) return false;
return true;
}
@Override
public int size() {
throw new UnsupportedOperationException("an accumulated size of index entries would not reflect the real number of words, which cannot be computed easily");
//int size = this.ram.size();
//for (int s: this.array.sizes()) size += s;
//return size;
}
private int[] sizes() {

@ -174,18 +174,24 @@ public final class Fulltext implements Iterable<byte[]> {
this.solr.clearCache();
}
public void clear() throws IOException {
public void clearURLIndex() throws IOException {
if (this.exportthread != null) this.exportthread.interrupt();
if (this.urlIndexFile == null) {
SplitTable.delete(this.location, this.tablename);
} else {
this.urlIndexFile.clear();
}
this.solr.clear();
// the remote solr is not cleared here because that shall be done separately
this.statsDump = null;
}
public void clearLocalSolr() throws IOException {
this.solr.clear0();
}
public void clearRemoteSolr() throws IOException {
this.solr.clear1();
}
public int size() {
int size = 0;
size += this.urlIndexFile == null ? 0 : this.urlIndexFile.size();

@ -244,14 +244,13 @@ public class Segment {
public void clear() {
try {
if (this.termIndex != null) this.termIndex.clear();
if (this.fulltext != null) this.fulltext.clear();
if (this.fulltext != null) this.fulltext.clearURLIndex();
if (this.fulltext != null) this.fulltext.clearLocalSolr();
if (this.fulltext != null) this.fulltext.clearRemoteSolr();
if (this.urlCitationIndex != null) this.urlCitationIndex.clear();
} catch (final IOException e) {
Log.logException(e);
}
if (Switchboard.getSwitchboard() != null &&
Switchboard.getSwitchboard().peers != null &&
Switchboard.getSwitchboard().peers.mySeed() != null) Switchboard.getSwitchboard().peers.mySeed().resetCounters();
}
public File getLocation() {

Loading…
Cancel
Save