- added index deletion to index administration submenu

- added index deletion processes to the process scheduler/recorder
pull/1/head
Michael Peter Christen 12 years ago
parent ee95e772cf
commit 1b102d98d8

@ -7,7 +7,7 @@
<body id="IndexDeletion">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
#%env/templates/submenuIndexControl.template%#
<h2>Index Deletion</h2>
<p>The search index contains #[doccount]# documents. You can delete them here. Deletions are made concurrently which can cause that recently deleted documents are not yet reflected in the document count.</p>
<form id="IndexDeletionPath" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
@ -84,12 +84,12 @@
<p>Delete all documents which are inside specific collections. This is the list of known collections: #[collectionlist]#</p>
<dl>
<dt>Not Assiged</dt>
<dd><input type="radio" name="collectiondelete-mode" value="unassigned" #(collectiondelete-mode-unassigned-checked)#::checked="checked"#(/collectiondelete-mode-unassigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are not assigned to any collection
<dd><input type="radio" name="collectiondelete-mode" id="collectiondelete-mode-unassigned" value="unassigned" #(collectiondelete-mode-unassigned-checked)#::checked="checked"#(/collectiondelete-mode-unassigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are not assigned to any collection
</dd>
<dt>Assigned</dt>
<dd><input type="radio" name="collectiondelete-mode" value="assigned" #(collectiondelete-mode-assigned-checked)#::checked="checked"#(/collectiondelete-mode-assigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)<br/>
<input type="text" name="collectiondelete" id="collections" value="#[collectiondelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>
<dd><input type="radio" name="collectiondelete-mode" id="collectiondelete-mode-assigned" value="assigned" #(collectiondelete-mode-assigned-checked)#::checked="checked"#(/collectiondelete-mode-assigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)<br/>
<input type="text" name="collectiondelete" id="collections" value="#[collectiondelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';document.getElementById('collectiondelete-mode-assigned').checked=true;"/>
</dd>
<dt><input type="submit" name="simulate-collectiondelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
<dd><input type="submit" name="engage-collectiondelete" id="engage-collectiondelete" value="Engage Deletion" #(collectiondelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/collectiondelete-active)#/>

@ -32,6 +32,7 @@ import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.WorkTables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import net.yacy.search.query.QueryModifier;
@ -95,6 +96,48 @@ public class IndexDeletion_p {
int count = post == null ? -1 : post.getInt("count", -1);
if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) {
boolean simulate = post.containsKey("simulate-urldelete");
// parse the input
urldelete = urldelete.trim();
String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|"));
Set<String> ids = new HashSet<String>();
for (String urlStub: stubURLs) {
if (urlStub == null || urlStub.length() == 0) continue;
int pos = urlStub.indexOf("://",0);
if (pos == -1) {
if (urlStub.startsWith("www")) urlStub = "http://" + urlStub;
if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub;
}
try {
DigestURI u = new DigestURI(urlStub);
BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
SolrDocument doc;
try {
while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
}
} catch (InterruptedException e) {
}
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs matching with " + urldelete);
} catch (MalformedURLException e) {}
}
if (simulate) {
count = ids.size();
prop.put("urldelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByIds(ids);
//webgraphConnector.deleteByQuery(webgraphQuery);
} catch (IOException e) {
}
prop.put("urldelete-active", 2);
}
prop.put("urldelete-active_count", count);
}
if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) {
boolean simulate = post.containsKey("simulate-timedelete");
Date deleteageDate = null;
@ -112,6 +155,7 @@ public class IndexDeletion_p {
try {
defaultConnector.deleteByQuery(collection1Query);
webgraphConnector.deleteByQuery(webgraphQuery);
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs older than " + timedelete_number + " " + timedelete_unit);
} catch (IOException e) {
}
prop.put("timedelete-active", 2);
@ -132,6 +176,7 @@ public class IndexDeletion_p {
} else {
try {
defaultConnector.deleteByQuery(query);
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, collection " + collectiondelete);
} catch (IOException e) {
}
prop.put("collectiondelete-active", 2);
@ -150,6 +195,7 @@ public class IndexDeletion_p {
} else {
try {
defaultConnector.deleteByQuery(querydelete);
sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, solr query, q = " + querydelete);
} catch (IOException e) {
}
prop.put("querydelete-active", 2);
@ -157,47 +203,6 @@ public class IndexDeletion_p {
prop.put("querydelete-active_count", count);
}
if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) {
boolean simulate = post.containsKey("simulate-urldelete");
// parse the input
urldelete = urldelete.trim();
String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|"));
Set<String> ids = new HashSet<String>();
for (String urlStub: stubURLs) {
if (urlStub == null || urlStub.length() == 0) continue;
int pos = urlStub.indexOf("://",0);
if (pos == -1) {
if (urlStub.startsWith("www")) urlStub = "http://" + urlStub;
if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub;
}
try {
DigestURI u = new DigestURI(urlStub);
BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
SolrDocument doc;
try {
while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
}
} catch (InterruptedException e) {
}
} catch (MalformedURLException e) {}
}
if (simulate) {
count = ids.size();
prop.put("urldelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByIds(ids);
//webgraphConnector.deleteByQuery(webgraphQuery);
} catch (IOException e) {
}
prop.put("urldelete-active", 2);
}
prop.put("urldelete-active_count", count);
}
// return rewrite properties
return prop;
}

@ -2,6 +2,7 @@
<h3>Index Administration</h3>
<ul class="SubMenu">
<li><a href="/IndexControlURLs_p.html" class="MenuItemLink lock">URL Database Administration</a></li>
<li><a href="/IndexDeletion_p.html" class="MenuItemLink lock">Index Deletion</a></li>
<li><a href="/IndexFederated_p.html" class="MenuItemLink lock">Index Sources &amp; Targets</a></li>
<li><a href="/IndexSchema_p.html" class="MenuItemLink lock">Solr Schema Editor</a></li>
#(p2p)#::<li><a href="/IndexControlRWIs_p.html" class="MenuItemLink lock">Reverse Word Index</a></li>#(/p2p)#

@ -61,6 +61,7 @@ public class WorkTables extends Tables {
public final static String TABLE_API_TYPE_STEERING = "steering";
public final static String TABLE_API_TYPE_CONFIGURATION = "configuration";
public final static String TABLE_API_TYPE_CRAWLER = "crawler";
public final static String TABLE_API_TYPE_DELETION = "deletion";
public final static String TABLE_API_COL_TYPE = "type";
public final static String TABLE_API_COL_COMMENT = "comment";

Loading…
Cancel
Save