added an index deletion servlet and some style changes for the

'dangerous' engage-button
pull/1/head
Michael Peter Christen 12 years ago
parent 1aac722cc6
commit 0e2ee00fea

@ -0,0 +1,121 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
<head>
<title>YaCy '#[clientname]#': Index Deletion</title>
#%env/templates/metas.template%#
</head>
<body id="IndexDeletion">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
<h2>Index Deletion</h2>
<p>The search index contains #[doccount]# documents. You can delete them here. Deletions are made concurrently which can cause that recently deleted documents are not yet reflected in the document count.</p>
<form id="IndexDeletionPath" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend><label>Delete by URL Matching</label></legend>
<p>Delete all documents within a sub-path of the given urls. That means all documents must start with one of the url stubs as given here.</p>
<dl>
<dt>One URL stub, a list of URL stubs<br/>or a regular expression</dt>
<dd>
<textarea name="urldelete" id="crawlingURL" cols="64" rows="3" size="41" >#[urldelete]#</textarea>
</dd>
<dt>Matching Method</dt>
<dd>
<input type="radio" name="urldelete-mm" value="subpath" #(urldelete-mm-subpath-checked)#::checked="checked"#(/urldelete-mm-subpath-checked)# />sub-path of given URLs&nbsp;&nbsp;&nbsp;
<input type="radio" name="urldelete-mm" value="regexp" #(urldelete-mm-regexp-checked)#::checked="checked"#(/urldelete-mm-regexp-checked)# disabled="disabled"/>matching with regular expression
<dt><input type="submit" name="simulate-urldelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
<dd><input type="submit" name="engage-urldelete" id="engage-urldelete" value="Engage Deletion" #(urldelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/urldelete-active)#/>
#(urldelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/urldelete-active)#
</dd>
</dl>
</fieldset>
</form>
<form id="IndexDeletionAge" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend><label>Delete by Age</label></legend>
<p>Delete all documents which are older than a given time period.</p>
<dl>
<dt>Time Period</dt>
<dd>All documents older than
<select name="timedelete-number" id="timedelete-number" onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';">
<option value="1" #(timedelete-n-1)#::selected="selected"#(/timedelete-n-1)#>1</option>
<option value="2" #(timedelete-n-2)#::selected="selected"#(/timedelete-n-2)#>2</option>
<option value="3" #(timedelete-n-3)#::selected="selected"#(/timedelete-n-3)#>3</option>
<option value="4" #(timedelete-n-4)#::selected="selected"#(/timedelete-n-4)#>4</option>
<option value="5" #(timedelete-n-5)#::selected="selected"#(/timedelete-n-5)#>5</option>
<option value="6" #(timedelete-n-6)#::selected="selected"#(/timedelete-n-6)#>6</option>
<option value="7" #(timedelete-n-7)#::selected="selected"#(/timedelete-n-7)#>7</option>
<option value="8" #(timedelete-n-8)#::selected="selected"#(/timedelete-n-8)#>8</option>
<option value="9" #(timedelete-n-9)#::selected="selected"#(/timedelete-n-9)#>9</option>
<option value="10" #(timedelete-n-10)#::selected="selected"#(/timedelete-n-10)#>10</option>
<option value="12" #(timedelete-n-12)#::selected="selected"#(/timedelete-n-12)#>12</option>
<option value="14" #(timedelete-n-14)#::selected="selected"#(/timedelete-n-14)#>14</option>
<option value="21" #(timedelete-n-21)#::selected="selected"#(/timedelete-n-21)#>21</option>
<option value="24" #(timedelete-n-24)#::selected="selected"#(/timedelete-n-24)#>24</option>
<option value="28" #(timedelete-n-28)#::selected="selected"#(/timedelete-n-28)#>28</option>
<option value="30" #(timedelete-n-30)#::selected="selected"#(/timedelete-n-30)#>30</option>
<option value="60" #(timedelete-n-60)#::selected="selected"#(/timedelete-n-60)#>60</option>
<option value="90" #(timedelete-n-90)#::selected="selected"#(/timedelete-n-90)#>90</option>
</select>
<select name="timedelete-unit" id="timedelete-unit" onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';">
<option value="year" #(timedelete-u-year)#::selected="selected"#(/timedelete-u-year)#>years</option>
<option value="month" #(timedelete-u-month)#::selected="selected"#(/timedelete-u-month)#>months</option>
<option value="day" #(timedelete-u-day)#::selected="selected"#(/timedelete-u-day)#>days</option>
<option value="hour" #(timedelete-u-hour)#::selected="selected"#(/timedelete-u-hour)#>hours</option>
</select>
</dd>
<dt>Age Identification</dt>
<dd>
<input type="radio" name="timedelete-source" value="loaddate" #(timedelete-source-loaddate-checked)#::checked="checked"#(/timedelete-source-loaddate-checked)# onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';"/>load date&nbsp;&nbsp;&nbsp;
<input type="radio" name="timedelete-source" value="lastmodified" #(timedelete-source-lastmodified-checked)#::checked="checked"#(/timedelete-source-lastmodified-checked)# onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';"/>last-modified
</dd>
<dt><input type="submit" name="simulate-timedelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
<dd><input type="submit" name="engage-timedelete" id="engage-timedelete" value="Engage Deletion" #(timedelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/timedelete-active)#/>
#(timedelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/timedelete-active)#
</dd>
</dl>
</fieldset>
</form>
<form id="IndexDeletionCollection" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend><label>Delete Collections</label></legend>
<p>Delete all documents which are inside specific collections. This is the list of known collections: #[collectionlist]#</p>
<dl>
<dt>Not Assiged</dt>
<dd><input type="radio" name="collectiondelete-mode" value="unassigned" #(collectiondelete-mode-unassigned-checked)#::checked="checked"#(/collectiondelete-mode-unassigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are not assigned to any collection
</dd>
<dt>Assigned</dt>
<dd><input type="radio" name="collectiondelete-mode" value="assigned" #(collectiondelete-mode-assigned-checked)#::checked="checked"#(/collectiondelete-mode-assigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)<br/>
<input type="text" name="collectiondelete" id="collections" value="#[collectiondelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>
</dd>
<dt><input type="submit" name="simulate-collectiondelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
<dd><input type="submit" name="engage-collectiondelete" id="engage-collectiondelete" value="Engage Deletion" #(collectiondelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/collectiondelete-active)#/>
#(collectiondelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/collectiondelete-active)#
</dd>
</dl>
</fieldset>
</form>
<form id="IndexDeletionQuery" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend><label>Delete by Solr Query</label></legend>
<p>This is the most generic option: select a set of documents using a solr query.</p>
<dl>
<dt>q=</dt>
<dd>
<input type="text" name="querydelete" id="querydelete" value="#[querydelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-querydelete');d.disabled=true;d.className='dangerdisarmed';"/>
</dd>
<dt><input type="submit" name="simulate-querydelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
<dd><input type="submit" name="engage-querydelete" id="engage-querydelete" value="Engage Deletion" #(querydelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/querydelete-active)#/>
#(querydelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/querydelete-active)#
</dd>
</dl>
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,214 @@
/**
* IndexDeletion_p
* Copyright 2013 by Michael Peter Christen
* First released 29.04.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import net.yacy.search.query.QueryModifier;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class IndexDeletion_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
SolrConnector defaultConnector = sb.index.fulltext().getDefaultConnector();
SolrConnector webgraphConnector = sb.index.fulltext().getWebgraphConnector();
defaultConnector.commit(false); // we must do a commit here because the user cannot see a proper count.
prop.put("doccount", defaultConnector.getSize());
try {
prop.put("collectionlist", defaultConnector.getFacets("*:*", 1000, CollectionSchema.collection_sxt.getSolrFieldName()).get(CollectionSchema.collection_sxt.getSolrFieldName()).toString());
} catch (IOException e1) {
prop.put("collectionlist", "[]");
}
// Delete by URL Matching
String urldelete = post == null ? "" : post.get("urldelete", "");
boolean urldelete_mm_subpath_checked = post == null ? true : post.get("urldelete-mm", "subpath").equals("subpath");
prop.put("urldelete", urldelete);
prop.put("urldelete-mm-subpath-checked", urldelete_mm_subpath_checked ? 1 : 0);
prop.put("urldelete-mm-regexp-checked", urldelete_mm_subpath_checked ? 0 : 1);
prop.put("urldelete-active", 0);
// Delete by Age
int timedelete_number = post == null ? 14 : post.getInt("timedelete-number", 14);
String timedelete_unit = post == null ? "day" : post.get("timedelete-unit", "day");
boolean timedelete_source_loaddate_checked = post == null ? true : post.get("timedelete-source", "loaddate").equals("loaddate");
for (int i = 1; i <= 90; i++) prop.put("timedelete-n-" + i, 0);
prop.put("timedelete-n-" + timedelete_number, timedelete_number);
prop.put("timedelete-u-year", timedelete_unit.equals("year") ? 1 : 0);
prop.put("timedelete-u-month", timedelete_unit.equals("month") ? 1 : 0);
prop.put("timedelete-u-day", timedelete_unit.equals("day") ? 1 : 0);
prop.put("timedelete-u-hour", timedelete_unit.equals("hour") ? 1 : 0);
prop.put("timedelete-source-loaddate-checked", timedelete_source_loaddate_checked ? 1 : 0);
prop.put("timedelete-source-lastmodified-checked", timedelete_source_loaddate_checked ? 0 : 1);
prop.put("timedelete-active", 0);
// Delete Collections
boolean collectiondelete_mode_unassigned_checked = post == null ? true : post.get("collectiondelete-mode", "unassigned").equals("unassigned");
String collectiondelete = post == null ? "" : post.get("collectiondelete", "");
prop.put("collectiondelete-mode-unassigned-checked", collectiondelete_mode_unassigned_checked ? 1 : 0);
prop.put("collectiondelete-mode-assigned-checked", collectiondelete_mode_unassigned_checked ? 0 : 1);
prop.put("collectiondelete", collectiondelete);
prop.put("collectiondelete-active", 0);
// Delete by Solr Query
prop.put("querydelete", "");
String querydelete = post == null ? "" : post.get("querydelete", "");
prop.put("querydelete", querydelete);
prop.put("querydelete-active", 0);
int count = post == null ? -1 : post.getInt("count", -1);
if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) {
boolean simulate = post.containsKey("simulate-timedelete");
Date deleteageDate = null;
long t = timeParser(timedelete_number, timedelete_unit); // year, month, day, hour
if (t > 0) deleteageDate = new Date(t);
final String collection1Query = (timedelete_source_loaddate_checked ? CollectionSchema.load_date_dt : CollectionSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
final String webgraphQuery = (timedelete_source_loaddate_checked ? WebgraphSchema.load_date_dt : WebgraphSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
if (simulate) {
try {
count = (int) defaultConnector.getCountByQuery(collection1Query);
} catch (IOException e) {
}
prop.put("timedelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByQuery(collection1Query);
webgraphConnector.deleteByQuery(webgraphQuery);
} catch (IOException e) {
}
prop.put("timedelete-active", 2);
}
prop.put("timedelete-active_count", count);
}
if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) {
boolean simulate = post.containsKey("simulate-collectiondelete");
collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|");
String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + ":[* TO *]" : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete);
if (simulate) {
try {
count = (int) defaultConnector.getCountByQuery(query);
} catch (IOException e) {
}
prop.put("collectiondelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByQuery(query);
} catch (IOException e) {
}
prop.put("collectiondelete-active", 2);
}
prop.put("collectiondelete-active_count", count);
}
if (post != null && (post.containsKey("simulate-querydelete") || post.containsKey("engage-querydelete"))) {
boolean simulate = post.containsKey("simulate-querydelete");
if (simulate) {
try {
count = (int) defaultConnector.getCountByQuery(querydelete);
} catch (IOException e) {
}
prop.put("querydelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByQuery(querydelete);
} catch (IOException e) {
}
prop.put("querydelete-active", 2);
}
prop.put("querydelete-active_count", count);
}
if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) {
boolean simulate = post.containsKey("simulate-urldelete");
// parse the input
urldelete = urldelete.trim();
String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|"));
Set<String> ids = new HashSet<String>();
for (String urlStub: stubURLs) {
if (urlStub == null || urlStub.length() == 0) continue;
int pos = urlStub.indexOf("://",0);
if (pos == -1) {
if (urlStub.startsWith("www")) urlStub = "http://" + urlStub;
if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub;
}
try {
DigestURI u = new DigestURI(urlStub);
BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
SolrDocument doc;
try {
while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
}
} catch (InterruptedException e) {
}
} catch (MalformedURLException e) {}
}
if (simulate) {
count = ids.size();
prop.put("urldelete-active", count == 0 ? 2 : 1);
} else {
try {
defaultConnector.deleteByIds(ids);
//webgraphConnector.deleteByQuery(webgraphQuery);
} catch (IOException e) {
}
prop.put("urldelete-active", 2);
}
prop.put("urldelete-active_count", count);
}
// return rewrite properties
return prop;
}
private static long timeParser(final int number, final String unit) {
if ("year".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L * 365L;
if ("month".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L * 30L;
if ("day".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L;
if ("hour".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L;
if ("minute".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L;
return 0L;
}
}

@ -337,4 +337,48 @@ form dd, dl.pairs dd {
background-size: 16px 16px;
background-repeat:no-repeat;
background-position: 2px center;
}
.dangerdisarmed {
height:20px;
width:160px;
font-size:1.2em;
text-indent:9px;
color:white;
-webkit-border-radius: 20px;
-moz-border-radius: 20px;
-khtml-border-radius: 20px;
border-radius: 20px;
border: 0px;
background:#dddddd;
}
.dangerready {
height:20px;
width:160px;
font-size:1.2em;
text-indent:9px;
color:white;
-webkit-border-radius: 20px;
-moz-border-radius: 20px;
-khtml-border-radius: 20px;
border-radius: 20px;
border: 0px;
background:#ff6000;
background-image:url("/env/grafics/construction.gif");
background-size: 24px 16px;
background-repeat:no-repeat;
background-position: 4px center;
}
.dangerready:hover {
background:#ff0000;
background-image:url("/env/grafics/construction.gif");
background-size: 24px 16px;
background-repeat:no-repeat;
background-position: 4px center;
}
Loading…
Cancel
Save