'dangerous' engage-buttonpull/1/head
parent
1aac722cc6
commit
0e2ee00fea
@ -0,0 +1,121 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Index Deletion</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexDeletion">
|
||||
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuIndexCreate.template%#
|
||||
<h2>Index Deletion</h2>
|
||||
<p>The search index contains #[doccount]# documents. You can delete them here. Deletions are made concurrently which can cause that recently deleted documents are not yet reflected in the document count.</p>
|
||||
<form id="IndexDeletionPath" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<legend><label>Delete by URL Matching</label></legend>
|
||||
<p>Delete all documents within a sub-path of the given urls. That means all documents must start with one of the url stubs as given here.</p>
|
||||
<dl>
|
||||
<dt>One URL stub, a list of URL stubs<br/>or a regular expression</dt>
|
||||
<dd>
|
||||
<textarea name="urldelete" id="crawlingURL" cols="64" rows="3" size="41" >#[urldelete]#</textarea>
|
||||
</dd>
|
||||
<dt>Matching Method</dt>
|
||||
<dd>
|
||||
<input type="radio" name="urldelete-mm" value="subpath" #(urldelete-mm-subpath-checked)#::checked="checked"#(/urldelete-mm-subpath-checked)# />sub-path of given URLs
|
||||
<input type="radio" name="urldelete-mm" value="regexp" #(urldelete-mm-regexp-checked)#::checked="checked"#(/urldelete-mm-regexp-checked)# disabled="disabled"/>matching with regular expression
|
||||
<dt><input type="submit" name="simulate-urldelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
|
||||
<dd><input type="submit" name="engage-urldelete" id="engage-urldelete" value="Engage Deletion" #(urldelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/urldelete-active)#/>
|
||||
#(urldelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/urldelete-active)#
|
||||
</dd>
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
<form id="IndexDeletionAge" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<legend><label>Delete by Age</label></legend>
|
||||
<p>Delete all documents which are older than a given time period.</p>
|
||||
<dl>
|
||||
<dt>Time Period</dt>
|
||||
<dd>All documents older than
|
||||
<select name="timedelete-number" id="timedelete-number" onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';">
|
||||
<option value="1" #(timedelete-n-1)#::selected="selected"#(/timedelete-n-1)#>1</option>
|
||||
<option value="2" #(timedelete-n-2)#::selected="selected"#(/timedelete-n-2)#>2</option>
|
||||
<option value="3" #(timedelete-n-3)#::selected="selected"#(/timedelete-n-3)#>3</option>
|
||||
<option value="4" #(timedelete-n-4)#::selected="selected"#(/timedelete-n-4)#>4</option>
|
||||
<option value="5" #(timedelete-n-5)#::selected="selected"#(/timedelete-n-5)#>5</option>
|
||||
<option value="6" #(timedelete-n-6)#::selected="selected"#(/timedelete-n-6)#>6</option>
|
||||
<option value="7" #(timedelete-n-7)#::selected="selected"#(/timedelete-n-7)#>7</option>
|
||||
<option value="8" #(timedelete-n-8)#::selected="selected"#(/timedelete-n-8)#>8</option>
|
||||
<option value="9" #(timedelete-n-9)#::selected="selected"#(/timedelete-n-9)#>9</option>
|
||||
<option value="10" #(timedelete-n-10)#::selected="selected"#(/timedelete-n-10)#>10</option>
|
||||
<option value="12" #(timedelete-n-12)#::selected="selected"#(/timedelete-n-12)#>12</option>
|
||||
<option value="14" #(timedelete-n-14)#::selected="selected"#(/timedelete-n-14)#>14</option>
|
||||
<option value="21" #(timedelete-n-21)#::selected="selected"#(/timedelete-n-21)#>21</option>
|
||||
<option value="24" #(timedelete-n-24)#::selected="selected"#(/timedelete-n-24)#>24</option>
|
||||
<option value="28" #(timedelete-n-28)#::selected="selected"#(/timedelete-n-28)#>28</option>
|
||||
<option value="30" #(timedelete-n-30)#::selected="selected"#(/timedelete-n-30)#>30</option>
|
||||
<option value="60" #(timedelete-n-60)#::selected="selected"#(/timedelete-n-60)#>60</option>
|
||||
<option value="90" #(timedelete-n-90)#::selected="selected"#(/timedelete-n-90)#>90</option>
|
||||
</select>
|
||||
<select name="timedelete-unit" id="timedelete-unit" onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';">
|
||||
<option value="year" #(timedelete-u-year)#::selected="selected"#(/timedelete-u-year)#>years</option>
|
||||
<option value="month" #(timedelete-u-month)#::selected="selected"#(/timedelete-u-month)#>months</option>
|
||||
<option value="day" #(timedelete-u-day)#::selected="selected"#(/timedelete-u-day)#>days</option>
|
||||
<option value="hour" #(timedelete-u-hour)#::selected="selected"#(/timedelete-u-hour)#>hours</option>
|
||||
</select>
|
||||
</dd>
|
||||
<dt>Age Identification</dt>
|
||||
<dd>
|
||||
<input type="radio" name="timedelete-source" value="loaddate" #(timedelete-source-loaddate-checked)#::checked="checked"#(/timedelete-source-loaddate-checked)# onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';"/>load date
|
||||
<input type="radio" name="timedelete-source" value="lastmodified" #(timedelete-source-lastmodified-checked)#::checked="checked"#(/timedelete-source-lastmodified-checked)# onClick="d=document.getElementById('engage-timedelete');d.disabled=true;d.className='dangerdisarmed';"/>last-modified
|
||||
</dd>
|
||||
<dt><input type="submit" name="simulate-timedelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
|
||||
<dd><input type="submit" name="engage-timedelete" id="engage-timedelete" value="Engage Deletion" #(timedelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/timedelete-active)#/>
|
||||
#(timedelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/timedelete-active)#
|
||||
</dd>
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
<form id="IndexDeletionCollection" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<legend><label>Delete Collections</label></legend>
|
||||
<p>Delete all documents which are inside specific collections. This is the list of known collections: #[collectionlist]#</p>
|
||||
<dl>
|
||||
<dt>Not Assiged</dt>
|
||||
<dd><input type="radio" name="collectiondelete-mode" value="unassigned" #(collectiondelete-mode-unassigned-checked)#::checked="checked"#(/collectiondelete-mode-unassigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are not assigned to any collection
|
||||
</dd>
|
||||
|
||||
<dt>Assigned</dt>
|
||||
<dd><input type="radio" name="collectiondelete-mode" value="assigned" #(collectiondelete-mode-assigned-checked)#::checked="checked"#(/collectiondelete-mode-assigned-checked)# onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)<br/>
|
||||
<input type="text" name="collectiondelete" id="collections" value="#[collectiondelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-collectiondelete');d.disabled=true;d.className='dangerdisarmed';"/>
|
||||
</dd>
|
||||
<dt><input type="submit" name="simulate-collectiondelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
|
||||
<dd><input type="submit" name="engage-collectiondelete" id="engage-collectiondelete" value="Engage Deletion" #(collectiondelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/collectiondelete-active)#/>
|
||||
#(collectiondelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/collectiondelete-active)#
|
||||
</dd>
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
<form id="IndexDeletionQuery" action="IndexDeletion_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<legend><label>Delete by Solr Query</label></legend>
|
||||
<p>This is the most generic option: select a set of documents using a solr query.</p>
|
||||
<dl>
|
||||
<dt>q=</dt>
|
||||
<dd>
|
||||
<input type="text" name="querydelete" id="querydelete" value="#[querydelete]#" size="96" maxlength="1024" onClick="d=document.getElementById('engage-querydelete');d.disabled=true;d.className='dangerdisarmed';"/>
|
||||
</dd>
|
||||
<dt><input type="submit" name="simulate-querydelete" value="Simulate Deletion" class="submitready" title="no actual deletion, generates only a deletion count"/></dt>
|
||||
<dd><input type="submit" name="engage-querydelete" id="engage-querydelete" value="Engage Deletion" #(querydelete-active)#class="dangerdisarmed" disabled="disabled" title="simulate a deletion first to calculate the deletion count"::class="dangerready"::class="dangerdisarmed" disabled="disabled" title="engaged"#(/querydelete-active)#/>
|
||||
#(querydelete-active)#::<span class="pending">selected #[count]# documents for deletion</span><input type="hidden" name="count" id="count" value="#[count]#" />::<span class="commit">deleted #[count]# documents</span>#(/querydelete-active)#
|
||||
</dd>
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,214 @@
|
||||
/**
|
||||
* IndexDeletion_p
|
||||
* Copyright 2013 by Michael Peter Christen
|
||||
* First released 29.04.2013 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
import net.yacy.cora.date.ISO8601Formatter;
|
||||
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.query.QueryModifier;
|
||||
import net.yacy.search.schema.CollectionSchema;
|
||||
import net.yacy.search.schema.WebgraphSchema;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
|
||||
public class IndexDeletion_p {
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
|
||||
SolrConnector defaultConnector = sb.index.fulltext().getDefaultConnector();
|
||||
SolrConnector webgraphConnector = sb.index.fulltext().getWebgraphConnector();
|
||||
defaultConnector.commit(false); // we must do a commit here because the user cannot see a proper count.
|
||||
prop.put("doccount", defaultConnector.getSize());
|
||||
try {
|
||||
prop.put("collectionlist", defaultConnector.getFacets("*:*", 1000, CollectionSchema.collection_sxt.getSolrFieldName()).get(CollectionSchema.collection_sxt.getSolrFieldName()).toString());
|
||||
} catch (IOException e1) {
|
||||
prop.put("collectionlist", "[]");
|
||||
}
|
||||
|
||||
// Delete by URL Matching
|
||||
String urldelete = post == null ? "" : post.get("urldelete", "");
|
||||
boolean urldelete_mm_subpath_checked = post == null ? true : post.get("urldelete-mm", "subpath").equals("subpath");
|
||||
prop.put("urldelete", urldelete);
|
||||
prop.put("urldelete-mm-subpath-checked", urldelete_mm_subpath_checked ? 1 : 0);
|
||||
prop.put("urldelete-mm-regexp-checked", urldelete_mm_subpath_checked ? 0 : 1);
|
||||
prop.put("urldelete-active", 0);
|
||||
|
||||
// Delete by Age
|
||||
int timedelete_number = post == null ? 14 : post.getInt("timedelete-number", 14);
|
||||
String timedelete_unit = post == null ? "day" : post.get("timedelete-unit", "day");
|
||||
boolean timedelete_source_loaddate_checked = post == null ? true : post.get("timedelete-source", "loaddate").equals("loaddate");
|
||||
for (int i = 1; i <= 90; i++) prop.put("timedelete-n-" + i, 0);
|
||||
prop.put("timedelete-n-" + timedelete_number, timedelete_number);
|
||||
prop.put("timedelete-u-year", timedelete_unit.equals("year") ? 1 : 0);
|
||||
prop.put("timedelete-u-month", timedelete_unit.equals("month") ? 1 : 0);
|
||||
prop.put("timedelete-u-day", timedelete_unit.equals("day") ? 1 : 0);
|
||||
prop.put("timedelete-u-hour", timedelete_unit.equals("hour") ? 1 : 0);
|
||||
prop.put("timedelete-source-loaddate-checked", timedelete_source_loaddate_checked ? 1 : 0);
|
||||
prop.put("timedelete-source-lastmodified-checked", timedelete_source_loaddate_checked ? 0 : 1);
|
||||
prop.put("timedelete-active", 0);
|
||||
|
||||
// Delete Collections
|
||||
boolean collectiondelete_mode_unassigned_checked = post == null ? true : post.get("collectiondelete-mode", "unassigned").equals("unassigned");
|
||||
String collectiondelete = post == null ? "" : post.get("collectiondelete", "");
|
||||
prop.put("collectiondelete-mode-unassigned-checked", collectiondelete_mode_unassigned_checked ? 1 : 0);
|
||||
prop.put("collectiondelete-mode-assigned-checked", collectiondelete_mode_unassigned_checked ? 0 : 1);
|
||||
prop.put("collectiondelete", collectiondelete);
|
||||
prop.put("collectiondelete-active", 0);
|
||||
|
||||
// Delete by Solr Query
|
||||
prop.put("querydelete", "");
|
||||
String querydelete = post == null ? "" : post.get("querydelete", "");
|
||||
prop.put("querydelete", querydelete);
|
||||
prop.put("querydelete-active", 0);
|
||||
|
||||
int count = post == null ? -1 : post.getInt("count", -1);
|
||||
|
||||
if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) {
|
||||
boolean simulate = post.containsKey("simulate-timedelete");
|
||||
Date deleteageDate = null;
|
||||
long t = timeParser(timedelete_number, timedelete_unit); // year, month, day, hour
|
||||
if (t > 0) deleteageDate = new Date(t);
|
||||
final String collection1Query = (timedelete_source_loaddate_checked ? CollectionSchema.load_date_dt : CollectionSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
|
||||
final String webgraphQuery = (timedelete_source_loaddate_checked ? WebgraphSchema.load_date_dt : WebgraphSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
|
||||
if (simulate) {
|
||||
try {
|
||||
count = (int) defaultConnector.getCountByQuery(collection1Query);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("timedelete-active", count == 0 ? 2 : 1);
|
||||
} else {
|
||||
try {
|
||||
defaultConnector.deleteByQuery(collection1Query);
|
||||
webgraphConnector.deleteByQuery(webgraphQuery);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("timedelete-active", 2);
|
||||
}
|
||||
prop.put("timedelete-active_count", count);
|
||||
}
|
||||
|
||||
if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) {
|
||||
boolean simulate = post.containsKey("simulate-collectiondelete");
|
||||
collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|");
|
||||
String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + ":[* TO *]" : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete);
|
||||
if (simulate) {
|
||||
try {
|
||||
count = (int) defaultConnector.getCountByQuery(query);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("collectiondelete-active", count == 0 ? 2 : 1);
|
||||
} else {
|
||||
try {
|
||||
defaultConnector.deleteByQuery(query);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("collectiondelete-active", 2);
|
||||
}
|
||||
prop.put("collectiondelete-active_count", count);
|
||||
}
|
||||
|
||||
if (post != null && (post.containsKey("simulate-querydelete") || post.containsKey("engage-querydelete"))) {
|
||||
boolean simulate = post.containsKey("simulate-querydelete");
|
||||
if (simulate) {
|
||||
try {
|
||||
count = (int) defaultConnector.getCountByQuery(querydelete);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("querydelete-active", count == 0 ? 2 : 1);
|
||||
} else {
|
||||
try {
|
||||
defaultConnector.deleteByQuery(querydelete);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("querydelete-active", 2);
|
||||
}
|
||||
prop.put("querydelete-active_count", count);
|
||||
}
|
||||
|
||||
if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) {
|
||||
boolean simulate = post.containsKey("simulate-urldelete");
|
||||
// parse the input
|
||||
urldelete = urldelete.trim();
|
||||
String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|"));
|
||||
Set<String> ids = new HashSet<String>();
|
||||
for (String urlStub: stubURLs) {
|
||||
if (urlStub == null || urlStub.length() == 0) continue;
|
||||
int pos = urlStub.indexOf("://",0);
|
||||
if (pos == -1) {
|
||||
if (urlStub.startsWith("www")) urlStub = "http://" + urlStub;
|
||||
if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub;
|
||||
}
|
||||
try {
|
||||
DigestURI u = new DigestURI(urlStub);
|
||||
BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
|
||||
SolrDocument doc;
|
||||
try {
|
||||
while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
|
||||
String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
|
||||
if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
} catch (MalformedURLException e) {}
|
||||
}
|
||||
|
||||
if (simulate) {
|
||||
count = ids.size();
|
||||
prop.put("urldelete-active", count == 0 ? 2 : 1);
|
||||
} else {
|
||||
try {
|
||||
defaultConnector.deleteByIds(ids);
|
||||
//webgraphConnector.deleteByQuery(webgraphQuery);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
prop.put("urldelete-active", 2);
|
||||
}
|
||||
prop.put("urldelete-active_count", count);
|
||||
}
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
|
||||
private static long timeParser(final int number, final String unit) {
|
||||
if ("year".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L * 365L;
|
||||
if ("month".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L * 30L;
|
||||
if ("day".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L * 24L;
|
||||
if ("hour".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L * 60L;
|
||||
if ("minute".equals(unit)) return System.currentTimeMillis() - number * 1000L * 60L;
|
||||
return 0L;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue