From 1b102d98d84cc6cb19f731f66d26a9e7036af2c2 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 30 Apr 2013 02:11:28 +0200 Subject: [PATCH] - added index deletion to index administration submenu - added index deletion processes to the process scheduler/recorder --- htroot/IndexDeletion_p.html | 8 +- htroot/IndexDeletion_p.java | 87 ++++++++++--------- .../templates/submenuIndexControl.template | 1 + source/net/yacy/data/WorkTables.java | 1 + 4 files changed, 52 insertions(+), 45 deletions(-) diff --git a/htroot/IndexDeletion_p.html b/htroot/IndexDeletion_p.html index 0cc9f1f63..0c6e79291 100644 --- a/htroot/IndexDeletion_p.html +++ b/htroot/IndexDeletion_p.html @@ -7,7 +7,7 @@ #%env/templates/header.template%# - #%env/templates/submenuIndexCreate.template%# + #%env/templates/submenuIndexControl.template%#

Index Deletion

The search index contains #[doccount]# documents. You can delete them here. Deletions are made concurrently which can cause that recently deleted documents are not yet reflected in the document count.

@@ -84,12 +84,12 @@

Delete all documents which are inside specific collections. This is the list of known collections: #[collectionlist]#

Not Assiged
-
Delete all documents which are not assigned to any collection +
Delete all documents which are not assigned to any collection
Assigned
-
Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)
- +
Delete all documents which are assigned to the following collection(s), separated by ',' (comma) or '|' (vertical bar)
+
diff --git a/htroot/IndexDeletion_p.java b/htroot/IndexDeletion_p.java index 42fe017bc..dccf96c57 100644 --- a/htroot/IndexDeletion_p.java +++ b/htroot/IndexDeletion_p.java @@ -32,6 +32,7 @@ import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.protocol.RequestHeader; +import net.yacy.data.WorkTables; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.search.Switchboard; import net.yacy.search.query.QueryModifier; @@ -95,6 +96,48 @@ public class IndexDeletion_p { int count = post == null ? -1 : post.getInt("count", -1); + if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) { + boolean simulate = post.containsKey("simulate-urldelete"); + // parse the input + urldelete = urldelete.trim(); + String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|")); + Set ids = new HashSet(); + for (String urlStub: stubURLs) { + if (urlStub == null || urlStub.length() == 0) continue; + int pos = urlStub.indexOf("://",0); + if (pos == -1) { + if (urlStub.startsWith("www")) urlStub = "http://" + urlStub; + if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub; + } + try { + DigestURI u = new DigestURI(urlStub); + BlockingQueue dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); + SolrDocument doc; + try { + while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) { + String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()); + if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())); + } + } catch (InterruptedException e) { + } + sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs matching with " + urldelete); + } catch (MalformedURLException e) {} + } + + if (simulate) { + count = ids.size(); + prop.put("urldelete-active", count == 0 ? 2 : 1); + } else { + try { + defaultConnector.deleteByIds(ids); + //webgraphConnector.deleteByQuery(webgraphQuery); + } catch (IOException e) { + } + prop.put("urldelete-active", 2); + } + prop.put("urldelete-active_count", count); + } + if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) { boolean simulate = post.containsKey("simulate-timedelete"); Date deleteageDate = null; @@ -112,6 +155,7 @@ public class IndexDeletion_p { try { defaultConnector.deleteByQuery(collection1Query); webgraphConnector.deleteByQuery(webgraphQuery); + sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs older than " + timedelete_number + " " + timedelete_unit); } catch (IOException e) { } prop.put("timedelete-active", 2); @@ -132,6 +176,7 @@ public class IndexDeletion_p { } else { try { defaultConnector.deleteByQuery(query); + sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, collection " + collectiondelete); } catch (IOException e) { } prop.put("collectiondelete-active", 2); @@ -150,53 +195,13 @@ public class IndexDeletion_p { } else { try { defaultConnector.deleteByQuery(querydelete); + sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, solr query, q = " + querydelete); } catch (IOException e) { } prop.put("querydelete-active", 2); } prop.put("querydelete-active_count", count); } - - if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) { - boolean simulate = post.containsKey("simulate-urldelete"); - // parse the input - urldelete = urldelete.trim(); - String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|")); - Set ids = new HashSet(); - for (String urlStub: stubURLs) { - if (urlStub == null || urlStub.length() == 0) continue; - int pos = urlStub.indexOf("://",0); - if (pos == -1) { - if (urlStub.startsWith("www")) urlStub = "http://" + urlStub; - if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub; - } - try { - DigestURI u = new DigestURI(urlStub); - BlockingQueue dq = defaultConnector.concurrentDocumentsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":" + u.getHost(), 0, 100000000, Long.MAX_VALUE, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); - SolrDocument doc; - try { - while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) { - String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()); - if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())); - } - } catch (InterruptedException e) { - } - } catch (MalformedURLException e) {} - } - - if (simulate) { - count = ids.size(); - prop.put("urldelete-active", count == 0 ? 2 : 1); - } else { - try { - defaultConnector.deleteByIds(ids); - //webgraphConnector.deleteByQuery(webgraphQuery); - } catch (IOException e) { - } - prop.put("urldelete-active", 2); - } - prop.put("urldelete-active_count", count); - } // return rewrite properties return prop; diff --git a/htroot/env/templates/submenuIndexControl.template b/htroot/env/templates/submenuIndexControl.template index a60975436..ad6faf09f 100644 --- a/htroot/env/templates/submenuIndexControl.template +++ b/htroot/env/templates/submenuIndexControl.template @@ -2,6 +2,7 @@

Index Administration