diff --git a/htroot/IndexCreateWWWLocalQueue_p.html b/htroot/IndexCreateWWWLocalQueue_p.html index fe83d7c4a..1d1025f65 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.html +++ b/htroot/IndexCreateWWWLocalQueue_p.html @@ -18,9 +18,20 @@ It may also contain urls that are computed by the proxy-prefetch. The local crawler queue is empty

::
- + Delete Enties: + + + This may take a quite long time

+
There are #[num]# entries in the local crawler queue. Showing #[show-num]# most recent entries: diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 7a099eb92..e7de920a7 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -45,13 +45,18 @@ import java.text.SimpleDateFormat; import java.util.Date; +import java.util.Iterator; import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaCrawlNURL.Entry; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; @@ -70,10 +75,65 @@ public class IndexCreateWWWLocalQueue_p { serverObjects prop = new serverObjects(); if (post != null) { - if (post.containsKey("clearcrawlqueue")) { - int c = switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE); - switchboard.urlPool.noticeURL.clear(plasmaCrawlNURL.STACK_TYPE_CORE); - switchboard.cleanProfiles(); + if (post.containsKey("deleteEntries")) { + int c = 0; + + String pattern = post.get("pattern", ".*").trim(); + String option = post.get("option", ".*").trim(); + if (pattern.equals(".*")) { + c = switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE); + switchboard.urlPool.noticeURL.clear(plasmaCrawlNURL.STACK_TYPE_CORE); + switchboard.cleanProfiles(); + } else{ + Pattern compiledPattern = null; + try { + // compiling the regular expression + compiledPattern = Pattern.compile(pattern); + + // iterating through the list of URLs + Iterator iter = switchboard.urlPool.noticeURL.iterator(plasmaCrawlNURL.STACK_TYPE_CORE); + while (iter.hasNext()) { + String value = null; + String nextHash = new String((byte[]) iter.next()); + Entry entry = switchboard.urlPool.noticeURL.getEntry(nextHash); + if (entry == null) continue; + + if ((option.equals("URL")&&(entry.url() != null))) { + value = entry.url().toString(); + } else if ((option.equals("AnchorName"))) { + value = entry.name(); + } else if ((option.equals("Profile"))) { + String profileHandle = entry.profileHandle(); + if (profileHandle == null) { + value = "unknown"; + } else { + plasmaCrawlProfile.entry profile = switchboard.profiles.getEntry(profileHandle); + if (profile == null) { + value = "unknown"; + } else { + value = profile.name(); + } + } + } else if ((option.equals("Depth"))) { + value = Integer.toString(entry.depth()); + } else if ((option.equals("Initiator"))) { + value = (entry.initiator()==null)?"proxy":wikiCode.replaceHTML(entry.initiator()); + } else if ((option.equals("ModifiedDate"))) { + value = daydate(entry.loaddate()); + } + + if (value != null) { + Matcher matcher = compiledPattern.matcher(value); + if (matcher.find()) { + switchboard.urlPool.noticeURL.remove(nextHash); + } + } + + } + } catch (PatternSyntaxException e) { + e.printStackTrace(); + } + } prop.put("info", 3);//crawling queue cleared prop.put("info_numEntries", c); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index a6dc8f7bd..c26336be8 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -251,6 +251,19 @@ public class plasmaCrawlNURL extends plasmaURL { default: return null; } } + + public Iterator iterator(int stackType) { + switch (stackType) { + case STACK_TYPE_CORE: return coreStack.iterator(); + case STACK_TYPE_LIMIT: return limitStack.iterator(); + case STACK_TYPE_OVERHANG: return overhangStack.iterator(); + case STACK_TYPE_REMOTE: return remoteStack.iterator(); + case STACK_TYPE_IMAGE: return imageStack.iterator(); + case STACK_TYPE_MOVIE: return movieStack.iterator(); + case STACK_TYPE_MUSIC: return musicStack.iterator(); + default: return null; + } + } public Entry pop(int stackType) { switch (stackType) {