add to blacklist button in CrawlResults

http://bugs.yacy.net/view.php?id=220
introduced Blacklist.add with sourcefile only parameter
pull/1/head
reger 11 years ago
parent 17b454f957
commit 58ecf5e4dd

@ -73,9 +73,19 @@
<p><em>Statistics about #[domains]# domains in this stack:</em></p>
<table cellpadding="2" cellspacing="1" >
<tr class="TableHeader">
<td align="center"></td>
<td align="center"></td>
<td><strong>Domain</strong></td>
<td><strong>URLs</strong></td>
<td>Blacklist to use
<form name="selectblacklistform" action="#[feedbackpage]#">
<select name="selectedblacklist" onchange="forms.selectblacklistform.submit();">
#{blacklists}#
<option #[selected]# value="#[name]#">#[name]#</option>
#{/blacklists}#
</select>
<input type="hidden" name="process" value="#[tabletype]#" />
</form>
</td>
</tr>
#{domains}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
@ -84,12 +94,23 @@
<div>
<input type="hidden" name="process" value="#[tabletype]#" />
<input type="hidden" name="domain" value="#[domain]#" />
<input type="hidden" name="blacklistname" value="#[blacklistname]#" />
<input type="submit" name="deletedomain" value="delete all" />
</div>
</form>
</td>
<td><a href="http://#[domain]#/" target="_blank">#[domain]#</a></td>
<td>#[count]#</td>
<td>
<form action="#[feedbackpage]#" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<div align="center">
<input type="hidden" name="process" value="#[tabletype]#" />
<input type="hidden" name="domain" value="#[domain]#" />
<input type="hidden" name="blacklistname" value="#[blacklistname]#" />
<input type="submit" name="delandaddtoblacklist" value="del & blacklist" />
</div>
</form>
</td>
</tr>
#{/domains}#
</table><br />

@ -29,6 +29,7 @@ import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@ -40,8 +41,11 @@ import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.ResultURLs;
import net.yacy.crawler.data.ResultURLs.EventOrigin;
import net.yacy.crawler.data.ResultURLs.InitExecEntry;
import net.yacy.data.ListManager;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
@ -105,7 +109,8 @@ public class CrawlResults {
return prop;
}
}
String selectedblacklist = post.get("selectedblacklist",Blacklist.defaultBlacklist(ListManager.listsPath));
if (post != null) {
// custom number of lines
if (post.containsKey("count")) {
@ -123,13 +128,19 @@ public class CrawlResults {
}
}
if (post.containsKey("deletedomain")) {
if (post.containsKey("deletedomain") || post.containsKey("delandaddtoblacklist")) {
final String domain = post.get("domain", null);
if (domain != null) {
selectedblacklist = post.get("blacklistname");
Set<String> hostnames = new HashSet<String>();
hostnames.add(domain);
sb.index.fulltext().deleteStaleDomainNames(hostnames, null);
ResultURLs.deleteDomain(tabletype, domain);
// handle addtoblacklist
if (post.containsKey("delandaddtoblacklist")) {
Switchboard.urlBlacklist.add(selectedblacklist, domain, ".*");
}
}
}
@ -297,11 +308,33 @@ public class CrawlResults {
prop.put("table_domains_" + cnt + "_tabletype", tabletype.getCode());
prop.put("table_domains_" + cnt + "_domain", domain);
prop.put("table_domains_" + cnt + "_count", ResultURLs.domainCount(tabletype, domain));
prop.put("table_domains_" + cnt + "_blacklistname", selectedblacklist);
dark = !dark;
cnt++;
}
prop.put("table_domains", cnt);
// load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
int blacklistCount = 0;
if (dirlist != null) {
for (final String element : dirlist) {
if (element.equals(selectedblacklist)) {
prop.put("table_blacklists_" + blacklistCount + "_selected", "selected");
} else {
prop.put("table_blacklists_" + blacklistCount + "_selected", "");
}
prop.putXML("table_blacklists_" + blacklistCount + "_name", element);
blacklistCount++;
}
prop.put("table_blacklists", blacklistCount);
}
}
prop.put("process", tabletype.getCode());
// return rewrite properties
return prop;

@ -354,6 +354,53 @@ public class Blacklist {
}
}
/**
* appends a entry to the backlist source file
*
* @param blacklistSourcefile name of the blacklist file (LISTS/*.black)
* @param host host or host pattern
* @param path path or path pattern
*/
public final void add (final String blacklistSourcefile, final String host, final String path) {
// TODO: check sourcefile synced with cache.ser files ?
if (host == null) {
throw new IllegalArgumentException("host may not be null");
}
if (path == null) {
throw new IllegalArgumentException("path may not be null");
}
String p = (!path.isEmpty() && path.charAt(0) == '/') ? path.substring(1) : path;
// avoid PatternSyntaxException e
final String h = ((!isMatchable(host) && !host.isEmpty() && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
if (!p.isEmpty() && p.charAt(0) == '*') {
p = "." + p;
}
Pattern pattern = Pattern.compile(p, Pattern.CASE_INSENSITIVE);
// Append the line to the file.
PrintWriter pw = null;
try {
final String newEntry = h + "/" + pattern;
if (!blacklistFileContains(blacklistRootPath, blacklistSourcefile, newEntry)) {
pw = new PrintWriter(new FileWriter(new File(blacklistRootPath, blacklistSourcefile), true));
pw.println(newEntry);
pw.close();
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} finally {
if (pw != null) {
try {
pw.close();
} catch (final Exception e) {
ConcurrentLog.warn("Blacklist", "could not close stream to "
+ blacklistSourcefile + "! " + e.getMessage());
}
}
}
}
public final int blacklistCacheSize() {
int size = 0;
final Iterator<BlacklistType> iter = this.cachedUrlHashs.keySet().iterator();

Loading…
Cancel
Save