From ca93835713b9070866b61b17ecb07175c4b99a8a Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 10 Jun 2012 23:17:21 +0200 Subject: [PATCH 1/8] removed usage of deprecated methods --- source/de/anomic/http/server/HTTPDFileHandler.java | 6 +++--- source/net/yacy/interaction/TripleStore.java | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index b643fc81f..c889b483d 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -1427,11 +1427,11 @@ public final class HTTPDFileHandler { final String strARGS = (String) conProp.get("ARGS"); if(strARGS.startsWith("url=")) { final String strUrl = strARGS.substring(4); // strip url= - + try { proxyurl = new URL(strUrl); } catch (MalformedURLException e) { - proxyurl = new URL (URLDecoder.decode(strUrl)); + proxyurl = new URL (URLDecoder.decode(strUrl, UTF8.charset.name())); } } @@ -1486,7 +1486,7 @@ public final class HTTPDFileHandler { String directory = ""; if (proxyurl.getPath().lastIndexOf('/') > 0) directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/')); - + String location = ""; if (outgoingHeader.containsKey("Location")) { diff --git a/source/net/yacy/interaction/TripleStore.java b/source/net/yacy/interaction/TripleStore.java index b2791e774..f56d1845f 100644 --- a/source/net/yacy/interaction/TripleStore.java +++ b/source/net/yacy/interaction/TripleStore.java @@ -2,15 +2,16 @@ package net.yacy.interaction; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.StringBufferInputStream; import java.util.Iterator; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; @@ -32,7 +33,7 @@ public class TripleStore { if (filename.endsWith(".nt")) LoadNTriples(filename); else LoadRDF(filename); } - + public static void LoadRDF(String fileNameOrUri) throws IOException { Model tmp = ModelFactory.createDefaultModel(); Log.logInfo("TRIPLESTORE", "Loading from " + fileNameOrUri); @@ -67,8 +68,7 @@ public class TripleStore { try { - @SuppressWarnings("deprecation") - InputStream in = new StringBufferInputStream(rdffile); + InputStream in = new ByteArrayInputStream(UTF8.getBytes(rdffile)); // read the RDF/XML file tmp.read(in, null); From 7a4dab6d1d88efdf704c55b1ece4d42e5a3b6434 Mon Sep 17 00:00:00 2001 From: cominch Date: Sun, 10 Jun 2012 23:33:09 +0200 Subject: [PATCH 2/8] - removed unused variables - do not replace malformed or invalid URLs in urlproxy git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7835 6c8d7289-2bf4-0310-a012-ef5d649a1542 Conflicts: source/de/anomic/http/server/HTTPDFileHandler.java --- htroot/Triple_p.java | 77 ++++++++---------- htroot/interaction/GetRDF.java | 79 +++++++++---------- htroot/interaction/PutRDF.java | 68 +++++++--------- htroot/interaction/Table.java | 49 ++++++------ htroot/interaction/Triple.java | 49 ++++++------ htroot/interaction/UploadSingleFile.java | 73 ++++++++--------- .../http/server/AugmentedHtmlStream.java | 53 +++++++------ .../anomic/http/server/HTTPDProxyHandler.java | 24 +++--- 8 files changed, 219 insertions(+), 253 deletions(-) diff --git a/htroot/Triple_p.java b/htroot/Triple_p.java index c80126b2e..5295c34d0 100644 --- a/htroot/Triple_p.java +++ b/htroot/Triple_p.java @@ -2,46 +2,33 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.MalformedURLException; -import java.util.List; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.TripleStore; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; -import net.yacy.search.Switchboard; - - -import de.anomic.http.server.HTTPDemon; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory ; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSetFormatter; -import com.hp.hpl.jena.sparql.algebra.Algebra ; -import com.hp.hpl.jena.sparql.algebra.Op ; -import com.hp.hpl.jena.sparql.engine.QueryIterator ; -import com.hp.hpl.jena.sparql.engine.binding.Binding ; -import com.hp.hpl.jena.rdf.model.*; -import com.hp.hpl.jena.util.FileManager; +import de.anomic.http.server.HTTPDemon; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; public class Triple_p { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); - boolean hasRights = sb.verifyAuthentication(header); - prop.put("display", 1); // Fixed to 1 prop.putHTML("mode_output", "no query performed"); - + String q = "PREFIX lln: \n"+ "PREFIX rdf: \n"+ "PREFIX xsd: \n"+ @@ -52,21 +39,21 @@ public class Triple_p { "}"; if (post != null) { - + if (post.containsKey("submit")) { // System.out.println (post.get("submit")); } - + if (post.containsKey("rdffileslist")) { String list = post.get("rdffileslist"); - + for (String s: list.split("\n")) { String newurl = s; try { DigestURI d = new DigestURI (s); - + if (d.getHost().endsWith(".yacy")) { newurl = d.getProtocol()+"://"+HTTPDemon.getAlternativeResolver().resolve(d.getHost())+d.getPath(); System.out.println (newurl); @@ -77,38 +64,38 @@ public class Triple_p { } catch (IOException e) { Log.logException(e); } - } + } } - + if (post.containsKey("rdffile")) { TripleStore.Add(post.get("rdffile$file")); } - + if (post.containsKey("query")) { // Create a new query String queryString = post.get("query"); - + q = queryString; - + int count = 0; - + try { - + com.hp.hpl.jena.query.Query query = QueryFactory.create(queryString); - + // Execute the query and obtain results QueryExecution qe = QueryExecutionFactory.create(query, TripleStore.model); ResultSet resultSet = qe.execSelect(); - + ByteArrayOutputStream sos = new ByteArrayOutputStream(); - + ResultSetFormatter.outputAsRDF(sos, "", resultSet); prop.putHTML("mode_rdfdump", sos.toString()); - + int scount = 0; while (resultSet.hasNext()) { QuerySolution s = resultSet.next(); @@ -117,34 +104,34 @@ public class Triple_p { prop.put("entries_"+scount+"_o", s.getResource(null).getURI()); scount ++; } - + prop.putHTML("entries", ""+scount); - + for (String s: resultSet.getResultVars()) { - + prop.putHTML("mode_output_"+count+"_caption", s); count ++; } - - + + } catch (Exception e) { prop.putHTML("mode_rdfdump", "error"); } - - - + + + prop.putHTML("mode_output", ""+count); } - + } - + prop.putHTML("mode_query", q); - + // return rewrite properties return prop; } - + } diff --git a/htroot/interaction/GetRDF.java b/htroot/interaction/GetRDF.java index f9466d27f..c2ebf8049 100644 --- a/htroot/interaction/GetRDF.java +++ b/htroot/interaction/GetRDF.java @@ -1,6 +1,6 @@ package interaction; -//ViewLog_p.java +//ViewLog_p.java //----------------------- //part of the AnomicHTTPD caching proxy //(C) by Michael Peter Christen; mc@yacy.net @@ -30,75 +30,72 @@ package interaction; //if the shell's current path is HTROOT import java.io.ByteArrayOutputStream; -import java.io.FileOutputStream; +import java.io.UnsupportedEncodingException; -import com.hp.hpl.jena.rdf.model.Model; - -import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.Interaction; import net.yacy.interaction.TripleStore; -import net.yacy.kelondro.logging.Log; -import net.yacy.search.Switchboard; -import de.anomic.data.UserDB; + +import com.hp.hpl.jena.rdf.model.Model; + import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class GetRDF { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - - final Switchboard sb = (Switchboard) env; - + final serverObjects prop = new serverObjects(); - String url = ""; - String s = ""; - String p = ""; - String o = ""; - Boolean global = false; - + if(post != null){ - + global = post.containsKey("global"); - + } - + if (global) { - + ByteArrayOutputStream fout; - - + + fout = new ByteArrayOutputStream(); - + TripleStore.model.write(fout); - - prop.put("resultXML", fout.toString()); - + + try { + prop.put("resultXML", fout.toString(UTF8.charset.name())); + } catch (UnsupportedEncodingException e) { + } + } else { - + Model tmp = TripleStore.privatestorage.get(Interaction.GetLoggedOnUser(header)); - + if (tmp != null) { - - ByteArrayOutputStream fout; - fout = new ByteArrayOutputStream(); + + ByteArrayOutputStream fout; + fout = new ByteArrayOutputStream(); tmp.write(fout); - - prop.put("resultXML", fout.toString()); - + + try { + prop.put("resultXML", fout.toString(UTF8.charset.name())); + } catch (UnsupportedEncodingException e) { + } + } else { - + prop.put("resultXML", ""); } } - - - - + + + + return prop; } } diff --git a/htroot/interaction/PutRDF.java b/htroot/interaction/PutRDF.java index d591f877e..3806ee4c6 100644 --- a/htroot/interaction/PutRDF.java +++ b/htroot/interaction/PutRDF.java @@ -1,6 +1,6 @@ package interaction; -//ViewLog_p.java +//ViewLog_p.java //----------------------- //part of the AnomicHTTPD caching proxy //(C) by Michael Peter Christen; mc@yacy.net @@ -30,75 +30,65 @@ package interaction; //if the shell's current path is HTROOT import java.io.ByteArrayOutputStream; -import java.io.FileOutputStream; -import com.hp.hpl.jena.rdf.model.Model; - -import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.Interaction; import net.yacy.interaction.TripleStore; -import net.yacy.kelondro.logging.Log; -import net.yacy.search.Switchboard; -import de.anomic.data.UserDB; + +import com.hp.hpl.jena.rdf.model.Model; + import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class PutRDF { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - - final Switchboard sb = (Switchboard) env; - + + final serverObjects prop = new serverObjects(); - String url = ""; - String s = ""; - String p = ""; - String o = ""; - Boolean global = false; - + if(post != null){ - + global = post.containsKey("global"); - + } - + if (global) { - + ByteArrayOutputStream fout; - - + + fout = new ByteArrayOutputStream(); - + TripleStore.model.write(fout); - + prop.put("resultXML", fout.toString()); - + } else { - + Model tmp = TripleStore.privatestorage.get(Interaction.GetLoggedOnUser(header)); - + if (tmp != null) { - - ByteArrayOutputStream fout; - fout = new ByteArrayOutputStream(); + + ByteArrayOutputStream fout; + fout = new ByteArrayOutputStream(); tmp.write(fout); - + prop.put("resultXML", fout.toString()); - + } else { - + prop.put("resultXML", ""); } } - - - - + + + + return prop; } } diff --git a/htroot/interaction/Table.java b/htroot/interaction/Table.java index e4ccecfab..b25c1ab4f 100644 --- a/htroot/interaction/Table.java +++ b/htroot/interaction/Table.java @@ -1,6 +1,6 @@ package interaction; -//ViewLog_p.java +//ViewLog_p.java //----------------------- //part of the AnomicHTTPD caching proxy //(C) by Michael Peter Christen; mc@yacy.net @@ -32,23 +32,22 @@ package interaction; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.Interaction; -import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import de.anomic.data.UserDB; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class Table { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - + final Switchboard sb = (Switchboard) env; - + final serverObjects prop = new serverObjects(); - + UserDB.Entry entry=null; - - + + //default values prop.put("enabled_logged_in", "0"); @@ -71,7 +70,7 @@ public class Table { } } } - + //identified via userDB if(entry != null){ prop.put("enabled_logged-in", "1"); @@ -97,48 +96,48 @@ public class Table { } String url = ""; - String s = ""; + String s = ""; String p = ""; String o = ""; - + Boolean global = false; - + if(post != null){ if(post.containsKey("url")){ url = post.get("url"); } - + if(post.containsKey("s")){ s = post.get("s"); } - + if(post.containsKey("p")){ p = post.get("p"); } - + if(post.containsKey("o")){ o = post.get("o"); } - + global = post.containsKey("global"); - + } - + if (post.containsKey("load")) { - + o = Interaction.GetTableentry(s, p, global ? "global" : username, ""); - + } else { - + Interaction.Tableentry(s, p, o, global ? "global" : username, ""); - } - + } + prop.put("result", o); - - + + return prop; } } diff --git a/htroot/interaction/Triple.java b/htroot/interaction/Triple.java index fa1a3c2fb..407b76c34 100644 --- a/htroot/interaction/Triple.java +++ b/htroot/interaction/Triple.java @@ -1,6 +1,6 @@ package interaction; -//ViewLog_p.java +//ViewLog_p.java //----------------------- //part of the AnomicHTTPD caching proxy //(C) by Michael Peter Christen; mc@yacy.net @@ -32,23 +32,22 @@ package interaction; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.Interaction; -import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import de.anomic.data.UserDB; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class Triple { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - + final Switchboard sb = (Switchboard) env; - + final serverObjects prop = new serverObjects(); - + UserDB.Entry entry=null; - - + + //default values prop.put("enabled_logged_in", "0"); @@ -71,7 +70,7 @@ public class Triple { } } } - + //identified via userDB if(entry != null){ prop.put("enabled_logged-in", "1"); @@ -97,47 +96,47 @@ public class Triple { } String url = ""; - String s = ""; + String s = ""; String p = ""; String o = ""; - + Boolean global = false; - + if(post != null){ if(post.containsKey("url")){ url = post.get("url"); } - + if(post.containsKey("s")){ s = post.get("s"); } - + if(post.containsKey("p")){ p = post.get("p"); } - + if(post.containsKey("o")){ o = post.get("o"); } - + global = post.containsKey("global"); - + } - + if (post.containsKey("load")) { - + o = Interaction.TripleGet(s, p, global ? "" : username); - + } else { - + Interaction.Triple(url, s, p, o, global ? "" : username); - } - + } + prop.put("result", o); - - + + return prop; } } diff --git a/htroot/interaction/UploadSingleFile.java b/htroot/interaction/UploadSingleFile.java index fe5a1f06b..995d43c6d 100644 --- a/htroot/interaction/UploadSingleFile.java +++ b/htroot/interaction/UploadSingleFile.java @@ -1,24 +1,17 @@ package interaction; -import java.io.BufferedOutputStream; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.OutputStreamWriter; import net.yacy.yacy; -import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.document.content.SurrogateReader; import net.yacy.interaction.Interaction; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Digest; import net.yacy.search.Switchboard; import de.anomic.data.UserDB; -import de.anomic.data.UserDB.AccessRight; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -31,67 +24,67 @@ public class UploadSingleFile { final serverObjects prop = new serverObjects(); - - + + if (post != null){ if (post.containsKey("uploadfile") && !post.get("uploadfile").isEmpty()) { - + UserDB.Entry entry = sb.userDB.getEntry(Interaction.GetLoggedOnUser(header)); if (entry != null) { - + if (entry.hasRight(UserDB.AccessRight.UPLOAD_RIGHT)) { - + // the user has the upload right - + } - + } - + String targetfilename = post.get("uploadfile", "target.file"); - + String targetfolder = "/upload/"+Interaction.GetLoggedOnUser(header); - + if (post.containsKey("targetfilename")) { targetfilename = post.get("targetfilename"); - + } - + if (post.containsKey("targetfolder")) { targetfolder = post.get("targetfolder"); - + if (!targetfolder.startsWith("/")) { targetfolder = "/" + targetfolder; } - + } - + File f = new File(yacy.dataHome_g, "DATA/HTDOCS"+targetfolder+"/"); - + yacy.mkdirsIfNeseccary (f); - + f = new File(f, targetfilename); - + Log.logInfo ("FILEUPLOAD", f.toString()); - - + + try { - + ByteArrayInputStream stream = new ByteArrayInputStream(post .get("uploadfile$file").getBytes()); - - + + if (stream != null) { - + OutputStream out; - - + + out = new FileOutputStream(f.toString()); - - + + byte[] buf = new byte[1024]; int len; while ((len = stream.read(buf)) > 0) { @@ -100,7 +93,7 @@ public class UploadSingleFile { stream.close(); out.close(); } - + } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -108,8 +101,8 @@ public class UploadSingleFile { // TODO Auto-generated catch block e.printStackTrace(); } - - + + } } @@ -117,9 +110,9 @@ public class UploadSingleFile { // return rewrite properties return prop; } - - + + } diff --git a/source/de/anomic/http/server/AugmentedHtmlStream.java b/source/de/anomic/http/server/AugmentedHtmlStream.java index 82d944e9c..884798b82 100644 --- a/source/de/anomic/http/server/AugmentedHtmlStream.java +++ b/source/de/anomic/http/server/AugmentedHtmlStream.java @@ -15,55 +15,56 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.search.Switchboard; public class AugmentedHtmlStream extends FilterOutputStream { - private Writer out; - private ByteArrayOutputStream buffer; - private Charset charset; - private DigestURI url; - private byte[] urlhash; - private RequestHeader requestHeader; + private final Writer out; + private final ByteArrayOutputStream buffer; + private final Charset charset; + private final DigestURI url; + private final RequestHeader requestHeader; - public AugmentedHtmlStream(OutputStream out, Charset charset, DigestURI url, byte[] urlhash, RequestHeader requestHeader) { + public AugmentedHtmlStream(OutputStream out, Charset charset, DigestURI url, RequestHeader requestHeader) { super(out); this.out = new BufferedWriter(new OutputStreamWriter(out, charset)); this.buffer = new ByteArrayOutputStream(); this.charset = charset; this.url = url; - this.urlhash = urlhash; this.requestHeader = requestHeader; } - - public void write(int b) throws IOException { + + @Override + public void write(int b) throws IOException { this.buffer.write(b); } - - public void write(byte[] b, int off, int len) throws IOException { + + @Override + public void write(byte[] b, int off, int len) throws IOException { this.buffer.write(b, off, len); } - - public void close() throws IOException { - StringBuffer b = new StringBuffer(this.buffer.toString(charset.name())); + + @Override + public void close() throws IOException { + StringBuffer b = new StringBuffer(this.buffer.toString(this.charset.name())); b = process(b); - out.write(b.toString()); - out.close(); + this.out.write(b.toString()); + this.out.close(); } - + public StringBuffer process(StringBuffer data) { - + if (Switchboard.getSwitchboard().getConfigBool("proxyAugmentation", false) == true) { - + if (!this.url.toNormalform(false, true).contains("currentyacypeer/")) { - - return AugmentHtmlStream.process (data, charset, url, requestHeader); - + + return AugmentHtmlStream.process (data, this.charset, this.url, this.requestHeader); + } else { return data; } - - } else { + + } else { return data; } } - + public static boolean supportsMime(String mime) { // System.out.println("mime" +mime); return mime.split(";")[0].equals("text/html"); diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index c0bb0768d..ad901bdb0 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -467,8 +467,8 @@ public final class HTTPDProxyHandler { port = sb.peers.myPort(); path = path.substring(16); } - - // point virtual directory to my peer + + // point virtual directory to my peer if (path.startsWith("/currentyacypeer/")) { host = sb.peers.myIP(); port = sb.peers.myPort(); @@ -492,7 +492,7 @@ public final class HTTPDProxyHandler { final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; - + requestHeader.remove(HeaderFramework.HOST); final HTTPClient client = setupHttpClient(requestHeader, connectHost); @@ -508,13 +508,13 @@ public final class HTTPDProxyHandler { if (responseHeader.isEmpty()) { throw new Exception(client.getHttpResponse().getStatusLine().toString()); } - + if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) { // enable chunk encoding, because we don't know the length after annotating responseHeader.remove(HeaderFramework.CONTENT_LENGTH); responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); - } + } ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond); @@ -555,7 +555,7 @@ public final class HTTPDProxyHandler { // chunked encoding disables somewhere, add it again responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } - + // sending the respond header back to the client if (chunkedOut != null) { responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); @@ -584,10 +584,10 @@ public final class HTTPDProxyHandler { final String storeError = response.shallStoreCacheForProxy(); final boolean storeHTCache = response.profile().storeHTCache(); final String supportError = TextParser.supports(response.url(), response.getMimeType()); - + if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) { - outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, url.hash(), requestHeader); - } + outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, requestHeader); + } if ( /* * Now we store the response into the htcache directory if @@ -655,7 +655,7 @@ public final class HTTPDProxyHandler { conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } - + outStream.close(); if (chunkedOut != null) { @@ -745,9 +745,9 @@ public final class HTTPDProxyHandler { //respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative' if(AugmentedHtmlStream.supportsMime(cachedResponseHeader.mime())) { - respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, url.hash(), requestHeader); + respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, requestHeader); } - + // send also the complete body now from the cache // simply read the file and transfer to out socket FileUtils.copy(cacheEntry, respond); From af5a597e47cdec9d5d8061b47f12508acb67df85 Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Sun, 10 Jun 2012 23:38:41 +0200 Subject: [PATCH 3/8] Scroogle is not comming back, remove dead code Conflicts: source/net/yacy/search/Switchboard.java --- defaults/yacy.init | 1 - htroot/ConfigHeuristics_p.html | 16 +------ htroot/ConfigHeuristics_p.java | 3 -- htroot/ConfigNetwork_p.java | 1 - source/net/yacy/search/Switchboard.java | 59 ++----------------------- 5 files changed, 4 insertions(+), 76 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 309531a50..9d8ceda59 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1011,7 +1011,6 @@ about.body = # search heuristics heuristic.site = false -heuristic.scroogle = false heuristic.blekko = false # colours for generic design diff --git a/htroot/ConfigHeuristics_p.html b/htroot/ConfigHeuristics_p.html index 91cc76746..0778c9396 100644 --- a/htroot/ConfigHeuristics_p.html +++ b/htroot/ConfigHeuristics_p.html @@ -43,20 +43,6 @@

-
@@ -73,4 +59,4 @@ #%env/templates/footer.template%# - \ No newline at end of file + diff --git a/htroot/ConfigHeuristics_p.java b/htroot/ConfigHeuristics_p.java index f60abe374..3ea6031cd 100644 --- a/htroot/ConfigHeuristics_p.java +++ b/htroot/ConfigHeuristics_p.java @@ -45,14 +45,11 @@ public class ConfigHeuristics_p { if (post.containsKey("site_on")) sb.setConfig("heuristic.site", true); if (post.containsKey("site_off")) sb.setConfig("heuristic.site", false); - if (post.containsKey("scroogle_on")) sb.setConfig("heuristic.scroogle", true); - if (post.containsKey("scroogle_off")) sb.setConfig("heuristic.scroogle", false); if (post.containsKey("blekko_on")) sb.setConfig("heuristic.blekko", true); if (post.containsKey("blekko_off")) sb.setConfig("heuristic.blekko", false); } prop.put("site.checked", sb.getConfigBool("heuristic.site", false) ? 1 : 0); - prop.put("scroogle.checked", sb.getConfigBool("heuristic.scroogle", false) ? 1 : 0); prop.put("blekko.checked", sb.getConfigBool("heuristic.blekko", false) ? 1 : 0); return prop; diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java index e78ab69d0..385abe40d 100644 --- a/htroot/ConfigNetwork_p.java +++ b/htroot/ConfigNetwork_p.java @@ -87,7 +87,6 @@ public class ConfigNetwork_p if ( !indexReceive ) { // remove heuristics sb.setConfig("heuristic.site", false); - sb.setConfig("heuristic.scroogle", false); sb.setConfig("heuristic.blekko", false); } final boolean robinsonmode = "robinson".equals(post.get("network", "")); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 7e40457be..b3452c5e4 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1190,7 +1190,6 @@ public final class Switchboard extends serverSwitch // remove heuristics setConfig("heuristic.site", false); - setConfig("heuristic.scroogle", false); setConfig("heuristic.blekko", false); // relocate @@ -3152,8 +3151,8 @@ public final class Switchboard extends serverSwitch final Map links; searchEvent.getRankingResult().oneFeederStarted(); - try { - links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE); + try { + links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE); if ( links != null ) { final Iterator i = links.keySet().iterator(); while ( i.hasNext() ) { @@ -3166,59 +3165,7 @@ public final class Switchboard extends serverSwitch addAllToIndex(url, links, searchEvent, "site"); } } catch ( final Throwable e ) { - Log.logException(e); - } finally { - searchEvent.getRankingResult().oneFeederTerminated(); - } - } - }.start(); - } - - public final void heuristicScroogle(final SearchEvent searchEvent) { - new Thread() { - @Override - public void run() { - QueryParams query = searchEvent.getQuery(); - String queryString = query.queryString(true); - final int meta = queryString.indexOf("heuristic:", 0); - if ( meta >= 0 ) { - final int q = queryString.indexOf(' ', meta); - queryString = - (q >= 0) - ? queryString.substring(0, meta) + queryString.substring(q + 1) - : queryString.substring(0, meta); - } - final String urlString = - "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" - + queryString.trim().replaceAll(" ", "+") - + "&n=2"; - final DigestURI url; - try { - url = new DigestURI(MultiProtocolURI.unescape(urlString)); - } catch ( final MalformedURLException e1 ) { - Log.logWarning("heuristicScroogle", "url not well-formed: '" + urlString + "'"); - return; - } - - Map links = null; - searchEvent.getRankingResult().oneFeederStarted(); - try { - links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE); - if ( links != null ) { - final Iterator i = links.keySet().iterator(); - while ( i.hasNext() ) { - if ( i.next().toNormalform(false, false).indexOf("scroogle", 0) >= 0 ) { - i.remove(); - } - } - Switchboard.this.log.logInfo("Heuristic: adding " - + links.size() - + " links from scroogle"); - // add all pages to the index - addAllToIndex(null, links, searchEvent, "scroogle"); - } - } catch ( final Throwable e ) { - //Log.logException(e); + Log.logException(e); } finally { searchEvent.getRankingResult().oneFeederTerminated(); } From 213f006bf14a47bae5a0f801481eef0e89667f8f Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Sun, 10 Jun 2012 23:40:07 +0200 Subject: [PATCH 4/8] One is okay ... Conflicts: htroot/Trails.html --- htroot/Trails.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htroot/Trails.html b/htroot/Trails.html index 8110ed7cf..b01e39607 100644 --- a/htroot/Trails.html +++ b/htroot/Trails.html @@ -63,5 +63,5 @@ search();
#%env/templates/footer.template%# - + \ No newline at end of file From edaa09b9b12651ef3a211ff653040aab5becf68e Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Mon, 11 Jun 2012 00:17:30 +0200 Subject: [PATCH 5/8] Rewrote all String blacklist types to enum 'BlacklistType', closes bug #143 Conflicts: htroot/Supporter.java htroot/yacy/crawlReceipt.java htroot/yacy/transferRWI.java htroot/yacy/transferURL.java source/de/anomic/crawler/CrawlStacker.java source/de/anomic/data/ListManager.java source/net/yacy/peers/Protocol.java source/net/yacy/repository/Blacklist.java source/net/yacy/repository/LoaderDispatcher.java source/net/yacy/search/Switchboard.java source/net/yacy/search/index/MetadataRepository.java source/net/yacy/search/index/Segment.java source/net/yacy/search/query/RWIProcess.java source/net/yacy/search/snippet/MediaSnippet.java --- htroot/BlacklistCleaner_p.java | 17 +-- htroot/BlacklistTest_p.java | 18 +-- htroot/Blacklist_p.java | 63 ++++---- htroot/IndexControlRWIs_p.java | 9 +- htroot/Supporter.java | 8 +- htroot/Surftips.java | 4 +- htroot/api/blacklists_p.java | 9 +- htroot/sharedBlacklist_p.java | 7 +- htroot/yacy/crawlReceipt.java | 13 +- htroot/yacy/transferRWI.java | 8 +- htroot/yacy/transferURL.java | 8 +- source/de/anomic/crawler/CrawlStacker.java | 8 +- .../anomic/crawler/retrieval/HTTPLoader.java | 6 +- source/de/anomic/data/ListManager.java | 62 ++++---- .../anomic/http/server/HTTPDProxyHandler.java | 8 +- source/net/yacy/peers/NewsPool.java | 6 +- source/net/yacy/peers/Protocol.java | 7 +- source/net/yacy/repository/Blacklist.java | 136 +++++++++++------- source/net/yacy/repository/BlacklistFile.java | 8 +- .../net/yacy/repository/LoaderDispatcher.java | 5 +- source/net/yacy/search/Switchboard.java | 2 +- .../yacy/search/index/MetadataRepository.java | 5 +- source/net/yacy/search/index/Segment.java | 4 +- source/net/yacy/search/query/RWIProcess.java | 9 +- .../net/yacy/search/snippet/MediaSnippet.java | 14 +- 25 files changed, 241 insertions(+), 203 deletions(-) diff --git a/htroot/BlacklistCleaner_p.java b/htroot/BlacklistCleaner_p.java index 5e92b3782..3f2d45387 100644 --- a/htroot/BlacklistCleaner_p.java +++ b/htroot/BlacklistCleaner_p.java @@ -49,6 +49,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistError; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; import de.anomic.data.ListManager; @@ -76,10 +77,6 @@ public class BlacklistCleaner_p { ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS")); String blacklistToUse = null; - // get the list of supported blacklist types - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - prop.put(DISABLED+"checked", "1"); if (post != null) { @@ -102,10 +99,10 @@ public class BlacklistCleaner_p { if (post.containsKey("delete")) { prop.put(RESULTS + "modified", "1"); - prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", true))); + prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true))); } else if (post.containsKey("alter")) { prop.put(RESULTS + "modified", "2"); - prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); + prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); } // list illegal entries @@ -275,7 +272,7 @@ public class BlacklistCleaner_p { * @param entries Array of entries to be deleted. * @return Length of the list of entries to be removed. */ - private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) { + private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) { // load blacklist data from file final List list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); @@ -299,7 +296,7 @@ public class BlacklistCleaner_p { } // remove the entry from the running blacklist engine - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0)); final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1); @@ -328,7 +325,7 @@ public class BlacklistCleaner_p { */ private static int alterEntries( final String blacklistToUse, - final String[] supportedBlacklistTypes, + final BlacklistType[] supportedBlacklistTypes, final String[] oldEntry, final String[] newEntry) { removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); @@ -346,7 +343,7 @@ public class BlacklistCleaner_p { path = n.substring(pos + 1); } pw.println(host + "/" + path); - for (final String s : supportedBlacklistTypes) { + for (final BlacklistType s : supportedBlacklistTypes) { if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) { Switchboard.urlBlacklist.add( s, diff --git a/htroot/BlacklistTest_p.java b/htroot/BlacklistTest_p.java index 584754b78..915a657eb 100644 --- a/htroot/BlacklistTest_p.java +++ b/htroot/BlacklistTest_p.java @@ -34,7 +34,7 @@ import java.net.MalformedURLException; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.DigestURI; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.data.ListManager; @@ -64,21 +64,23 @@ public class BlacklistTest_p { DigestURI testurl = null; try { testurl = new DigestURI(urlstring); - } catch (final MalformedURLException e) { testurl = null; } + } catch (final MalformedURLException e) { + testurl = null; + } if(testurl != null) { prop.putHTML("url",testurl.toString()); prop.putHTML("testlist_url",testurl.toString()); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) prop.put("testlist_listedincrawler", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) prop.put("testlist_listedindht", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) prop.put("testlist_listedinnews", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) prop.put("testlist_listedinproxy", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) prop.put("testlist_listedinsearch", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) prop.put("testlist_listedinsurftips", "1"); } else { diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 0476e00d3..862372117 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -43,6 +43,7 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; import de.anomic.data.ListManager; @@ -64,10 +65,6 @@ public class Blacklist_p { ListManager.switchboard = (Switchboard) env; ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); - // get the list of supported blacklist types - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - // load all blacklist files located in the directory List dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); @@ -98,22 +95,22 @@ public class Blacklist_p { } if(testurl != null) { prop.putHTML("testlist_url",testurl.toString()); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) { prop.put("testlist_listedincrawler", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) { prop.put("testlist_listedindht", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) { prop.put("testlist_listedinnews", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) { prop.put("testlist_listedinproxy", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) { prop.put("testlist_listedinsearch", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) { prop.put("testlist_listedinsurftips", "1"); } } else { @@ -159,7 +156,7 @@ public class Blacklist_p { ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse); // activate it for all known blacklist types - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse); } } catch (final IOException e) {/* */} @@ -189,7 +186,7 @@ public class Blacklist_p { Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!"); } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); } @@ -212,7 +209,7 @@ public class Blacklist_p { return prop; } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (post.containsKey("activateList4" + supportedBlacklistType)) { ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); } else { @@ -253,7 +250,7 @@ public class Blacklist_p { if (selectedBlacklistEntries.length > 0) { String temp = null; for (final String selectedBlacklistEntry : selectedBlacklistEntries) { - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } @@ -273,7 +270,7 @@ public class Blacklist_p { // store this call as api call ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry); - final String temp = addBlacklistEntry(blacklistToUse, blentry, header, supportedBlacklistTypes); + final String temp = addBlacklistEntry(blacklistToUse, blentry, header, BlacklistType.values()); if (temp != null) { prop.put("LOCATION", temp); return prop; @@ -298,12 +295,12 @@ public class Blacklist_p { !targetBlacklist.equals(blacklistToUse)) { String temp; for (final String selectedBlacklistEntry : selectedBlacklistEntries) { - if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; @@ -338,12 +335,12 @@ public class Blacklist_p { if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) { - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } - if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { + if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } @@ -475,12 +472,12 @@ public class Blacklist_p { if (element.equals(blacklistToUse)) { //current List prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1"); - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]); + for (int blTypes=0; blTypes < BlacklistType.values().length; blTypes++) { + prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",BlacklistType.values()[blTypes].toString()); prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked", - ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", element) ? "0" : "1"); + ListManager.listSetContains(BlacklistType.values()[blTypes] + ".BlackLists", element) ? "0" : "1"); } - prop.put(DISABLED + "currentActiveFor", supportedBlacklistTypes.length); + prop.put(DISABLED + "currentActiveFor", BlacklistType.values().length); } else { prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element); @@ -494,9 +491,9 @@ public class Blacklist_p { } int activeCount = 0; - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) { - prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType); + prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType.toString()); activeCount++; } } @@ -521,13 +518,13 @@ public class Blacklist_p { * @param newEntry the entry that is to be added * @param header * @param supportedBlacklistTypes - * @return null if no error occured, else a String to put into LOCATION + * @return null if no error occurred, else a String to put into LOCATION */ private static String addBlacklistEntry( final String blacklistToUse, final String newEntry, final RequestHeader header, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (blacklistToUse == null || blacklistToUse.length() == 0) { return ""; @@ -555,7 +552,7 @@ public class Blacklist_p { final String blacklistToUse, final String oldEntry, final RequestHeader header, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (blacklistToUse == null || blacklistToUse.length() == 0) { return ""; @@ -580,7 +577,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String oldEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { // load blacklist data from file final List list = FileUtils.getListArray(new File(listsPath, blacklistToUse)); @@ -603,7 +600,7 @@ public class Blacklist_p { pos = oldEntry.length(); oldEntry = oldEntry + "/.*"; } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) { Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1)); } @@ -622,7 +619,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String newEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { // ignore empty entries if(newEntry == null || newEntry.isEmpty()) { @@ -659,7 +656,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String newEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) { // append the line to the file @@ -683,7 +680,7 @@ public class Blacklist_p { // add to blacklist int pos = newEntry.indexOf('/',0); - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1)); } diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 19b05b2b0..19288a0df 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -58,7 +58,7 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.peers.dht.PeerSelection; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; @@ -441,7 +441,7 @@ public class IndexControlRWIs_p supportedBlacklistType + ".BlackLists", blacklist) ) { Switchboard.urlBlacklist.add( - supportedBlacklistType, + BlacklistType.valueOf(supportedBlacklistType), url.getHost(), url.getFile()); } @@ -457,7 +457,6 @@ public class IndexControlRWIs_p if ( post.containsKey("blacklistdomains") ) { PrintWriter pw; try { - final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(","); pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true)); DigestURI url; @@ -472,7 +471,7 @@ public class IndexControlRWIs_p if ( e != null ) { url = e.url(); pw.println(url.getHost() + "/.*"); - for ( final String supportedBlacklistType : supportedBlacklistTypes ) { + for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) { if ( ListManager.listSetContains( supportedBlacklistType + ".BlackLists", blacklist) ) { @@ -623,7 +622,7 @@ public class IndexControlRWIs_p ? "appears emphasized, " : "") + ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : "")); - if ( Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url) ) { + if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) { prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1"); } i++; diff --git a/htroot/Supporter.java b/htroot/Supporter.java index 767bdadcc..2d75ee4c9 100644 --- a/htroot/Supporter.java +++ b/htroot/Supporter.java @@ -43,7 +43,7 @@ import net.yacy.kelondro.order.NaturalOrder; import net.yacy.peers.NewsDB; import net.yacy.peers.NewsPool; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -129,8 +129,10 @@ public class Supporter { url = row.getPrimaryKeyUTF8().trim(); try { - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; - } catch(final MalformedURLException e) {continue;} + if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; + } catch (final MalformedURLException e) { + continue; + } title = row.getColUTF8(1); description = row.getColUTF8(2); if ((url == null) || (title == null) || (description == null)) continue; diff --git a/htroot/Surftips.java b/htroot/Surftips.java index 682f66ee4..63d4efa1a 100644 --- a/htroot/Surftips.java +++ b/htroot/Surftips.java @@ -42,7 +42,7 @@ import net.yacy.kelondro.order.NaturalOrder; import net.yacy.peers.NewsDB; import net.yacy.peers.NewsPool; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -136,7 +136,7 @@ public class Surftips { url = row.getPrimaryKeyUTF8().trim(); try{ - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url))) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS ,new DigestURI(url))) continue; }catch(final MalformedURLException e){continue;}; title = row.getColUTF8(1); diff --git a/htroot/api/blacklists_p.java b/htroot/api/blacklists_p.java index 180c3d4e8..6f907ef79 100644 --- a/htroot/api/blacklists_p.java +++ b/htroot/api/blacklists_p.java @@ -4,7 +4,7 @@ import java.util.List; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import de.anomic.data.ListManager; import de.anomic.server.serverObjects; @@ -35,15 +35,14 @@ public class blacklists_p { prop.put("lists_" + blacklistCount + "_shared", "0"); } - final String[] types = Blacklist.BLACKLIST_TYPES_STRING.split(","); int j = 0; - for (final String type : types) { - prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type); + for (final BlacklistType type : BlacklistType.values()) { + prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type.toString()); prop.put("lists_" + blacklistCount + "_types_" + j + "_value", ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0); j++; } - prop.put("lists_" + blacklistCount + "_types", types.length); + prop.put("lists_" + blacklistCount + "_types", BlacklistType.values().length); if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) { final List list = FileUtils.getListArray(new File(ListManager.listsPath, element)); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 2704cc9d5..1bbf28b55 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -47,7 +47,7 @@ import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.util.FileUtils; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; @@ -238,10 +238,7 @@ public class sharedBlacklist_p { pw.println(newItem); if (Switchboard.urlBlacklist != null) { - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) { Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1)); } diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index fabcabe00..58dbea76a 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -35,6 +35,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; @@ -133,8 +134,16 @@ public final class crawlReceipt { // check if the entry is in our network domain final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url()); - if (urlRejectReason != null) { - if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); + if (urlRejectReason != null) { + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); + prop.put("delay", "9999"); + return prop; + } + + // Check URL against DHT blacklist + if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) { + // URL is blacklisted + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam); prop.put("delay", "9999"); return prop; } diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index e20418823..b29b732b2 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -46,7 +46,7 @@ import net.yacy.peers.Network; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.peers.dht.FlatWordPartitionScheme; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segments; @@ -171,9 +171,9 @@ public final class transferRWI { iEntry = new WordReferenceRow(estring.substring(p)); urlHash = iEntry.urlhash(); - // block blacklisted entries - if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) { - if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); + // block blacklisted entries + if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(BlacklistType.DHT, urlHash))) { + Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); blocked++; continue; } diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 944c96d00..a9b43c7a2 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -38,7 +38,7 @@ import net.yacy.peers.EventChannel; import net.yacy.peers.Network; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; @@ -121,9 +121,9 @@ public final class transferURL { continue; } - // check if the entry is blacklisted - if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) { - if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); + // check if the entry is blacklisted + if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) { + Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); lEntry = null; blocked++; continue; diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 966ecabac..35d1a3f11 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.peers.SeedDB; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.FilterEngine; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; @@ -404,9 +404,9 @@ public final class CrawlStacker { return "denied_(" + urlRejectReason + ")"; } - // check blacklist - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { - if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist."); + // check blacklist + if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) { + this.log.logFine("URL '" + urlstring + "' is in blacklist."); return "url in blacklist"; } diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index a3547feda..2a349a601 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -36,7 +36,7 @@ import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.io.ByteCount; import net.yacy.kelondro.logging.Log; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segments; @@ -95,7 +95,7 @@ public final class HTTPLoader { // check if url is in blacklist final String hostlow = host.toLowerCase(); - if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { + if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) { this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } @@ -236,7 +236,7 @@ public final class HTTPLoader { // check if url is in blacklist final String hostlow = host.toLowerCase(); - if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { + if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) { throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } diff --git a/source/de/anomic/data/ListManager.java b/source/de/anomic/data/ListManager.java index 47a38ff67..051580a8c 100644 --- a/source/de/anomic/data/ListManager.java +++ b/source/de/anomic/data/ListManager.java @@ -1,10 +1,10 @@ // listManager.java // ------------------------------------- // part of YACY -// +// // (C) 2005, 2006 by Alexander Schier // (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com -// +// // last change: $LastChangedDate$ by $LastChangedBy$ // $LastChangedRevision$ // @@ -30,35 +30,34 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import java.util.Vector; +import java.util.regex.Pattern; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.BlacklistFile; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; -import java.util.List; -import java.util.regex.Pattern; - // The Naming of the functions is a bit strange... public class ListManager { - + private final static Pattern commaPattern = Pattern.compile(","); - + public static Switchboard switchboard = null; public static File listsPath = null; /** * Get ListSet from configuration file and return it as a unified Set. - * + * * Meaning of ListSet: There are various "lists" in YaCy which are * actually disjunct (pairwise unequal) sets which themselves can be seperated * into different subsets. E.g., there can be more than one blacklist of a type. - * A ListSet is the set of all those "lists" (subsets) of an equal type. - * + * A ListSet is the set of all those "lists" (subsets) of an equal type. + * * @param setName name of the ListSet * @return a ListSet from configuration file */ @@ -69,13 +68,13 @@ public class ListManager { /** * Removes an element from a ListSet and updates the configuration file * accordingly. If the element doesn't exist, then nothing will be changed. - * + * * @param setName name of the ListSet. * @param listName name of the element to remove from the ListSet. */ public static void removeFromListSet(final String setName, final String listName) { final Set listSet = getListSet(setName); - + if (!listSet.isEmpty()) { listSet.remove(listName); switchboard.setConfig(setName, collection2string(listSet)); @@ -86,9 +85,9 @@ public class ListManager { * Adds an element to an existing ListSet. If the ListSet doesn't exist yet, * a new one will be added. If the ListSet already contains an identical element, * then nothing happens. - * + * * The new list will be written to the configuartion file. - * + * * @param setName * @param newListName */ @@ -101,7 +100,7 @@ public class ListManager { /** * @param setName ListSet in which to search for an element. - * @param listName the element to search for. + * @param listName the element to search for. * @return true if the ListSet "setName" contains an element * "listName", false otherwise. */ @@ -112,23 +111,23 @@ public class ListManager { //================general Lists================== - public static String getListString(final String filename, final boolean withcomments) { + public static String getListString(final String filename, final boolean withcomments) { return FileUtils.getListString(new File(listsPath ,filename), withcomments); } - + //================Helper functions for collection conversion================== - + /** * Simple conversion of a Collection of Strings to a comma separated String. * If the implementing Collection subclass guaranties an order of its elements, * the substrings of the result will have the same order. - * + * * @param col a Collection of Strings. * @return String with elements from set separated by comma. */ public static String collection2string(final Collection col){ final StringBuilder str = new StringBuilder(col.size() * 40); - + if (col != null && !col.isEmpty()) { final Iterator it = col.iterator(); str.append(it.next()); @@ -137,7 +136,7 @@ public class ListManager { str.append(it.next()); } } - + return str.toString(); } @@ -158,13 +157,13 @@ public class ListManager { /** * Simple conversion of a comma separated list to a unified Set. - * + * * @param string list of comma separated Strings * @return resulting Set or empty Set if string is null */ public static Set string2set(final String string){ HashSet set; - + if (string != null) { set = new HashSet(Arrays.asList(commaPattern.split(string, 0))); } else { @@ -177,7 +176,7 @@ public class ListManager { /** * Simple conversion of a comma separated list to a Vector containing * the order of the substrings. - * + * * @param string list of comma separated Strings * @return resulting Vector or empty Vector if string is null */ @@ -198,19 +197,16 @@ public class ListManager { /** * Load or reload all active Blacklists */ - public static void reloadBlacklists(){ - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - - final List blacklistFiles = new ArrayList(supportedBlacklistTypes.length); - for (String supportedBlacklistType : supportedBlacklistTypes) { + public static void reloadBlacklists(){ + final List blacklistFiles = new ArrayList(BlacklistType.values().length); + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { final BlacklistFile blFile = new BlacklistFile( switchboard.getConfig( - supportedBlacklistType + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")), + supportedBlacklistType.toString() + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")), supportedBlacklistType); blacklistFiles.add(blFile); } - + Switchboard.urlBlacklist.clear(); Switchboard.urlBlacklist.loadList( blacklistFiles.toArray(new BlacklistFile[blacklistFiles.size()]), diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index ad901bdb0..30e958cd8 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -84,7 +84,7 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.io.ByteCountOutputStream; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import de.anomic.crawler.Cache; @@ -350,7 +350,7 @@ public final class HTTPDProxyHandler { // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); if (args != null) { path = path + "?" + args; } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { log.logInfo("AGIS blocking of host '" + hostlow + "'"); HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); @@ -814,7 +814,7 @@ public final class HTTPDProxyHandler { // re-calc the url path final String remotePath = (args == null) ? path : (path + "?" + args); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) { HTTPDemon.sendRespondError(conProp,respond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); @@ -1243,7 +1243,7 @@ public final class HTTPDProxyHandler { // blacklist idea inspired by [AS]: // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { HTTPDemon.sendRespondError(conProp,clientOut,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); diff --git a/source/net/yacy/peers/NewsPool.java b/source/net/yacy/peers/NewsPool.java index c75f10c14..33b87b676 100644 --- a/source/net/yacy/peers/NewsPool.java +++ b/source/net/yacy/peers/NewsPool.java @@ -56,7 +56,7 @@ import java.util.Set; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; public class NewsPool { @@ -343,13 +343,13 @@ public class NewsPool { if (record.created().getTime() == 0) return; final Map attributes = record.attributes(); if (attributes.containsKey("url")){ - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){ + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("url")))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); return; } } if (attributes.containsKey("startURL")){ - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){ + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("startURL")))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); return; } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 298fa8052..b1681d722 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -96,6 +96,7 @@ import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.peers.graphics.WebStructureGraph.HostReference; import net.yacy.peers.operation.yacyVersion; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; @@ -699,9 +700,9 @@ public final class Protocol assert (urlEntry.hash().length == 12) : "urlEntry.hash() = " + ASCII.String(urlEntry.hash()); if ( urlEntry.hash().length != 12 ) { continue; // bad url hash - } - if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) { - if ( Network.log.isInfo() ) { + } + if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry) ) { + if ( Network.log.isInfo() ) { Network.log.logInfo("remote search: filtered blacklisted url " + urlEntry.url() + " from peer " diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 1f23b9eca..472921d84 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -26,9 +26,13 @@ package net.yacy.repository; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -50,12 +54,12 @@ import net.yacy.kelondro.util.SetTools; public class Blacklist { - public static final String BLACKLIST_DHT = "dht"; - public static final String BLACKLIST_CRAWLER = "crawler"; - public static final String BLACKLIST_PROXY = "proxy"; - public static final String BLACKLIST_SEARCH = "search"; - public static final String BLACKLIST_SURFTIPS = "surftips"; - public static final String BLACKLIST_NEWS = "news"; + private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser"); + + public enum BlacklistType { + DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS + } + public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$"; public static enum BlacklistError { @@ -82,33 +86,31 @@ public class Blacklist { return this.errorCode; } } - protected static final Set BLACKLIST_TYPES = new HashSet(Arrays.asList(new String[]{ - Blacklist.BLACKLIST_CRAWLER, - Blacklist.BLACKLIST_PROXY, - Blacklist.BLACKLIST_DHT, - Blacklist.BLACKLIST_SEARCH, - Blacklist.BLACKLIST_SURFTIPS, - Blacklist.BLACKLIST_NEWS - })); - public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news"; + private File blacklistRootPath = null; - private final ConcurrentMap cachedUrlHashs; - private final ConcurrentMap>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here - private final ConcurrentMap>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private final ConcurrentMap cachedUrlHashs; + private final ConcurrentMap>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private final ConcurrentMap>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + public Blacklist(final File rootPath) { setRootPath(rootPath); // prepare the data structure - this.hostpaths_matchable = new ConcurrentHashMap>>(); - this.hostpaths_notmatchable = new ConcurrentHashMap>>(); - this.cachedUrlHashs = new ConcurrentHashMap(); + this.hostpaths_matchable = new ConcurrentHashMap>>(); + this.hostpaths_notmatchable = new ConcurrentHashMap>>(); + this.cachedUrlHashs = new ConcurrentHashMap(); - for (final String blacklistType : BLACKLIST_TYPES) { + for (final BlacklistType blacklistType : BlacklistType.values()) { this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap>()); this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap>()); - this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap>()); + if (blacklistType.equals(BlacklistType.DHT)) { + loadDHTCache(); + } else { + this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + } } } @@ -126,30 +128,16 @@ public class Blacklist { this.blacklistRootPath = rootPath; } - protected Map> getBlacklistMap(final String blacklistType, final boolean matchable) { - if (blacklistType == null) { - throw new IllegalArgumentException("Blacklist type not set."); - } - if (!BLACKLIST_TYPES.contains(blacklistType)) { - throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + "."); - } - + protected ConcurrentMap> getBlacklistMap(final BlacklistType blacklistType, final boolean matchable) { return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType); } - protected HandleSet getCacheUrlHashsSet(final String blacklistType) { - if (blacklistType == null) { - throw new IllegalArgumentException("Blacklist type not set."); - } - if (!BLACKLIST_TYPES.contains(blacklistType)) { - throw new IllegalArgumentException("Unknown backlist type."); - } - + protected HandleSet getCacheUrlHashsSet(final BlacklistType blacklistType) { return this.cachedUrlHashs.get(blacklistType); } public void clear() { - for (final Map> entry : this.hostpaths_matchable.values()) { + for (final ConcurrentMap> entry : this.hostpaths_matchable.values()) { entry.clear(); } for (final Map> entry : this.hostpaths_notmatchable.values()) { @@ -162,12 +150,12 @@ public class Blacklist { public int size() { int size = 0; - for (final String entry : this.hostpaths_matchable.keySet()) { + for (final BlacklistType entry : this.hostpaths_matchable.keySet()) { for (final List ientry : this.hostpaths_matchable.get(entry).values()) { size += ientry.size(); } } - for (final String entry : this.hostpaths_notmatchable.keySet()) { + for (final BlacklistType entry : this.hostpaths_notmatchable.keySet()) { for (final List ientry : this.hostpaths_notmatchable.get(entry).values()) { size += ientry.size(); } @@ -188,8 +176,8 @@ public class Blacklist { * @param sep */ private void loadList(final BlacklistFile blFile, final String sep) { - final Map> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); - final Map> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); + final ConcurrentMap> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); + final ConcurrentMap> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); Set>> loadedBlacklist; Map.Entry> loadedEntry; List paths; @@ -240,18 +228,18 @@ public class Blacklist { } } - public void loadList(final String blacklistType, final String fileNames, final String sep) { + public void loadList(final BlacklistType blacklistType, final String fileNames, final String sep) { // method for not breaking older plasmaURLPattern interface final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType); loadList(blFile, sep); } - public void removeAll(final String blacklistType, final String host) { + public void removeAll(final BlacklistType blacklistType, final String host) { getBlacklistMap(blacklistType, true).remove(host); getBlacklistMap(blacklistType, false).remove(host); } - public void remove(final String blacklistType, final String host, final String path) { + public void remove(final BlacklistType blacklistType, final String host, final String path) { final Map> blacklistMap = getBlacklistMap(blacklistType, true); List hostList = blacklistMap.get(host); @@ -272,7 +260,7 @@ public class Blacklist { } } - public void add(final String blacklistType, final String host, final String path) { + public void add(final BlacklistType blacklistType, final String host, final String path) { if (host == null) { throw new IllegalArgumentException("host may not be null"); } @@ -296,18 +284,18 @@ public class Blacklist { public int blacklistCacheSize() { int size = 0; - final Iterator iter = this.cachedUrlHashs.keySet().iterator(); + final Iterator iter = this.cachedUrlHashs.keySet().iterator(); while (iter.hasNext()) { size += this.cachedUrlHashs.get(iter.next()).size(); } return size; } - public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) { + public boolean hashInBlacklistedCache(final BlacklistType blacklistType, final byte[] urlHash) { return getCacheUrlHashsSet(blacklistType).has(urlHash); } - public boolean contains(final String blacklistType, final String host, final String path) { + public boolean contains(final BlacklistType blacklistType, final String host, final String path) { boolean ret = false; if (blacklistType != null && host != null && path != null) { @@ -324,7 +312,18 @@ public class Blacklist { return ret; } - public boolean isListed(final String blacklistType, final DigestURI url) { + /** + * Checks whether the given entry is listed in given blacklist type + * @param blacklistType The used blacklist + * @param entry Entry to be checked + * @return Whether the given entry is blacklisted + */ + public boolean isListed(final BlacklistType blacklistType, final URIMetadataRow entry) { + // Call inner method + return isListed(blacklistType, entry.url()); + } + + public boolean isListed(final BlacklistType blacklistType, final DigestURI url) { if (url == null) { throw new IllegalArgumentException("url may not be null"); } @@ -358,7 +357,7 @@ public class Blacklist { return "Default YaCy Blacklist Engine"; } - public boolean isListed(final String blacklistType, final String hostlow, final String path) { + public boolean isListed(final BlacklistType blacklistType, final String hostlow, final String path) { if (hostlow == null) { throw new IllegalArgumentException("hostlow may not be null"); } @@ -509,4 +508,33 @@ public class Blacklist { final Set blacklist = new HashSet(FileUtils.getListArray(new File(listsPath, blacklistToUse))); return blacklist != null && blacklist.contains(newEntry); } + + public final void saveDHTCache() { + try { + final ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(BLACKLIST_DHT_CACHEFILE)); + out.writeObject(getCacheUrlHashsSet(BlacklistType.DHT)); + out.close(); + + } catch (final IOException e) { + Log.logException(e); + } + } + + public final void loadDHTCache() { + try { + if (BLACKLIST_DHT_CACHEFILE.exists()) { + final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE)); + this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject()); + in.close(); + } else { + this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + } + } catch (final ClassNotFoundException e) { + Log.logException(e); + } catch (final FileNotFoundException e) { + Log.logException(e); + } catch (final IOException e) { + Log.logException(e); + } + } } diff --git a/source/net/yacy/repository/BlacklistFile.java b/source/net/yacy/repository/BlacklistFile.java index 5c516ae09..00493ddd8 100644 --- a/source/net/yacy/repository/BlacklistFile.java +++ b/source/net/yacy/repository/BlacklistFile.java @@ -30,12 +30,14 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import net.yacy.repository.Blacklist.BlacklistType; + public class BlacklistFile { private final String filename; - private final String type; + private final BlacklistType type; - public BlacklistFile(final String filename, final String type) { + public BlacklistFile(final String filename, final BlacklistType type) { this.filename = filename; this.type = type; } @@ -53,5 +55,5 @@ public class BlacklistFile { return new HashSet(Arrays.asList(this.filename.split(","))); } - public String getType() { return this.type; } + public BlacklistType getType() { return this.type; } } diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 82cef5fd4..24f8ebbcf 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -52,6 +52,7 @@ import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.Cache; @@ -188,8 +189,8 @@ public final class LoaderDispatcher { final String protocol = url.getProtocol(); final String host = url.getHost(); - // check if url is in blacklist - if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) { + // check if url is in blacklist + if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) { this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index b3452c5e4..195f38266 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2597,7 +2597,7 @@ public final class Switchboard extends serverSwitch "denied by profile rule, process case=" + processCase + ", profile name = " - + queueEntry.profile().name()); + + queueEntry.profile().name()); return; } diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index 61f6c82d6..0ce804a23 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -60,6 +60,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.util.MemoryControl; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import de.anomic.crawler.CrawlStacker; public final class MetadataRepository implements /*Metadata,*/ Iterable { @@ -408,8 +409,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable remove(entry.hash()); continue; } - if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) || - this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) || + if (this.blacklist.isListed(BlacklistType.CRAWLER, entry) || + this.blacklist.isListed(BlacklistType.DHT, entry) || (this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) { this.lastBlacklistedUrl = entry.url().toNormalform(true, true); this.lastBlacklistedHash = ASCII.String(entry.hash()); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 221bf4ad8..fd90400c6 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -62,7 +62,7 @@ import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.util.ISO639; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; import net.yacy.search.query.RWIProcess; @@ -536,7 +536,7 @@ public class Segment { urlHashs.put(entry.urlhash()); } else { url = ue.url(); - if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { + if (url == null || Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) { urlHashs.put(entry.urlhash()); } } diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index 43b189c7e..d1046693e 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -64,6 +64,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.TermSearch; import net.yacy.peers.graphics.ProfilingGraph; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; @@ -624,7 +625,13 @@ public final class RWIProcess extends Thread this.sortout++; continue; } - + + // Check for blacklist + if ( Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page) ) { + this.sortout++; + continue; + } + final String pageurl = page.url().toNormalform(true, true); final String pageauthor = page.dc_creator(); final String pagetitle = page.dc_title().toLowerCase(); diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index 7f6707601..16765e59f 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -51,7 +51,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.util.ByteArray; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.retrieval.Request; @@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable, Comparator, Comparator= 0 || u.indexOf("favicon",0) >= 0) continue; if (ientry.height() > 0 && ientry.height() < 32) continue; if (ientry.width() > 0 && ientry.width() < 32) continue; @@ -251,8 +251,8 @@ public class MediaSnippet implements Comparable, Comparator Date: Wed, 30 May 2012 16:57:54 +0200 Subject: [PATCH 6/8] Fix to make all values lower-case (this should make all existing blacklists compatible with the new enum) --- source/net/yacy/repository/Blacklist.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 472921d84..f993f8739 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -57,7 +57,12 @@ public class Blacklist { private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser"); public enum BlacklistType { - DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS + DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS; + + @Override + public final String toString () { + return super.toString().toLowerCase(); + } } public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$"; From 4ee6fb1de9c8a9226d75c9857f399ce5984fd845 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 11 Jun 2012 00:38:02 +0200 Subject: [PATCH 7/8] added missing blacklist dht cache storage (maybe due to mistakes in cherry picking) --- source/net/yacy/repository/Blacklist.java | 20 +++++++++++++++++--- source/net/yacy/search/Switchboard.java | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index f993f8739..7675d7ada 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -54,7 +54,7 @@ import net.yacy.kelondro.util.SetTools; public class Blacklist { - private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser"); + private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/blacklistCache_DHT.ser"); public enum BlacklistType { DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS; @@ -119,6 +119,20 @@ public class Blacklist { } } + /** + * Close (shutdown) this "sub-system", add more here for shutdown. + * + * @return void + */ + public synchronized void close() { + Log.logFine("Blacklist", "Shutting down blacklists ..."); + + // Save DHT cache + saveDHTCache(); + + Log.logFine("Blacklist", "All blacklists has been shutdown."); + } + public final void setRootPath(final File rootPath) { if (rootPath == null) { throw new NullPointerException("The blacklist root path must not be null."); @@ -531,8 +545,7 @@ public class Blacklist { final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE)); this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject()); in.close(); - } else { - this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + return; } } catch (final ClassNotFoundException e) { Log.logException(e); @@ -541,5 +554,6 @@ public class Blacklist { } catch (final IOException e) { Log.logException(e); } + this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); } } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 195f38266..c811cb26c 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1600,6 +1600,7 @@ public final class Switchboard extends serverSwitch this.tables.close(); Domains.close(); AccessTracker.dumpLog(new File("DATA/LOG/queries.log")); + Switchboard.urlBlacklist.close(); UPnP.deletePortMapping(); this.tray.remove(); try { From bef823c247142ce142798987618b125236e53098 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 11 Jun 2012 01:20:54 +0200 Subject: [PATCH 8/8] close the reader if finished --- source/net/yacy/kelondro/blob/HeapReader.java | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/source/net/yacy/kelondro/blob/HeapReader.java b/source/net/yacy/kelondro/blob/HeapReader.java index 2be59f361..251e5d435 100644 --- a/source/net/yacy/kelondro/blob/HeapReader.java +++ b/source/net/yacy/kelondro/blob/HeapReader.java @@ -774,6 +774,7 @@ public class HeapReader { if (len > 1) { if (len - 1 != this.is.skipBytes(len - 1)) { // all that is remaining Log.logWarning("HeapReader", "problem skiping " + + len + " bytes in " + this.blobFile.getName()); + try {this.is.close();} catch (IOException e) {} return null; } } @@ -782,23 +783,33 @@ public class HeapReader { // we are now ahead of remaining this.keylen - 1 bytes of the key key = new byte[this.keylen]; key[0] = b; // the first entry that we know already - if (this.is.read(key, 1, keylen1) < keylen1) return null; // read remaining key bytes + if (this.is.read(key, 1, keylen1) < keylen1) { + try {this.is.close();} catch (IOException e) {} + return null; // read remaining key bytes + } // so far we have read this.keylen - 1 + 1 = this.keylen bytes. // there must be a remaining number of len - this.keylen bytes left for the BLOB - if (len < this.keylen) return null; // a strange case that can only happen in case of corrupted data + if (len < this.keylen) { + try {this.is.close();} catch (IOException e) {} + return null; // a strange case that can only happen in case of corrupted data + } try { payload = new byte[len - this.keylen]; // the remaining record entries - if (this.is.read(payload) < payload.length) return null; + if (this.is.read(payload) < payload.length) { + try {this.is.close();} catch (IOException e) {} + return null; + } return new entry(key, payload); - } catch (OutOfMemoryError e) { + } catch (OutOfMemoryError ee) { // the allocation of memory for the payload may fail // this is bad because we must interrupt the iteration here but the // process that uses the iteration may think that the iteraton has just been completed - Log.logSevere("HeapReader", "out of memory in LookAheadIterator.next0", e); + Log.logSevere("HeapReader", "out of memory in LookAheadIterator.next0", ee); + try {this.is.close();} catch (IOException e) {} return null; } } - } catch (final IOException e) { + } catch (IOException e) { return null; } } @@ -807,11 +818,6 @@ public class HeapReader { if (this.is != null) try { this.is.close(); } catch (final IOException e) {Log.logException(e);} this.is = null; } - - @Override - protected void finalize() { - this.close(); - } } public static class entry implements Map.Entry {