Merge branch 'master' of git://gitorious.org/yacy/rc1.git

pull/1/head
reger 13 years ago
commit cac2c7ee34

@ -1011,7 +1011,6 @@ about.body =
# search heuristics
heuristic.site = false
heuristic.scroogle = false
heuristic.blekko = false
# colours for generic design

@ -49,6 +49,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistError;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager;
@ -76,10 +77,6 @@ public class BlacklistCleaner_p {
ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS"));
String blacklistToUse = null;
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
prop.put(DISABLED+"checked", "1");
if (post != null) {
@ -102,10 +99,10 @@ public class BlacklistCleaner_p {
if (post.containsKey("delete")) {
prop.put(RESULTS + "modified", "1");
prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", true)));
prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true)));
} else if (post.containsKey("alter")) {
prop.put(RESULTS + "modified", "2");
prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false)));
prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false)));
}
// list illegal entries
@ -275,7 +272,7 @@ public class BlacklistCleaner_p {
* @param entries Array of entries to be deleted.
* @return Length of the list of entries to be removed.
*/
private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) {
private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
@ -299,7 +296,7 @@ public class BlacklistCleaner_p {
}
// remove the entry from the running blacklist engine
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0));
final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1);
@ -328,7 +325,7 @@ public class BlacklistCleaner_p {
*/
private static int alterEntries(
final String blacklistToUse,
final String[] supportedBlacklistTypes,
final BlacklistType[] supportedBlacklistTypes,
final String[] oldEntry,
final String[] newEntry) {
removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
@ -346,7 +343,7 @@ public class BlacklistCleaner_p {
path = n.substring(pos + 1);
}
pw.println(host + "/" + path);
for (final String s : supportedBlacklistTypes) {
for (final BlacklistType s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(
s,

@ -34,7 +34,7 @@ import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.data.ListManager;
@ -64,21 +64,23 @@ public class BlacklistTest_p {
DigestURI testurl = null;
try {
testurl = new DigestURI(urlstring);
} catch (final MalformedURLException e) { testurl = null; }
} catch (final MalformedURLException e) {
testurl = null;
}
if(testurl != null) {
prop.putHTML("url",testurl.toString());
prop.putHTML("testlist_url",testurl.toString());
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl))
prop.put("testlist_listedincrawler", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl))
prop.put("testlist_listedindht", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl))
prop.put("testlist_listedinnews", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl))
prop.put("testlist_listedinproxy", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl))
prop.put("testlist_listedinsearch", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl))
prop.put("testlist_listedinsurftips", "1");
}
else {

@ -43,6 +43,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager;
@ -64,10 +65,6 @@ public class Blacklist_p {
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
// load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
@ -98,22 +95,22 @@ public class Blacklist_p {
}
if(testurl != null) {
prop.putHTML("testlist_url",testurl.toString());
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) {
prop.put("testlist_listedincrawler", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) {
prop.put("testlist_listedindht", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) {
prop.put("testlist_listedinnews", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) {
prop.put("testlist_listedinproxy", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) {
prop.put("testlist_listedinsearch", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) {
prop.put("testlist_listedinsurftips", "1");
}
} else {
@ -159,7 +156,7 @@ public class Blacklist_p {
ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse);
// activate it for all known blacklist types
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse);
}
} catch (final IOException e) {/* */}
@ -189,7 +186,7 @@ public class Blacklist_p {
Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!");
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
}
@ -212,7 +209,7 @@ public class Blacklist_p {
return prop;
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (post.containsKey("activateList4" + supportedBlacklistType)) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
} else {
@ -253,7 +250,7 @@ public class Blacklist_p {
if (selectedBlacklistEntries.length > 0) {
String temp = null;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
@ -273,7 +270,7 @@ public class Blacklist_p {
// store this call as api call
ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry);
final String temp = addBlacklistEntry(blacklistToUse, blentry, header, supportedBlacklistTypes);
final String temp = addBlacklistEntry(blacklistToUse, blentry, header, BlacklistType.values());
if (temp != null) {
prop.put("LOCATION", temp);
return prop;
@ -298,12 +295,12 @@ public class Blacklist_p {
!targetBlacklist.equals(blacklistToUse)) {
String temp;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
@ -338,12 +335,12 @@ public class Blacklist_p {
if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) {
if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
@ -475,12 +472,12 @@ public class Blacklist_p {
if (element.equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
for (int blTypes=0; blTypes < BlacklistType.values().length; blTypes++) {
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",BlacklistType.values()[blTypes].toString());
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", element) ? "0" : "1");
ListManager.listSetContains(BlacklistType.values()[blTypes] + ".BlackLists", element) ? "0" : "1");
}
prop.put(DISABLED + "currentActiveFor", supportedBlacklistTypes.length);
prop.put(DISABLED + "currentActiveFor", BlacklistType.values().length);
} else {
prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element);
@ -494,9 +491,9 @@ public class Blacklist_p {
}
int activeCount = 0;
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) {
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType);
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType.toString());
activeCount++;
}
}
@ -521,13 +518,13 @@ public class Blacklist_p {
* @param newEntry the entry that is to be added
* @param header
* @param supportedBlacklistTypes
* @return null if no error occured, else a String to put into LOCATION
* @return null if no error occurred, else a String to put into LOCATION
*/
private static String addBlacklistEntry(
final String blacklistToUse,
final String newEntry,
final RequestHeader header,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) {
return "";
@ -555,7 +552,7 @@ public class Blacklist_p {
final String blacklistToUse,
final String oldEntry,
final RequestHeader header,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) {
return "";
@ -580,7 +577,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String oldEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(listsPath, blacklistToUse));
@ -603,7 +600,7 @@ public class Blacklist_p {
pos = oldEntry.length();
oldEntry = oldEntry + "/.*";
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1));
}
@ -622,7 +619,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String newEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
// ignore empty entries
if(newEntry == null || newEntry.isEmpty()) {
@ -659,7 +656,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String newEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) {
// append the line to the file
@ -683,7 +680,7 @@ public class Blacklist_p {
// add to blacklist
int pos = newEntry.indexOf('/',0);
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1));
}

@ -43,20 +43,6 @@
</p>
</fieldset>
</form>
<!--
<form id="HeuristicFormScroogle" method="post" action="ConfigHeuristics_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend>
<input type="checkbox" name="scroogle_check" id="scroogle" onclick="window.location.href='ConfigHeuristics_p.html?#(scroogle.checked)#scroogle_on=::scroogle_off=#(/scroogle.checked)#'" value="scroogle"#(scroogle.checked)#:: checked="checked"#(/scroogle.checked)# />
<label for="scroogle">scroogle: load external search result list from <a href="http://scroogle.org">scroogle</a></label>
</legend>
<p>
When using this heuristic, then every search request line is used for a call to scroogle.
20 results are taken from scroogle and loaded simultanously, parsed and indexed immediately.
</p>
</fieldset>
</form>
-->
<form id="HeuristicFormBlekko" method="post" action="ConfigHeuristics_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend>
@ -73,4 +59,4 @@
#%env/templates/footer.template%#
</body>
</html>
</html>

@ -45,14 +45,11 @@ public class ConfigHeuristics_p {
if (post.containsKey("site_on")) sb.setConfig("heuristic.site", true);
if (post.containsKey("site_off")) sb.setConfig("heuristic.site", false);
if (post.containsKey("scroogle_on")) sb.setConfig("heuristic.scroogle", true);
if (post.containsKey("scroogle_off")) sb.setConfig("heuristic.scroogle", false);
if (post.containsKey("blekko_on")) sb.setConfig("heuristic.blekko", true);
if (post.containsKey("blekko_off")) sb.setConfig("heuristic.blekko", false);
}
prop.put("site.checked", sb.getConfigBool("heuristic.site", false) ? 1 : 0);
prop.put("scroogle.checked", sb.getConfigBool("heuristic.scroogle", false) ? 1 : 0);
prop.put("blekko.checked", sb.getConfigBool("heuristic.blekko", false) ? 1 : 0);
return prop;

@ -87,7 +87,6 @@ public class ConfigNetwork_p
if ( !indexReceive ) {
// remove heuristics
sb.setConfig("heuristic.site", false);
sb.setConfig("heuristic.scroogle", false);
sb.setConfig("heuristic.blekko", false);
}
final boolean robinsonmode = "robinson".equals(post.get("network", ""));

@ -58,7 +58,7 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
@ -441,7 +441,7 @@ public class IndexControlRWIs_p
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
supportedBlacklistType,
BlacklistType.valueOf(supportedBlacklistType),
url.getHost(),
url.getFile());
}
@ -457,7 +457,6 @@ public class IndexControlRWIs_p
if ( post.containsKey("blacklistdomains") ) {
PrintWriter pw;
try {
final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(",");
pw =
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
DigestURI url;
@ -472,7 +471,7 @@ public class IndexControlRWIs_p
if ( e != null ) {
url = e.url();
pw.println(url.getHost() + "/.*");
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
@ -623,7 +622,7 @@ public class IndexControlRWIs_p
? "appears emphasized, "
: "")
+ ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : ""));
if ( Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url) ) {
if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) {
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1");
}
i++;

@ -43,7 +43,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -129,8 +129,10 @@ public class Supporter {
url = row.getPrimaryKeyUTF8().trim();
try {
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch(final MalformedURLException e) {continue;}
if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch (final MalformedURLException e) {
continue;
}
title = row.getColUTF8(1);
description = row.getColUTF8(2);
if ((url == null) || (title == null) || (description == null)) continue;

@ -42,7 +42,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -136,7 +136,7 @@ public class Surftips {
url = row.getPrimaryKeyUTF8().trim();
try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url)))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS ,new DigestURI(url)))
continue;
}catch(final MalformedURLException e){continue;};
title = row.getColUTF8(1);

@ -63,5 +63,5 @@ search();
<div id="trails"></div>
#%env/templates/footer.template%#
</body>
</body>
</html>

@ -2,46 +2,33 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.TripleStore;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.http.server.HTTPDemon;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory ;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFormatter;
import com.hp.hpl.jena.sparql.algebra.Algebra ;
import com.hp.hpl.jena.sparql.algebra.Op ;
import com.hp.hpl.jena.sparql.engine.QueryIterator ;
import com.hp.hpl.jena.sparql.engine.binding.Binding ;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.util.FileManager;
import de.anomic.http.server.HTTPDemon;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Triple_p {
public static serverObjects respond(final RequestHeader header,
final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
boolean hasRights = sb.verifyAuthentication(header);
prop.put("display", 1); // Fixed to 1
prop.putHTML("mode_output", "no query performed");
String q = "PREFIX lln: <http://virtual.x/>\n"+
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"+
"PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n"+
@ -52,21 +39,21 @@ public class Triple_p {
"}";
if (post != null) {
if (post.containsKey("submit")) {
//
System.out.println (post.get("submit"));
}
if (post.containsKey("rdffileslist")) {
String list = post.get("rdffileslist");
for (String s: list.split("\n")) {
String newurl = s;
try {
DigestURI d = new DigestURI (s);
if (d.getHost().endsWith(".yacy")) {
newurl = d.getProtocol()+"://"+HTTPDemon.getAlternativeResolver().resolve(d.getHost())+d.getPath();
System.out.println (newurl);
@ -77,38 +64,38 @@ public class Triple_p {
} catch (IOException e) {
Log.logException(e);
}
}
}
}
if (post.containsKey("rdffile")) {
TripleStore.Add(post.get("rdffile$file"));
}
if (post.containsKey("query")) {
// Create a new query
String queryString = post.get("query");
q = queryString;
int count = 0;
try {
com.hp.hpl.jena.query.Query query = QueryFactory.create(queryString);
// Execute the query and obtain results
QueryExecution qe = QueryExecutionFactory.create(query, TripleStore.model);
ResultSet resultSet = qe.execSelect();
ByteArrayOutputStream sos = new ByteArrayOutputStream();
ResultSetFormatter.outputAsRDF(sos, "", resultSet);
prop.putHTML("mode_rdfdump", sos.toString());
int scount = 0;
while (resultSet.hasNext()) {
QuerySolution s = resultSet.next();
@ -117,34 +104,34 @@ public class Triple_p {
prop.put("entries_"+scount+"_o", s.getResource(null).getURI());
scount ++;
}
prop.putHTML("entries", ""+scount);
for (String s: resultSet.getResultVars()) {
prop.putHTML("mode_output_"+count+"_caption", s);
count ++;
}
} catch (Exception e) {
prop.putHTML("mode_rdfdump", "error");
}
prop.putHTML("mode_output", ""+count);
}
}
prop.putHTML("mode_query", q);
// return rewrite properties
return prop;
}
}

@ -4,7 +4,7 @@ import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.data.ListManager;
import de.anomic.server.serverObjects;
@ -35,15 +35,14 @@ public class blacklists_p {
prop.put("lists_" + blacklistCount + "_shared", "0");
}
final String[] types = Blacklist.BLACKLIST_TYPES_STRING.split(",");
int j = 0;
for (final String type : types) {
prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type);
for (final BlacklistType type : BlacklistType.values()) {
prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type.toString());
prop.put("lists_" + blacklistCount + "_types_" + j + "_value",
ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0);
j++;
}
prop.put("lists_" + blacklistCount + "_types", types.length);
prop.put("lists_" + blacklistCount + "_types", BlacklistType.values().length);
if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) {
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, element));

@ -1,6 +1,6 @@
package interaction;
//ViewLog_p.java
//ViewLog_p.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@yacy.net
@ -30,75 +30,72 @@ package interaction;
//if the shell's current path is HTROOT
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.UnsupportedEncodingException;
import com.hp.hpl.jena.rdf.model.Model;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.interaction.TripleStore;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import com.hp.hpl.jena.rdf.model.Model;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class GetRDF {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
String url = "";
String s = "";
String p = "";
String o = "";
Boolean global = false;
if(post != null){
global = post.containsKey("global");
}
if (global) {
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
TripleStore.model.write(fout);
prop.put("resultXML", fout.toString());
try {
prop.put("resultXML", fout.toString(UTF8.charset.name()));
} catch (UnsupportedEncodingException e) {
}
} else {
Model tmp = TripleStore.privatestorage.get(Interaction.GetLoggedOnUser(header));
if (tmp != null) {
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
tmp.write(fout);
prop.put("resultXML", fout.toString());
try {
prop.put("resultXML", fout.toString(UTF8.charset.name()));
} catch (UnsupportedEncodingException e) {
}
} else {
prop.put("resultXML", "");
}
}
return prop;
}
}

@ -1,6 +1,6 @@
package interaction;
//ViewLog_p.java
//ViewLog_p.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@yacy.net
@ -30,75 +30,65 @@ package interaction;
//if the shell's current path is HTROOT
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import com.hp.hpl.jena.rdf.model.Model;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.interaction.TripleStore;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import com.hp.hpl.jena.rdf.model.Model;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class PutRDF {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
String url = "";
String s = "";
String p = "";
String o = "";
Boolean global = false;
if(post != null){
global = post.containsKey("global");
}
if (global) {
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
TripleStore.model.write(fout);
prop.put("resultXML", fout.toString());
} else {
Model tmp = TripleStore.privatestorage.get(Interaction.GetLoggedOnUser(header));
if (tmp != null) {
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
ByteArrayOutputStream fout;
fout = new ByteArrayOutputStream();
tmp.write(fout);
prop.put("resultXML", fout.toString());
} else {
prop.put("resultXML", "");
}
}
return prop;
}
}

@ -1,6 +1,6 @@
package interaction;
//ViewLog_p.java
//ViewLog_p.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@yacy.net
@ -32,23 +32,22 @@ package interaction;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Table {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
UserDB.Entry entry=null;
//default values
prop.put("enabled_logged_in", "0");
@ -71,7 +70,7 @@ public class Table {
}
}
}
//identified via userDB
if(entry != null){
prop.put("enabled_logged-in", "1");
@ -97,48 +96,48 @@ public class Table {
}
String url = "";
String s = "";
String s = "";
String p = "";
String o = "";
Boolean global = false;
if(post != null){
if(post.containsKey("url")){
url = post.get("url");
}
if(post.containsKey("s")){
s = post.get("s");
}
if(post.containsKey("p")){
p = post.get("p");
}
if(post.containsKey("o")){
o = post.get("o");
}
global = post.containsKey("global");
}
if (post.containsKey("load")) {
o = Interaction.GetTableentry(s, p, global ? "global" : username, "");
} else {
Interaction.Tableentry(s, p, o, global ? "global" : username, "");
}
}
prop.put("result", o);
return prop;
}
}

@ -1,6 +1,6 @@
package interaction;
//ViewLog_p.java
//ViewLog_p.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@yacy.net
@ -32,23 +32,22 @@ package interaction;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Triple {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
UserDB.Entry entry=null;
//default values
prop.put("enabled_logged_in", "0");
@ -71,7 +70,7 @@ public class Triple {
}
}
}
//identified via userDB
if(entry != null){
prop.put("enabled_logged-in", "1");
@ -97,47 +96,47 @@ public class Triple {
}
String url = "";
String s = "";
String s = "";
String p = "";
String o = "";
Boolean global = false;
if(post != null){
if(post.containsKey("url")){
url = post.get("url");
}
if(post.containsKey("s")){
s = post.get("s");
}
if(post.containsKey("p")){
p = post.get("p");
}
if(post.containsKey("o")){
o = post.get("o");
}
global = post.containsKey("global");
}
if (post.containsKey("load")) {
o = Interaction.TripleGet(s, p, global ? "" : username);
} else {
Interaction.Triple(url, s, p, o, global ? "" : username);
}
}
prop.put("result", o);
return prop;
}
}

@ -1,24 +1,17 @@
package interaction;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import net.yacy.yacy;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.content.SurrogateReader;
import net.yacy.interaction.Interaction;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Digest;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.data.UserDB.AccessRight;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -31,67 +24,67 @@ public class UploadSingleFile {
final serverObjects prop = new serverObjects();
if (post != null){
if (post.containsKey("uploadfile") && !post.get("uploadfile").isEmpty()) {
UserDB.Entry entry = sb.userDB.getEntry(Interaction.GetLoggedOnUser(header));
if (entry != null) {
if (entry.hasRight(UserDB.AccessRight.UPLOAD_RIGHT)) {
// the user has the upload right
}
}
String targetfilename = post.get("uploadfile", "target.file");
String targetfolder = "/upload/"+Interaction.GetLoggedOnUser(header);
if (post.containsKey("targetfilename")) {
targetfilename = post.get("targetfilename");
}
if (post.containsKey("targetfolder")) {
targetfolder = post.get("targetfolder");
if (!targetfolder.startsWith("/")) {
targetfolder = "/" + targetfolder;
}
}
File f = new File(yacy.dataHome_g, "DATA/HTDOCS"+targetfolder+"/");
yacy.mkdirsIfNeseccary (f);
f = new File(f, targetfilename);
Log.logInfo ("FILEUPLOAD", f.toString());
try {
ByteArrayInputStream stream = new ByteArrayInputStream(post
.get("uploadfile$file").getBytes());
if (stream != null) {
OutputStream out;
out = new FileOutputStream(f.toString());
byte[] buf = new byte[1024];
int len;
while ((len = stream.read(buf)) > 0) {
@ -100,7 +93,7 @@ public class UploadSingleFile {
stream.close();
out.close();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
@ -108,8 +101,8 @@ public class UploadSingleFile {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
@ -117,9 +110,9 @@ public class UploadSingleFile {
// return rewrite properties
return prop;
}
}

@ -47,7 +47,7 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
@ -238,10 +238,7 @@ public class sharedBlacklist_p {
pw.println(newItem);
if (Switchboard.urlBlacklist != null) {
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1));
}

@ -35,6 +35,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
@ -133,8 +134,16 @@ public final class crawlReceipt {
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url());
if (urlRejectReason != null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
if (urlRejectReason != null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
return prop;
}
// Check URL against DHT blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
// URL is blacklisted
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam);
prop.put("delay", "9999");
return prop;
}

@ -46,7 +46,7 @@ import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
@ -171,9 +171,9 @@ public final class transferRWI {
iEntry = new WordReferenceRow(estring.substring(p));
urlHash = iEntry.urlhash();
// block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
// block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(BlacklistType.DHT, urlHash))) {
Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
blocked++;
continue;
}

@ -38,7 +38,7 @@ import net.yacy.peers.EventChannel;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
@ -121,9 +121,9 @@ public final class transferURL {
continue;
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;

@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
@ -404,9 +404,9 @@ public final class CrawlStacker {
return "denied_(" + urlRejectReason + ")";
}
// check blacklist
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist.");
// check blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
this.log.logFine("URL '" + urlstring + "' is in blacklist.");
return "url in blacklist";
}

@ -36,7 +36,7 @@ import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
@ -95,7 +95,7 @@ public final class HTTPLoader {
// check if url is in blacklist
final String hostlow = host.toLowerCase();
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}
@ -236,7 +236,7 @@ public final class HTTPLoader {
// check if url is in blacklist
final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}

@ -1,10 +1,10 @@
// listManager.java
// -------------------------------------
// part of YACY
//
//
// (C) 2005, 2006 by Alexander Schier
// (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
//
//
// last change: $LastChangedDate$ by $LastChangedBy$
// $LastChangedRevision$
//
@ -30,35 +30,34 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.BlacklistFile;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import java.util.List;
import java.util.regex.Pattern;
// The Naming of the functions is a bit strange...
public class ListManager {
private final static Pattern commaPattern = Pattern.compile(",");
public static Switchboard switchboard = null;
public static File listsPath = null;
/**
* Get ListSet from configuration file and return it as a unified Set.
*
*
* <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are
* actually disjunct (pairwise unequal) sets which themselves can be seperated
* into different subsets. E.g., there can be more than one blacklist of a type.
* A ListSet is the set of all those "lists" (subsets) of an equal type.
*
* A ListSet is the set of all those "lists" (subsets) of an equal type.
*
* @param setName name of the ListSet
* @return a ListSet from configuration file
*/
@ -69,13 +68,13 @@ public class ListManager {
/**
* Removes an element from a ListSet and updates the configuration file
* accordingly. If the element doesn't exist, then nothing will be changed.
*
*
* @param setName name of the ListSet.
* @param listName name of the element to remove from the ListSet.
*/
public static void removeFromListSet(final String setName, final String listName) {
final Set<String> listSet = getListSet(setName);
if (!listSet.isEmpty()) {
listSet.remove(listName);
switchboard.setConfig(setName, collection2string(listSet));
@ -86,9 +85,9 @@ public class ListManager {
* Adds an element to an existing ListSet. If the ListSet doesn't exist yet,
* a new one will be added. If the ListSet already contains an identical element,
* then nothing happens.
*
*
* The new list will be written to the configuartion file.
*
*
* @param setName
* @param newListName
*/
@ -101,7 +100,7 @@ public class ListManager {
/**
* @param setName ListSet in which to search for an element.
* @param listName the element to search for.
* @param listName the element to search for.
* @return <code>true</code> if the ListSet "setName" contains an element
* "listName", <code>false</code> otherwise.
*/
@ -112,23 +111,23 @@ public class ListManager {
//================general Lists==================
public static String getListString(final String filename, final boolean withcomments) {
public static String getListString(final String filename, final boolean withcomments) {
return FileUtils.getListString(new File(listsPath ,filename), withcomments);
}
//================Helper functions for collection conversion==================
/**
* Simple conversion of a Collection of Strings to a comma separated String.
* If the implementing Collection subclass guaranties an order of its elements,
* the substrings of the result will have the same order.
*
*
* @param col a Collection of Strings.
* @return String with elements from set separated by comma.
*/
public static String collection2string(final Collection<String> col){
final StringBuilder str = new StringBuilder(col.size() * 40);
if (col != null && !col.isEmpty()) {
final Iterator<String> it = col.iterator();
str.append(it.next());
@ -137,7 +136,7 @@ public class ListManager {
str.append(it.next());
}
}
return str.toString();
}
@ -158,13 +157,13 @@ public class ListManager {
/**
* Simple conversion of a comma separated list to a unified Set.
*
*
* @param string list of comma separated Strings
* @return resulting Set or empty Set if string is <code>null</code>
*/
public static Set<String> string2set(final String string){
HashSet<String> set;
if (string != null) {
set = new HashSet<String>(Arrays.asList(commaPattern.split(string, 0)));
} else {
@ -177,7 +176,7 @@ public class ListManager {
/**
* Simple conversion of a comma separated list to a Vector containing
* the order of the substrings.
*
*
* @param string list of comma separated Strings
* @return resulting Vector or empty Vector if string is <code>null</code>
*/
@ -198,19 +197,16 @@ public class ListManager {
/**
* Load or reload all active Blacklists
*/
public static void reloadBlacklists(){
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(supportedBlacklistTypes.length);
for (String supportedBlacklistType : supportedBlacklistTypes) {
public static void reloadBlacklists(){
final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(BlacklistType.values().length);
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
final BlacklistFile blFile = new BlacklistFile(
switchboard.getConfig(
supportedBlacklistType + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistType.toString() + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistType);
blacklistFiles.add(blFile);
}
Switchboard.urlBlacklist.clear();
Switchboard.urlBlacklist.loadList(
blacklistFiles.toArray(new BlacklistFile[blacklistFiles.size()]),

@ -15,55 +15,56 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
public class AugmentedHtmlStream extends FilterOutputStream {
private Writer out;
private ByteArrayOutputStream buffer;
private Charset charset;
private DigestURI url;
private byte[] urlhash;
private RequestHeader requestHeader;
private final Writer out;
private final ByteArrayOutputStream buffer;
private final Charset charset;
private final DigestURI url;
private final RequestHeader requestHeader;
public AugmentedHtmlStream(OutputStream out, Charset charset, DigestURI url, byte[] urlhash, RequestHeader requestHeader) {
public AugmentedHtmlStream(OutputStream out, Charset charset, DigestURI url, RequestHeader requestHeader) {
super(out);
this.out = new BufferedWriter(new OutputStreamWriter(out, charset));
this.buffer = new ByteArrayOutputStream();
this.charset = charset;
this.url = url;
this.urlhash = urlhash;
this.requestHeader = requestHeader;
}
public void write(int b) throws IOException {
@Override
public void write(int b) throws IOException {
this.buffer.write(b);
}
public void write(byte[] b, int off, int len) throws IOException {
@Override
public void write(byte[] b, int off, int len) throws IOException {
this.buffer.write(b, off, len);
}
public void close() throws IOException {
StringBuffer b = new StringBuffer(this.buffer.toString(charset.name()));
@Override
public void close() throws IOException {
StringBuffer b = new StringBuffer(this.buffer.toString(this.charset.name()));
b = process(b);
out.write(b.toString());
out.close();
this.out.write(b.toString());
this.out.close();
}
public StringBuffer process(StringBuffer data) {
if (Switchboard.getSwitchboard().getConfigBool("proxyAugmentation", false) == true) {
if (!this.url.toNormalform(false, true).contains("currentyacypeer/")) {
return AugmentHtmlStream.process (data, charset, url, requestHeader);
return AugmentHtmlStream.process (data, this.charset, this.url, this.requestHeader);
} else {
return data;
}
} else {
} else {
return data;
}
}
public static boolean supportsMime(String mime) {
// System.out.println("mime" +mime);
return mime.split(";")[0].equals("text/html");

@ -1427,11 +1427,11 @@ public final class HTTPDFileHandler {
final String strARGS = (String) conProp.get("ARGS");
if(strARGS.startsWith("url=")) {
final String strUrl = strARGS.substring(4); // strip url=
try {
proxyurl = new URL(strUrl);
} catch (MalformedURLException e) {
proxyurl = new URL (URLDecoder.decode(strUrl));
proxyurl = new URL (URLDecoder.decode(strUrl, UTF8.charset.name()));
}
}
@ -1486,7 +1486,7 @@ public final class HTTPDFileHandler {
String directory = "";
if (proxyurl.getPath().lastIndexOf('/') > 0)
directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/'));
String location = "";
if (outgoingHeader.containsKey("Location")) {

@ -84,7 +84,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCountOutputStream;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache;
@ -350,7 +350,7 @@ public final class HTTPDProxyHandler {
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
log.logInfo("AGIS blocking of host '" + hostlow + "'");
HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
@ -467,8 +467,8 @@ public final class HTTPDProxyHandler {
port = sb.peers.myPort();
path = path.substring(16);
}
// point virtual directory to my peer
// point virtual directory to my peer
if (path.startsWith("/currentyacypeer/")) {
host = sb.peers.myIP();
port = sb.peers.myPort();
@ -492,7 +492,7 @@ public final class HTTPDProxyHandler {
final String connectHost = hostPart(host, port, yAddress);
final String getUrl = "http://"+ connectHost + remotePath;
requestHeader.remove(HeaderFramework.HOST);
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
@ -508,13 +508,13 @@ public final class HTTPDProxyHandler {
if (responseHeader.isEmpty()) {
throw new Exception(client.getHttpResponse().getStatusLine().toString());
}
if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
// enable chunk encoding, because we don't know the length after annotating
responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
}
}
ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond);
@ -555,7 +555,7 @@ public final class HTTPDProxyHandler {
// chunked encoding disables somewhere, add it again
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
}
// sending the respond header back to the client
if (chunkedOut != null) {
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
@ -584,10 +584,10 @@ public final class HTTPDProxyHandler {
final String storeError = response.shallStoreCacheForProxy();
final boolean storeHTCache = response.profile().storeHTCache();
final String supportError = TextParser.supports(response.url(), response.getMimeType());
if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, url.hash(), requestHeader);
}
outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, requestHeader);
}
if (
/*
* Now we store the response into the htcache directory if
@ -655,7 +655,7 @@ public final class HTTPDProxyHandler {
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
}
outStream.close();
if (chunkedOut != null) {
@ -745,9 +745,9 @@ public final class HTTPDProxyHandler {
//respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
if(AugmentedHtmlStream.supportsMime(cachedResponseHeader.mime())) {
respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, url.hash(), requestHeader);
respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, requestHeader);
}
// send also the complete body now from the cache
// simply read the file and transfer to out socket
FileUtils.copy(cacheEntry, respond);
@ -814,7 +814,7 @@ public final class HTTPDProxyHandler {
// re-calc the url path
final String remotePath = (args == null) ? path : (path + "?" + args);
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) {
HTTPDemon.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -1243,7 +1243,7 @@ public final class HTTPDProxyHandler {
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
HTTPDemon.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'");

@ -2,15 +2,16 @@
package net.yacy.interaction;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringBufferInputStream;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
@ -32,7 +33,7 @@ public class TripleStore {
if (filename.endsWith(".nt")) LoadNTriples(filename);
else LoadRDF(filename);
}
public static void LoadRDF(String fileNameOrUri) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
Log.logInfo("TRIPLESTORE", "Loading from " + fileNameOrUri);
@ -67,8 +68,7 @@ public class TripleStore {
try {
@SuppressWarnings("deprecation")
InputStream in = new StringBufferInputStream(rdffile);
InputStream in = new ByteArrayInputStream(UTF8.getBytes(rdffile));
// read the RDF/XML file
tmp.read(in, null);

@ -774,6 +774,7 @@ public class HeapReader {
if (len > 1) {
if (len - 1 != this.is.skipBytes(len - 1)) { // all that is remaining
Log.logWarning("HeapReader", "problem skiping " + + len + " bytes in " + this.blobFile.getName());
try {this.is.close();} catch (IOException e) {}
return null;
}
}
@ -782,23 +783,33 @@ public class HeapReader {
// we are now ahead of remaining this.keylen - 1 bytes of the key
key = new byte[this.keylen];
key[0] = b; // the first entry that we know already
if (this.is.read(key, 1, keylen1) < keylen1) return null; // read remaining key bytes
if (this.is.read(key, 1, keylen1) < keylen1) {
try {this.is.close();} catch (IOException e) {}
return null; // read remaining key bytes
}
// so far we have read this.keylen - 1 + 1 = this.keylen bytes.
// there must be a remaining number of len - this.keylen bytes left for the BLOB
if (len < this.keylen) return null; // a strange case that can only happen in case of corrupted data
if (len < this.keylen) {
try {this.is.close();} catch (IOException e) {}
return null; // a strange case that can only happen in case of corrupted data
}
try {
payload = new byte[len - this.keylen]; // the remaining record entries
if (this.is.read(payload) < payload.length) return null;
if (this.is.read(payload) < payload.length) {
try {this.is.close();} catch (IOException e) {}
return null;
}
return new entry(key, payload);
} catch (OutOfMemoryError e) {
} catch (OutOfMemoryError ee) {
// the allocation of memory for the payload may fail
// this is bad because we must interrupt the iteration here but the
// process that uses the iteration may think that the iteraton has just been completed
Log.logSevere("HeapReader", "out of memory in LookAheadIterator.next0", e);
Log.logSevere("HeapReader", "out of memory in LookAheadIterator.next0", ee);
try {this.is.close();} catch (IOException e) {}
return null;
}
}
} catch (final IOException e) {
} catch (IOException e) {
return null;
}
}
@ -807,11 +818,6 @@ public class HeapReader {
if (this.is != null) try { this.is.close(); } catch (final IOException e) {Log.logException(e);}
this.is = null;
}
@Override
protected void finalize() {
this.close();
}
}
public static class entry implements Map.Entry<byte[], byte[]> {

@ -56,7 +56,7 @@ import java.util.Set;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
public class NewsPool {
@ -343,13 +343,13 @@ public class NewsPool {
if (record.created().getTime() == 0) return;
final Map<String, String> attributes = record.attributes();
if (attributes.containsKey("url")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("url")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url"));
return;
}
}
if (attributes.containsKey("startURL")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("startURL")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL"));
return;
}

@ -96,6 +96,7 @@ import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.peers.operation.yacyVersion;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
@ -699,9 +700,9 @@ public final class Protocol
assert (urlEntry.hash().length == 12) : "urlEntry.hash() = " + ASCII.String(urlEntry.hash());
if ( urlEntry.hash().length != 12 ) {
continue; // bad url hash
}
if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) {
if ( Network.log.isInfo() ) {
}
if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry) ) {
if ( Network.log.isInfo() ) {
Network.log.logInfo("remote search: filtered blacklisted url "
+ urlEntry.url()
+ " from peer "

@ -26,9 +26,13 @@
package net.yacy.repository;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -50,12 +54,17 @@ import net.yacy.kelondro.util.SetTools;
public class Blacklist {
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_SURFTIPS = "surftips";
public static final String BLACKLIST_NEWS = "news";
private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/blacklistCache_DHT.ser");
public enum BlacklistType {
DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS;
@Override
public final String toString () {
return super.toString().toLowerCase();
}
}
public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static enum BlacklistError {
@ -82,36 +91,48 @@ public class Blacklist {
return this.errorCode;
}
}
protected static final Set<String> BLACKLIST_TYPES = new HashSet<String>(Arrays.asList(new String[]{
Blacklist.BLACKLIST_CRAWLER,
Blacklist.BLACKLIST_PROXY,
Blacklist.BLACKLIST_DHT,
Blacklist.BLACKLIST_SEARCH,
Blacklist.BLACKLIST_SURFTIPS,
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null;
private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<BlacklistType, HandleSet> cachedUrlHashs;
private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
setRootPath(rootPath);
// prepare the data structure
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
this.hostpaths_matchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<BlacklistType, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) {
for (final BlacklistType blacklistType : BlacklistType.values()) {
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
if (blacklistType.equals(BlacklistType.DHT)) {
loadDHTCache();
} else {
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}
}
/**
* Close (shutdown) this "sub-system", add more here for shutdown.
*
* @return void
*/
public synchronized void close() {
Log.logFine("Blacklist", "Shutting down blacklists ...");
// Save DHT cache
saveDHTCache();
Log.logFine("Blacklist", "All blacklists has been shutdown.");
}
public final void setRootPath(final File rootPath) {
if (rootPath == null) {
throw new NullPointerException("The blacklist root path must not be null.");
@ -126,30 +147,16 @@ public class Blacklist {
this.blacklistRootPath = rootPath;
}
protected Map<String, List<Pattern>> getBlacklistMap(final String blacklistType, final boolean matchable) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + ".");
}
protected ConcurrentMap<String, List<Pattern>> getBlacklistMap(final BlacklistType blacklistType, final boolean matchable) {
return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType);
}
protected HandleSet getCacheUrlHashsSet(final String blacklistType) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown backlist type.");
}
protected HandleSet getCacheUrlHashsSet(final BlacklistType blacklistType) {
return this.cachedUrlHashs.get(blacklistType);
}
public void clear() {
for (final Map<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
for (final ConcurrentMap<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
entry.clear();
}
for (final Map<String, List<Pattern>> entry : this.hostpaths_notmatchable.values()) {
@ -162,12 +169,12 @@ public class Blacklist {
public int size() {
int size = 0;
for (final String entry : this.hostpaths_matchable.keySet()) {
for (final BlacklistType entry : this.hostpaths_matchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size();
}
}
for (final String entry : this.hostpaths_notmatchable.keySet()) {
for (final BlacklistType entry : this.hostpaths_notmatchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size();
}
@ -188,8 +195,8 @@ public class Blacklist {
* @param sep
*/
private void loadList(final BlacklistFile blFile, final String sep) {
final Map<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
final ConcurrentMap<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final ConcurrentMap<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, List<String>>> loadedBlacklist;
Map.Entry<String, List<String>> loadedEntry;
List<Pattern> paths;
@ -240,18 +247,18 @@ public class Blacklist {
}
}
public void loadList(final String blacklistType, final String fileNames, final String sep) {
public void loadList(final BlacklistType blacklistType, final String fileNames, final String sep) {
// method for not breaking older plasmaURLPattern interface
final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType);
loadList(blFile, sep);
}
public void removeAll(final String blacklistType, final String host) {
public void removeAll(final BlacklistType blacklistType, final String host) {
getBlacklistMap(blacklistType, true).remove(host);
getBlacklistMap(blacklistType, false).remove(host);
}
public void remove(final String blacklistType, final String host, final String path) {
public void remove(final BlacklistType blacklistType, final String host, final String path) {
final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
List<Pattern> hostList = blacklistMap.get(host);
@ -272,7 +279,7 @@ public class Blacklist {
}
}
public void add(final String blacklistType, final String host, final String path) {
public void add(final BlacklistType blacklistType, final String host, final String path) {
if (host == null) {
throw new IllegalArgumentException("host may not be null");
}
@ -296,18 +303,18 @@ public class Blacklist {
public int blacklistCacheSize() {
int size = 0;
final Iterator<String> iter = this.cachedUrlHashs.keySet().iterator();
final Iterator<BlacklistType> iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) {
size += this.cachedUrlHashs.get(iter.next()).size();
}
return size;
}
public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) {
public boolean hashInBlacklistedCache(final BlacklistType blacklistType, final byte[] urlHash) {
return getCacheUrlHashsSet(blacklistType).has(urlHash);
}
public boolean contains(final String blacklistType, final String host, final String path) {
public boolean contains(final BlacklistType blacklistType, final String host, final String path) {
boolean ret = false;
if (blacklistType != null && host != null && path != null) {
@ -324,7 +331,18 @@ public class Blacklist {
return ret;
}
public boolean isListed(final String blacklistType, final DigestURI url) {
/**
* Checks whether the given entry is listed in given blacklist type
* @param blacklistType The used blacklist
* @param entry Entry to be checked
* @return Whether the given entry is blacklisted
*/
public boolean isListed(final BlacklistType blacklistType, final URIMetadataRow entry) {
// Call inner method
return isListed(blacklistType, entry.url());
}
public boolean isListed(final BlacklistType blacklistType, final DigestURI url) {
if (url == null) {
throw new IllegalArgumentException("url may not be null");
}
@ -358,7 +376,7 @@ public class Blacklist {
return "Default YaCy Blacklist Engine";
}
public boolean isListed(final String blacklistType, final String hostlow, final String path) {
public boolean isListed(final BlacklistType blacklistType, final String hostlow, final String path) {
if (hostlow == null) {
throw new IllegalArgumentException("hostlow may not be null");
}
@ -509,4 +527,33 @@ public class Blacklist {
final Set<String> blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse)));
return blacklist != null && blacklist.contains(newEntry);
}
public final void saveDHTCache() {
try {
final ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(BLACKLIST_DHT_CACHEFILE));
out.writeObject(getCacheUrlHashsSet(BlacklistType.DHT));
out.close();
} catch (final IOException e) {
Log.logException(e);
}
}
public final void loadDHTCache() {
try {
if (BLACKLIST_DHT_CACHEFILE.exists()) {
final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE));
this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject());
in.close();
return;
}
} catch (final ClassNotFoundException e) {
Log.logException(e);
} catch (final FileNotFoundException e) {
Log.logException(e);
} catch (final IOException e) {
Log.logException(e);
}
this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}

@ -30,12 +30,14 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import net.yacy.repository.Blacklist.BlacklistType;
public class BlacklistFile {
private final String filename;
private final String type;
private final BlacklistType type;
public BlacklistFile(final String filename, final String type) {
public BlacklistFile(final String filename, final BlacklistType type) {
this.filename = filename;
this.type = type;
}
@ -53,5 +55,5 @@ public class BlacklistFile {
return new HashSet<String>(Arrays.asList(this.filename.split(",")));
}
public String getType() { return this.type; }
public BlacklistType getType() { return this.type; }
}

@ -52,6 +52,7 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.Cache;
@ -188,8 +189,8 @@ public final class LoaderDispatcher {
final String protocol = url.getProtocol();
final String host = url.getHost();
// check if url is in blacklist
if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
// check if url is in blacklist
if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}

@ -1190,7 +1190,6 @@ public final class Switchboard extends serverSwitch
// remove heuristics
setConfig("heuristic.site", false);
setConfig("heuristic.scroogle", false);
setConfig("heuristic.blekko", false);
// relocate
@ -1601,6 +1600,7 @@ public final class Switchboard extends serverSwitch
this.tables.close();
Domains.close();
AccessTracker.dumpLog(new File("DATA/LOG/queries.log"));
Switchboard.urlBlacklist.close();
UPnP.deletePortMapping();
this.tray.remove();
try {
@ -2598,7 +2598,7 @@ public final class Switchboard extends serverSwitch
"denied by profile rule, process case="
+ processCase
+ ", profile name = "
+ queueEntry.profile().name());
+ queueEntry.profile().name());
return;
}
@ -3152,8 +3152,8 @@ public final class Switchboard extends serverSwitch
final Map<MultiProtocolURI, String> links;
searchEvent.getRankingResult().oneFeederStarted();
try {
links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE);
try {
links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE);
if ( links != null ) {
final Iterator<MultiProtocolURI> i = links.keySet().iterator();
while ( i.hasNext() ) {
@ -3166,59 +3166,7 @@ public final class Switchboard extends serverSwitch
addAllToIndex(url, links, searchEvent, "site");
}
} catch ( final Throwable e ) {
Log.logException(e);
} finally {
searchEvent.getRankingResult().oneFeederTerminated();
}
}
}.start();
}
public final void heuristicScroogle(final SearchEvent searchEvent) {
new Thread() {
@Override
public void run() {
QueryParams query = searchEvent.getQuery();
String queryString = query.queryString(true);
final int meta = queryString.indexOf("heuristic:", 0);
if ( meta >= 0 ) {
final int q = queryString.indexOf(' ', meta);
queryString =
(q >= 0)
? queryString.substring(0, meta) + queryString.substring(q + 1)
: queryString.substring(0, meta);
}
final String urlString =
"http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw="
+ queryString.trim().replaceAll(" ", "+")
+ "&n=2";
final DigestURI url;
try {
url = new DigestURI(MultiProtocolURI.unescape(urlString));
} catch ( final MalformedURLException e1 ) {
Log.logWarning("heuristicScroogle", "url not well-formed: '" + urlString + "'");
return;
}
Map<MultiProtocolURI, String> links = null;
searchEvent.getRankingResult().oneFeederStarted();
try {
links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE);
if ( links != null ) {
final Iterator<MultiProtocolURI> i = links.keySet().iterator();
while ( i.hasNext() ) {
if ( i.next().toNormalform(false, false).indexOf("scroogle", 0) >= 0 ) {
i.remove();
}
}
Switchboard.this.log.logInfo("Heuristic: adding "
+ links.size()
+ " links from scroogle");
// add all pages to the index
addAllToIndex(null, links, searchEvent, "scroogle");
}
} catch ( final Throwable e ) {
//Log.logException(e);
Log.logException(e);
} finally {
searchEvent.getRankingResult().oneFeederTerminated();
}

@ -60,6 +60,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.crawler.CrawlStacker;
public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> {
@ -408,8 +409,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
remove(entry.hash());
continue;
}
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) ||
if (this.blacklist.isListed(BlacklistType.CRAWLER, entry) ||
this.blacklist.isListed(BlacklistType.DHT, entry) ||
(this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) {
this.lastBlacklistedUrl = entry.url().toNormalform(true, true);
this.lastBlacklistedHash = ASCII.String(entry.hash());

@ -62,7 +62,7 @@ import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.ISO639;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.RWIProcess;
@ -536,7 +536,7 @@ public class Segment {
urlHashs.put(entry.urlhash());
} else {
url = ue.url();
if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
if (url == null || Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
urlHashs.put(entry.urlhash());
}
}

@ -64,6 +64,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.TermSearch;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
@ -624,7 +625,13 @@ public final class RWIProcess extends Thread
this.sortout++;
continue;
}
// Check for blacklist
if ( Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page) ) {
this.sortout++;
continue;
}
final String pageurl = page.url().toNormalform(true, true);
final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase();

@ -51,7 +51,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.ByteArray;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request;
@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) {
entry = i.next();
url = new DigestURI(entry.getKey());
desc = entry.getValue();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
desc = entry.getValue();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
@ -202,8 +202,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) {
ientry = i.next();
url = new DigestURI(ientry.url());
final String u = url.toString();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
final String u = url.toString();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -251,8 +251,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
* @param url The URL to check
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Wether the given URL is blacklisted
*/
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) {
*/
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURI url) {
// Default is not blacklisted
boolean isBlacklisted = false;

Loading…
Cancel
Save