*) cleaning up the code a little bit

*) program to interface, not implementation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7345 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 14 years ago
parent 7bb4b001ed
commit 9b3fae9496

@ -30,6 +30,8 @@
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import net.yacy.cora.document.RSSMessage;
@ -412,7 +414,7 @@ public class yacysearch {
int maxDistance = (querystring.indexOf('"') >= 0) ? maxDistance = query.length - 1 : Integer.MAX_VALUE;
// filter out stopwords
final TreeSet<String> filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords);
final SortedSet<String> filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords);
if (!filtered.isEmpty()) {
SetTools.excludeDestructive(query[0], Switchboard.stopwords);
}
@ -576,7 +578,7 @@ public class yacysearch {
}
// find geographic info
TreeSet<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
SortedSet<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if (coordinates == null || coordinates.isEmpty() || offset > 0) {
prop.put("geoinfo", "0");
} else {

@ -25,7 +25,7 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import net.yacy.cora.protocol.HeaderFramework;
@ -209,11 +209,11 @@ public class yacysearchitem {
if (result == null) return prop; // no content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
final ArrayList<MediaSnippet> media = result.mediaSnippets();
final List<MediaSnippet> media = result.mediaSnippets();
if (item == 0) col = true;
if (media != null) {
int c = 0;
for (MediaSnippet ms : media) {
for (final MediaSnippet ms : media) {
prop.putHTML("content_items_" + c + "_href", ms.href.toNormalform(true, false));
prop.putHTML("content_items_" + c + "_hrefshort", nxTools.shortenURLString(ms.href.toNormalform(true, false), urllength));
prop.putHTML("content_items_" + c + "_name", shorten(ms.name, namelength));

@ -79,6 +79,8 @@ import de.anomic.server.serverHandler;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverCore.Session;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
/**
@ -105,13 +107,13 @@ public final class HTTPDemon implements serverHandler, Cloneable {
private static AlternativeDomainNames alternativeResolver = null;
/**
* A hashset containing extensions that indicate content that should not be transported
* A Set containing extensions that indicate content that should not be transported
* using zipped content encoding
* @see #shallTransportZipped(String)
*/
//TODO: Load this from a file
private static final HashSet<String> disallowZippedContentEncoding = new HashSet<String>(Arrays.asList(new String[]{
private static final Set<String> disallowZippedContentEncoding = new HashSet<String>(Arrays.asList(new String[]{
".gz", ".tgz", ".jpg", ".jpeg", ".png", ".mp3", ".mov", ".avi", ".gif", ".zip", ".rar", ".bz2", ".lha", ".jar", ".rpm", ".arc", ".arj", ".wmv", ".ico", ".bmp"
}));
@ -120,13 +122,13 @@ public final class HTTPDemon implements serverHandler, Cloneable {
public static final String copyright = "[ HTTP SERVER: AnomicHTTPD v" + vDATE + " by Michael Christen / www.anomic.de ]";
public static final String hline = "-------------------------------------------------------------------------------";
public static final Map<String, String> reverseMappingCache = new ConcurrentHashMap<String, String>();
public static final ConcurrentMap<String, String> reverseMappingCache = new ConcurrentHashMap<String, String>();
private static volatile Switchboard switchboard = null;
private static String virtualHost = null;
public static boolean keepAliveSupport = false;
private static Map<String, Long> YaCyHopAccessRequester = new ConcurrentHashMap<String, Long>();
private static Map<String, Long> YaCyHopAccessTargets = new ConcurrentHashMap<String, Long>();
private static ConcurrentMap<String, Long> YaCyHopAccessRequester = new ConcurrentHashMap<String, Long>();
private static ConcurrentMap<String, Long> YaCyHopAccessTargets = new ConcurrentHashMap<String, Long>();
// for authentication
private boolean use_proxyAccounts = false;

@ -28,8 +28,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import de.anomic.crawler.CrawlProfile;
@ -96,10 +98,12 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return ByteArray.hashCode(href.hash());
}
@Override
public String toString() {
return new String(href.hash());
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
@ -116,7 +120,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return o1.compareTo(o2);
}
public static ArrayList<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) {
public static List<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) {
if (queryhashes.isEmpty()) {
Log.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList<MediaSnippet>();
@ -142,7 +146,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return a;
}
public static ArrayList<MediaSnippet> computeMediaSnippets(final DigestURI source, final Document document, final HandleSet queryhashes, final ContentDomain mediatype) {
public static List<MediaSnippet> computeMediaSnippets(final DigestURI source, final Document document, final HandleSet queryhashes, final ContentDomain mediatype) {
if (document == null) return new ArrayList<MediaSnippet>();
Map<MultiProtocolURI, String> media = null;
@ -155,7 +159,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
Map.Entry<MultiProtocolURI, String> entry;
DigestURI url;
String desc;
final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) {
entry = i.next();
url = new DigestURI(entry.getKey());
@ -169,9 +173,9 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return result;
}
public static ArrayList<MediaSnippet> computeImageSnippets(final DigestURI source, final Document document, final HandleSet queryhashes) {
public static List<MediaSnippet> computeImageSnippets(final DigestURI source, final Document document, final HandleSet queryhashes) {
final TreeSet<ImageEntry> images = new TreeSet<ImageEntry>();
final SortedSet<ImageEntry> images = new TreeSet<ImageEntry>();
images.addAll(document.getImages().values()); // iterates images in descending size order!
// a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode()
@ -179,7 +183,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
ImageEntry ientry;
DigestURI url;
String desc;
final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) {
ientry = i.next();
url = new DigestURI(ientry.url());
@ -206,7 +210,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
// remove all hashes that appear in the sentence
if (sentence == null) return queryhashes;
final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence, null);
final SortedMap<byte[], Integer> hs = Condenser.hashSentence(sentence, null);
final Iterator<byte[]> j = queryhashes.iterator();
byte[] hash;
Integer pos;

@ -1,4 +1,4 @@
// plasmaSearchQuery.java
// QueryParams.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@yacy.net
@ -31,6 +31,7 @@ import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;
@ -331,21 +332,21 @@ public final class QueryParams {
String s;
int l;
// the string is clean now, but we must generate a set out of it
final String[] a = querystring.split(" ");
for (int i = 0; i < a.length; i++) {
if (a[i].startsWith("-")) {
exclude.add(a[i].substring(1));
final String[] queries = querystring.split(" ");
for (int i = 0; i < queries.length; i++) {
if (queries[i].startsWith("-")) {
exclude.add(queries[i].substring(1));
} else {
while ((c = a[i].indexOf('-')) >= 0) {
s = a[i].substring(0, c);
while ((c = queries[i].indexOf('-')) >= 0) {
s = queries[i].substring(0, c);
l = s.length();
if (l >= Condenser.wordminsize) {query.add(s);}
if (l > 0) {fullquery.add(s);}
a[i] = a[i].substring(c + 1);
queries[i] = queries[i].substring(c + 1);
}
l = a[i].length();
if (l >= Condenser.wordminsize) {query.add(a[i]);}
if (l > 0) {fullquery.add(a[i]);}
l = queries[i].length();
if (l >= Condenser.wordminsize) {query.add(queries[i]);}
if (l > 0) {fullquery.add(queries[i]);}
}
}
}
@ -364,18 +365,18 @@ public final class QueryParams {
public String queryStringForUrl() {
try {
return URLEncoder.encode(this.queryString, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return this.queryString;
}
return URLEncoder.encode(this.queryString, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return this.queryString;
}
}
public TreeSet<String>[] queryWords() {
return cleanQuery(this.queryString);
}
public void filterOut(final TreeSet<String> blueList) {
public void filterOut(final SortedSet<String> blueList) {
// filter out words that appear in this set
// this is applied to the queryHashes
final HandleSet blues = Word.words2hashesHandles(blueList);

@ -33,7 +33,8 @@ import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
@ -68,11 +69,11 @@ public final class RankingProcess extends Thread {
private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000;
private final QueryParams query;
private final TreeSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final SortedSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final TreeSet<byte[]> misses; // contains url-hashes that could not been found in the LURL-DB
private final SortedSet<byte[]> misses; // contains url-hashes that could not been found in the LURL-DB
//private final int[] domZones;
private TreeMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private int remote_resourceSize, remote_indexCount, remote_peerCount;
private int local_resourceSize, local_indexCount;
@ -126,6 +127,7 @@ public final class RankingProcess extends Thread {
return this.order;
}
@Override
public void run() {
// do a search
@ -176,76 +178,74 @@ public final class RankingProcess extends Thread {
// iterate over normalized entries and select some that are better than currently stored
timer = System.currentTimeMillis();
String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
// apply all constraints
try {
WordReferenceVars iEntry;
while (true) {
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS);
if (iEntry == null || iEntry == WordReferenceVars.poison) break;
assert (iEntry.metadataHash().length == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS);
if (iEntry == null || iEntry == WordReferenceVars.poison) break;
assert (iEntry.metadataHash().length == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;
// increase flag counts
for (int j = 0; j < 32; j++) {
if (iEntry.flags().get(j)) {flagcount[j]++;}
}
// check constraints
if (!testFlags(iEntry)) {
continue;
}
// check document domain
if (query.contentdom != ContentDomain.TEXT) {
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue;
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue;
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue;
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue;
}
// increase flag counts
for (int j = 0; j < 32; j++) {
if (iEntry.flags().get(j)) {flagcount[j]++;}
}
// check tld domain
/*
if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) {
// filter out all tld that do not match with wanted tld domain
continue;
}
*/
// check constraints
if (!testFlags(iEntry)) {
continue;
}
// check document domain
if (query.contentdom != ContentDomain.TEXT) {
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue;
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue;
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue;
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue;
}
// check tld domain
/*
if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) {
// filter out all tld that do not match with wanted tld domain
continue;
}
*/
// count domZones
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
// check site constraints
domhash = new String(iEntry.metadataHash(), 6, 6);
if (query.sitehash == null) {
// no site constraint there; maybe collect host navigation information
if (nav_hosts && query.urlMask_isCatchall) {
this.hostNavigator.inc(domhash);
this.hostResolver.put(domhash, new String(iEntry.metadataHash()));
}
} else {
if (!domhash.equals(query.sitehash)) {
// filter out all domains that do not match with the site constraint
continue;
}
}
// check site constraints
String domhash = new String(iEntry.metadataHash(), 6, 6);
if (query.sitehash == null) {
// no site constraint there; maybe collect host navigation information
if (nav_hosts && query.urlMask_isCatchall) {
this.hostNavigator.inc(domhash);
this.hostResolver.put(domhash, new String(iEntry.metadataHash()));
}
} else {
if (!domhash.equals(query.sitehash)) {
// filter out all domains that do not match with the site constraint
continue;
}
}
// finally make a double-check and insert result to stack
// finally make a double-check and insert result to stack
if (urlhashes.add(iEntry.metadataHash())) {
stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
//System.out.println("stack.put: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
// increase counter for statistics
if (local) this.local_indexCount++; else this.remote_indexCount++;
}
}
}
} catch (InterruptedException e) {}
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false);
}
/**
@ -261,7 +261,6 @@ public final class RankingProcess extends Thread {
}
public boolean feedingIsFinished() {
//System.out.println("feedingIsFinished: this.feeders == " + this.feeders);
return System.currentTimeMillis() - this.startTime > 50 && this.feeders == 0;
}
@ -288,7 +287,7 @@ public final class RankingProcess extends Thread {
return localSearchInclusion;
}
private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long waitingtime) {
private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, final long waitingtime) {
// returns from the current RWI list the best entry and removes this entry from the list
WeakPriorityBlockingQueue<WordReferenceVars> m;
@ -348,7 +347,7 @@ public final class RankingProcess extends Thread {
}
}
} catch (InterruptedException e1) {}
if (this.doubleDomCache.size() == 0) return null;
if (this.doubleDomCache.isEmpty()) return null;
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches
@ -395,7 +394,7 @@ public final class RankingProcess extends Thread {
*/
public URIMetadataRow takeURL(final boolean skipDoubleDom, final long waitingtime) {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
final long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
int p = -1;
byte[] urlhash;
long timeleft;
@ -470,8 +469,7 @@ public final class RankingProcess extends Thread {
if (pageauthor != null && pageauthor.length() > 0) {
// add author to the author navigator
String authorhash = new String(Word.word2hash(pageauthor));
//System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author);
// check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
continue;
@ -581,7 +579,7 @@ public final class RankingProcess extends Thread {
ScoreCluster<String> result = new ScoreCluster<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
Iterator<String> domhashs = this.hostNavigator.keys(false);
final Iterator<String> domhashs = this.hostNavigator.keys(false);
URIMetadataRow row;
String domhash, urlhash, hostname;
while (domhashs.hasNext() && result.size() < 30) {
@ -606,11 +604,11 @@ public final class RankingProcess extends Thread {
public StaticScore<String> getTopicNavigator(int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
ScoreCluster<String> result = new ScoreCluster<String>();
final ScoreCluster<String> result = new ScoreCluster<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result;
if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful
Map<String, Double> counts = new HashMap<String, Double>();
Iterator<String> i = this.ref.keys(false);
final Map<String, Double> counts = new HashMap<String, Double>();
final Iterator<String> i = this.ref.keys(false);
String word;
byte[] termHash;
int c;
@ -635,8 +633,8 @@ public final class RankingProcess extends Thread {
public void addTopic(final String[] words) {
String word;
for (int i = 0; i < words.length; i++) {
word = words[i].toLowerCase();
for (final String w : words) {
word = w.toLowerCase();
if (word.length() > 2 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 &&
!query.queryHashes.has(Word.word2hash(word)) &&
@ -712,11 +710,9 @@ public final class RankingProcess extends Thread {
final int m = Math.min(maxYBR, ybrTables.length);
for (int i = 0; i < m; i++) {
if ((ybrTables[i] != null) && (ybrTables[i].contains(domhash))) {
//System.out.println("YBR FOUND: " + urlHash + " (" + i + ")");
return i;
}
}
//System.out.println("NOT FOUND: " + urlHash);
return 15;
}

@ -27,9 +27,9 @@
package de.anomic.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.Condenser;
@ -54,7 +54,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
private String alternative_urlstring;
private String alternative_urlname;
private final TextSnippet textSnippet;
private final ArrayList<MediaSnippet> mediaSnippets;
private final List<MediaSnippet> mediaSnippets;
// statistic objects
public long dbRetrievalTime, snippetComputationTime;
@ -63,7 +63,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
final Segment indexSegment,
yacySeedDB peers,
final TextSnippet textSnippet,
final ArrayList<MediaSnippet> mediaSnippets,
final List<MediaSnippet> mediaSnippets,
final long dbRetrievalTime, final long snippetComputationTime) {
this.urlentry = urlentry;
this.urlcomps = urlentry.metadata();
@ -102,9 +102,11 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
if ((p = alternative_urlname.indexOf('?')) > 0) alternative_urlname = alternative_urlname.substring(0, p);
}
}
@Override
public int hashCode() {
return ByteArray.hashCode(urlentry.hash());
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
@ -145,7 +147,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
public TextSnippet textSnippet() {
return this.textSnippet;
}
public ArrayList<MediaSnippet> mediaSnippets() {
public List<MediaSnippet> mediaSnippets() {
return this.mediaSnippets;
}
public Date modified() {

@ -28,6 +28,8 @@ package de.anomic.search;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.storage.StaticScore;
@ -43,7 +45,6 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.CrawlProfile;
import de.anomic.search.MediaSnippet;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph;
@ -153,6 +154,7 @@ public class ResultFetcher {
this.neededResults = neededResults;
}
@Override
public void run() {
// start fetching urls and snippets
@ -163,18 +165,18 @@ public class ResultFetcher {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0;
while (System.currentTimeMillis() < this.timeout) {
this.lastLifeSign = System.currentTimeMillis();
this.lastLifeSign = System.currentTimeMillis();
// check if we have enough
if (result.sizeAvailable() >= this.neededResults) {
if (result.sizeAvailable() >= this.neededResults) {
//System.out.println("result.sizeAvailable() >= this.neededResults");
break;
}
// check if we can succeed if we try to take another url
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) {
break;
}
// check if we can succeed if we try to take another url
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) {
break;
}
// get next entry
page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis());
@ -266,7 +268,7 @@ public class ResultFetcher {
} else {
// attach media information
startTime = System.currentTimeMillis();
final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);
@ -369,9 +371,9 @@ public class ResultFetcher {
int c = 0;
if (result == null) return c;
// iterate over all images in the result
final ArrayList<MediaSnippet> imagemedia = result.mediaSnippets();
final List<MediaSnippet> imagemedia = result.mediaSnippets();
if (imagemedia != null) {
for (MediaSnippet ms: imagemedia) {
for (final MediaSnippet ms: imagemedia) {
images.put(new ReverseElement<MediaSnippet>(ms, ms.ranking)); // remove smallest in case of overflow
c++;
//System.out.println("*** image " + new String(ms.href.hash()) + " images.size = " + images.size() + "/" + images.size());

@ -29,6 +29,8 @@ package de.anomic.search;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.Semaphore;
@ -73,19 +75,19 @@ public final class SearchEvent {
// class variables for remote searches
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private final TreeMap<byte[], String> preselectedPeerHashes;
private final SortedMap<byte[], String> preselectedPeerHashes;
private final ResultURLs crawlResults;
private final Thread localSearchThread;
private final TreeMap<byte[], Integer> IACount;
private final TreeMap<byte[], String> IAResults;
private final TreeMap<byte[], HeuristicResult> heuristics;
private final SortedMap<byte[], Integer> IACount;
private final SortedMap<byte[], String> IAResults;
private final SortedMap<byte[], HeuristicResult> heuristics;
private byte[] IAmaxcounthash, IAneardhthash;
private final ReferenceOrder order;
public SearchEvent(final QueryParams query,
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes,
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
@ -164,7 +166,7 @@ public final class SearchEvent {
long mindhtdistance = Long.MAX_VALUE, l;
byte[] wordhash;
assert this.rankingProcess.searchContainerMap() != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankingProcess.searchContainerMap().entrySet()) {
for (final Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankingProcess.searchContainerMap().entrySet()) {
wordhash = entry.getKey();
final ReferenceContainer<WordReference> container = entry.getValue();
assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash);
@ -231,14 +233,14 @@ public final class SearchEvent {
public void cleanup() {
// stop all threads
if (primarySearchThreads != null) {
for (yacySearch search : this.primarySearchThreads) {
for (final yacySearch search : this.primarySearchThreads) {
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt();
}
}
}
if (secondarySearchThreads != null) {
for (yacySearch search : this.secondarySearchThreads) {
for (final yacySearch search : this.secondarySearchThreads) {
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt();
}
@ -304,14 +306,14 @@ public final class SearchEvent {
boolean anyRemoteSearchAlive() {
// check primary search threads
if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) {
for (int i = 0; i < this.primarySearchThreads.length; i++) {
if ((this.primarySearchThreads[i] != null) && (this.primarySearchThreads[i].isAlive())) return true;
for (final yacySearch primarySearchThread : primarySearchThreads) {
if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true;
}
}
// maybe a secondary search thread is alive, check this
if ((this.secondarySearchThreads != null) && (this.secondarySearchThreads.length != 0)) {
for (int i = 0; i < this.secondarySearchThreads.length; i++) {
if ((this.secondarySearchThreads[i] != null) && (this.secondarySearchThreads[i].isAlive())) return true;
for (final yacySearch secondarySearchThread : this.secondarySearchThreads) {
if ((secondarySearchThread != null) && (secondarySearchThread.isAlive())) return true;
}
}
return false;
@ -395,12 +397,12 @@ public final class SearchEvent {
// cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
// this relation contains the information where specific urls can be found in specific peers
TreeMap<String, TreeMap<String, String>> abstractsCache;
TreeSet<String> checkedPeers;
SortedMap<String, SortedMap<String, String>> abstractsCache;
SortedSet<String> checkedPeers;
Semaphore trigger;
public SecondarySearchSuperviser() {
this.abstractsCache = new TreeMap<String, TreeMap<String, String>>();
this.abstractsCache = new TreeMap<String, SortedMap<String, String>>();
this.checkedPeers = new TreeSet<String>();
this.trigger = new Semaphore(0);
}
@ -412,16 +414,16 @@ public final class SearchEvent {
*/
public void addAbstract(String wordhash, TreeMap<String, String> singleAbstract) {
synchronized (abstractsCache) {
TreeMap<String, String> oldAbstract = abstractsCache.get(wordhash);
SortedMap<String, String> oldAbstract = abstractsCache.get(wordhash);
if (oldAbstract == null) {
// new abstracts in the cache
abstractsCache.put(wordhash, singleAbstract);
} else synchronized (oldAbstract) {
// extend the abstracts in the cache: join the single abstracts
for (Map.Entry<String, String> oneref: singleAbstract.entrySet()) {
String urlhash = oneref.getKey();
String peerlistNew = oneref.getValue();
String peerlistOld = oldAbstract.get(urlhash);
for (final Map.Entry<String, String> oneref: singleAbstract.entrySet()) {
final String urlhash = oneref.getKey();
final String peerlistNew = oneref.getValue();
final String peerlistOld = oldAbstract.get(urlhash);
if (peerlistOld == null) {
oldAbstract.put(urlhash, peerlistNew);
} else {
@ -438,13 +440,13 @@ public final class SearchEvent {
}
private String wordsFromPeer(final String peerhash, final String urls) {
Map.Entry<String, TreeMap<String, String>> entry;
Map.Entry<String, SortedMap<String, String>> entry;
String word, peerlist, url, wordlist = "";
TreeMap<String, String> urlPeerlist;
SortedMap<String, String> urlPeerlist;
int p;
boolean hasURL;
synchronized (this) {
final Iterator<Map.Entry <String, TreeMap<String, String>>> i = this.abstractsCache.entrySet().iterator();
final Iterator<Map.Entry <String, SortedMap<String, String>>> i = this.abstractsCache.entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
word = entry.getKey();
@ -465,6 +467,7 @@ public final class SearchEvent {
return wordlist;
}
@Override
public void run() {
try {
int t = 0;
@ -496,12 +499,12 @@ public final class SearchEvent {
if (abstractsCache.size() != query.queryHashes.size()) return;
// join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list
final TreeMap<String, String> abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true);
final SortedMap<String, String> abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true);
if (abstractJoin.isEmpty()) return;
// the join result is now a urlhash: peer-list relation
// generate a list of peers that have the urls for the joined search result
final TreeMap<String, String> secondarySearchURLs = new TreeMap<String, String>(); // a (peerhash:urlhash-liststring) mapping
final SortedMap<String, String> secondarySearchURLs = new TreeMap<String, String>(); // a (peerhash:urlhash-liststring) mapping
String url, urls, peer, peerlist;
final String mypeerhash = peers.mySeed().hash;
boolean mypeerinvolved = false;

@ -30,7 +30,8 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.kelondro.util.MemoryControl;
@ -41,7 +42,7 @@ import de.anomic.yacy.yacySeedDB;
public class SearchEventCache {
private static ConcurrentHashMap<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
private static ConcurrentMap<String, SearchEvent> lastEvents = new ConcurrentHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetimeBigMem = 600000; // the time an event will stay in the cache when available memory is high, 10 Minutes
public static final long eventLifetimeMediumMem = 60000; // the time an event will stay in the cache when available memory is medium, 1 Minute
public static final long eventLifetimeShortMem = 10000; // the time an event will stay in the cache when memory is low, 10 seconds
@ -82,6 +83,7 @@ public class SearchEventCache {
* in case of failed words
*/
new Thread(){
@Override
public void run() {
for (SearchEvent k: delete) {
k.cleanup();
@ -100,7 +102,7 @@ public class SearchEventCache {
final QueryParams query,
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes,
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader) {

@ -62,6 +62,8 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
@ -173,9 +175,9 @@ public final class Switchboard extends serverSwitch {
private int dhtMaxReferenceCount = 1000;
// colored list management
public static TreeSet<String> badwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static TreeSet<String> stopwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static TreeSet<String> blueList = null;
public static SortedSet<String> badwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static SortedSet<String> stopwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static SortedSet<String> blueList = null;
public static HandleSet badwordHashes = null;
public static HandleSet blueListHashes = null;
public static HandleSet stopwordHashes = null;
@ -224,7 +226,7 @@ public final class Switchboard extends serverSwitch {
public int searchQueriesRobinsonFromLocal = 0; // absolute counter of all local queries submitted on this peer from a local or autheticated used
public int searchQueriesRobinsonFromRemote = 0; // absolute counter of all local queries submitted on this peer from a remote IP without authentication
public double searchQueriesGlobal = 0d; // partial counter of remote queries (1/number-of-requested-peers)
public TreeMap<byte[], String> clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used
public SortedMap<byte[], String> clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used
public URLLicense licensedURLs;
public List<Pattern> networkWhitelist, networkBlacklist;
public FilterEngine domainList;
@ -232,7 +234,7 @@ public final class Switchboard extends serverSwitch {
public LinkedBlockingQueue<String> trail;
public yacySeedDB peers;
public WorkTables tables;
public TreeMap<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public SortedMap<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public WorkflowProcessor<indexingQueueEntry> indexingDocumentProcessor;
public WorkflowProcessor<indexingQueueEntry> indexingCondensementProcessor;
@ -256,7 +258,7 @@ public final class Switchboard extends serverSwitch {
public Switchboard(final File dataPath, final File appPath, final String initPath, final String configPath) throws IOException {
super(dataPath, appPath, initPath, configPath);
MemoryTracker.startSystemProfiling();
sb=this;
sb = this;
// set loglevel and log
setLog(new Log("PLASMA"));
@ -751,7 +753,7 @@ public final class Switchboard extends serverSwitch {
netdef = netdef.trim();
try {
netdefmap = Switchboard.loadFileAsMap(new DigestURI(netdef));
if (netdefmap == null || netdefmap.size() == 0) continue netload;
if (netdefmap == null || netdefmap.isEmpty()) continue netload;
setConfig(netdefmap);
break netload;
} catch (final Exception e) {
@ -1891,7 +1893,7 @@ public final class Switchboard extends serverSwitch {
doclist.add(document);
}
if (doclist.size() == 0) return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
if (doclist.isEmpty()) return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
in.documents = doclist.toArray(new Document[doclist.size()]);
Condenser[] condenser = new Condenser[in.documents.length];
if (this.log.isFine()) log.logFine("Condensing for '" + in.queueEntry.url().toNormalform(false, true) + "'");
@ -1981,8 +1983,8 @@ public final class Switchboard extends serverSwitch {
}
// store rss feeds in document into rss table
for (Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet()) {
Tables.Data rssRow = new Tables.Data();
for (final Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet()) {
final Tables.Data rssRow = new Tables.Data();
rssRow.put("referrer", queueEntry.url().hash());
rssRow.put("url", rssEntry.getKey().toNormalform(true, false).getBytes());
rssRow.put("title", rssEntry.getValue().getBytes());
@ -2036,14 +2038,14 @@ public final class Switchboard extends serverSwitch {
Map<MultiProtocolURI, String> matcher = searchEvent.getQuery().separateMatches(links);
// take the matcher and load them all
for (Map.Entry<MultiProtocolURI, String> entry: matcher.entrySet()) {
for (final Map.Entry<MultiProtocolURI, String> entry: matcher.entrySet()) {
try {
this.addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName);
} catch (IOException e) {} catch (Parser.Failure e) {}
}
// take then the no-matcher and load them also
for (Map.Entry<MultiProtocolURI, String> entry: links.entrySet()) {
for (final Map.Entry<MultiProtocolURI, String> entry: links.entrySet()) {
try {
this.addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName);
} catch (IOException e) {} catch (Parser.Failure e) {}
@ -2069,32 +2071,35 @@ public final class Switchboard extends serverSwitch {
log.logWarning("addToIndex: cannot load " + url.toNormalform(false, false) + ": " + acceptedError);
return;
}
new Thread() {public void run() {
try {
Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE);
if (response == null) throw new IOException("response == null");
if (response.getContent() == null) throw new IOException("content == null");
if (response.getResponseHeader() == null) throw new IOException("header == null");
Document[] documents = response.parse();
if (documents != null) for (Document document: documents) {
if (document.indexingDenied()) throw new Parser.Failure("indexing is denied", url);
Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib);
ResultImages.registerImages(url, document, true);
webStructure.generateCitationReference(url, document, condenser, response.lastModified());
storeDocumentIndex(process, response, document, condenser, searchEvent, "heuristic:" + heuristicName);
log.logInfo("addToIndex fill of url " + url.toNormalform(true, true) + " finished");
new Thread() {
@Override
public void run() {
try {
final Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE);
if (response == null) throw new IOException("response == null");
if (response.getContent() == null) throw new IOException("content == null");
if (response.getResponseHeader() == null) throw new IOException("header == null");
final Document[] documents = response.parse();
if (documents != null) for (final Document document: documents) {
if (document.indexingDenied()) throw new Parser.Failure("indexing is denied", url);
final Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib);
ResultImages.registerImages(url, document, true);
webStructure.generateCitationReference(url, document, condenser, response.lastModified());
storeDocumentIndex(process, response, document, condenser, searchEvent, "heuristic:" + heuristicName);
log.logInfo("addToIndex fill of url " + url.toNormalform(true, true) + " finished");
}
} catch (IOException e) {
log.logWarning("addToIndex: failed loading " + url.toNormalform(false, false) + ": " + e.getMessage());
} catch (Parser.Failure e) {
log.logWarning("addToIndex: failed parsing " + url.toNormalform(false, false) + ": " + e.getMessage());
}
} catch (IOException e) {
log.logWarning("addToIndex: failed loading " + url.toNormalform(false, false) + ": " + e.getMessage());
} catch (Parser.Failure e) {
log.logWarning("addToIndex: failed parsing " + url.toNormalform(false, false) + ": " + e.getMessage());
}
}}.start();
}.start();
}
public class receiptSending implements Runnable {
yacySeed initiatorPeer;
URIMetadataRow reference;
private yacySeed initiatorPeer;
private URIMetadataRow reference;
public receiptSending(final yacySeed initiatorPeer, final URIMetadataRow reference) {
this.initiatorPeer = initiatorPeer;
@ -2124,7 +2129,7 @@ public final class Switchboard extends serverSwitch {
public int adminAuthenticated(final RequestHeader requestHeader) {
// authorization for localhost, only if flag is set to grant localhost access as admin
boolean accessFromLocalhost = accessFromLocalhost(requestHeader);
final boolean accessFromLocalhost = accessFromLocalhost(requestHeader);
if (getConfigBool("adminAccountForLocalhost", false) && accessFromLocalhost) return 3; // soft-authenticated for localhost
// get the authorization string from the header
@ -2187,16 +2192,16 @@ public final class Switchboard extends serverSwitch {
}
}
public static int accessFrequency(final HashMap<String, TreeSet<Long>> tracker, final String host) {
public static int accessFrequency(final Map<String, SortedSet<Long>> tracker, final String host) {
// returns the access frequency in queries per hour for a given host and a specific tracker
final long timeInterval = 1000 * 60 * 60;
final TreeSet<Long> accessSet = tracker.get(host);
final SortedSet<Long> accessSet = tracker.get(host);
if (accessSet == null) return 0;
return accessSet.tailSet(Long.valueOf(System.currentTimeMillis() - timeInterval)).size();
}
public String dhtShallTransfer(final String segment) {
String cautionCause = onlineCaution();
final String cautionCause = onlineCaution();
if (cautionCause != null) {
return "online caution for " + cautionCause + ", dht transmission";
}
@ -2218,7 +2223,7 @@ public final class Switchboard extends serverSwitch {
if (getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false")) {
return "no DHT distribution: not enabled (per setting)";
}
Segment indexSegment = this.indexSegments.segment(segment);
final Segment indexSegment = this.indexSegments.segment(segment);
if (indexSegment.urlMetadata().size() < 10) {
return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size();
}
@ -2322,71 +2327,73 @@ public final class Switchboard extends serverSwitch {
}
public final void heuristicSite(final SearchEvent searchEvent, final String host) {
new Thread() {public void run() {
String r = host;
if (r.indexOf("//") < 0) r = "http://" + r;
// get the links for a specific site
DigestURI url;
try {
url = new DigestURI(r);
} catch (MalformedURLException e) {
Log.logException(e);
return;
}
Map<MultiProtocolURI, String> links = null;
try {
links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE);
} catch (IOException e) {
Log.logException(e);
return;
}
Iterator<MultiProtocolURI> i = links.keySet().iterator();
MultiProtocolURI u;
while (i.hasNext()) {
u = i.next();
if (!u.getHost().endsWith(host)) i.remove();
new Thread() {
@Override
public void run() {
String r = host;
if (r.indexOf("//") < 0) r = "http://" + r;
// get the links for a specific site
DigestURI url;
try {
url = new DigestURI(r);
} catch (MalformedURLException e) {
Log.logException(e);
return;
}
final Map<MultiProtocolURI, String> links;
try {
links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE);
} catch (IOException e) {
Log.logException(e);
return;
}
final Iterator<MultiProtocolURI> i = links.keySet().iterator();
while (i.hasNext()) {
if (!i.next().getHost().endsWith(host)) i.remove();
}
// add all pages to the index
addAllToIndex(url, links, searchEvent, "site");
}
// add all pages to the index
addAllToIndex(url, links, searchEvent, "site");
}}.start();
}.start();
}
public final void heuristicScroogle(final SearchEvent searchEvent) {
new Thread() {public void run() {
String query = searchEvent.getQuery().queryString(true);
int meta = query.indexOf("heuristic:");
if (meta >= 0) {
int q = query.indexOf(' ', meta);
if (q >= 0) query = query.substring(0, meta) + query.substring(q + 1); else query = query.substring(0, meta);
}
final String urlString = "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" + query.trim().replaceAll(" ", "+") + "&n=2";
DigestURI url;
try {
url = new DigestURI(MultiProtocolURI.unescape(urlString));
} catch (MalformedURLException e1) {
return;
}
Map<MultiProtocolURI, String> links = null;
try {
links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE);
} catch (IOException e) {
Log.logException(e);
return;
}
Iterator<MultiProtocolURI> i = links.keySet().iterator();
MultiProtocolURI u;
while (i.hasNext()) {
u = i.next();
if (u.toNormalform(false, false).indexOf("scroogle") >= 0) i.remove();
new Thread() {
@Override
public void run() {
String query = searchEvent.getQuery().queryString(true);
int meta = query.indexOf("heuristic:");
if (meta >= 0) {
final int q = query.indexOf(' ', meta);
if (q >= 0) query = query.substring(0, meta) + query.substring(q + 1); else query = query.substring(0, meta);
}
final String urlString = "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" + query.trim().replaceAll(" ", "+") + "&n=2";
final DigestURI url;
try {
url = new DigestURI(MultiProtocolURI.unescape(urlString));
} catch (MalformedURLException e1) {
return;
}
Map<MultiProtocolURI, String> links = null;
try {
links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE);
} catch (IOException e) {
Log.logException(e);
return;
}
Iterator<MultiProtocolURI> i = links.keySet().iterator();
while (i.hasNext()) {
if (i.next().toNormalform(false, false).indexOf("scroogle") >= 0) i.remove();
}
log.logInfo("Heuristic: adding " + links.size() + " links from scroogle");
// add all pages to the index
addAllToIndex(null, links, searchEvent, "scroogle");
}
log.logInfo("Heuristic: adding " + links.size() + " links from scroogle");
// add all pages to the index
addAllToIndex(null, links, searchEvent, "scroogle");
}}.start();
}.start();
}
public int currentPPM() {
@ -2542,14 +2549,11 @@ public final class Switchboard extends serverSwitch {
port = 3128;
}
// create new config
ProxySettings.use = true;
ProxySettings.use4ssl = true;
ProxySettings.use4YaCy = true;
ProxySettings.port = port;
ProxySettings.host = host;
if ((ProxySettings.host == null) || (ProxySettings.host.length() == 0)) {
ProxySettings.use = false;
}
ProxySettings.use = ((ProxySettings.host != null) && (ProxySettings.host.length() > 0));
// determining if remote proxy usage is enabled
ProxySettings.use = getConfigBool("remoteProxyUse", false);

@ -24,11 +24,11 @@
package de.anomic.search;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.List;
import java.util.SortedMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -95,7 +95,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
public void put(final String wordhashes, final String urlhash, final String snippet) {
// generate key
String key = urlhash + wordhashes;
final String key = urlhash + wordhashes;
// do nothing if snippet is known
if (cache.containsKey(key)) return;
@ -139,10 +139,10 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
int source = SOURCE_CACHE;
final String wordhashes = yacySearch.set2string(queryhashes);
final String urls = new String(url.hash());
String line = snippetsCache.get(wordhashes, urls);
if (line != null) {
String snippetLine = snippetsCache.get(wordhashes, urls);
if (snippetLine != null) {
// found the snippet
init(url.hash(), line, source, null);
init(url.hash(), snippetLine, source, null);
return;
}
@ -151,7 +151,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
* LOAD RESOURCE DATA
* =========================================================================== */
// if the snippet is not in the cache, we can try to get it from the htcache
Response response;
final Response response;
try {
// first try to get the snippet from metadata
String loc;
@ -245,24 +245,24 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
//String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
//String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
line = "";
snippetLine = "";
//if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
//if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
//if (appline != null) line += (line.length() == 0) ? appline : "<br />" + appline;
//if (hrefline != null) line += (line.length() == 0) ? hrefline : "<br />" + hrefline;
if (textline != null) line += (line.length() == 0) ? textline : "<br />" + textline;
if (textline != null) snippetLine += (snippetLine.length() == 0) ? textline : "<br />" + textline;
if (line == null || !remainingHashes.isEmpty()) {
if (snippetLine == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found");
return;
}
if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength);
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, line);
snippetsCache.put(wordhashes, urls, snippetLine);
document.close();
init(url.hash(), line, source, null);
init(url.hash(), snippetLine, source, null);
}
private void init(final byte[] urlhash, final String line, final int errorCode, final String errortext) {
@ -294,24 +294,24 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (line.endsWith(".")) line = line.substring(0, line.length() - 1);
final Iterator<byte[]> i = queryHashes.iterator();
byte[] h;
final String[] w = line.split(" ");
final String[] words = line.split(" ");
while (i.hasNext()) {
h = i.next();
for (int j = 0; j < w.length; j++) {
final ArrayList<String> al = markedWordArrayList(w[j]); // mark special character separated words correctly if more than 1 word has to be marked
w[j] = "";
for (int j = 0; j < words.length; j++) {
final List<String> al = markedWordArrayList(words[j]); // mark special character separated words correctly if more than 1 word has to be marked
words[j] = "";
for (int k = 0; k < al.size(); k++) {
if(k % 2 == 0){ // word has not been marked
w[j] += getWordMarked(al.get(k), h);
words[j] += getWordMarked(al.get(k), h);
} else { // word has been marked, do not encode again
w[j] += al.get(k);
words[j] += al.get(k);
}
}
}
}
final StringBuilder l = new StringBuilder(line.length() + queryHashes.size() * 8);
for (int j = 0; j < w.length; j++) {
l.append(w[j]);
for (int j = 0; j < words.length; j++) {
l.append(words[j]);
l.append(' ');
}
return l.toString().trim();
@ -325,6 +325,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return o1.compareTo(o2);
}
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlhash);
}
@ -341,59 +342,67 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
* @return the marked word if hash matches, else the unmarked word
* @see #getLineMarked(Set)
*/
private static String getWordMarked(String word, byte[] h){
private static String getWordMarked(final String word, final byte[] h){
//ignore punctuation marks (contrib [MN])
//note to myself:
//For details on regex see "Mastering regular expressions" by J.E.F. Friedl
//especially p. 123 and p. 390/391 (in the German version of the 2nd edition)
String prefix = "";
String postfix = "";
StringBuilder theWord = new StringBuilder(word);
StringBuilder prefix = new StringBuilder();
StringBuilder postfix = new StringBuilder();
int len = 0;
// cut off prefix if it contains of non-characters or non-numbers
while(p1.matcher(word).find()) {
prefix = prefix + word.substring(0,1);
word = word.substring(1);
while(p1.matcher(theWord).find()) {
prefix.append(theWord.substring(0,1));
theWord = theWord.delete(0, 1);
}
// cut off postfix if it contains of non-characters or non-numbers
while(p2.matcher(word).find()) {
len = word.length();
postfix = word.substring(len-1,len) + postfix;
word = word.substring(0,len-1);
while(p2.matcher(theWord).find()) {
len = theWord.length();
postfix.insert(0, theWord.substring(len-1,len));
theWord = theWord.delete(len - 1, len);
}
//special treatment if there is a special character in the word
if(p3.matcher(word).find()) {
String out = "";
if(p3.matcher(theWord).find()) {
StringBuilder out = null;
String temp = "";
for(int k=0; k < word.length(); k++) {
for(int k=0; k < theWord.length(); k++) {
out = new StringBuilder();
//is character a special character?
if(p4.matcher(word.substring(k,k+1)).find()) {
if(p4.matcher(theWord.substring(k,k+1)).find()) {
if (new String(Word.word2hash(temp)).equals(new String(h))) temp = "<b>" + CharacterCoding.unicode2html(temp, false) + "</b>";
out = out + temp + CharacterCoding.unicode2html(word.substring(k,k+1), false);
out.append(temp);
out.append(CharacterCoding.unicode2html(theWord.substring(k,k+1), false));
temp = "";
}
//last character
else if(k == (word.length()-1)) {
temp = temp + word.substring(k,k+1);
else if(k == (theWord.length()-1)) {
temp = temp + theWord.substring(k,k+1);
if (new String(Word.word2hash(temp)).equals(new String(h))) temp = "<b>" + CharacterCoding.unicode2html(temp, false) + "</b>";
out = out + temp;
out.append(temp);
temp = "";
}
else temp = temp + word.substring(k,k+1);
else {
temp = temp + theWord.substring(k,k+1);
}
}
word = out;
theWord = out;
}
//end contrib [MN]
else if (new String(Word.word2hash(word)).equals(new String(h))) word = "<b>" + CharacterCoding.unicode2html(word, false) + "</b>";
else if (new String(Word.word2hash(theWord)).equals(new String(h))) {
theWord.replace(0, theWord.length(), CharacterCoding.unicode2html(theWord.toString(), false));
theWord.insert(0, "<b>");
theWord.append("</b>");
}
word = CharacterCoding.unicode2html(prefix, false)
+ word
+ CharacterCoding.unicode2html(postfix, false);
return word;
theWord.insert(0, CharacterCoding.unicode2html(prefix.toString(), false));
theWord.append(CharacterCoding.unicode2html(postfix.toString(), false));
return theWord.toString();
}
/**
@ -403,8 +412,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
* @return words that already has and has not yet been marked
* @author [DW], 08.11.2008
*/
private static ArrayList<String> markedWordArrayList(String string){
ArrayList<String> al = new java.util.ArrayList<String>(1);
private static List<String> markedWordArrayList(String string){
List<String> al = new java.util.ArrayList<String>(1);
Matcher m = p01.matcher(string);
while (m.find()) {
al.add(m.group(1));
@ -417,8 +426,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
private static boolean containsAllHashes(final String sentence, final HandleSet queryhashes) {
final TreeMap<byte[], Integer> m = Condenser.hashSentence(sentence, null);
for (byte[] b: queryhashes) {
final SortedMap<byte[], Integer> m = Condenser.hashSentence(sentence, null);
for (final byte[] b: queryhashes) {
if (!(m.containsKey(b))) return false;
}
return true;

@ -44,6 +44,7 @@ import net.yacy.kelondro.workflow.WorkflowProcessor;
import de.anomic.search.Segment;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
import java.util.List;
public class Dispatcher {
@ -53,8 +54,8 @@ public class Dispatcher {
* Before a RWI is sent, the following process is applied:
* - (1) a number of RWIs are selected and accumulated.
* When they are selected, they are removed from the index
* - (2) the RWI collection is splitted into a number of partitions according to the vertical DHT.
* - (3) the splitted RWIs are enqueued as Entry object in the entry 'cloud' of the dispatcher
* - (2) the RWI collection is split into a number of partitions according to the vertical DHT.
* - (3) the split RWIs are enqueued as Entry object in the entry 'cloud' of the dispatcher
* - (4) more entries may be enqueued to the dispatcher and entries with the same primary target
* are accumulated.
* - (5) the largest entries are selected from the dispatcher cloud and enqueued to the 'next' array
@ -114,8 +115,6 @@ public class Dispatcher {
seeds,
gzipBody,
timeout);
//this.selectedContainerCache = null;
//this.splittedContainerCache = null;
int concurrentSender = Math.min(25, Math.max(10, WorkflowProcessor.useCPU * 2 + 1));
indexingTransmissionProcessor = new WorkflowProcessor<Transmission.Chunk>(
@ -229,11 +228,11 @@ public class Dispatcher {
* @throws RowSpaceExceededException
*/
@SuppressWarnings("unchecked")
private ArrayList<ReferenceContainer<WordReference>>[] splitContainers(ArrayList<ReferenceContainer<WordReference>> containers) throws RowSpaceExceededException {
private List<ReferenceContainer<WordReference>>[] splitContainers(List<ReferenceContainer<WordReference>> containers) throws RowSpaceExceededException {
// init the result vector
int partitionCount = this.seeds.scheme.verticalPartitions();
ArrayList<ReferenceContainer<WordReference>>[] partitions = (ArrayList<ReferenceContainer<WordReference>>[]) new ArrayList[partitionCount];
List<ReferenceContainer<WordReference>>[] partitions = (ArrayList<ReferenceContainer<WordReference>>[]) new ArrayList[partitionCount];
for (int i = 0; i < partitions.length; i++) partitions[i] = new ArrayList<ReferenceContainer<WordReference>>();
// check all entries and split them to the partitions
@ -271,7 +270,7 @@ public class Dispatcher {
* stored in a cache of the Entry for later transmission to the targets, which means that
* then no additional IO is necessary.
*/
private void enqueueContainersToCloud(final ArrayList<ReferenceContainer<WordReference>>[] containers) {
private void enqueueContainersToCloud(final List<ReferenceContainer<WordReference>>[] containers) {
if (transmissionCloud == null) return;
ReferenceContainer<WordReference> lastContainer;
byte[] primaryTarget;
@ -286,7 +285,7 @@ public class Dispatcher {
// get or make a entry object
entry = this.transmissionCloud.get(pTArray); // if this is not null, the entry is extended here
ArrayList<yacySeed> targets = PeerSelection.getAcceptRemoteIndexSeedsList(
List<yacySeed> targets = PeerSelection.getAcceptRemoteIndexSeedsList(
seeds,
primaryTarget,
seeds.redundancy() * 3,
@ -327,7 +326,7 @@ public class Dispatcher {
final int maxtime) {
if (this.transmissionCloud == null) return false;
ArrayList<ReferenceContainer<WordReference>> selectedContainerCache;
List<ReferenceContainer<WordReference>> selectedContainerCache;
try {
selectedContainerCache = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime);
} catch (IOException e) {
@ -341,25 +340,25 @@ public class Dispatcher {
return false;
}
ArrayList<ReferenceContainer<WordReference>>[] splittedContainerCache;
List<ReferenceContainer<WordReference>>[] splitContainerCache;
try {
splittedContainerCache = splitContainers(selectedContainerCache);
splitContainerCache = splitContainers(selectedContainerCache);
} catch (RowSpaceExceededException e) {
this.log.logSevere("selectContainersEnqueueToCloud: splitContainers failed because of too low RAM", e);
return false;
}
selectedContainerCache = null;
if (splittedContainerCache == null) {
this.log.logInfo("selectContainersEnqueueToCloud: splittedContainerCache is empty, cannot do anything here.");
if (splitContainerCache == null) {
this.log.logInfo("selectContainersEnqueueToCloud: splitContainerCache is empty, cannot do anything here.");
return false;
}
this.log.logInfo("splitContainersFromCache: splittedContainerCache filled with " + splittedContainerCache.length + " partitions, deleting selectedContainerCache");
if (splittedContainerCache.length != this.seeds.scheme.verticalPartitions()) {
this.log.logWarning("selectContainersEnqueueToCloud: splittedContainerCache has wrong length.");
this.log.logInfo("splitContainersFromCache: splitContainerCache filled with " + splitContainerCache.length + " partitions, deleting selectedContainerCache");
if (splitContainerCache.length != this.seeds.scheme.verticalPartitions()) {
this.log.logWarning("selectContainersEnqueueToCloud: splitContainerCache has wrong length.");
return false;
}
enqueueContainersToCloud(splittedContainerCache);
splittedContainerCache = null;
enqueueContainersToCloud(splitContainerCache);
splitContainerCache = null;
this.log.logInfo("selectContainersEnqueueToCloud: splittedContainerCache enqueued to cloud array which has now " + this.transmissionCloud.size() + " entries.");
return true;
}

@ -27,6 +27,7 @@ package de.anomic.yacy.dht;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.yacy.cora.storage.DynamicScore;
@ -57,13 +58,13 @@ public class PeerSelection {
final yacySeedDB seedDB,
byte[] wordhash,
int redundancy,
HashMap<String, yacySeed> regularSeeds,
Map<String, yacySeed> regularSeeds,
DynamicScore<String> ranking) {
// this method is called from the search target computation
long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash);
final long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash);
yacySeed seed;
for (int v = 0; v < dhtVerticalTargets.length; v++) {
wordhash = FlatWordPartitionScheme.positionToHash(dhtVerticalTargets[v]);
for (long dhtVerticalTarget : dhtVerticalTargets) {
wordhash = FlatWordPartitionScheme.positionToHash(dhtVerticalTarget);
Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, false);
int c = Math.min(seedDB.sizeConnected(), redundancy);
int cc = 3; // select a maximum of 3, this is enough redundancy
@ -81,7 +82,7 @@ public class PeerSelection {
private static int guessedOwn = 0;
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final byte[] wordhash, String urlhash, int redundancy) {
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final byte[] wordhash, final String urlhash, final int redundancy) {
// the guessIfOwnWord is a fast method that should only fail in case that a 'true' may be incorrect, but a 'false' shall always be correct
if (guessIfOwnWord(seedDB, wordhash, urlhash)) {
// this case must be verified, because it can be wrong.
@ -108,7 +109,7 @@ public class PeerSelection {
private static boolean verifyIfOwnWord(final yacySeedDB seedDB, byte[] wordhash, String urlhash, int redundancy) {
String myHash = seedDB.mySeed().hash;
wordhash = FlatWordPartitionScheme.positionToHash(seedDB.scheme.dhtPosition(wordhash, urlhash));
Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true);
final Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true);
while (dhtEnum.hasNext()) {
if (dhtEnum.next().hash.equals(myHash)) return true;
}
@ -120,18 +121,18 @@ public class PeerSelection {
}
public static byte[] limitOver(final yacySeedDB seedDB, final byte[] startHash) {
Iterator<yacySeed> seeds = getAcceptRemoteIndexSeeds(seedDB, startHash, 1, false);
final Iterator<yacySeed> seeds = getAcceptRemoteIndexSeeds(seedDB, startHash, 1, false);
if (seeds.hasNext()) return seeds.next().hash.getBytes();
return null;
}
protected static ArrayList<yacySeed> getAcceptRemoteIndexSeedsList(
protected static List<yacySeed> getAcceptRemoteIndexSeedsList(
yacySeedDB seedDB,
final byte[] starthash,
int max,
boolean alsoMyOwn) {
final Iterator<yacySeed> seedIter = PeerSelection.getAcceptRemoteIndexSeeds(seedDB, starthash, max, alsoMyOwn);
ArrayList<yacySeed> targets = new ArrayList<yacySeed>();
final ArrayList<yacySeed> targets = new ArrayList<yacySeed>();
while (seedIter.hasNext() && max-- > 0) targets.add(seedIter.next());
return targets;
}
@ -145,7 +146,7 @@ public class PeerSelection {
* @param alsoMyOwn
* @return
*/
public static Iterator<yacySeed> getAcceptRemoteIndexSeeds(yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) {
public static Iterator<yacySeed> getAcceptRemoteIndexSeeds(final yacySeedDB seedDB, final byte[] starthash, final int max, final boolean alsoMyOwn) {
return new acceptRemoteIndexSeedEnum(seedDB, starthash, Math.min(max, seedDB.sizeConnected()), alsoMyOwn);
}
@ -225,19 +226,19 @@ public class PeerSelection {
* @param minVersion
* @return
*/
protected static Iterator<yacySeed> getDHTSeeds(yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
protected static Iterator<yacySeed> getDHTSeeds(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
// enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning
return new seedDHTEnum(seedDB, firstHash, minVersion);
}
private static class seedDHTEnum implements Iterator<yacySeed> {
Iterator<yacySeed> e1, e2;
int steps;
float minVersion;
yacySeedDB seedDB;
private Iterator<yacySeed> e1, e2;
private int steps;
private float minVersion;
private yacySeedDB seedDB;
public seedDHTEnum(yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
public seedDHTEnum(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
this.seedDB = seedDB;
this.steps = seedDB.sizeConnected();
this.minVersion = minVersion;
@ -279,17 +280,17 @@ public class PeerSelection {
* @param seedDB
* @return an iterator of seed objects
*/
public static Iterator<yacySeed> getProvidesRemoteCrawlURLs(yacySeedDB seedDB) {
public static Iterator<yacySeed> getProvidesRemoteCrawlURLs(final yacySeedDB seedDB) {
return new providesRemoteCrawlURLsEnum(seedDB);
}
private static class providesRemoteCrawlURLsEnum implements Iterator<yacySeed> {
Iterator<yacySeed> se;
yacySeed nextSeed;
yacySeedDB seedDB;
private Iterator<yacySeed> se;
private yacySeed nextSeed;
private yacySeedDB seedDB;
public providesRemoteCrawlURLsEnum(yacySeedDB seedDB) {
public providesRemoteCrawlURLsEnum(final yacySeedDB seedDB) {
this.seedDB = seedDB;
se = getDHTSeeds(seedDB, null, yacyVersion.YACY_POVIDES_REMOTECRAWL_LISTS);
nextSeed = nextInternal();
@ -335,7 +336,7 @@ public class PeerSelection {
* @param count number of wanted peers
* @return a hash map of peer hashes to seed object
*/
public static Map<String, yacySeed> seedsByAge(yacySeedDB seedDB, final boolean up, int count) {
public static Map<String, yacySeed> seedsByAge(final yacySeedDB seedDB, final boolean up, int count) {
if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected();

@ -44,6 +44,9 @@ import de.anomic.search.Segment;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
public class Transmission {
@ -68,11 +71,11 @@ public class Transmission {
public Chunk newChunk(
byte[] primaryTarget,
final ArrayList<yacySeed> targets,
final List<yacySeed> targets,
final Row payloadrow) {
return new Chunk(primaryTarget, targets, payloadrow);
}
public class Chunk extends WorkflowJob implements Iterable<ReferenceContainer<WordReference>> {
/**
* a dispatcher entry contains
@ -86,9 +89,9 @@ public class Transmission {
*/
private final byte[] primaryTarget;
private final ReferenceContainerCache<WordReference> containers;
private final TreeMap<byte[], URIMetadataRow> references;
private final SortedMap<byte[], URIMetadataRow> references;
private final HandleSet badReferences;
private final ArrayList<yacySeed> targets;
private final List<yacySeed> targets;
private int hit, miss;
/**
@ -101,7 +104,7 @@ public class Transmission {
*/
public Chunk(
byte[] primaryTarget,
final ArrayList<yacySeed> targets,
final List<yacySeed> targets,
final Row payloadrow) {
super();
this.primaryTarget = primaryTarget;
@ -122,7 +125,7 @@ public class Transmission {
public void add(ReferenceContainer<WordReference> container) throws RowSpaceExceededException {
// iterate through the entries in the container and check if the reference is in the repository
Iterator<WordReference> i = container.entries();
ArrayList<byte[]> notFoundx = new ArrayList<byte[]>();
List<byte[]> notFoundx = new ArrayList<byte[]>();
while (i.hasNext()) {
WordReference e = i.next();
if (references.containsKey(e.metadataHash())) continue;
@ -139,7 +142,7 @@ public class Transmission {
}
}
// now delete all references that were not found
for (byte[] b : notFoundx) container.removeReference(b);
for (final byte[] b : notFoundx) container.removeReference(b);
// finally add the remaining container to the cache
containers.add(container);
}

@ -53,6 +53,7 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Pattern;
@ -97,10 +98,10 @@ import de.anomic.tools.crypt;
public final class yacyClient {
private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
private static byte[] postToFile(final yacySeed target, final String filename, final Map<String,ContentBody> parts, final int timeout) throws IOException {
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
}
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final Map<String,ContentBody> parts, final int timeout) throws IOException {
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
}
@ -132,7 +133,7 @@ public final class yacyClient {
final String salt = crypt.randomSalt();
try {
// generate request
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("count", new StringBody("20"));
parts.put("seed", new StringBody(mySeed.genSeedStr(salt)));
// send request
@ -256,7 +257,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("seed"));
parts.put("env", new StringBody(seedHash));
final byte[] content = postToFile(target, "query.html", parts, 10000);
@ -277,7 +278,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("rwicount"));
parts.put("ttl", new StringBody("0"));
parts.put("env", new StringBody(wordHash));
@ -300,7 +301,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("lurlcount"));
parts.put("ttl", new StringBody("0"));
parts.put("env", new StringBody(""));
@ -337,7 +338,7 @@ public final class yacyClient {
// send request
try {
/* a long time-out is needed */
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("call", new StringBody("remotecrawl"));
parts.put("count", new StringBody(Integer.toString(maxCount)));
parts.put("time", new StringBody(Long.toString(maxTime)));
@ -585,7 +586,7 @@ public final class yacyClient {
public Map<byte[], String> indexabstract; // index abstracts, a collection of url-hashes per word
public SearchResult(
LinkedHashMap<String,ContentBody> parts,
Map<String,ContentBody> parts,
final yacySeed mySeed,
final String wordhashes,
final String excludehashes,
@ -695,7 +696,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
parts.put("process", new StringBody("permission"));
final byte[] content = postToFile(seedDB, targetHash, "message.html", parts, 5000);
final Map<String, String> result = FileUtils.table(content);
@ -715,7 +716,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
parts.put("process", new StringBody("post"));
parts.put("myseed", new StringBody(seedDB.mySeed().genSeedStr(salt)));
parts.put("subject", new StringBody(subject));
@ -754,7 +755,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("process", new StringBody("permission"));
parts.put("purpose", new StringBody("crcon"));
parts.put("filename", new StringBody(filename));
@ -777,7 +778,7 @@ public final class yacyClient {
// send request
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("process", new StringBody("store"));
parts.put("purpose", new StringBody("crcon"));
parts.put("filesize", new StringBody(Long.toString(file.length)));
@ -853,7 +854,7 @@ public final class yacyClient {
// send request
try {
// prepare request
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("process", new StringBody(process));
parts.put("urlhash", new StringBody(((entry == null) ? "" : new String(entry.hash()))));
parts.put("result", new StringBody(result));
@ -883,7 +884,7 @@ public final class yacyClient {
public static String transferIndex(
final yacySeed targetSeed,
final ReferenceContainerCache<WordReference> indexes,
final TreeMap<byte[], URIMetadataRow> urlCache,
final SortedMap<byte[], URIMetadataRow> urlCache,
final boolean gzipBody,
final int timeout) {
@ -1006,13 +1007,13 @@ public final class yacyClient {
if (indexcount == 0) {
// nothing to do but everything ok
final HashMap<String, String> result = new HashMap<String, String>(2);
final Map<String, String> result = new HashMap<String, String>(2);
result.put("result", "ok");
result.put("unknownURL", "");
return result;
}
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
parts.put("wordc", new StringBody(Integer.toString(indexes.size())));
parts.put("entryc", new StringBody(Integer.toString(indexcount)));
parts.put("indexes", new StringBody(entrypost.toString()));
@ -1037,7 +1038,7 @@ public final class yacyClient {
// prepare post values
final String salt = crypt.randomSalt();
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
// enabling gzip compression for post request body
if (gzipBody && (targetSeed.getVersion() < yacyVersion.YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED)) {
@ -1081,7 +1082,7 @@ public final class yacyClient {
String address = targetSeed.getClusterAddress();
if (address == null) { address = "localhost:8080"; }
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
return FileUtils.table(content);
} catch (final Exception e) {
@ -1112,7 +1113,7 @@ public final class yacyClient {
} else {
searchlines.add(args[2]);
}
for (String line: searchlines) {
for (final String line: searchlines) {
final byte[] wordhashe = QueryParams.hashSet2hashString(Word.words2hashesHandles(QueryParams.cleanQuery(line)[0])).getBytes();
long time = System.currentTimeMillis();
SearchResult result;
@ -1164,7 +1165,7 @@ public final class yacyClient {
final String vhost = url.getHost();
final int timeout = 10000;
// new data
final LinkedHashMap<String,ContentBody> newpost = new LinkedHashMap<String,ContentBody>();
final Map<String,ContentBody> newpost = new LinkedHashMap<String,ContentBody>();
try {
newpost.put("process", new StringBody("permission"));
newpost.put("purpose", new StringBody("crcon"));

@ -27,8 +27,9 @@ package de.anomic.yacy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.SortedMap;
import java.util.regex.Pattern;
import net.yacy.cora.storage.DynamicScore;
@ -112,6 +113,7 @@ public class yacySearch extends Thread {
this.constraint = constraint;
}
@Override
public void run() {
try {
this.urls = yacyClient.search(
@ -137,10 +139,10 @@ public class yacySearch extends Thread {
}
public static String set2string(final HandleSet hashes) {
String wh = "";
StringBuilder wh = new StringBuilder();
final Iterator<byte[]> iter = hashes.iterator();
while (iter.hasNext()) { wh = wh + new String(iter.next()); }
return wh;
while (iter.hasNext()) { wh.append(new String(iter.next())); }
return wh.toString();
}
public int links() {
@ -155,25 +157,25 @@ public class yacySearch extends Thread {
return targetPeer;
}
private static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final TreeMap<byte[], String> peerhashes) {
private static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final SortedMap<byte[], String> peerhashes) {
final Iterator<Map.Entry<byte[], String>> i = peerhashes.entrySet().iterator();
final ArrayList<yacySeed> l = new ArrayList<yacySeed>();
final List<yacySeed> l = new ArrayList<yacySeed>();
Map.Entry<byte[], String> entry;
yacySeed s;
while (i.hasNext()) {
entry = i.next();
s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time
if (s != null) {
s.setAlternativeAddress(entry.getValue());
l.add(s);
}
}
final yacySeed[] result = new yacySeed[l.size()];
for (int j = 0; j < l.size(); j++) {
result[j] = l.get(j);
entry = i.next();
s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time
if (s != null) {
s.setAlternativeAddress(entry.getValue());
l.add(s);
}
}
return result;
//return (yacySeed[]) l.toArray();
// final yacySeed[] result = new yacySeed[l.size()];
// for (int j = 0; j < l.size(); j++) {
// result[j] = l.get(j);
// }
// return result;
return l.toArray(new yacySeed[0]);
}
private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final HandleSet wordhashes, int seedcount, int redundancy) {
@ -187,8 +189,8 @@ public class yacySearch extends Thread {
// put in seeds according to dht
final DynamicScore<String> ranking = new ScoreCluster<String>();
final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
final Map<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final Map<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
yacySeed seed;
Iterator<yacySeed> dhtEnum;
Iterator<byte[]> iter = wordhashes.iterator();
@ -235,7 +237,7 @@ public class yacySearch extends Thread {
seedcount = Math.min(ranking.size(), seedcount);
final yacySeed[] result = new yacySeed[seedcount + matchingSeeds.size()];
c = 0;
Iterator<String> iters = ranking.keys(false); // higher are better
final Iterator<String> iters = ranking.keys(false); // higher are better
while (iters.hasNext() && c < seedcount) {
seed = regularSeeds.get(iters.next());
seed.selectscore = c;
@ -267,7 +269,7 @@ public class yacySearch extends Thread {
final Blacklist blacklist,
final RankingProfile rankingProfile,
final Bitfield constraint,
final TreeMap<byte[], String> clusterselection) {
final SortedMap<byte[], String> clusterselection) {
// check own peer status
//if (wordIndex.seedDB.mySeed() == null || wordIndex.seedDB.mySeed().getPublicAddress() == null) { return null; }
@ -310,7 +312,7 @@ public class yacySearch extends Thread {
final RankingProcess containerCache,
final String targethash, final Blacklist blacklist,
final RankingProfile rankingProfile,
final Bitfield constraint, final TreeMap<byte[], String> clusterselection) {
final Bitfield constraint, final SortedMap<byte[], String> clusterselection) {
assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes;
// check own peer status
@ -332,23 +334,25 @@ public class yacySearch extends Thread {
public static int remainingWaiting(final yacySearch[] searchThreads) {
if (searchThreads == null) return 0;
int alive = 0;
for (int i = 0; i < searchThreads.length; i++) {
if (searchThreads[i].isAlive()) alive++;
for (final yacySearch searchThread : searchThreads) {
if (searchThread.isAlive()) alive++;
}
return alive;
}
public static int collectedLinks(final yacySearch[] searchThreads) {
int links = 0;
for (int i = 0; i < searchThreads.length; i++) {
if (!(searchThreads[i].isAlive()) && searchThreads[i].urls > 0) links += searchThreads[i].urls;
for (final yacySearch searchThread : searchThreads) {
if (!(searchThread.isAlive()) && searchThread.urls > 0) {
links += searchThread.urls;
}
}
return links;
}
public static void interruptAlive(final yacySearch[] searchThreads) {
for (int i = 0; i < searchThreads.length; i++) {
if (searchThreads[i].isAlive()) searchThreads[i].interrupt();
for (final yacySearch searchThread : searchThreads) {
if (searchThread.isAlive()) searchThread.interrupt();
}
}

@ -34,6 +34,7 @@ import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
@ -337,11 +338,11 @@ public class HTTPClient {
* @return content bytes
* @throws IOException
*/
public byte[] POSTbytes(final String uri, final LinkedHashMap<String, ContentBody> parts, final boolean usegzip) throws IOException {
public byte[] POSTbytes(final String uri, final Map<String, ContentBody> parts, final boolean usegzip) throws IOException {
final HttpPost httpPost = new HttpPost(uri);
final MultipartEntity multipartEntity = new MultipartEntity();
for (Entry<String,ContentBody> part : parts.entrySet())
for (final Entry<String,ContentBody> part : parts.entrySet())
multipartEntity.addPart(part.getKey(), part.getValue());
// statistics
upbytes = multipartEntity.getContentLength();
@ -371,100 +372,100 @@ public class HTTPClient {
return httpResponse.getStatusLine().getStatusCode();
}
/**
* This method gets direct access to the content-stream
* Since this way is uncontrolled by the Client think of using 'writeTo' instead!
/**
* This method gets direct access to the content-stream
* Since this way is uncontrolled by the Client think of using 'writeTo' instead!
* Please take care to call finish()!
*
* @return the content as InputStream
* @throws IOException
*/
public InputStream getContentstream() throws IOException {
if (httpResponse != null && currentRequest != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) try {
return httpEntity.getContent();
} catch (final IOException e) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
throw e;
}
}
return null;
}
*
* @return the content as InputStream
* @throws IOException
*/
public InputStream getContentstream() throws IOException {
if (httpResponse != null && currentRequest != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) try {
return httpEntity.getContent();
} catch (final IOException e) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
throw e;
}
}
return null;
}
/**
* This method streams the content to the outputStream
/**
* This method streams the content to the outputStream
* Please take care to call finish()!
*
* @param outputStream
* @throws IOException
*/
public void writeTo(final OutputStream outputStream) throws IOException {
if (httpResponse != null && currentRequest != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) try {
httpEntity.writeTo(outputStream);
outputStream.flush();
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest = null;
} catch (final IOException e) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
throw e;
}
}
}
*
* @param outputStream
* @throws IOException
*/
public void writeTo(final OutputStream outputStream) throws IOException {
if (httpResponse != null && currentRequest != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) try {
httpEntity.writeTo(outputStream);
outputStream.flush();
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest = null;
} catch (final IOException e) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
throw e;
}
}
}
/**
* This method ensures correct finish of client-connections
* This method should be used after every use of GET or POST and writeTo or getContentstream!
*
* @throws IOException
*/
public void finish() throws IOException {
if (httpResponse != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null && httpEntity.isStreaming()) {
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
}
}
if (currentRequest != null) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
}
}
/**
* This method ensures correct finish of client-connections
* This method should be used after every use of GET or POST and writeTo or getContentstream!
*
* @throws IOException
*/
public void finish() throws IOException {
if (httpResponse != null) {
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null && httpEntity.isStreaming()) {
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
}
}
if (currentRequest != null) {
ConnectionInfo.removeConnection(currentRequest.hashCode());
currentRequest.abort();
currentRequest = null;
}
}
private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final long maxBytes) throws IOException {
byte[] content = null;
try {
execute(httpUriRequest);
if (httpResponse == null) return null;
// get the response body
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && httpEntity.getContentLength() < maxBytes) {
content = EntityUtils.toByteArray(httpEntity);
}
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
}
} catch (final IOException e) {
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
httpUriRequest.abort();
throw e;
}
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
return content;
execute(httpUriRequest);
if (httpResponse == null) return null;
// get the response body
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && httpEntity.getContentLength() < maxBytes) {
content = EntityUtils.toByteArray(httpEntity);
}
// TODO: The name of this method is misnomer.
// It will be renamed to #finish() in the next major release of httpcore
httpEntity.consumeContent();
}
} catch (final IOException e) {
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
httpUriRequest.abort();
throw e;
}
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
return content;
}
private void execute(final HttpUriRequest httpUriRequest) throws IOException {
@ -485,19 +486,19 @@ public class HTTPClient {
assert !hrequest.expectContinue();
}
httpResponse = httpClient.execute(httpUriRequest, httpContext);
} catch (Exception e) {
//e.printStackTrace();
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
httpUriRequest.abort();
throw new IOException("Client can't execute: " + e.getMessage());
}
} catch (Exception e) {
//e.printStackTrace();
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
httpUriRequest.abort();
throw new IOException("Client can't execute: " + e.getMessage());
}
}
private void setHeaders(final HttpUriRequest httpUriRequest) {
if (headers != null) {
for (Header header : headers) {
httpUriRequest.addHeader(header);
}
for (final Header header : headers) {
httpUriRequest.addHeader(header);
}
}
if (realm != null)
httpUriRequest.setHeader("Authorization", "realm=" + realm);
@ -535,92 +536,92 @@ public class HTTPClient {
private static SSLSocketFactory getSSLSocketFactory() {
final TrustManager trustManager = new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
}
public void checkClientTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
SSLContext sslContext = null;
try {
sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, new TrustManager[] { trustManager }, null);
} catch (NoSuchAlgorithmException e) {
// should not happen
// e.printStackTrace();
} catch (KeyManagementException e) {
// should not happen
// e.printStackTrace();
}
sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, new TrustManager[] { trustManager }, null);
} catch (NoSuchAlgorithmException e) {
// should not happen
// e.printStackTrace();
} catch (KeyManagementException e) {
// should not happen
// e.printStackTrace();
}
final SSLSocketFactory sslSF = new SSLSocketFactory(sslContext);
sslSF.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
final SSLSocketFactory sslSF = new SSLSocketFactory(sslContext);
sslSF.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
return sslSF;
}
/**
* testing
*
* @param args urls to test
*/
public static void main(final String[] args) {
String url = null;
// prepare Parts
final LinkedHashMap<String,ContentBody> newparts = new LinkedHashMap<String,ContentBody>();
try {
newparts.put("foo", new StringBody("FooBar"));
newparts.put("bar", new StringBody("BarFoo"));
} catch (UnsupportedEncodingException e) {
System.out.println(e.getStackTrace());
}
HTTPClient client = new HTTPClient();
client.setUserAgent("foobar");
client.setRedirecting(false);
// Get some
for (int i = 0; i < args.length; i++) {
url = args[i];
if (!url.toUpperCase().startsWith("HTTP://")) {
url = "http://" + url;
}
try {
System.out.println(new String(client.GETbytes(url)));
} catch (IOException e) {
e.printStackTrace();
}
}
// Head some
/**
* testing
*
* @param args urls to test
*/
public static void main(final String[] args) {
String url = null;
// prepare Parts
final Map<String,ContentBody> newparts = new LinkedHashMap<String,ContentBody>();
try {
newparts.put("foo", new StringBody("FooBar"));
newparts.put("bar", new StringBody("BarFoo"));
} catch (UnsupportedEncodingException e) {
System.out.println(e.getStackTrace());
}
HTTPClient client = new HTTPClient();
client.setUserAgent("foobar");
client.setRedirecting(false);
// Get some
for (final String arg : args) {
url = arg;
if (!url.toUpperCase().startsWith("HTTP://")) {
url = "http://" + url;
}
try {
System.out.println(new String(client.GETbytes(url)));
} catch (IOException e) {
e.printStackTrace();
}
}
// Head some
// try {
// client.HEADResponse(url);
// } catch (IOException e) {
// e.printStackTrace();
// }
for (Header header: client.getHttpResponse().getAllHeaders()) {
System.out.println("Header " + header.getName() + " : " + header.getValue());
for (final Header header: client.getHttpResponse().getAllHeaders()) {
System.out.println("Header " + header.getName() + " : " + header.getValue());
// for (HeaderElement element: header.getElements())
// System.out.println("Element " + element.getName() + " : " + element.getValue());
}
System.out.println(client.getHttpResponse().getLocale());
System.out.println(client.getHttpResponse().getProtocolVersion());
System.out.println(client.getHttpResponse().getStatusLine());
// Post some
}
System.out.println(client.getHttpResponse().getLocale());
System.out.println(client.getHttpResponse().getProtocolVersion());
System.out.println(client.getHttpResponse().getStatusLine());
// Post some
// try {
// System.out.println(new String(client.POSTbytes(url, newparts)));
// } catch (IOException e1) {
// e1.printStackTrace();
// }
// Close out connection manager
try {
HTTPClient.closeConnectionManager();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
// Close out connection manager
try {
HTTPClient.closeConnectionManager();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**

@ -25,6 +25,7 @@ import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import net.yacy.cora.document.MultiProtocolURI;
@ -35,7 +36,7 @@ import org.apache.http.entity.mime.content.ContentBody;
*/
public class HTTPConnector {
private static final Map<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
private static final ConcurrentMap<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
private String userAgent;
private HTTPConnector(String userAgent) {
@ -59,7 +60,7 @@ public class HTTPConnector {
* @return response body
* @throws IOException
*/
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post) throws IOException {
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, final Map<String, ContentBody> post) throws IOException {
return post(url, timeout, vhost, post, false);
}
@ -74,7 +75,7 @@ public class HTTPConnector {
* @return response body
* @throws IOException
*/
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post, final boolean usegzip) throws IOException {
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, final Map<String, ContentBody> post, final boolean usegzip) throws IOException {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(this.userAgent);

@ -36,11 +36,14 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import de.anomic.data.DidYouMeanLibrary;
@ -233,8 +236,8 @@ public final class Condenser {
final int phrase,
final int flagpos,
final Bitfield flagstemplate,
boolean useForLanguageIdentification,
DidYouMeanLibrary meaningLib) {
final boolean useForLanguageIdentification,
final DidYouMeanLibrary meaningLib) {
String word;
Word wprop;
sievedWordsEnum wordenum;
@ -259,14 +262,14 @@ public final class Condenser {
}
}
public Condenser(final InputStream text, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
public Condenser(final InputStream text, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
this.languageIdentificator = null; // we don't need that here
// analysis = new Properties();
words = new TreeMap<String, Word>();
createCondensement(text, meaningLib);
}
public int excludeWords(final TreeSet<String> stopwords) {
public int excludeWords(final SortedSet<String> stopwords) {
// subtracts the given stopwords from the word list
// the word list shrinkes. This returns the number of shrinked words
final int oldsize = words.size();
@ -283,8 +286,8 @@ public final class Condenser {
return this.languageIdentificator.getLanguage();
}
private void createCondensement(final InputStream is, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
final HashSet<String> currsentwords = new HashSet<String>();
private void createCondensement(final InputStream is, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
final Set<String> currsentwords = new HashSet<String>();
StringBuilder sentence = new StringBuilder(100);
String word = "";
String k;
@ -299,7 +302,7 @@ public final class Condenser {
int idx;
int wordInSentenceCounter = 1;
boolean comb_indexof = false, last_last = false, last_index = false;
final HashMap<StringBuilder, Phrase> sentences = new HashMap<StringBuilder, Phrase>(100);
final Map<StringBuilder, Phrase> sentences = new HashMap<StringBuilder, Phrase>(100);
// read source
final sievedWordsEnum wordenum = new sievedWordsEnum(is, meaningLib);
@ -458,17 +461,13 @@ public final class Condenser {
public final static boolean invisible(final char c) {
final int type = Character.getType(c);
if (
type == Character.LOWERCASE_LETTER
|| type == Character.DECIMAL_DIGIT_NUMBER
|| type == Character.UPPERCASE_LETTER
|| type == Character.MODIFIER_LETTER
|| type == Character.OTHER_LETTER
|| type == Character.TITLECASE_LETTER
|| ContentScraper.punctuation(c)) {
return false;
}
return true;
return !(type == Character.LOWERCASE_LETTER
|| type == Character.DECIMAL_DIGIT_NUMBER
|| type == Character.UPPERCASE_LETTER
|| type == Character.MODIFIER_LETTER
|| type == Character.OTHER_LETTER
|| type == Character.TITLECASE_LETTER
|| ContentScraper.punctuation(c));
}
/**
@ -476,8 +475,8 @@ public final class Condenser {
* @param sentence the sentence to be tokenized
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
*/
public static TreeMap<byte[], Integer> hashSentence(final String sentence, DidYouMeanLibrary meaningLib) {
final TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final DidYouMeanLibrary meaningLib) {
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
final Enumeration<String> words = wordTokenizer(sentence, "UTF-8", meaningLib);
int pos = 0;
String word;
@ -489,14 +488,16 @@ public final class Condenser {
// don't overwrite old values, that leads to too far word distances
oldpos = map.put(hash, LargeNumberCache.valueOf(pos));
if (oldpos != null) map.put(hash, oldpos);
if (oldpos != null) {
map.put(hash, oldpos);
}
pos += word.length() + 1;
}
return map;
}
public static Enumeration<String> wordTokenizer(final String s, final String charset, DidYouMeanLibrary meaningLib) {
public static Enumeration<String> wordTokenizer(final String s, final String charset, final DidYouMeanLibrary meaningLib) {
try {
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes(charset)), meaningLib);
} catch (final Exception e) {
@ -507,11 +508,11 @@ public final class Condenser {
public static class sievedWordsEnum implements Enumeration<String> {
// this enumeration removes all words that contain either wrong characters or are too short
StringBuilder buffer = null;
unsievedWordsEnum e;
DidYouMeanLibrary meaningLib;
private StringBuilder buffer = null;
private unsievedWordsEnum e;
private DidYouMeanLibrary meaningLib;
public sievedWordsEnum(final InputStream is, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
public sievedWordsEnum(final InputStream is, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException {
this.e = new unsievedWordsEnum(is);
this.buffer = nextElement0();
this.meaningLib = meaningLib;
@ -550,10 +551,10 @@ public final class Condenser {
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
// returns an enumeration of StringBuilder Objects
StringBuilder buffer = null;
sentencesFromInputStreamEnum e;
ArrayList<StringBuilder> s;
int sIndex;
private StringBuilder buffer = null;
private sentencesFromInputStreamEnum e;
private List<StringBuilder> s;
private int sIndex;
public unsievedWordsEnum(final InputStream is) throws UnsupportedEncodingException {
e = new sentencesFromInputStreamEnum(is);
@ -616,11 +617,19 @@ public final class Condenser {
static StringBuilder trim(StringBuilder sb) {
int i = 0;
while (i < sb.length() && sb.charAt(i) <= ' ') i++;
if (i > 0) sb.delete(0, i);
while (i < sb.length() && sb.charAt(i) <= ' ') {
i++;
}
if (i > 0) {
sb.delete(0, i);
}
i = sb.length() - 1;
while (i >= 0 && i < sb.length() && sb.charAt(i) <= ' ') i--;
if (i > 0) sb.delete(i + 1, sb.length());
while (i >= 0 && i < sb.length() && sb.charAt(i) <= ' ') {
i--;
}
if (i > 0) {
sb.delete(i + 1, sb.length());
}
return sb;
}
@ -636,10 +645,10 @@ public final class Condenser {
// read sentences from a given input stream
// this enumerates StringBuilder objects
StringBuilder buffer = null;
BufferedReader raf;
int counter = 0;
boolean pre = false;
private StringBuilder buffer;
private BufferedReader raf;
private int counter = 0;
private boolean pre = false;
public sentencesFromInputStreamEnum(final InputStream is) throws UnsupportedEncodingException {
raf = new BufferedReader(new InputStreamReader(is, "UTF-8"));
@ -723,7 +732,7 @@ public final class Condenser {
return s;
}
public static Map<String, Word> getWords(final String text, DidYouMeanLibrary meaningLib) {
public static Map<String, Word> getWords(final String text, final DidYouMeanLibrary meaningLib) {
// returns a word/indexWord relation map
if (text == null) return null;
ByteArrayInputStream buffer;

@ -22,6 +22,8 @@ package net.yacy.document;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
@ -37,7 +39,7 @@ public class SnippetExtractor {
public SnippetExtractor(final Collection<StringBuilder> sentences, final HandleSet queryhashes, int maxLength) throws UnsupportedOperationException {
if (sentences == null) throw new UnsupportedOperationException("sentence == null");
if (queryhashes == null || queryhashes.isEmpty()) throw new UnsupportedOperationException("queryhashes == null");
TreeMap<byte[], Integer> hs;
SortedMap<byte[], Integer> hs;
final TreeMap<Long, StringBuilder> order = new TreeMap<Long, StringBuilder>();
long uniqCounter = 999L;
Integer pos;
@ -124,7 +126,7 @@ public class SnippetExtractor {
byte[] hash;
// find all hashes that appear in the sentence
final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence, null);
final Map<byte[], Integer> hs = Condenser.hashSentence(sentence, null);
final Iterator<byte[]> j = queryhashes.iterator();
Integer pos;
int p, minpos = sentence.length(), maxpos = -1;

@ -31,6 +31,7 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -43,7 +44,7 @@ import net.yacy.kelondro.order.NaturalOrder;
public final class Heap extends HeapModifier implements BLOB {
private TreeMap<byte[], byte[]> buffer; // a write buffer to limit IO to the file
private SortedMap<byte[], byte[]> buffer; // a write buffer to limit IO to the file
private int buffersize; // bytes that are buffered in buffer
private final int buffermax; // maximum size of the buffer
@ -65,7 +66,7 @@ public final class Heap extends HeapModifier implements BLOB {
*
* If a record is removed, it becomes a free record.
* New records are either appended to the end of the file or filled into a free record.
* A free record must either fit exactly to the size of the new record, or an old record is splitted
* A free record must either fit exactly to the size of the new record, or an old record is split
* into a filled and a new, smaller empty record.
*/
@ -191,7 +192,7 @@ public final class Heap extends HeapModifier implements BLOB {
posBuffer = 0;
byte[] ba = new byte[l + (4 + this.keylength) * this.buffer.size()];
byte[] b;
TreeMap<byte[], byte[]> nextBuffer = new TreeMap<byte[], byte[]>(ordering);
SortedMap<byte[], byte[]> nextBuffer = new TreeMap<byte[], byte[]>(ordering);
flush: while (i.hasNext()) {
entry = i.next();
key = normalizeKey(entry.getKey());

@ -63,7 +63,7 @@ public class Word {
public int posInText; // unique handle, is initialized with word position (excluding double occurring words)
public int posInPhrase; // position of word in phrase
public int numOfPhrase; // number of phrase. 'normal' phrases begin with number 100
HashSet<Integer> phrases; // a set of handles to all phrases where this word appears
Set<Integer> phrases; // a set of handles to all phrases where this word appears
public Bitfield flags; // the flag bits for each word
public Word(final int handle, final int pip, final int nop) {
@ -92,6 +92,7 @@ public class Word {
return phrases.iterator();
}
@Override
public String toString() {
// this is here for debugging
return "{count=" + count + ", posInText=" + posInText + ", posInPhrase=" + posInPhrase + ", numOfPhrase=" + numOfPhrase + "}";
@ -99,6 +100,9 @@ public class Word {
// static methods
public static byte[] word2hash(final StringBuilder word) {
return word2hash(word.toString());
}
// create a word hash
public static final byte[] word2hash(final String word) {
@ -114,7 +118,7 @@ public class Word {
public static final HandleSet words2hashesHandles(final Set<String> words) {
final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size());
for (String word: words)
for (final String word: words)
try {
hashes.put(word2hash(word));
} catch (RowSpaceExceededException e) {
@ -126,7 +130,7 @@ public class Word {
public static final HandleSet words2hashesHandles(final String[] words) {
final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.length);
for (String word: words)
for (final String word: words)
try {
hashes.put(word2hash(word));
} catch (RowSpaceExceededException e) {

@ -35,15 +35,18 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.logging.Log;
public class SetTools {
public final class SetTools {
//public static Comparator fastStringComparator = fastStringComparator(true);
@ -51,7 +54,7 @@ public class SetTools {
// ------------------------------------------------------------------------------------------------
// helper methods
public final static int log2a(int x) {
public static int log2a(int x) {
// this computes 1 + log2
// it is the number of bits in x, not the logarithm by 2
int l = 0;
@ -69,13 +72,13 @@ public class SetTools {
// - join by iterative tests (where we distinguish left-right and right-left tests)
public final static <A, B> TreeMap<A, B> joinConstructive(final Collection<TreeMap<A, B>> maps, final boolean concatStrings) {
public static <A, B> SortedMap<A, B> joinConstructive(final Collection<SortedMap<A, B>> maps, final boolean concatStrings) {
// this joins all TreeMap(s) contained in maps
// first order entities by their size
final TreeMap<Long, TreeMap<A, B>> orderMap = new TreeMap<Long, TreeMap<A, B>>();
TreeMap<A, B> singleMap;
final Iterator<TreeMap<A, B>> i = maps.iterator();
final SortedMap<Long, SortedMap<A, B>> orderMap = new TreeMap<Long, SortedMap<A, B>>();
SortedMap<A, B> singleMap;
final Iterator<SortedMap<A, B>> i = maps.iterator();
int count = 0;
while (i.hasNext()) {
// get next entity:
@ -94,7 +97,7 @@ public class SetTools {
// we now must pairwise build up a conjunction of these maps
Long k = orderMap.firstKey(); // the smallest, which means, the one with the least entries
TreeMap<A, B> mapA, mapB, joinResult = orderMap.remove(k);
SortedMap<A, B> mapA, mapB, joinResult = orderMap.remove(k);
while (!orderMap.isEmpty() && !joinResult.isEmpty()) {
// take the first element of map which is a result and combine it with result
k = orderMap.firstKey(); // the next smallest...
@ -111,7 +114,7 @@ public class SetTools {
return joinResult;
}
public final static <A, B> TreeMap<A, B> joinConstructive(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
public static <A, B> SortedMap<A, B> joinConstructive(final SortedMap<A, B> map1, final SortedMap<A, B> map2, final boolean concatStrings) {
// comparators must be equal
if ((map1 == null) || (map2 == null)) return null;
if (map1.comparator() != map2.comparator()) return null;
@ -132,9 +135,9 @@ public class SetTools {
}
@SuppressWarnings("unchecked")
private final static <A, B> TreeMap<A, B> joinConstructiveByTest(final TreeMap<A, B> small, final TreeMap<A, B> large, final boolean concatStrings) {
private static <A, B> SortedMap<A, B> joinConstructiveByTest(final SortedMap<A, B> small, final SortedMap<A, B> large, final boolean concatStrings) {
final Iterator<Map.Entry<A, B>> mi = small.entrySet().iterator();
final TreeMap<A, B> result = new TreeMap<A, B>(large.comparator());
final SortedMap<A, B> result = new TreeMap<A, B>(large.comparator());
synchronized (mi) {
Map.Entry<A, B> mentry1;
B mobj2;
@ -159,12 +162,12 @@ public class SetTools {
}
@SuppressWarnings("unchecked")
private final static <A, B> TreeMap<A, B> joinConstructiveByEnumeration(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
private static <A, B> SortedMap<A, B> joinConstructiveByEnumeration(final SortedMap<A, B> map1, final SortedMap<A, B> map2, final boolean concatStrings) {
// implement pairwise enumeration
final Comparator<? super A> comp = map1.comparator();
final Iterator<Map.Entry<A, B>> mi1 = map1.entrySet().iterator();
final Iterator<Map.Entry<A, B>> mi2 = map2.entrySet().iterator();
final TreeMap<A, B> result = new TreeMap<A, B>(map1.comparator());
final SortedMap<A, B> result = new TreeMap<A, B>(map1.comparator());
int c;
if ((mi1.hasNext()) && (mi2.hasNext())) {
Map.Entry<A, B> mentry1 = mi1.next();
@ -190,7 +193,7 @@ public class SetTools {
}
// now the same for set-set
public final static <A> TreeSet<A> joinConstructive(final TreeSet<A> set1, final TreeSet<A> set2) {
public static <A> SortedSet<A> joinConstructive(final SortedSet<A> set1, final SortedSet<A> set2) {
// comparators must be equal
if ((set1 == null) || (set2 == null)) return null;
if (set1.comparator() != set2.comparator()) return null;
@ -210,9 +213,9 @@ public class SetTools {
return joinConstructiveByEnumeration(set1, set2);
}
private final static <A> TreeSet<A> joinConstructiveByTest(final TreeSet<A> small, final TreeSet<A> large) {
private static <A> SortedSet<A> joinConstructiveByTest(final SortedSet<A> small, final SortedSet<A> large) {
final Iterator<A> mi = small.iterator();
final TreeSet<A> result = new TreeSet<A>(small.comparator());
final SortedSet<A> result = new TreeSet<A>(small.comparator());
A o;
while (mi.hasNext()) {
o = mi.next();
@ -221,12 +224,12 @@ public class SetTools {
return result;
}
private final static <A> TreeSet<A> joinConstructiveByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
private static <A> SortedSet<A> joinConstructiveByEnumeration(final SortedSet<A> set1, final SortedSet<A> set2) {
// implement pairwise enumeration
final Comparator<? super A> comp = set1.comparator();
final Iterator<A> mi = set1.iterator();
final Iterator<A> si = set2.iterator();
final TreeSet<A> result = new TreeSet<A>(set1.comparator());
final SortedSet<A> result = new TreeSet<A>(set1.comparator());
int c;
if ((mi.hasNext()) && (si.hasNext())) {
A mobj = mi.next();
@ -254,7 +257,7 @@ public class SetTools {
* @param large
* @return true if the small set is completely included in the large set
*/
public final static <A> boolean totalInclusion(final Set<A> small, final Set<A> large) {
public static <A> boolean totalInclusion(final Set<A> small, final Set<A> large) {
for (A o: small) {
if (!large.contains(o)) return false;
}
@ -267,7 +270,7 @@ public class SetTools {
* @param large
* @return true if the small set is completely included in the large set
*/
public final static boolean totalInclusion(final HandleSet small, final HandleSet large) {
public static boolean totalInclusion(final HandleSet small, final HandleSet large) {
for (byte[] handle: small) {
if (!large.has(handle)) return false;
}
@ -281,7 +284,7 @@ public class SetTools {
* @param set2
* @return true if any element of the first set is part of the second set or vice-versa
*/
public final static <A> boolean anymatch(final TreeSet<A> set1, final TreeSet<A> set2) {
public static <A> boolean anymatch(final SortedSet<A> set1, final SortedSet<A> set2) {
// comparators must be equal
if ((set1 == null) || (set2 == null)) return false;
if (set1.comparator() != set2.comparator()) return false;
@ -307,7 +310,7 @@ public class SetTools {
* @param set2
* @return true if any element of the first set is part of the second set or vice-versa
*/
public final static boolean anymatch(final HandleSet set1, final HandleSet set2) {
public static boolean anymatch(final HandleSet set1, final HandleSet set2) {
// comparators must be equal
if ((set1 == null) || (set2 == null)) return false;
if (set1.comparator() != set2.comparator()) return false;
@ -327,7 +330,7 @@ public class SetTools {
return anymatchByEnumeration(set1, set2);
}
private final static <A> boolean anymatchByTest(final TreeSet<A> small, final TreeSet<A> large) {
private static <A> boolean anymatchByTest(final SortedSet<A> small, final SortedSet<A> large) {
final Iterator<A> mi = small.iterator();
A o;
while (mi.hasNext()) {
@ -337,7 +340,7 @@ public class SetTools {
return false;
}
private final static boolean anymatchByTest(final HandleSet small, final HandleSet large) {
private static boolean anymatchByTest(final HandleSet small, final HandleSet large) {
final Iterator<byte[]> mi = small.iterator();
byte[] o;
while (mi.hasNext()) {
@ -347,7 +350,7 @@ public class SetTools {
return false;
}
private final static <A> boolean anymatchByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
private static <A> boolean anymatchByEnumeration(final SortedSet<A> set1, final SortedSet<A> set2) {
// implement pairwise enumeration
final Comparator<? super A> comp = set1.comparator();
final Iterator<A> mi = set1.iterator();
@ -370,7 +373,7 @@ public class SetTools {
return false;
}
private final static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) {
private static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) {
// implement pairwise enumeration
final Comparator<byte[]> comp = set1.comparator();
final Iterator<byte[]> mi = set1.iterator();
@ -418,11 +421,11 @@ public class SetTools {
}
*/
public final static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) {
public static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) {
// comparators must be equal
if (map == null) return;
if (set == null) return;
assert !(map instanceof TreeMap<?,?> && set instanceof TreeSet<?>) || ((TreeMap<A, B>) map).comparator() == ((TreeSet<A>) set).comparator();
assert !(map instanceof SortedMap<?,?> && set instanceof SortedSet<?>) || ((SortedMap<A, B>) map).comparator() == ((SortedSet<A>) set).comparator();
if (map.isEmpty() || set.isEmpty()) return;
if (map.size() < set.size())
@ -431,21 +434,21 @@ public class SetTools {
excludeDestructiveByTestSetInMap(map, set);
}
private final static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) {
private static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) {
final Iterator<A> mi = map.keySet().iterator();
while (mi.hasNext()) if (set.contains(mi.next())) mi.remove();
}
private final static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) {
private static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) {
final Iterator<A> si = set.iterator();
while (si.hasNext()) map.remove(si.next());
}
// and the same again with set-set
public final static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) {
public static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) {
if (set1 == null) return;
if (set2 == null) return;
assert !(set1 instanceof TreeSet<?> && set2 instanceof TreeSet<?>) || ((TreeSet<A>) set1).comparator() == ((TreeSet<A>) set2).comparator();
assert !(set1 instanceof SortedSet<?> && set2 instanceof SortedSet<?>) || ((SortedSet<A>) set1).comparator() == ((SortedSet<A>) set2).comparator();
if (set1.isEmpty() || set2.isEmpty()) return;
if (set1.size() < set2.size())
@ -454,20 +457,20 @@ public class SetTools {
excludeDestructiveByTestLargeInSmall(set1, set2);
}
private final static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) {
private static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) {
final Iterator<A> mi = small.iterator();
while (mi.hasNext()) if (large.contains(mi.next())) mi.remove();
}
private final static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) {
private static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) {
final Iterator<A> si = small.iterator();
while (si.hasNext()) large.remove(si.next());
}
// ------------------------------------------------------------------------------------------------
public final static TreeMap<String, String> loadMap(final String filename, final String sep) {
final TreeMap<String, String> map = new TreeMap<String, String>();
public static SortedMap<String, String> loadMap(final String filename, final String sep) {
final SortedMap<String, String> map = new TreeMap<String, String>();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
@ -485,8 +488,8 @@ public class SetTools {
return map;
}
public final static TreeMap<String, ArrayList<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
final TreeMap<String, ArrayList<String>> map = new TreeMap<String, ArrayList<String>>();
public static SortedMap<String, List<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
final SortedMap<String, List<String>> map = new TreeMap<String, List<String>>();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
@ -508,8 +511,8 @@ public class SetTools {
return map;
}
public final static TreeSet<String> loadList(final File file, final Comparator<String> c) {
final TreeSet<String> list = new TreeSet<String>(c);
public static SortedSet<String> loadList(final File file, final Comparator<String> c) {
final SortedSet<String> list = new TreeSet<String>(c);
if (!(file.exists())) return list;
BufferedReader br = null;
@ -528,7 +531,7 @@ public class SetTools {
return list;
}
public final static String setToString(final HandleSet set, final char separator) {
public static String setToString(final HandleSet set, final char separator) {
final Iterator<byte[]> i = set.iterator();
final StringBuilder sb = new StringBuilder(set.size() * 7);
if (i.hasNext()) sb.append(new String(i.next()));
@ -538,7 +541,7 @@ public class SetTools {
return sb.toString();
}
public final static String setToString(final Set<String> set, final char separator) {
public static String setToString(final Set<String> set, final char separator) {
final Iterator<String> i = set.iterator();
final StringBuilder sb = new StringBuilder(set.size() * 7);
if (i.hasNext()) sb.append(i.next());
@ -552,8 +555,8 @@ public class SetTools {
public static void main(final String[] args) {
final TreeMap<String, String> m = new TreeMap<String, String>();
final TreeMap<String, String> s = new TreeMap<String, String>();
final SortedMap<String, String> m = new TreeMap<String, String>();
final SortedMap<String, String> s = new TreeMap<String, String>();
m.put("a", "a");
m.put("x", "x");
m.put("f", "f");

@ -90,32 +90,28 @@ public class Blacklist {
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
protected File blacklistRootPath = null;
protected HashMap<String, HandleSet> cachedUrlHashs = null;
//protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths_matchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths_notmatchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private File blacklistRootPath = null;
private final Map<String, HandleSet> cachedUrlHashs;
private final Map<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final Map<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
this.setRootPath(rootPath);
this.blacklistRootPath = rootPath;
this.setRootPath(rootPath);
// prepare the data structure
//this.hostpaths = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.hostpaths_matchable = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.hostpaths_notmatchable = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.hostpaths_matchable = new HashMap<String, Map<String, List<String>>>();
this.hostpaths_notmatchable = new HashMap<String, Map<String, List<String>>>();
this.cachedUrlHashs = new HashMap<String, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) {
//this.hostpaths.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.hostpaths_matchable.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.hostpaths_matchable.put(blacklistType, new HashMap<String, List<String>>());
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, List<String>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}
public void setRootPath(final File rootPath) {
public final void setRootPath(final File rootPath) {
if (rootPath == null) {
throw new NullPointerException("The blacklist root path must not be null.");
}
@ -129,9 +125,9 @@ public class Blacklist {
this.blacklistRootPath = rootPath;
}
protected Map<String, ArrayList<String>> getBlacklistMap(final String blacklistType, final boolean matchable) {
protected Map<String, List<String>> getBlacklistMap(final String blacklistType, final boolean matchable) {
if (blacklistType == null) {
throw new IllegalArgumentException();
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + ".");
@ -142,7 +138,7 @@ public class Blacklist {
protected HandleSet getCacheUrlHashsSet(final String blacklistType) {
if (blacklistType == null) {
throw new IllegalArgumentException();
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown backlist type.");
@ -152,10 +148,10 @@ public class Blacklist {
}
public void clear() {
for (final Map<String, ArrayList<String>> entry : this.hostpaths_matchable.values()) {
for (final Map<String, List<String>> entry : this.hostpaths_matchable.values()) {
entry.clear();
}
for (final Map<String, ArrayList<String>> entry : this.hostpaths_notmatchable.values()) {
for (final Map<String, List<String>> entry : this.hostpaths_notmatchable.values()) {
entry.clear();
}
for (final HandleSet entry : this.cachedUrlHashs.values()) {
@ -166,12 +162,12 @@ public class Blacklist {
public int size() {
int size = 0;
for (final String entry : this.hostpaths_matchable.keySet()) {
for (final ArrayList<String> ientry : this.hostpaths_matchable.get(entry).values()) {
for (final List<String> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size();
}
}
for (final String entry : this.hostpaths_notmatchable.keySet()) {
for (final ArrayList<String> ientry : this.hostpaths_notmatchable.get(entry).values()) {
for (final List<String> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size();
}
}
@ -179,48 +175,45 @@ public class Blacklist {
}
public void loadList(final BlacklistFile[] blFiles, final String sep) {
for (int j = 0; j < blFiles.length; j++) {
final BlacklistFile blf = blFiles[j];
for (final BlacklistFile blf : blFiles) {
loadList(blf.getType(), blf.getFileName(), sep);
}
}
private void loadList(final BlacklistFile blFile, final String sep) {
final Map<String, ArrayList<String>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, ArrayList<String>>> loadedBlacklist;
Map.Entry<String, ArrayList<String>> loadedEntry;
ArrayList<String> paths;
ArrayList<String> loadedPaths;
final Map<String, List<String>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, List<String>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, List<String>>> loadedBlacklist;
Map.Entry<String, List<String>> loadedEntry;
List<String> paths;
List<String> loadedPaths;
final String[] fileNames = blFile.getFileNamesUnified();
if (fileNames.length > 0) {
for (int i = 0; i < fileNames.length; i++) {
// make sure all requested blacklist files exist
final File file = new File(this.blacklistRootPath, fileNames[i]);
try {
file.createNewFile();
} catch (final IOException e) { /* */ }
// join all blacklists from files into one internal blacklist map
loadedBlacklist = SetTools.loadMapMultiValsPerKey(file.toString(), sep).entrySet();
for (final Iterator<Map.Entry<String, ArrayList<String>>> mi = loadedBlacklist.iterator(); mi.hasNext();) {
loadedEntry = mi.next();
loadedPaths = loadedEntry.getValue();
// create new entry if host mask unknown, otherwise merge
// existing one with path patterns from blacklist file
paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey());
if (paths == null) {
if (isMatchable(loadedEntry.getKey())) {
blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths);
} else {
blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths);
}
for (final String fileName : fileNames) {
// make sure all requested blacklist files exist
final File file = new File(this.blacklistRootPath, fileName);
try {
file.createNewFile();
} catch (final IOException e) { /* */ }
// join all blacklists from files into one internal blacklist map
loadedBlacklist = SetTools.loadMapMultiValsPerKey(file.toString(), sep).entrySet();
for (final Iterator<Map.Entry<String, List<String>>> mi = loadedBlacklist.iterator(); mi.hasNext();) {
loadedEntry = mi.next();
loadedPaths = loadedEntry.getValue();
// create new entry if host mask unknown, otherwise merge
// existing one with path patterns from blacklist file
paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey());
if (paths == null) {
if (isMatchable(loadedEntry.getKey())) {
blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths);
} else {
// TODO check for duplicates? (refactor List -> Set)
paths.addAll(loadedPaths);
blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths);
}
} else {
// TODO check for duplicates? (refactor List -> Set)
paths.addAll(loadedPaths);
}
}
}
@ -240,15 +233,16 @@ public class Blacklist {
public void remove(final String blacklistType, final String host, final String path) {
final Map<String, ArrayList<String>> blacklistMap = getBlacklistMap(blacklistType, true);
ArrayList<String> hostList = blacklistMap.get(host);
final Map<String, List<String>> blacklistMap = getBlacklistMap(blacklistType, true);
List<String> hostList = blacklistMap.get(host);
if (hostList != null) {
hostList.remove(path);
if (hostList.isEmpty()) {
blacklistMap.remove(host);
}
}
final Map<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
final Map<String, List<String>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
hostList = blacklistMapNotMatch.get(host);
if (hostList != null) {
hostList.remove(path);
@ -258,61 +252,55 @@ public class Blacklist {
}
}
public void add(final String blacklistType, String host, String path) {
public void add(final String blacklistType, final String host, final String path) {
if (host == null) {
throw new NullPointerException();
throw new IllegalArgumentException("host may not be null");
}
if (path == null) {
throw new NullPointerException();
throw new IllegalArgumentException("path may not be null");
}
if (path.length() > 0 && path.charAt(0) == '/') {
path = path.substring(1);
}
final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path;
Map<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false);
final Map<String, List<String>> blacklistMap = getBlacklistMap(blacklistType, (isMatchable(host)) ? true : false);
// avoid PatternSyntaxException e
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') {
host = "." + host;
}
final String h =
((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
ArrayList<String> hostList = blacklistMap.get(host.toLowerCase());
if (hostList == null) {
blacklistMap.put(host.toLowerCase(), (hostList = new ArrayList<String>()));
List<String> hostList;
if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) {
blacklistMap.put(h, (hostList = new ArrayList<String>()));
}
hostList.add(path);
hostList.add(p);
}
public int blacklistCacheSize() {
int size = 0;
final Iterator<String> iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) {
final HandleSet blacklistMap = this.cachedUrlHashs.get(iter.next());
size += blacklistMap.size();
size += this.cachedUrlHashs.get(iter.next()).size();
}
return size;
}
public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) {
final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
return urlHashCache.has(urlHash);
return getCacheUrlHashsSet(blacklistType).has(urlHash);
}
public boolean contains(final String blacklistType, String host, String path) {
public boolean contains(final String blacklistType, final String host, final String path) {
boolean ret = false;
if (blacklistType != null && host != null && path != null) {
Map<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false);
final Map<String, List<String>> blacklistMap =
getBlacklistMap(blacklistType, (isMatchable(host)) ? true : false);
// avoid PatternSyntaxException e
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') {
host = "." + host;
}
final String h =
((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
ArrayList<String> hostList = blacklistMap.get(host.toLowerCase());
List<String> hostList = blacklistMap.get(h);
if (hostList != null) {
ret = hostList.contains(path);
}
@ -321,6 +309,10 @@ public class Blacklist {
}
public boolean isListed(final String blacklistType, final DigestURI url) {
if (url == null) {
throw new IllegalArgumentException("url may not be null");
}
if (url.getHost() == null) {
return false;
}
@ -340,44 +332,31 @@ public class Blacklist {
}
public static boolean isMatchable(final String host) {
try {
if (Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net)
{
return true;
}
if (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot)
{
return true;
}
if (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and befor * must be a dot)
{
return true;
}
} catch (final PatternSyntaxException e) {
//System.out.println(e.toString());
return false;
}
return false;
return (
(Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net)
|| (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot)
|| (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and before * must be a dot)
);
}
public String getEngineInfo() {
return "Default YaCy Blacklist Engine";
}
public boolean isListed(final String blacklistType, final String hostlow, String path) {
public boolean isListed(final String blacklistType, final String hostlow, final String path) {
if (hostlow == null) {
throw new NullPointerException();
throw new IllegalArgumentException("hostlow may not be null");
}
if (path == null) {
throw new NullPointerException();
throw new IllegalArgumentException("path may not be null");
}
// getting the proper blacklist
final Map<String, ArrayList<String>> blacklistMapMatched = getBlacklistMap(blacklistType, true);
final Map<String, List<String>> blacklistMapMatched = getBlacklistMap(blacklistType, true);
final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path;
if (path.length() > 0 && path.charAt(0) == '/') {
path = path.substring(1);
}
List<String> app;
boolean matched = false;
String pp = ""; // path-pattern
@ -391,7 +370,7 @@ public class Blacklist {
Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + pp);
continue;
}
matched |= (("*".equals(pp)) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (p.matches(pp)));
}
}
// first try to match the domain with wildcard '*'
@ -430,9 +409,9 @@ public class Blacklist {
// loop over all Regexentrys
if (!matched) {
final Map<String, ArrayList<String>> blacklistMapNotMatched = getBlacklistMap(blacklistType, false);
final Map<String, List<String>> blacklistMapNotMatched = getBlacklistMap(blacklistType, false);
String key;
for (final Entry<String, ArrayList<String>> entry : blacklistMapNotMatched.entrySet()) {
for (final Entry<String, List<String>> entry : blacklistMapNotMatched.entrySet()) {
key = entry.getKey();
try {
if (Pattern.matches(key, hostlow)) {
@ -451,11 +430,11 @@ public class Blacklist {
return matched;
}
public BlacklistError checkError(String element, Map<String, String> properties) {
public BlacklistError checkError(final String element, final Map<String, String> properties) {
boolean allowRegex = true;
int slashPos;
String host, path;
final String host, path;
if (properties != null) {
allowRegex = properties.get("allowRegex").equalsIgnoreCase("true") ? true : false;
@ -500,7 +479,7 @@ public class Blacklist {
}
// check for errors on regex-compiling path
if (!isValidRegex(path) && !path.equals("*")) {
if (!isValidRegex(path) && !"*".equals(path)) {
return BlacklistError.PATH_REGEX;
}
@ -512,19 +491,18 @@ public class Blacklist {
* @param expression The expression to be checked.
* @return True if the expression is a valid regular expression, else false.
*/
private static boolean isValidRegex(String expression) {
private static boolean isValidRegex(final String expression) {
boolean ret = true;
try {
Pattern.compile(expression);
} catch (final PatternSyntaxException e) {
ret = false;
}
return ret;
}
public static String defaultBlacklist(final File listsPath) {
List<String> dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
final List<String> dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
if (dirlist.isEmpty()) {
return null;
}
@ -537,12 +515,8 @@ public class Blacklist {
* @param newEntry The Entry.
* @return True if file contains entry, else false.
*/
public static boolean blacklistFileContains(final File listsPath, final String blacklistToUse, String newEntry) {
boolean ret = false;
final HashSet<String> Blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse)));
if (Blacklist != null) {
ret = Blacklist.contains(newEntry);
}
return ret;
public static boolean blacklistFileContains(final File listsPath, final String blacklistToUse, final String newEntry) {
final Set<String> blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse)));
return blacklist != null && blacklist.contains(newEntry);
}
}

Loading…
Cancel
Save