diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index b9dd8f0c8..e98e7bd33 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -30,6 +30,8 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeSet; import net.yacy.cora.document.RSSMessage; @@ -412,7 +414,7 @@ public class yacysearch { int maxDistance = (querystring.indexOf('"') >= 0) ? maxDistance = query.length - 1 : Integer.MAX_VALUE; // filter out stopwords - final TreeSet filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords); + final SortedSet filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords); if (!filtered.isEmpty()) { SetTools.excludeDestructive(query[0], Switchboard.stopwords); } @@ -576,7 +578,7 @@ public class yacysearch { } // find geographic info - TreeSet coordinates = LibraryProvider.geoLoc.find(originalquerystring, false); + SortedSet coordinates = LibraryProvider.geoLoc.find(originalquerystring, false); if (coordinates == null || coordinates.isEmpty() || offset > 0) { prop.put("geoinfo", "0"); } else { diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 060e02218..8639f2d18 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -25,7 +25,7 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import java.net.MalformedURLException; -import java.util.ArrayList; +import java.util.List; import java.util.TreeSet; import net.yacy.cora.protocol.HeaderFramework; @@ -209,11 +209,11 @@ public class yacysearchitem { if (result == null) return prop; // no content prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content - final ArrayList media = result.mediaSnippets(); + final List media = result.mediaSnippets(); if (item == 0) col = true; if (media != null) { int c = 0; - for (MediaSnippet ms : media) { + for (final MediaSnippet ms : media) { prop.putHTML("content_items_" + c + "_href", ms.href.toNormalform(true, false)); prop.putHTML("content_items_" + c + "_hrefshort", nxTools.shortenURLString(ms.href.toNormalform(true, false), urllength)); prop.putHTML("content_items_" + c + "_name", shorten(ms.name, namelength)); diff --git a/source/de/anomic/http/server/HTTPDemon.java b/source/de/anomic/http/server/HTTPDemon.java index 2ddebef17..19801f7a9 100644 --- a/source/de/anomic/http/server/HTTPDemon.java +++ b/source/de/anomic/http/server/HTTPDemon.java @@ -79,6 +79,8 @@ import de.anomic.server.serverHandler; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.serverCore.Session; +import java.util.Set; +import java.util.concurrent.ConcurrentMap; /** @@ -105,13 +107,13 @@ public final class HTTPDemon implements serverHandler, Cloneable { private static AlternativeDomainNames alternativeResolver = null; /** - * A hashset containing extensions that indicate content that should not be transported + * A Set containing extensions that indicate content that should not be transported * using zipped content encoding * @see #shallTransportZipped(String) */ //TODO: Load this from a file - private static final HashSet disallowZippedContentEncoding = new HashSet(Arrays.asList(new String[]{ + private static final Set disallowZippedContentEncoding = new HashSet(Arrays.asList(new String[]{ ".gz", ".tgz", ".jpg", ".jpeg", ".png", ".mp3", ".mov", ".avi", ".gif", ".zip", ".rar", ".bz2", ".lha", ".jar", ".rpm", ".arc", ".arj", ".wmv", ".ico", ".bmp" })); @@ -120,13 +122,13 @@ public final class HTTPDemon implements serverHandler, Cloneable { public static final String copyright = "[ HTTP SERVER: AnomicHTTPD v" + vDATE + " by Michael Christen / www.anomic.de ]"; public static final String hline = "-------------------------------------------------------------------------------"; - public static final Map reverseMappingCache = new ConcurrentHashMap(); + public static final ConcurrentMap reverseMappingCache = new ConcurrentHashMap(); private static volatile Switchboard switchboard = null; private static String virtualHost = null; public static boolean keepAliveSupport = false; - private static Map YaCyHopAccessRequester = new ConcurrentHashMap(); - private static Map YaCyHopAccessTargets = new ConcurrentHashMap(); + private static ConcurrentMap YaCyHopAccessRequester = new ConcurrentHashMap(); + private static ConcurrentMap YaCyHopAccessTargets = new ConcurrentHashMap(); // for authentication private boolean use_proxyAccounts = false; diff --git a/source/de/anomic/search/MediaSnippet.java b/source/de/anomic/search/MediaSnippet.java index 07c27249c..0af028c7b 100644 --- a/source/de/anomic/search/MediaSnippet.java +++ b/source/de/anomic/search/MediaSnippet.java @@ -28,8 +28,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; +import java.util.List; import java.util.Map; -import java.util.TreeMap; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeSet; import de.anomic.crawler.CrawlProfile; @@ -96,10 +98,12 @@ public class MediaSnippet implements Comparable, Comparator, Comparator retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) { + public static List retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) { if (queryhashes.isEmpty()) { Log.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList(); @@ -142,7 +146,7 @@ public class MediaSnippet implements Comparable, Comparator computeMediaSnippets(final DigestURI source, final Document document, final HandleSet queryhashes, final ContentDomain mediatype) { + public static List computeMediaSnippets(final DigestURI source, final Document document, final HandleSet queryhashes, final ContentDomain mediatype) { if (document == null) return new ArrayList(); Map media = null; @@ -155,7 +159,7 @@ public class MediaSnippet implements Comparable, Comparator entry; DigestURI url; String desc; - final ArrayList result = new ArrayList(); + final List result = new ArrayList(); while (i.hasNext()) { entry = i.next(); url = new DigestURI(entry.getKey()); @@ -169,9 +173,9 @@ public class MediaSnippet implements Comparable, Comparator computeImageSnippets(final DigestURI source, final Document document, final HandleSet queryhashes) { + public static List computeImageSnippets(final DigestURI source, final Document document, final HandleSet queryhashes) { - final TreeSet images = new TreeSet(); + final SortedSet images = new TreeSet(); images.addAll(document.getImages().values()); // iterates images in descending size order! // a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode() @@ -179,7 +183,7 @@ public class MediaSnippet implements Comparable, Comparator result = new ArrayList(); + final List result = new ArrayList(); while (i.hasNext()) { ientry = i.next(); url = new DigestURI(ientry.url()); @@ -206,7 +210,7 @@ public class MediaSnippet implements Comparable, Comparator hs = Condenser.hashSentence(sentence, null); + final SortedMap hs = Condenser.hashSentence(sentence, null); final Iterator j = queryhashes.iterator(); byte[] hash; Integer pos; diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 79abf7753..857d35c2a 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -1,4 +1,4 @@ -// plasmaSearchQuery.java +// QueryParams.java // ----------------------- // part of YACY // (C) by Michael Peter Christen; mc@yacy.net @@ -31,6 +31,7 @@ import java.net.URLEncoder; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Pattern; @@ -331,21 +332,21 @@ public final class QueryParams { String s; int l; // the string is clean now, but we must generate a set out of it - final String[] a = querystring.split(" "); - for (int i = 0; i < a.length; i++) { - if (a[i].startsWith("-")) { - exclude.add(a[i].substring(1)); + final String[] queries = querystring.split(" "); + for (int i = 0; i < queries.length; i++) { + if (queries[i].startsWith("-")) { + exclude.add(queries[i].substring(1)); } else { - while ((c = a[i].indexOf('-')) >= 0) { - s = a[i].substring(0, c); + while ((c = queries[i].indexOf('-')) >= 0) { + s = queries[i].substring(0, c); l = s.length(); if (l >= Condenser.wordminsize) {query.add(s);} if (l > 0) {fullquery.add(s);} - a[i] = a[i].substring(c + 1); + queries[i] = queries[i].substring(c + 1); } - l = a[i].length(); - if (l >= Condenser.wordminsize) {query.add(a[i]);} - if (l > 0) {fullquery.add(a[i]);} + l = queries[i].length(); + if (l >= Condenser.wordminsize) {query.add(queries[i]);} + if (l > 0) {fullquery.add(queries[i]);} } } } @@ -364,18 +365,18 @@ public final class QueryParams { public String queryStringForUrl() { try { - return URLEncoder.encode(this.queryString, "UTF-8"); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - return this.queryString; - } + return URLEncoder.encode(this.queryString, "UTF-8"); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + return this.queryString; + } } public TreeSet[] queryWords() { return cleanQuery(this.queryString); } - public void filterOut(final TreeSet blueList) { + public void filterOut(final SortedSet blueList) { // filter out words that appear in this set // this is applied to the queryHashes final HandleSet blues = Word.words2hashesHandles(blueList); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index bee891395..b36d00f81 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -33,7 +33,8 @@ import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import java.util.TreeMap; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -68,11 +69,11 @@ public final class RankingProcess extends Thread { private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000; private final QueryParams query; - private final TreeSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) + private final SortedSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final int[] flagcount; // flag counter - private final TreeSet misses; // contains url-hashes that could not been found in the LURL-DB + private final SortedSet misses; // contains url-hashes that could not been found in the LURL-DB //private final int[] domZones; - private TreeMap> localSearchInclusion; + private SortedMap> localSearchInclusion; private int remote_resourceSize, remote_indexCount, remote_peerCount; private int local_resourceSize, local_indexCount; @@ -126,6 +127,7 @@ public final class RankingProcess extends Thread { return this.order; } + @Override public void run() { // do a search @@ -176,76 +178,74 @@ public final class RankingProcess extends Thread { // iterate over normalized entries and select some that are better than currently stored timer = System.currentTimeMillis(); - String domhash; boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; // apply all constraints try { WordReferenceVars iEntry; while (true) { - iEntry = decodedEntries.poll(1, TimeUnit.SECONDS); - if (iEntry == null || iEntry == WordReferenceVars.poison) break; - assert (iEntry.metadataHash().length == index.row().primaryKeyLength); - //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; + iEntry = decodedEntries.poll(1, TimeUnit.SECONDS); + if (iEntry == null || iEntry == WordReferenceVars.poison) break; + assert (iEntry.metadataHash().length == index.row().primaryKeyLength); + //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; - // increase flag counts - for (int j = 0; j < 32; j++) { - if (iEntry.flags().get(j)) {flagcount[j]++;} - } - - // check constraints - if (!testFlags(iEntry)) { - continue; - } - - // check document domain - if (query.contentdom != ContentDomain.TEXT) { - if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue; - if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue; - if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue; - if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue; - } + // increase flag counts + for (int j = 0; j < 32; j++) { + if (iEntry.flags().get(j)) {flagcount[j]++;} + } - // check tld domain - /* - if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) { - // filter out all tld that do not match with wanted tld domain - continue; - } - */ + // check constraints + if (!testFlags(iEntry)) { + continue; + } + + // check document domain + if (query.contentdom != ContentDomain.TEXT) { + if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue; + if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue; + if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue; + if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue; + } + + // check tld domain + /* + if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) { + // filter out all tld that do not match with wanted tld domain + continue; + } + */ // count domZones //this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++; - // check site constraints - domhash = new String(iEntry.metadataHash(), 6, 6); - if (query.sitehash == null) { - // no site constraint there; maybe collect host navigation information - if (nav_hosts && query.urlMask_isCatchall) { - this.hostNavigator.inc(domhash); - this.hostResolver.put(domhash, new String(iEntry.metadataHash())); - } - } else { - if (!domhash.equals(query.sitehash)) { - // filter out all domains that do not match with the site constraint - continue; - } - } + // check site constraints + String domhash = new String(iEntry.metadataHash(), 6, 6); + if (query.sitehash == null) { + // no site constraint there; maybe collect host navigation information + if (nav_hosts && query.urlMask_isCatchall) { + this.hostNavigator.inc(domhash); + this.hostResolver.put(domhash, new String(iEntry.metadataHash())); + } + } else { + if (!domhash.equals(query.sitehash)) { + // filter out all domains that do not match with the site constraint + continue; + } + } - // finally make a double-check and insert result to stack + // finally make a double-check and insert result to stack if (urlhashes.add(iEntry.metadataHash())) { stack.put(new ReverseElement(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest) - //System.out.println("stack.put: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue()); // increase counter for statistics if (local) this.local_indexCount++; else this.remote_indexCount++; } - } + } } catch (InterruptedException e) {} //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); - EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false); + EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false); } /** @@ -261,7 +261,6 @@ public final class RankingProcess extends Thread { } public boolean feedingIsFinished() { - //System.out.println("feedingIsFinished: this.feeders == " + this.feeders); return System.currentTimeMillis() - this.startTime > 50 && this.feeders == 0; } @@ -288,7 +287,7 @@ public final class RankingProcess extends Thread { return localSearchInclusion; } - private WeakPriorityBlockingQueue.Element takeRWI(final boolean skipDoubleDom, long waitingtime) { + private WeakPriorityBlockingQueue.Element takeRWI(final boolean skipDoubleDom, final long waitingtime) { // returns from the current RWI list the best entry and removes this entry from the list WeakPriorityBlockingQueue m; @@ -348,7 +347,7 @@ public final class RankingProcess extends Thread { } } } catch (InterruptedException e1) {} - if (this.doubleDomCache.size() == 0) return null; + if (this.doubleDomCache.isEmpty()) return null; // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // find best entry from all caches @@ -395,7 +394,7 @@ public final class RankingProcess extends Thread { */ public URIMetadataRow takeURL(final boolean skipDoubleDom, final long waitingtime) { // returns from the current RWI list the best URL entry and removes this entry from the list - long timeout = System.currentTimeMillis() + Math.max(10, waitingtime); + final long timeout = System.currentTimeMillis() + Math.max(10, waitingtime); int p = -1; byte[] urlhash; long timeleft; @@ -470,8 +469,7 @@ public final class RankingProcess extends Thread { if (pageauthor != null && pageauthor.length() > 0) { // add author to the author navigator String authorhash = new String(Word.word2hash(pageauthor)); - //System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author); - + // check if we already are filtering for authors if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { continue; @@ -581,7 +579,7 @@ public final class RankingProcess extends Thread { ScoreCluster result = new ScoreCluster(); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result; - Iterator domhashs = this.hostNavigator.keys(false); + final Iterator domhashs = this.hostNavigator.keys(false); URIMetadataRow row; String domhash, urlhash, hostname; while (domhashs.hasNext() && result.size() < 30) { @@ -606,11 +604,11 @@ public final class RankingProcess extends Thread { public StaticScore getTopicNavigator(int count) { // create a list of words that had been computed by statistics over all // words that appeared in the url or the description of all urls - ScoreCluster result = new ScoreCluster(); + final ScoreCluster result = new ScoreCluster(); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result; if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful - Map counts = new HashMap(); - Iterator i = this.ref.keys(false); + final Map counts = new HashMap(); + final Iterator i = this.ref.keys(false); String word; byte[] termHash; int c; @@ -635,8 +633,8 @@ public final class RankingProcess extends Thread { public void addTopic(final String[] words) { String word; - for (int i = 0; i < words.length; i++) { - word = words[i].toLowerCase(); + for (final String w : words) { + word = w.toLowerCase(); if (word.length() > 2 && "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 && !query.queryHashes.has(Word.word2hash(word)) && @@ -712,11 +710,9 @@ public final class RankingProcess extends Thread { final int m = Math.min(maxYBR, ybrTables.length); for (int i = 0; i < m; i++) { if ((ybrTables[i] != null) && (ybrTables[i].contains(domhash))) { - //System.out.println("YBR FOUND: " + urlHash + " (" + i + ")"); return i; } } - //System.out.println("NOT FOUND: " + urlHash); return 15; } diff --git a/source/de/anomic/search/ResultEntry.java b/source/de/anomic/search/ResultEntry.java index 6f6776828..2a4584947 100644 --- a/source/de/anomic/search/ResultEntry.java +++ b/source/de/anomic/search/ResultEntry.java @@ -27,9 +27,9 @@ package de.anomic.search; import java.io.IOException; -import java.util.ArrayList; import java.util.Comparator; import java.util.Date; +import java.util.List; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.document.Condenser; @@ -54,7 +54,7 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets; + private final List mediaSnippets; // statistic objects public long dbRetrievalTime, snippetComputationTime; @@ -63,7 +63,7 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets, + final List mediaSnippets, final long dbRetrievalTime, final long snippetComputationTime) { this.urlentry = urlentry; this.urlcomps = urlentry.metadata(); @@ -102,9 +102,11 @@ public class ResultEntry implements Comparable, Comparator 0) alternative_urlname = alternative_urlname.substring(0, p); } } + @Override public int hashCode() { return ByteArray.hashCode(urlentry.hash()); } + @Override public boolean equals(final Object obj) { if (this == obj) return true; if (obj == null) return false; @@ -145,7 +147,7 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets() { + public List mediaSnippets() { return this.mediaSnippets; } public Date modified() { diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 657f38e17..7a5821455 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -28,6 +28,8 @@ package de.anomic.search; import java.util.ArrayList; import java.util.Iterator; +import java.util.List; + import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.storage.StaticScore; @@ -43,7 +45,6 @@ import net.yacy.kelondro.util.EventTracker; import net.yacy.repository.LoaderDispatcher; import de.anomic.crawler.CrawlProfile; -import de.anomic.search.MediaSnippet; import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.graphics.ProfilingGraph; @@ -153,6 +154,7 @@ public class ResultFetcher { this.neededResults = neededResults; } + @Override public void run() { // start fetching urls and snippets @@ -163,18 +165,18 @@ public class ResultFetcher { //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); int loops = 0; while (System.currentTimeMillis() < this.timeout) { - this.lastLifeSign = System.currentTimeMillis(); - + this.lastLifeSign = System.currentTimeMillis(); + // check if we have enough - if (result.sizeAvailable() >= this.neededResults) { + if (result.sizeAvailable() >= this.neededResults) { //System.out.println("result.sizeAvailable() >= this.neededResults"); break; } - - // check if we can succeed if we try to take another url - if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) { - break; - } + + // check if we can succeed if we try to take another url + if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) { + break; + } // get next entry page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis()); @@ -266,7 +268,7 @@ public class ResultFetcher { } else { // attach media information startTime = System.currentTimeMillis(); - final ArrayList mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal()); + final List mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime); @@ -369,9 +371,9 @@ public class ResultFetcher { int c = 0; if (result == null) return c; // iterate over all images in the result - final ArrayList imagemedia = result.mediaSnippets(); + final List imagemedia = result.mediaSnippets(); if (imagemedia != null) { - for (MediaSnippet ms: imagemedia) { + for (final MediaSnippet ms: imagemedia) { images.put(new ReverseElement(ms, ms.ranking)); // remove smallest in case of overflow c++; //System.out.println("*** image " + new String(ms.href.hash()) + " images.size = " + images.size() + "/" + images.size()); diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 7f629ff86..5e726fb58 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -29,6 +29,8 @@ package de.anomic.search; import java.io.IOException; import java.util.Iterator; import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.Semaphore; @@ -73,19 +75,19 @@ public final class SearchEvent { // class variables for remote searches private yacySearch[] primarySearchThreads, secondarySearchThreads; - private final TreeMap preselectedPeerHashes; + private final SortedMap preselectedPeerHashes; private final ResultURLs crawlResults; private final Thread localSearchThread; - private final TreeMap IACount; - private final TreeMap IAResults; - private final TreeMap heuristics; + private final SortedMap IACount; + private final SortedMap IAResults; + private final SortedMap heuristics; private byte[] IAmaxcounthash, IAneardhthash; private final ReferenceOrder order; public SearchEvent(final QueryParams query, final yacySeedDB peers, final ResultURLs crawlResults, - final TreeMap preselectedPeerHashes, + final SortedMap preselectedPeerHashes, final boolean generateAbstracts, final LoaderDispatcher loader) { this.eventTime = System.currentTimeMillis(); // for lifetime check @@ -164,7 +166,7 @@ public final class SearchEvent { long mindhtdistance = Long.MAX_VALUE, l; byte[] wordhash; assert this.rankingProcess.searchContainerMap() != null; - for (Map.Entry> entry : this.rankingProcess.searchContainerMap().entrySet()) { + for (final Map.Entry> entry : this.rankingProcess.searchContainerMap().entrySet()) { wordhash = entry.getKey(); final ReferenceContainer container = entry.getValue(); assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash); @@ -231,14 +233,14 @@ public final class SearchEvent { public void cleanup() { // stop all threads if (primarySearchThreads != null) { - for (yacySearch search : this.primarySearchThreads) { + for (final yacySearch search : this.primarySearchThreads) { if (search != null) synchronized (search) { if (search.isAlive()) search.interrupt(); } } } if (secondarySearchThreads != null) { - for (yacySearch search : this.secondarySearchThreads) { + for (final yacySearch search : this.secondarySearchThreads) { if (search != null) synchronized (search) { if (search.isAlive()) search.interrupt(); } @@ -304,14 +306,14 @@ public final class SearchEvent { boolean anyRemoteSearchAlive() { // check primary search threads if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) { - for (int i = 0; i < this.primarySearchThreads.length; i++) { - if ((this.primarySearchThreads[i] != null) && (this.primarySearchThreads[i].isAlive())) return true; + for (final yacySearch primarySearchThread : primarySearchThreads) { + if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true; } } // maybe a secondary search thread is alive, check this if ((this.secondarySearchThreads != null) && (this.secondarySearchThreads.length != 0)) { - for (int i = 0; i < this.secondarySearchThreads.length; i++) { - if ((this.secondarySearchThreads[i] != null) && (this.secondarySearchThreads[i].isAlive())) return true; + for (final yacySearch secondarySearchThread : this.secondarySearchThreads) { + if ((secondarySearchThread != null) && (secondarySearchThread.isAlive())) return true; } } return false; @@ -395,12 +397,12 @@ public final class SearchEvent { // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation // this relation contains the information where specific urls can be found in specific peers - TreeMap> abstractsCache; - TreeSet checkedPeers; + SortedMap> abstractsCache; + SortedSet checkedPeers; Semaphore trigger; public SecondarySearchSuperviser() { - this.abstractsCache = new TreeMap>(); + this.abstractsCache = new TreeMap>(); this.checkedPeers = new TreeSet(); this.trigger = new Semaphore(0); } @@ -412,16 +414,16 @@ public final class SearchEvent { */ public void addAbstract(String wordhash, TreeMap singleAbstract) { synchronized (abstractsCache) { - TreeMap oldAbstract = abstractsCache.get(wordhash); + SortedMap oldAbstract = abstractsCache.get(wordhash); if (oldAbstract == null) { // new abstracts in the cache abstractsCache.put(wordhash, singleAbstract); } else synchronized (oldAbstract) { // extend the abstracts in the cache: join the single abstracts - for (Map.Entry oneref: singleAbstract.entrySet()) { - String urlhash = oneref.getKey(); - String peerlistNew = oneref.getValue(); - String peerlistOld = oldAbstract.get(urlhash); + for (final Map.Entry oneref: singleAbstract.entrySet()) { + final String urlhash = oneref.getKey(); + final String peerlistNew = oneref.getValue(); + final String peerlistOld = oldAbstract.get(urlhash); if (peerlistOld == null) { oldAbstract.put(urlhash, peerlistNew); } else { @@ -438,13 +440,13 @@ public final class SearchEvent { } private String wordsFromPeer(final String peerhash, final String urls) { - Map.Entry> entry; + Map.Entry> entry; String word, peerlist, url, wordlist = ""; - TreeMap urlPeerlist; + SortedMap urlPeerlist; int p; boolean hasURL; synchronized (this) { - final Iterator>> i = this.abstractsCache.entrySet().iterator(); + final Iterator>> i = this.abstractsCache.entrySet().iterator(); while (i.hasNext()) { entry = i.next(); word = entry.getKey(); @@ -465,6 +467,7 @@ public final class SearchEvent { return wordlist; } + @Override public void run() { try { int t = 0; @@ -496,12 +499,12 @@ public final class SearchEvent { if (abstractsCache.size() != query.queryHashes.size()) return; // join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list - final TreeMap abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true); + final SortedMap abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true); if (abstractJoin.isEmpty()) return; // the join result is now a urlhash: peer-list relation // generate a list of peers that have the urls for the joined search result - final TreeMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping + final SortedMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping String url, urls, peer, peerlist; final String mypeerhash = peers.mySeed().hash; boolean mypeerinvolved = false; diff --git a/source/de/anomic/search/SearchEventCache.java b/source/de/anomic/search/SearchEventCache.java index c1aafff88..2ff894226 100644 --- a/source/de/anomic/search/SearchEventCache.java +++ b/source/de/anomic/search/SearchEventCache.java @@ -30,7 +30,8 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.TreeMap; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentHashMap; import net.yacy.kelondro.util.MemoryControl; @@ -41,7 +42,7 @@ import de.anomic.yacy.yacySeedDB; public class SearchEventCache { - private static ConcurrentHashMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests + private static ConcurrentMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetimeBigMem = 600000; // the time an event will stay in the cache when available memory is high, 10 Minutes public static final long eventLifetimeMediumMem = 60000; // the time an event will stay in the cache when available memory is medium, 1 Minute public static final long eventLifetimeShortMem = 10000; // the time an event will stay in the cache when memory is low, 10 seconds @@ -82,6 +83,7 @@ public class SearchEventCache { * in case of failed words */ new Thread(){ + @Override public void run() { for (SearchEvent k: delete) { k.cleanup(); @@ -100,7 +102,7 @@ public class SearchEventCache { final QueryParams query, final yacySeedDB peers, final ResultURLs crawlResults, - final TreeMap preselectedPeerHashes, + final SortedMap preselectedPeerHashes, final boolean generateAbstracts, final LoaderDispatcher loader) { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index d26f6f573..84244ac7a 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -62,6 +62,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; @@ -173,9 +175,9 @@ public final class Switchboard extends serverSwitch { private int dhtMaxReferenceCount = 1000; // colored list management - public static TreeSet badwords = new TreeSet(NaturalOrder.naturalComparator); - public static TreeSet stopwords = new TreeSet(NaturalOrder.naturalComparator); - public static TreeSet blueList = null; + public static SortedSet badwords = new TreeSet(NaturalOrder.naturalComparator); + public static SortedSet stopwords = new TreeSet(NaturalOrder.naturalComparator); + public static SortedSet blueList = null; public static HandleSet badwordHashes = null; public static HandleSet blueListHashes = null; public static HandleSet stopwordHashes = null; @@ -224,7 +226,7 @@ public final class Switchboard extends serverSwitch { public int searchQueriesRobinsonFromLocal = 0; // absolute counter of all local queries submitted on this peer from a local or autheticated used public int searchQueriesRobinsonFromRemote = 0; // absolute counter of all local queries submitted on this peer from a remote IP without authentication public double searchQueriesGlobal = 0d; // partial counter of remote queries (1/number-of-requested-peers) - public TreeMap clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used + public SortedMap clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used public URLLicense licensedURLs; public List networkWhitelist, networkBlacklist; public FilterEngine domainList; @@ -232,7 +234,7 @@ public final class Switchboard extends serverSwitch { public LinkedBlockingQueue trail; public yacySeedDB peers; public WorkTables tables; - public TreeMap intranetURLs = new TreeMap(Base64Order.enhancedCoder); + public SortedMap intranetURLs = new TreeMap(Base64Order.enhancedCoder); public WorkflowProcessor indexingDocumentProcessor; public WorkflowProcessor indexingCondensementProcessor; @@ -256,7 +258,7 @@ public final class Switchboard extends serverSwitch { public Switchboard(final File dataPath, final File appPath, final String initPath, final String configPath) throws IOException { super(dataPath, appPath, initPath, configPath); MemoryTracker.startSystemProfiling(); - sb=this; + sb = this; // set loglevel and log setLog(new Log("PLASMA")); @@ -751,7 +753,7 @@ public final class Switchboard extends serverSwitch { netdef = netdef.trim(); try { netdefmap = Switchboard.loadFileAsMap(new DigestURI(netdef)); - if (netdefmap == null || netdefmap.size() == 0) continue netload; + if (netdefmap == null || netdefmap.isEmpty()) continue netload; setConfig(netdefmap); break netload; } catch (final Exception e) { @@ -1891,7 +1893,7 @@ public final class Switchboard extends serverSwitch { doclist.add(document); } - if (doclist.size() == 0) return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); + if (doclist.isEmpty()) return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); in.documents = doclist.toArray(new Document[doclist.size()]); Condenser[] condenser = new Condenser[in.documents.length]; if (this.log.isFine()) log.logFine("Condensing for '" + in.queueEntry.url().toNormalform(false, true) + "'"); @@ -1981,8 +1983,8 @@ public final class Switchboard extends serverSwitch { } // store rss feeds in document into rss table - for (Map.Entry rssEntry : document.getRSS().entrySet()) { - Tables.Data rssRow = new Tables.Data(); + for (final Map.Entry rssEntry : document.getRSS().entrySet()) { + final Tables.Data rssRow = new Tables.Data(); rssRow.put("referrer", queueEntry.url().hash()); rssRow.put("url", rssEntry.getKey().toNormalform(true, false).getBytes()); rssRow.put("title", rssEntry.getValue().getBytes()); @@ -2036,14 +2038,14 @@ public final class Switchboard extends serverSwitch { Map matcher = searchEvent.getQuery().separateMatches(links); // take the matcher and load them all - for (Map.Entry entry: matcher.entrySet()) { + for (final Map.Entry entry: matcher.entrySet()) { try { this.addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName); } catch (IOException e) {} catch (Parser.Failure e) {} } // take then the no-matcher and load them also - for (Map.Entry entry: links.entrySet()) { + for (final Map.Entry entry: links.entrySet()) { try { this.addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName); } catch (IOException e) {} catch (Parser.Failure e) {} @@ -2069,32 +2071,35 @@ public final class Switchboard extends serverSwitch { log.logWarning("addToIndex: cannot load " + url.toNormalform(false, false) + ": " + acceptedError); return; } - new Thread() {public void run() { - try { - Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE); - if (response == null) throw new IOException("response == null"); - if (response.getContent() == null) throw new IOException("content == null"); - if (response.getResponseHeader() == null) throw new IOException("header == null"); - Document[] documents = response.parse(); - if (documents != null) for (Document document: documents) { - if (document.indexingDenied()) throw new Parser.Failure("indexing is denied", url); - Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib); - ResultImages.registerImages(url, document, true); - webStructure.generateCitationReference(url, document, condenser, response.lastModified()); - storeDocumentIndex(process, response, document, condenser, searchEvent, "heuristic:" + heuristicName); - log.logInfo("addToIndex fill of url " + url.toNormalform(true, true) + " finished"); + new Thread() { + @Override + public void run() { + try { + final Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE); + if (response == null) throw new IOException("response == null"); + if (response.getContent() == null) throw new IOException("content == null"); + if (response.getResponseHeader() == null) throw new IOException("header == null"); + final Document[] documents = response.parse(); + if (documents != null) for (final Document document: documents) { + if (document.indexingDenied()) throw new Parser.Failure("indexing is denied", url); + final Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib); + ResultImages.registerImages(url, document, true); + webStructure.generateCitationReference(url, document, condenser, response.lastModified()); + storeDocumentIndex(process, response, document, condenser, searchEvent, "heuristic:" + heuristicName); + log.logInfo("addToIndex fill of url " + url.toNormalform(true, true) + " finished"); + } + } catch (IOException e) { + log.logWarning("addToIndex: failed loading " + url.toNormalform(false, false) + ": " + e.getMessage()); + } catch (Parser.Failure e) { + log.logWarning("addToIndex: failed parsing " + url.toNormalform(false, false) + ": " + e.getMessage()); } - } catch (IOException e) { - log.logWarning("addToIndex: failed loading " + url.toNormalform(false, false) + ": " + e.getMessage()); - } catch (Parser.Failure e) { - log.logWarning("addToIndex: failed parsing " + url.toNormalform(false, false) + ": " + e.getMessage()); } - }}.start(); + }.start(); } public class receiptSending implements Runnable { - yacySeed initiatorPeer; - URIMetadataRow reference; + private yacySeed initiatorPeer; + private URIMetadataRow reference; public receiptSending(final yacySeed initiatorPeer, final URIMetadataRow reference) { this.initiatorPeer = initiatorPeer; @@ -2124,7 +2129,7 @@ public final class Switchboard extends serverSwitch { public int adminAuthenticated(final RequestHeader requestHeader) { // authorization for localhost, only if flag is set to grant localhost access as admin - boolean accessFromLocalhost = accessFromLocalhost(requestHeader); + final boolean accessFromLocalhost = accessFromLocalhost(requestHeader); if (getConfigBool("adminAccountForLocalhost", false) && accessFromLocalhost) return 3; // soft-authenticated for localhost // get the authorization string from the header @@ -2187,16 +2192,16 @@ public final class Switchboard extends serverSwitch { } } - public static int accessFrequency(final HashMap> tracker, final String host) { + public static int accessFrequency(final Map> tracker, final String host) { // returns the access frequency in queries per hour for a given host and a specific tracker final long timeInterval = 1000 * 60 * 60; - final TreeSet accessSet = tracker.get(host); + final SortedSet accessSet = tracker.get(host); if (accessSet == null) return 0; return accessSet.tailSet(Long.valueOf(System.currentTimeMillis() - timeInterval)).size(); } public String dhtShallTransfer(final String segment) { - String cautionCause = onlineCaution(); + final String cautionCause = onlineCaution(); if (cautionCause != null) { return "online caution for " + cautionCause + ", dht transmission"; } @@ -2218,7 +2223,7 @@ public final class Switchboard extends serverSwitch { if (getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false")) { return "no DHT distribution: not enabled (per setting)"; } - Segment indexSegment = this.indexSegments.segment(segment); + final Segment indexSegment = this.indexSegments.segment(segment); if (indexSegment.urlMetadata().size() < 10) { return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size(); } @@ -2322,71 +2327,73 @@ public final class Switchboard extends serverSwitch { } public final void heuristicSite(final SearchEvent searchEvent, final String host) { - new Thread() {public void run() { - String r = host; - if (r.indexOf("//") < 0) r = "http://" + r; - - // get the links for a specific site - DigestURI url; - try { - url = new DigestURI(r); - } catch (MalformedURLException e) { - Log.logException(e); - return; - } - - Map links = null; - try { - links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE); - } catch (IOException e) { - Log.logException(e); - return; - } - Iterator i = links.keySet().iterator(); - MultiProtocolURI u; - while (i.hasNext()) { - u = i.next(); - if (!u.getHost().endsWith(host)) i.remove(); + new Thread() { + @Override + public void run() { + String r = host; + if (r.indexOf("//") < 0) r = "http://" + r; + + // get the links for a specific site + DigestURI url; + try { + url = new DigestURI(r); + } catch (MalformedURLException e) { + Log.logException(e); + return; + } + + final Map links; + try { + links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE); + } catch (IOException e) { + Log.logException(e); + return; + } + final Iterator i = links.keySet().iterator(); + while (i.hasNext()) { + if (!i.next().getHost().endsWith(host)) i.remove(); + } + + // add all pages to the index + addAllToIndex(url, links, searchEvent, "site"); } - - // add all pages to the index - addAllToIndex(url, links, searchEvent, "site"); - }}.start(); + }.start(); } public final void heuristicScroogle(final SearchEvent searchEvent) { - new Thread() {public void run() { - String query = searchEvent.getQuery().queryString(true); - int meta = query.indexOf("heuristic:"); - if (meta >= 0) { - int q = query.indexOf(' ', meta); - if (q >= 0) query = query.substring(0, meta) + query.substring(q + 1); else query = query.substring(0, meta); - } - final String urlString = "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" + query.trim().replaceAll(" ", "+") + "&n=2"; - DigestURI url; - try { - url = new DigestURI(MultiProtocolURI.unescape(urlString)); - } catch (MalformedURLException e1) { - return; - } - - Map links = null; - try { - links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE); - } catch (IOException e) { - Log.logException(e); - return; - } - Iterator i = links.keySet().iterator(); - MultiProtocolURI u; - while (i.hasNext()) { - u = i.next(); - if (u.toNormalform(false, false).indexOf("scroogle") >= 0) i.remove(); + new Thread() { + @Override + public void run() { + String query = searchEvent.getQuery().queryString(true); + int meta = query.indexOf("heuristic:"); + if (meta >= 0) { + final int q = query.indexOf(' ', meta); + if (q >= 0) query = query.substring(0, meta) + query.substring(q + 1); else query = query.substring(0, meta); + } + final String urlString = "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" + query.trim().replaceAll(" ", "+") + "&n=2"; + final DigestURI url; + try { + url = new DigestURI(MultiProtocolURI.unescape(urlString)); + } catch (MalformedURLException e1) { + return; + } + + Map links = null; + try { + links = loader.loadLinks(url, CrawlProfile.CacheStrategy.NOCACHE); + } catch (IOException e) { + Log.logException(e); + return; + } + Iterator i = links.keySet().iterator(); + while (i.hasNext()) { + if (i.next().toNormalform(false, false).indexOf("scroogle") >= 0) i.remove(); + } + log.logInfo("Heuristic: adding " + links.size() + " links from scroogle"); + // add all pages to the index + addAllToIndex(null, links, searchEvent, "scroogle"); } - log.logInfo("Heuristic: adding " + links.size() + " links from scroogle"); - // add all pages to the index - addAllToIndex(null, links, searchEvent, "scroogle"); - }}.start(); + }.start(); } public int currentPPM() { @@ -2542,14 +2549,11 @@ public final class Switchboard extends serverSwitch { port = 3128; } // create new config - ProxySettings.use = true; ProxySettings.use4ssl = true; ProxySettings.use4YaCy = true; ProxySettings.port = port; ProxySettings.host = host; - if ((ProxySettings.host == null) || (ProxySettings.host.length() == 0)) { - ProxySettings.use = false; - } + ProxySettings.use = ((ProxySettings.host != null) && (ProxySettings.host.length() > 0)); // determining if remote proxy usage is enabled ProxySettings.use = getConfigBool("remoteProxyUse", false); diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java index 150326cbe..e1c1cdf3b 100644 --- a/source/de/anomic/search/TextSnippet.java +++ b/source/de/anomic/search/TextSnippet.java @@ -24,11 +24,11 @@ package de.anomic.search; -import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; -import java.util.TreeMap; +import java.util.List; +import java.util.SortedMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -95,7 +95,7 @@ public class TextSnippet implements Comparable, Comparator, Comparator, Comparator, Comparator" + audioline; //if (videoline != null) line += (line.length() == 0) ? videoline : "
" + videoline; //if (appline != null) line += (line.length() == 0) ? appline : "
" + appline; //if (hrefline != null) line += (line.length() == 0) ? hrefline : "
" + hrefline; - if (textline != null) line += (line.length() == 0) ? textline : "
" + textline; + if (textline != null) snippetLine += (snippetLine.length() == 0) ? textline : "
" + textline; - if (line == null || !remainingHashes.isEmpty()) { + if (snippetLine == null || !remainingHashes.isEmpty()) { init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found"); return; } - if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength); + if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength); // finally store this snippet in our own cache - snippetsCache.put(wordhashes, urls, line); + snippetsCache.put(wordhashes, urls, snippetLine); document.close(); - init(url.hash(), line, source, null); + init(url.hash(), snippetLine, source, null); } private void init(final byte[] urlhash, final String line, final int errorCode, final String errortext) { @@ -294,24 +294,24 @@ public class TextSnippet implements Comparable, Comparator i = queryHashes.iterator(); byte[] h; - final String[] w = line.split(" "); + final String[] words = line.split(" "); while (i.hasNext()) { h = i.next(); - for (int j = 0; j < w.length; j++) { - final ArrayList al = markedWordArrayList(w[j]); // mark special character separated words correctly if more than 1 word has to be marked - w[j] = ""; + for (int j = 0; j < words.length; j++) { + final List al = markedWordArrayList(words[j]); // mark special character separated words correctly if more than 1 word has to be marked + words[j] = ""; for (int k = 0; k < al.size(); k++) { if(k % 2 == 0){ // word has not been marked - w[j] += getWordMarked(al.get(k), h); + words[j] += getWordMarked(al.get(k), h); } else { // word has been marked, do not encode again - w[j] += al.get(k); + words[j] += al.get(k); } } } } final StringBuilder l = new StringBuilder(line.length() + queryHashes.size() * 8); - for (int j = 0; j < w.length; j++) { - l.append(w[j]); + for (int j = 0; j < words.length; j++) { + l.append(words[j]); l.append(' '); } return l.toString().trim(); @@ -325,6 +325,7 @@ public class TextSnippet implements Comparable, Comparator, Comparator"; - out = out + temp + CharacterCoding.unicode2html(word.substring(k,k+1), false); + out.append(temp); + out.append(CharacterCoding.unicode2html(theWord.substring(k,k+1), false)); temp = ""; } //last character - else if(k == (word.length()-1)) { - temp = temp + word.substring(k,k+1); + else if(k == (theWord.length()-1)) { + temp = temp + theWord.substring(k,k+1); if (new String(Word.word2hash(temp)).equals(new String(h))) temp = "" + CharacterCoding.unicode2html(temp, false) + ""; - out = out + temp; + out.append(temp); temp = ""; } - else temp = temp + word.substring(k,k+1); + else { + temp = temp + theWord.substring(k,k+1); + } } - word = out; + theWord = out; } //end contrib [MN] - else if (new String(Word.word2hash(word)).equals(new String(h))) word = "" + CharacterCoding.unicode2html(word, false) + ""; + else if (new String(Word.word2hash(theWord)).equals(new String(h))) { + theWord.replace(0, theWord.length(), CharacterCoding.unicode2html(theWord.toString(), false)); + theWord.insert(0, ""); + theWord.append(""); + } - word = CharacterCoding.unicode2html(prefix, false) - + word - + CharacterCoding.unicode2html(postfix, false); - return word; + theWord.insert(0, CharacterCoding.unicode2html(prefix.toString(), false)); + theWord.append(CharacterCoding.unicode2html(postfix.toString(), false)); + return theWord.toString(); } /** @@ -403,8 +412,8 @@ public class TextSnippet implements Comparable, Comparator markedWordArrayList(String string){ - ArrayList al = new java.util.ArrayList(1); + private static List markedWordArrayList(String string){ + List al = new java.util.ArrayList(1); Matcher m = p01.matcher(string); while (m.find()) { al.add(m.group(1)); @@ -417,8 +426,8 @@ public class TextSnippet implements Comparable, Comparator m = Condenser.hashSentence(sentence, null); - for (byte[] b: queryhashes) { + final SortedMap m = Condenser.hashSentence(sentence, null); + for (final byte[] b: queryhashes) { if (!(m.containsKey(b))) return false; } return true; diff --git a/source/de/anomic/yacy/dht/Dispatcher.java b/source/de/anomic/yacy/dht/Dispatcher.java index 15687ff40..114a703af 100755 --- a/source/de/anomic/yacy/dht/Dispatcher.java +++ b/source/de/anomic/yacy/dht/Dispatcher.java @@ -44,6 +44,7 @@ import net.yacy.kelondro.workflow.WorkflowProcessor; import de.anomic.search.Segment; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; +import java.util.List; public class Dispatcher { @@ -53,8 +54,8 @@ public class Dispatcher { * Before a RWI is sent, the following process is applied: * - (1) a number of RWIs are selected and accumulated. * When they are selected, they are removed from the index - * - (2) the RWI collection is splitted into a number of partitions according to the vertical DHT. - * - (3) the splitted RWIs are enqueued as Entry object in the entry 'cloud' of the dispatcher + * - (2) the RWI collection is split into a number of partitions according to the vertical DHT. + * - (3) the split RWIs are enqueued as Entry object in the entry 'cloud' of the dispatcher * - (4) more entries may be enqueued to the dispatcher and entries with the same primary target * are accumulated. * - (5) the largest entries are selected from the dispatcher cloud and enqueued to the 'next' array @@ -114,8 +115,6 @@ public class Dispatcher { seeds, gzipBody, timeout); - //this.selectedContainerCache = null; - //this.splittedContainerCache = null; int concurrentSender = Math.min(25, Math.max(10, WorkflowProcessor.useCPU * 2 + 1)); indexingTransmissionProcessor = new WorkflowProcessor( @@ -229,11 +228,11 @@ public class Dispatcher { * @throws RowSpaceExceededException */ @SuppressWarnings("unchecked") - private ArrayList>[] splitContainers(ArrayList> containers) throws RowSpaceExceededException { + private List>[] splitContainers(List> containers) throws RowSpaceExceededException { // init the result vector int partitionCount = this.seeds.scheme.verticalPartitions(); - ArrayList>[] partitions = (ArrayList>[]) new ArrayList[partitionCount]; + List>[] partitions = (ArrayList>[]) new ArrayList[partitionCount]; for (int i = 0; i < partitions.length; i++) partitions[i] = new ArrayList>(); // check all entries and split them to the partitions @@ -271,7 +270,7 @@ public class Dispatcher { * stored in a cache of the Entry for later transmission to the targets, which means that * then no additional IO is necessary. */ - private void enqueueContainersToCloud(final ArrayList>[] containers) { + private void enqueueContainersToCloud(final List>[] containers) { if (transmissionCloud == null) return; ReferenceContainer lastContainer; byte[] primaryTarget; @@ -286,7 +285,7 @@ public class Dispatcher { // get or make a entry object entry = this.transmissionCloud.get(pTArray); // if this is not null, the entry is extended here - ArrayList targets = PeerSelection.getAcceptRemoteIndexSeedsList( + List targets = PeerSelection.getAcceptRemoteIndexSeedsList( seeds, primaryTarget, seeds.redundancy() * 3, @@ -327,7 +326,7 @@ public class Dispatcher { final int maxtime) { if (this.transmissionCloud == null) return false; - ArrayList> selectedContainerCache; + List> selectedContainerCache; try { selectedContainerCache = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime); } catch (IOException e) { @@ -341,25 +340,25 @@ public class Dispatcher { return false; } - ArrayList>[] splittedContainerCache; + List>[] splitContainerCache; try { - splittedContainerCache = splitContainers(selectedContainerCache); + splitContainerCache = splitContainers(selectedContainerCache); } catch (RowSpaceExceededException e) { this.log.logSevere("selectContainersEnqueueToCloud: splitContainers failed because of too low RAM", e); return false; } selectedContainerCache = null; - if (splittedContainerCache == null) { - this.log.logInfo("selectContainersEnqueueToCloud: splittedContainerCache is empty, cannot do anything here."); + if (splitContainerCache == null) { + this.log.logInfo("selectContainersEnqueueToCloud: splitContainerCache is empty, cannot do anything here."); return false; } - this.log.logInfo("splitContainersFromCache: splittedContainerCache filled with " + splittedContainerCache.length + " partitions, deleting selectedContainerCache"); - if (splittedContainerCache.length != this.seeds.scheme.verticalPartitions()) { - this.log.logWarning("selectContainersEnqueueToCloud: splittedContainerCache has wrong length."); + this.log.logInfo("splitContainersFromCache: splitContainerCache filled with " + splitContainerCache.length + " partitions, deleting selectedContainerCache"); + if (splitContainerCache.length != this.seeds.scheme.verticalPartitions()) { + this.log.logWarning("selectContainersEnqueueToCloud: splitContainerCache has wrong length."); return false; } - enqueueContainersToCloud(splittedContainerCache); - splittedContainerCache = null; + enqueueContainersToCloud(splitContainerCache); + splitContainerCache = null; this.log.logInfo("selectContainersEnqueueToCloud: splittedContainerCache enqueued to cloud array which has now " + this.transmissionCloud.size() + " entries."); return true; } diff --git a/source/de/anomic/yacy/dht/PeerSelection.java b/source/de/anomic/yacy/dht/PeerSelection.java index a901da4ec..6aa08930e 100755 --- a/source/de/anomic/yacy/dht/PeerSelection.java +++ b/source/de/anomic/yacy/dht/PeerSelection.java @@ -27,6 +27,7 @@ package de.anomic.yacy.dht; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import net.yacy.cora.storage.DynamicScore; @@ -57,13 +58,13 @@ public class PeerSelection { final yacySeedDB seedDB, byte[] wordhash, int redundancy, - HashMap regularSeeds, + Map regularSeeds, DynamicScore ranking) { // this method is called from the search target computation - long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash); + final long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash); yacySeed seed; - for (int v = 0; v < dhtVerticalTargets.length; v++) { - wordhash = FlatWordPartitionScheme.positionToHash(dhtVerticalTargets[v]); + for (long dhtVerticalTarget : dhtVerticalTargets) { + wordhash = FlatWordPartitionScheme.positionToHash(dhtVerticalTarget); Iterator dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, false); int c = Math.min(seedDB.sizeConnected(), redundancy); int cc = 3; // select a maximum of 3, this is enough redundancy @@ -81,7 +82,7 @@ public class PeerSelection { private static int guessedOwn = 0; - public static boolean shallBeOwnWord(final yacySeedDB seedDB, final byte[] wordhash, String urlhash, int redundancy) { + public static boolean shallBeOwnWord(final yacySeedDB seedDB, final byte[] wordhash, final String urlhash, final int redundancy) { // the guessIfOwnWord is a fast method that should only fail in case that a 'true' may be incorrect, but a 'false' shall always be correct if (guessIfOwnWord(seedDB, wordhash, urlhash)) { // this case must be verified, because it can be wrong. @@ -108,7 +109,7 @@ public class PeerSelection { private static boolean verifyIfOwnWord(final yacySeedDB seedDB, byte[] wordhash, String urlhash, int redundancy) { String myHash = seedDB.mySeed().hash; wordhash = FlatWordPartitionScheme.positionToHash(seedDB.scheme.dhtPosition(wordhash, urlhash)); - Iterator dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true); + final Iterator dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true); while (dhtEnum.hasNext()) { if (dhtEnum.next().hash.equals(myHash)) return true; } @@ -120,18 +121,18 @@ public class PeerSelection { } public static byte[] limitOver(final yacySeedDB seedDB, final byte[] startHash) { - Iterator seeds = getAcceptRemoteIndexSeeds(seedDB, startHash, 1, false); + final Iterator seeds = getAcceptRemoteIndexSeeds(seedDB, startHash, 1, false); if (seeds.hasNext()) return seeds.next().hash.getBytes(); return null; } - protected static ArrayList getAcceptRemoteIndexSeedsList( + protected static List getAcceptRemoteIndexSeedsList( yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) { final Iterator seedIter = PeerSelection.getAcceptRemoteIndexSeeds(seedDB, starthash, max, alsoMyOwn); - ArrayList targets = new ArrayList(); + final ArrayList targets = new ArrayList(); while (seedIter.hasNext() && max-- > 0) targets.add(seedIter.next()); return targets; } @@ -145,7 +146,7 @@ public class PeerSelection { * @param alsoMyOwn * @return */ - public static Iterator getAcceptRemoteIndexSeeds(yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) { + public static Iterator getAcceptRemoteIndexSeeds(final yacySeedDB seedDB, final byte[] starthash, final int max, final boolean alsoMyOwn) { return new acceptRemoteIndexSeedEnum(seedDB, starthash, Math.min(max, seedDB.sizeConnected()), alsoMyOwn); } @@ -225,19 +226,19 @@ public class PeerSelection { * @param minVersion * @return */ - protected static Iterator getDHTSeeds(yacySeedDB seedDB, final byte[] firstHash, final float minVersion) { + protected static Iterator getDHTSeeds(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) { // enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning return new seedDHTEnum(seedDB, firstHash, minVersion); } private static class seedDHTEnum implements Iterator { - Iterator e1, e2; - int steps; - float minVersion; - yacySeedDB seedDB; + private Iterator e1, e2; + private int steps; + private float minVersion; + private yacySeedDB seedDB; - public seedDHTEnum(yacySeedDB seedDB, final byte[] firstHash, final float minVersion) { + public seedDHTEnum(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) { this.seedDB = seedDB; this.steps = seedDB.sizeConnected(); this.minVersion = minVersion; @@ -279,17 +280,17 @@ public class PeerSelection { * @param seedDB * @return an iterator of seed objects */ - public static Iterator getProvidesRemoteCrawlURLs(yacySeedDB seedDB) { + public static Iterator getProvidesRemoteCrawlURLs(final yacySeedDB seedDB) { return new providesRemoteCrawlURLsEnum(seedDB); } private static class providesRemoteCrawlURLsEnum implements Iterator { - Iterator se; - yacySeed nextSeed; - yacySeedDB seedDB; + private Iterator se; + private yacySeed nextSeed; + private yacySeedDB seedDB; - public providesRemoteCrawlURLsEnum(yacySeedDB seedDB) { + public providesRemoteCrawlURLsEnum(final yacySeedDB seedDB) { this.seedDB = seedDB; se = getDHTSeeds(seedDB, null, yacyVersion.YACY_POVIDES_REMOTECRAWL_LISTS); nextSeed = nextInternal(); @@ -335,7 +336,7 @@ public class PeerSelection { * @param count number of wanted peers * @return a hash map of peer hashes to seed object */ - public static Map seedsByAge(yacySeedDB seedDB, final boolean up, int count) { + public static Map seedsByAge(final yacySeedDB seedDB, final boolean up, int count) { if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected(); diff --git a/source/de/anomic/yacy/dht/Transmission.java b/source/de/anomic/yacy/dht/Transmission.java index 45174813a..172e5d84a 100644 --- a/source/de/anomic/yacy/dht/Transmission.java +++ b/source/de/anomic/yacy/dht/Transmission.java @@ -44,6 +44,9 @@ import de.anomic.search.Segment; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; public class Transmission { @@ -68,11 +71,11 @@ public class Transmission { public Chunk newChunk( byte[] primaryTarget, - final ArrayList targets, + final List targets, final Row payloadrow) { return new Chunk(primaryTarget, targets, payloadrow); } - + public class Chunk extends WorkflowJob implements Iterable> { /** * a dispatcher entry contains @@ -86,9 +89,9 @@ public class Transmission { */ private final byte[] primaryTarget; private final ReferenceContainerCache containers; - private final TreeMap references; + private final SortedMap references; private final HandleSet badReferences; - private final ArrayList targets; + private final List targets; private int hit, miss; /** @@ -101,7 +104,7 @@ public class Transmission { */ public Chunk( byte[] primaryTarget, - final ArrayList targets, + final List targets, final Row payloadrow) { super(); this.primaryTarget = primaryTarget; @@ -122,7 +125,7 @@ public class Transmission { public void add(ReferenceContainer container) throws RowSpaceExceededException { // iterate through the entries in the container and check if the reference is in the repository Iterator i = container.entries(); - ArrayList notFoundx = new ArrayList(); + List notFoundx = new ArrayList(); while (i.hasNext()) { WordReference e = i.next(); if (references.containsKey(e.metadataHash())) continue; @@ -139,7 +142,7 @@ public class Transmission { } } // now delete all references that were not found - for (byte[] b : notFoundx) container.removeReference(b); + for (final byte[] b : notFoundx) container.removeReference(b); // finally add the remaining container to the cache containers.add(container); } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index ee8689788..7cea44824 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -53,6 +53,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.SortedMap; import java.util.TreeMap; import java.util.regex.Pattern; @@ -97,10 +98,10 @@ import de.anomic.tools.crypt; public final class yacyClient { - private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap parts, final int timeout) throws IOException { + private static byte[] postToFile(final yacySeed target, final String filename, final Map parts, final int timeout) throws IOException { return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts); } - private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap parts, final int timeout) throws IOException { + private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final Map parts, final int timeout) throws IOException { return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts); } @@ -132,7 +133,7 @@ public final class yacyClient { final String salt = crypt.randomSalt(); try { // generate request - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); parts.put("count", new StringBody("20")); parts.put("seed", new StringBody(mySeed.genSeedStr(salt))); // send request @@ -256,7 +257,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); parts.put("object", new StringBody("seed")); parts.put("env", new StringBody(seedHash)); final byte[] content = postToFile(target, "query.html", parts, 10000); @@ -277,7 +278,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); parts.put("object", new StringBody("rwicount")); parts.put("ttl", new StringBody("0")); parts.put("env", new StringBody(wordHash)); @@ -300,7 +301,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); parts.put("object", new StringBody("lurlcount")); parts.put("ttl", new StringBody("0")); parts.put("env", new StringBody("")); @@ -337,7 +338,7 @@ public final class yacyClient { // send request try { /* a long time-out is needed */ - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); parts.put("call", new StringBody("remotecrawl")); parts.put("count", new StringBody(Integer.toString(maxCount))); parts.put("time", new StringBody(Long.toString(maxTime))); @@ -585,7 +586,7 @@ public final class yacyClient { public Map indexabstract; // index abstracts, a collection of url-hashes per word public SearchResult( - LinkedHashMap parts, + Map parts, final yacySeed mySeed, final String wordhashes, final String excludehashes, @@ -695,7 +696,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt); parts.put("process", new StringBody("permission")); final byte[] content = postToFile(seedDB, targetHash, "message.html", parts, 5000); final Map result = FileUtils.table(content); @@ -715,7 +716,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt); parts.put("process", new StringBody("post")); parts.put("myseed", new StringBody(seedDB.mySeed().genSeedStr(salt))); parts.put("subject", new StringBody(subject)); @@ -754,7 +755,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); parts.put("process", new StringBody("permission")); parts.put("purpose", new StringBody("crcon")); parts.put("filename", new StringBody(filename)); @@ -777,7 +778,7 @@ public final class yacyClient { // send request try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); parts.put("process", new StringBody("store")); parts.put("purpose", new StringBody("crcon")); parts.put("filesize", new StringBody(Long.toString(file.length))); @@ -853,7 +854,7 @@ public final class yacyClient { // send request try { // prepare request - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt); parts.put("process", new StringBody(process)); parts.put("urlhash", new StringBody(((entry == null) ? "" : new String(entry.hash())))); parts.put("result", new StringBody(result)); @@ -883,7 +884,7 @@ public final class yacyClient { public static String transferIndex( final yacySeed targetSeed, final ReferenceContainerCache indexes, - final TreeMap urlCache, + final SortedMap urlCache, final boolean gzipBody, final int timeout) { @@ -1006,13 +1007,13 @@ public final class yacyClient { if (indexcount == 0) { // nothing to do but everything ok - final HashMap result = new HashMap(2); + final Map result = new HashMap(2); result.put("result", "ok"); result.put("unknownURL", ""); return result; } try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); parts.put("wordc", new StringBody(Integer.toString(indexes.size()))); parts.put("entryc", new StringBody(Integer.toString(indexcount))); parts.put("indexes", new StringBody(entrypost.toString())); @@ -1037,7 +1038,7 @@ public final class yacyClient { // prepare post values final String salt = crypt.randomSalt(); - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); // enabling gzip compression for post request body if (gzipBody && (targetSeed.getVersion() < yacyVersion.YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED)) { @@ -1081,7 +1082,7 @@ public final class yacyClient { String address = targetSeed.getClusterAddress(); if (address == null) { address = "localhost:8080"; } try { - final LinkedHashMap parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); + final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts); return FileUtils.table(content); } catch (final Exception e) { @@ -1112,7 +1113,7 @@ public final class yacyClient { } else { searchlines.add(args[2]); } - for (String line: searchlines) { + for (final String line: searchlines) { final byte[] wordhashe = QueryParams.hashSet2hashString(Word.words2hashesHandles(QueryParams.cleanQuery(line)[0])).getBytes(); long time = System.currentTimeMillis(); SearchResult result; @@ -1164,7 +1165,7 @@ public final class yacyClient { final String vhost = url.getHost(); final int timeout = 10000; // new data - final LinkedHashMap newpost = new LinkedHashMap(); + final Map newpost = new LinkedHashMap(); try { newpost.put("process", new StringBody("permission")); newpost.put("purpose", new StringBody("crcon")); diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index 8d23a51b4..557882f2b 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -27,8 +27,9 @@ package de.anomic.yacy; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; -import java.util.TreeMap; +import java.util.SortedMap; import java.util.regex.Pattern; import net.yacy.cora.storage.DynamicScore; @@ -112,6 +113,7 @@ public class yacySearch extends Thread { this.constraint = constraint; } + @Override public void run() { try { this.urls = yacyClient.search( @@ -137,10 +139,10 @@ public class yacySearch extends Thread { } public static String set2string(final HandleSet hashes) { - String wh = ""; + StringBuilder wh = new StringBuilder(); final Iterator iter = hashes.iterator(); - while (iter.hasNext()) { wh = wh + new String(iter.next()); } - return wh; + while (iter.hasNext()) { wh.append(new String(iter.next())); } + return wh.toString(); } public int links() { @@ -155,25 +157,25 @@ public class yacySearch extends Thread { return targetPeer; } - private static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final TreeMap peerhashes) { + private static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final SortedMap peerhashes) { final Iterator> i = peerhashes.entrySet().iterator(); - final ArrayList l = new ArrayList(); + final List l = new ArrayList(); Map.Entry entry; yacySeed s; while (i.hasNext()) { - entry = i.next(); - s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time - if (s != null) { - s.setAlternativeAddress(entry.getValue()); - l.add(s); - } - } - final yacySeed[] result = new yacySeed[l.size()]; - for (int j = 0; j < l.size(); j++) { - result[j] = l.get(j); + entry = i.next(); + s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time + if (s != null) { + s.setAlternativeAddress(entry.getValue()); + l.add(s); + } } - return result; - //return (yacySeed[]) l.toArray(); +// final yacySeed[] result = new yacySeed[l.size()]; +// for (int j = 0; j < l.size(); j++) { +// result[j] = l.get(j); +// } +// return result; + return l.toArray(new yacySeed[0]); } private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final HandleSet wordhashes, int seedcount, int redundancy) { @@ -187,8 +189,8 @@ public class yacySearch extends Thread { // put in seeds according to dht final DynamicScore ranking = new ScoreCluster(); - final HashMap regularSeeds = new HashMap(); - final HashMap matchingSeeds = new HashMap(); + final Map regularSeeds = new HashMap(); + final Map matchingSeeds = new HashMap(); yacySeed seed; Iterator dhtEnum; Iterator iter = wordhashes.iterator(); @@ -235,7 +237,7 @@ public class yacySearch extends Thread { seedcount = Math.min(ranking.size(), seedcount); final yacySeed[] result = new yacySeed[seedcount + matchingSeeds.size()]; c = 0; - Iterator iters = ranking.keys(false); // higher are better + final Iterator iters = ranking.keys(false); // higher are better while (iters.hasNext() && c < seedcount) { seed = regularSeeds.get(iters.next()); seed.selectscore = c; @@ -267,7 +269,7 @@ public class yacySearch extends Thread { final Blacklist blacklist, final RankingProfile rankingProfile, final Bitfield constraint, - final TreeMap clusterselection) { + final SortedMap clusterselection) { // check own peer status //if (wordIndex.seedDB.mySeed() == null || wordIndex.seedDB.mySeed().getPublicAddress() == null) { return null; } @@ -310,7 +312,7 @@ public class yacySearch extends Thread { final RankingProcess containerCache, final String targethash, final Blacklist blacklist, final RankingProfile rankingProfile, - final Bitfield constraint, final TreeMap clusterselection) { + final Bitfield constraint, final SortedMap clusterselection) { assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes; // check own peer status @@ -332,23 +334,25 @@ public class yacySearch extends Thread { public static int remainingWaiting(final yacySearch[] searchThreads) { if (searchThreads == null) return 0; int alive = 0; - for (int i = 0; i < searchThreads.length; i++) { - if (searchThreads[i].isAlive()) alive++; + for (final yacySearch searchThread : searchThreads) { + if (searchThread.isAlive()) alive++; } return alive; } public static int collectedLinks(final yacySearch[] searchThreads) { int links = 0; - for (int i = 0; i < searchThreads.length; i++) { - if (!(searchThreads[i].isAlive()) && searchThreads[i].urls > 0) links += searchThreads[i].urls; + for (final yacySearch searchThread : searchThreads) { + if (!(searchThread.isAlive()) && searchThread.urls > 0) { + links += searchThread.urls; + } } return links; } public static void interruptAlive(final yacySearch[] searchThreads) { - for (int i = 0; i < searchThreads.length; i++) { - if (searchThreads[i].isAlive()) searchThreads[i].interrupt(); + for (final yacySearch searchThread : searchThreads) { + if (searchThread.isAlive()) searchThread.interrupt(); } } diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index c12faa34f..7d3864cd6 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -34,6 +34,7 @@ import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.LinkedHashMap; +import java.util.Map; import java.util.Set; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; @@ -337,11 +338,11 @@ public class HTTPClient { * @return content bytes * @throws IOException */ - public byte[] POSTbytes(final String uri, final LinkedHashMap parts, final boolean usegzip) throws IOException { + public byte[] POSTbytes(final String uri, final Map parts, final boolean usegzip) throws IOException { final HttpPost httpPost = new HttpPost(uri); final MultipartEntity multipartEntity = new MultipartEntity(); - for (Entry part : parts.entrySet()) + for (final Entry part : parts.entrySet()) multipartEntity.addPart(part.getKey(), part.getValue()); // statistics upbytes = multipartEntity.getContentLength(); @@ -371,100 +372,100 @@ public class HTTPClient { return httpResponse.getStatusLine().getStatusCode(); } - /** - * This method gets direct access to the content-stream - * Since this way is uncontrolled by the Client think of using 'writeTo' instead! + /** + * This method gets direct access to the content-stream + * Since this way is uncontrolled by the Client think of using 'writeTo' instead! * Please take care to call finish()! - * - * @return the content as InputStream - * @throws IOException - */ - public InputStream getContentstream() throws IOException { - if (httpResponse != null && currentRequest != null) { - final HttpEntity httpEntity = httpResponse.getEntity(); - if (httpEntity != null) try { - return httpEntity.getContent(); - } catch (final IOException e) { - ConnectionInfo.removeConnection(currentRequest.hashCode()); - currentRequest.abort(); - currentRequest = null; - throw e; - } - } - return null; - } + * + * @return the content as InputStream + * @throws IOException + */ + public InputStream getContentstream() throws IOException { + if (httpResponse != null && currentRequest != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) try { + return httpEntity.getContent(); + } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + throw e; + } + } + return null; + } - /** - * This method streams the content to the outputStream + /** + * This method streams the content to the outputStream * Please take care to call finish()! - * - * @param outputStream - * @throws IOException - */ - public void writeTo(final OutputStream outputStream) throws IOException { - if (httpResponse != null && currentRequest != null) { - final HttpEntity httpEntity = httpResponse.getEntity(); - if (httpEntity != null) try { - httpEntity.writeTo(outputStream); - outputStream.flush(); - // TODO: The name of this method is misnomer. - // It will be renamed to #finish() in the next major release of httpcore - httpEntity.consumeContent(); - ConnectionInfo.removeConnection(currentRequest.hashCode()); - currentRequest = null; - } catch (final IOException e) { - ConnectionInfo.removeConnection(currentRequest.hashCode()); - currentRequest.abort(); - currentRequest = null; - throw e; - } - } - } + * + * @param outputStream + * @throws IOException + */ + public void writeTo(final OutputStream outputStream) throws IOException { + if (httpResponse != null && currentRequest != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) try { + httpEntity.writeTo(outputStream); + outputStream.flush(); + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest = null; + } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + throw e; + } + } + } - /** - * This method ensures correct finish of client-connections - * This method should be used after every use of GET or POST and writeTo or getContentstream! - * - * @throws IOException - */ - public void finish() throws IOException { - if (httpResponse != null) { - final HttpEntity httpEntity = httpResponse.getEntity(); - if (httpEntity != null && httpEntity.isStreaming()) { - // TODO: The name of this method is misnomer. - // It will be renamed to #finish() in the next major release of httpcore - httpEntity.consumeContent(); - } - } - if (currentRequest != null) { - ConnectionInfo.removeConnection(currentRequest.hashCode()); - currentRequest.abort(); - currentRequest = null; - } - } + /** + * This method ensures correct finish of client-connections + * This method should be used after every use of GET or POST and writeTo or getContentstream! + * + * @throws IOException + */ + public void finish() throws IOException { + if (httpResponse != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null && httpEntity.isStreaming()) { + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + } + } + if (currentRequest != null) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + } + } private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final long maxBytes) throws IOException { byte[] content = null; try { - execute(httpUriRequest); - if (httpResponse == null) return null; - // get the response body - final HttpEntity httpEntity = httpResponse.getEntity(); - if (httpEntity != null) { - if (getStatusCode() == 200 && httpEntity.getContentLength() < maxBytes) { - content = EntityUtils.toByteArray(httpEntity); - } - // TODO: The name of this method is misnomer. - // It will be renamed to #finish() in the next major release of httpcore - httpEntity.consumeContent(); - } - } catch (final IOException e) { - ConnectionInfo.removeConnection(httpUriRequest.hashCode()); - httpUriRequest.abort(); - throw e; - } - ConnectionInfo.removeConnection(httpUriRequest.hashCode()); - return content; + execute(httpUriRequest); + if (httpResponse == null) return null; + // get the response body + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) { + if (getStatusCode() == 200 && httpEntity.getContentLength() < maxBytes) { + content = EntityUtils.toByteArray(httpEntity); + } + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + } + } catch (final IOException e) { + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); + throw e; + } + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + return content; } private void execute(final HttpUriRequest httpUriRequest) throws IOException { @@ -485,19 +486,19 @@ public class HTTPClient { assert !hrequest.expectContinue(); } httpResponse = httpClient.execute(httpUriRequest, httpContext); - } catch (Exception e) { - //e.printStackTrace(); - ConnectionInfo.removeConnection(httpUriRequest.hashCode()); - httpUriRequest.abort(); - throw new IOException("Client can't execute: " + e.getMessage()); - } + } catch (Exception e) { + //e.printStackTrace(); + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); + throw new IOException("Client can't execute: " + e.getMessage()); + } } private void setHeaders(final HttpUriRequest httpUriRequest) { if (headers != null) { - for (Header header : headers) { - httpUriRequest.addHeader(header); - } + for (final Header header : headers) { + httpUriRequest.addHeader(header); + } } if (realm != null) httpUriRequest.setHeader("Authorization", "realm=" + realm); @@ -535,92 +536,92 @@ public class HTTPClient { private static SSLSocketFactory getSSLSocketFactory() { final TrustManager trustManager = new X509TrustManager() { - public void checkClientTrusted(X509Certificate[] chain, String authType) - throws CertificateException { - } + public void checkClientTrusted(X509Certificate[] chain, String authType) + throws CertificateException { + } - public void checkServerTrusted(X509Certificate[] chain, String authType) - throws CertificateException { - } + public void checkServerTrusted(X509Certificate[] chain, String authType) + throws CertificateException { + } - public X509Certificate[] getAcceptedIssuers() { - return null; - } + public X509Certificate[] getAcceptedIssuers() { + return null; + } }; SSLContext sslContext = null; try { - sslContext = SSLContext.getInstance("TLS"); - sslContext.init(null, new TrustManager[] { trustManager }, null); - } catch (NoSuchAlgorithmException e) { - // should not happen - // e.printStackTrace(); - } catch (KeyManagementException e) { - // should not happen - // e.printStackTrace(); - } + sslContext = SSLContext.getInstance("TLS"); + sslContext.init(null, new TrustManager[] { trustManager }, null); + } catch (NoSuchAlgorithmException e) { + // should not happen + // e.printStackTrace(); + } catch (KeyManagementException e) { + // should not happen + // e.printStackTrace(); + } - final SSLSocketFactory sslSF = new SSLSocketFactory(sslContext); - sslSF.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); + final SSLSocketFactory sslSF = new SSLSocketFactory(sslContext); + sslSF.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); return sslSF; } - /** - * testing - * - * @param args urls to test - */ - public static void main(final String[] args) { - String url = null; - // prepare Parts - final LinkedHashMap newparts = new LinkedHashMap(); - try { - newparts.put("foo", new StringBody("FooBar")); - newparts.put("bar", new StringBody("BarFoo")); - } catch (UnsupportedEncodingException e) { - System.out.println(e.getStackTrace()); - } - HTTPClient client = new HTTPClient(); - client.setUserAgent("foobar"); - client.setRedirecting(false); - // Get some - for (int i = 0; i < args.length; i++) { - url = args[i]; - if (!url.toUpperCase().startsWith("HTTP://")) { - url = "http://" + url; - } - try { - System.out.println(new String(client.GETbytes(url))); - } catch (IOException e) { - e.printStackTrace(); - } - } - // Head some + /** + * testing + * + * @param args urls to test + */ + public static void main(final String[] args) { + String url = null; + // prepare Parts + final Map newparts = new LinkedHashMap(); + try { + newparts.put("foo", new StringBody("FooBar")); + newparts.put("bar", new StringBody("BarFoo")); + } catch (UnsupportedEncodingException e) { + System.out.println(e.getStackTrace()); + } + HTTPClient client = new HTTPClient(); + client.setUserAgent("foobar"); + client.setRedirecting(false); + // Get some + for (final String arg : args) { + url = arg; + if (!url.toUpperCase().startsWith("HTTP://")) { + url = "http://" + url; + } + try { + System.out.println(new String(client.GETbytes(url))); + } catch (IOException e) { + e.printStackTrace(); + } + } + // Head some // try { // client.HEADResponse(url); // } catch (IOException e) { // e.printStackTrace(); // } - for (Header header: client.getHttpResponse().getAllHeaders()) { - System.out.println("Header " + header.getName() + " : " + header.getValue()); + for (final Header header: client.getHttpResponse().getAllHeaders()) { + System.out.println("Header " + header.getName() + " : " + header.getValue()); // for (HeaderElement element: header.getElements()) // System.out.println("Element " + element.getName() + " : " + element.getValue()); - } - System.out.println(client.getHttpResponse().getLocale()); - System.out.println(client.getHttpResponse().getProtocolVersion()); - System.out.println(client.getHttpResponse().getStatusLine()); - // Post some + } + System.out.println(client.getHttpResponse().getLocale()); + System.out.println(client.getHttpResponse().getProtocolVersion()); + System.out.println(client.getHttpResponse().getStatusLine()); + // Post some // try { // System.out.println(new String(client.POSTbytes(url, newparts))); // } catch (IOException e1) { // e1.printStackTrace(); // } - // Close out connection manager - try { - HTTPClient.closeConnectionManager(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } + // Close out connection manager + try { + HTTPClient.closeConnectionManager(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } /** diff --git a/source/net/yacy/cora/protocol/http/HTTPConnector.java b/source/net/yacy/cora/protocol/http/HTTPConnector.java index bff6439de..5f1e2f36a 100644 --- a/source/net/yacy/cora/protocol/http/HTTPConnector.java +++ b/source/net/yacy/cora/protocol/http/HTTPConnector.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import net.yacy.cora.document.MultiProtocolURI; @@ -35,7 +36,7 @@ import org.apache.http.entity.mime.content.ContentBody; */ public class HTTPConnector { - private static final Map cons = new ConcurrentHashMap(); + private static final ConcurrentMap cons = new ConcurrentHashMap(); private String userAgent; private HTTPConnector(String userAgent) { @@ -59,7 +60,7 @@ public class HTTPConnector { * @return response body * @throws IOException */ - public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap post) throws IOException { + public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, final Map post) throws IOException { return post(url, timeout, vhost, post, false); } @@ -74,7 +75,7 @@ public class HTTPConnector { * @return response body * @throws IOException */ - public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap post, final boolean usegzip) throws IOException { + public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, final Map post, final boolean usegzip) throws IOException { final HTTPClient client = new HTTPClient(); client.setTimout(timeout); client.setUserAgent(this.userAgent); diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index ef786c133..67c25c4c6 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -36,11 +36,14 @@ import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; +import java.util.Set; +import java.util.SortedSet; +import java.util.SortedMap; import java.util.TreeMap; -import java.util.TreeSet; import de.anomic.data.DidYouMeanLibrary; @@ -233,8 +236,8 @@ public final class Condenser { final int phrase, final int flagpos, final Bitfield flagstemplate, - boolean useForLanguageIdentification, - DidYouMeanLibrary meaningLib) { + final boolean useForLanguageIdentification, + final DidYouMeanLibrary meaningLib) { String word; Word wprop; sievedWordsEnum wordenum; @@ -259,14 +262,14 @@ public final class Condenser { } } - public Condenser(final InputStream text, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { + public Condenser(final InputStream text, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { this.languageIdentificator = null; // we don't need that here // analysis = new Properties(); words = new TreeMap(); createCondensement(text, meaningLib); } - public int excludeWords(final TreeSet stopwords) { + public int excludeWords(final SortedSet stopwords) { // subtracts the given stopwords from the word list // the word list shrinkes. This returns the number of shrinked words final int oldsize = words.size(); @@ -283,8 +286,8 @@ public final class Condenser { return this.languageIdentificator.getLanguage(); } - private void createCondensement(final InputStream is, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { - final HashSet currsentwords = new HashSet(); + private void createCondensement(final InputStream is, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { + final Set currsentwords = new HashSet(); StringBuilder sentence = new StringBuilder(100); String word = ""; String k; @@ -299,7 +302,7 @@ public final class Condenser { int idx; int wordInSentenceCounter = 1; boolean comb_indexof = false, last_last = false, last_index = false; - final HashMap sentences = new HashMap(100); + final Map sentences = new HashMap(100); // read source final sievedWordsEnum wordenum = new sievedWordsEnum(is, meaningLib); @@ -458,17 +461,13 @@ public final class Condenser { public final static boolean invisible(final char c) { final int type = Character.getType(c); - if ( - type == Character.LOWERCASE_LETTER - || type == Character.DECIMAL_DIGIT_NUMBER - || type == Character.UPPERCASE_LETTER - || type == Character.MODIFIER_LETTER - || type == Character.OTHER_LETTER - || type == Character.TITLECASE_LETTER - || ContentScraper.punctuation(c)) { - return false; - } - return true; + return !(type == Character.LOWERCASE_LETTER + || type == Character.DECIMAL_DIGIT_NUMBER + || type == Character.UPPERCASE_LETTER + || type == Character.MODIFIER_LETTER + || type == Character.OTHER_LETTER + || type == Character.TITLECASE_LETTER + || ContentScraper.punctuation(c)); } /** @@ -476,8 +475,8 @@ public final class Condenser { * @param sentence the sentence to be tokenized * @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering */ - public static TreeMap hashSentence(final String sentence, DidYouMeanLibrary meaningLib) { - final TreeMap map = new TreeMap(Base64Order.enhancedCoder); + public static SortedMap hashSentence(final String sentence, final DidYouMeanLibrary meaningLib) { + final SortedMap map = new TreeMap(Base64Order.enhancedCoder); final Enumeration words = wordTokenizer(sentence, "UTF-8", meaningLib); int pos = 0; String word; @@ -489,14 +488,16 @@ public final class Condenser { // don't overwrite old values, that leads to too far word distances oldpos = map.put(hash, LargeNumberCache.valueOf(pos)); - if (oldpos != null) map.put(hash, oldpos); + if (oldpos != null) { + map.put(hash, oldpos); + } pos += word.length() + 1; } return map; } - public static Enumeration wordTokenizer(final String s, final String charset, DidYouMeanLibrary meaningLib) { + public static Enumeration wordTokenizer(final String s, final String charset, final DidYouMeanLibrary meaningLib) { try { return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes(charset)), meaningLib); } catch (final Exception e) { @@ -507,11 +508,11 @@ public final class Condenser { public static class sievedWordsEnum implements Enumeration { // this enumeration removes all words that contain either wrong characters or are too short - StringBuilder buffer = null; - unsievedWordsEnum e; - DidYouMeanLibrary meaningLib; + private StringBuilder buffer = null; + private unsievedWordsEnum e; + private DidYouMeanLibrary meaningLib; - public sievedWordsEnum(final InputStream is, DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { + public sievedWordsEnum(final InputStream is, final DidYouMeanLibrary meaningLib) throws UnsupportedEncodingException { this.e = new unsievedWordsEnum(is); this.buffer = nextElement0(); this.meaningLib = meaningLib; @@ -550,10 +551,10 @@ public final class Condenser { private static class unsievedWordsEnum implements Enumeration { // returns an enumeration of StringBuilder Objects - StringBuilder buffer = null; - sentencesFromInputStreamEnum e; - ArrayList s; - int sIndex; + private StringBuilder buffer = null; + private sentencesFromInputStreamEnum e; + private List s; + private int sIndex; public unsievedWordsEnum(final InputStream is) throws UnsupportedEncodingException { e = new sentencesFromInputStreamEnum(is); @@ -616,11 +617,19 @@ public final class Condenser { static StringBuilder trim(StringBuilder sb) { int i = 0; - while (i < sb.length() && sb.charAt(i) <= ' ') i++; - if (i > 0) sb.delete(0, i); + while (i < sb.length() && sb.charAt(i) <= ' ') { + i++; + } + if (i > 0) { + sb.delete(0, i); + } i = sb.length() - 1; - while (i >= 0 && i < sb.length() && sb.charAt(i) <= ' ') i--; - if (i > 0) sb.delete(i + 1, sb.length()); + while (i >= 0 && i < sb.length() && sb.charAt(i) <= ' ') { + i--; + } + if (i > 0) { + sb.delete(i + 1, sb.length()); + } return sb; } @@ -636,10 +645,10 @@ public final class Condenser { // read sentences from a given input stream // this enumerates StringBuilder objects - StringBuilder buffer = null; - BufferedReader raf; - int counter = 0; - boolean pre = false; + private StringBuilder buffer; + private BufferedReader raf; + private int counter = 0; + private boolean pre = false; public sentencesFromInputStreamEnum(final InputStream is) throws UnsupportedEncodingException { raf = new BufferedReader(new InputStreamReader(is, "UTF-8")); @@ -723,7 +732,7 @@ public final class Condenser { return s; } - public static Map getWords(final String text, DidYouMeanLibrary meaningLib) { + public static Map getWords(final String text, final DidYouMeanLibrary meaningLib) { // returns a word/indexWord relation map if (text == null) return null; ByteArrayInputStream buffer; diff --git a/source/net/yacy/document/SnippetExtractor.java b/source/net/yacy/document/SnippetExtractor.java index ada609e58..385cb340a 100644 --- a/source/net/yacy/document/SnippetExtractor.java +++ b/source/net/yacy/document/SnippetExtractor.java @@ -22,6 +22,8 @@ package net.yacy.document; import java.util.Collection; import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; @@ -37,7 +39,7 @@ public class SnippetExtractor { public SnippetExtractor(final Collection sentences, final HandleSet queryhashes, int maxLength) throws UnsupportedOperationException { if (sentences == null) throw new UnsupportedOperationException("sentence == null"); if (queryhashes == null || queryhashes.isEmpty()) throw new UnsupportedOperationException("queryhashes == null"); - TreeMap hs; + SortedMap hs; final TreeMap order = new TreeMap(); long uniqCounter = 999L; Integer pos; @@ -124,7 +126,7 @@ public class SnippetExtractor { byte[] hash; // find all hashes that appear in the sentence - final TreeMap hs = Condenser.hashSentence(sentence, null); + final Map hs = Condenser.hashSentence(sentence, null); final Iterator j = queryhashes.iterator(); Integer pos; int p, minpos = sentence.length(), maxpos = -1; diff --git a/source/net/yacy/kelondro/blob/Heap.java b/source/net/yacy/kelondro/blob/Heap.java index 97696e90e..c8c878db0 100755 --- a/source/net/yacy/kelondro/blob/Heap.java +++ b/source/net/yacy/kelondro/blob/Heap.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.SortedMap; import java.util.TreeMap; import net.yacy.kelondro.index.RowSpaceExceededException; @@ -43,7 +44,7 @@ import net.yacy.kelondro.order.NaturalOrder; public final class Heap extends HeapModifier implements BLOB { - private TreeMap buffer; // a write buffer to limit IO to the file + private SortedMap buffer; // a write buffer to limit IO to the file private int buffersize; // bytes that are buffered in buffer private final int buffermax; // maximum size of the buffer @@ -65,7 +66,7 @@ public final class Heap extends HeapModifier implements BLOB { * * If a record is removed, it becomes a free record. * New records are either appended to the end of the file or filled into a free record. - * A free record must either fit exactly to the size of the new record, or an old record is splitted + * A free record must either fit exactly to the size of the new record, or an old record is split * into a filled and a new, smaller empty record. */ @@ -191,7 +192,7 @@ public final class Heap extends HeapModifier implements BLOB { posBuffer = 0; byte[] ba = new byte[l + (4 + this.keylength) * this.buffer.size()]; byte[] b; - TreeMap nextBuffer = new TreeMap(ordering); + SortedMap nextBuffer = new TreeMap(ordering); flush: while (i.hasNext()) { entry = i.next(); key = normalizeKey(entry.getKey()); diff --git a/source/net/yacy/kelondro/data/word/Word.java b/source/net/yacy/kelondro/data/word/Word.java index e1ae2643a..79abbe8b8 100644 --- a/source/net/yacy/kelondro/data/word/Word.java +++ b/source/net/yacy/kelondro/data/word/Word.java @@ -63,7 +63,7 @@ public class Word { public int posInText; // unique handle, is initialized with word position (excluding double occurring words) public int posInPhrase; // position of word in phrase public int numOfPhrase; // number of phrase. 'normal' phrases begin with number 100 - HashSet phrases; // a set of handles to all phrases where this word appears + Set phrases; // a set of handles to all phrases where this word appears public Bitfield flags; // the flag bits for each word public Word(final int handle, final int pip, final int nop) { @@ -92,6 +92,7 @@ public class Word { return phrases.iterator(); } + @Override public String toString() { // this is here for debugging return "{count=" + count + ", posInText=" + posInText + ", posInPhrase=" + posInPhrase + ", numOfPhrase=" + numOfPhrase + "}"; @@ -99,6 +100,9 @@ public class Word { // static methods + public static byte[] word2hash(final StringBuilder word) { + return word2hash(word.toString()); + } // create a word hash public static final byte[] word2hash(final String word) { @@ -114,7 +118,7 @@ public class Word { public static final HandleSet words2hashesHandles(final Set words) { final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size()); - for (String word: words) + for (final String word: words) try { hashes.put(word2hash(word)); } catch (RowSpaceExceededException e) { @@ -126,7 +130,7 @@ public class Word { public static final HandleSet words2hashesHandles(final String[] words) { final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.length); - for (String word: words) + for (final String word: words) try { hashes.put(word2hash(word)); } catch (RowSpaceExceededException e) { diff --git a/source/net/yacy/kelondro/util/SetTools.java b/source/net/yacy/kelondro/util/SetTools.java index 7c5806e83..385260a2c 100644 --- a/source/net/yacy/kelondro/util/SetTools.java +++ b/source/net/yacy/kelondro/util/SetTools.java @@ -35,15 +35,18 @@ import java.util.Collection; import java.util.Comparator; import java.util.ConcurrentModificationException; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.logging.Log; -public class SetTools { +public final class SetTools { //public static Comparator fastStringComparator = fastStringComparator(true); @@ -51,7 +54,7 @@ public class SetTools { // ------------------------------------------------------------------------------------------------ // helper methods - public final static int log2a(int x) { + public static int log2a(int x) { // this computes 1 + log2 // it is the number of bits in x, not the logarithm by 2 int l = 0; @@ -69,13 +72,13 @@ public class SetTools { // - join by iterative tests (where we distinguish left-right and right-left tests) - public final static TreeMap joinConstructive(final Collection> maps, final boolean concatStrings) { + public static SortedMap joinConstructive(final Collection> maps, final boolean concatStrings) { // this joins all TreeMap(s) contained in maps // first order entities by their size - final TreeMap> orderMap = new TreeMap>(); - TreeMap singleMap; - final Iterator> i = maps.iterator(); + final SortedMap> orderMap = new TreeMap>(); + SortedMap singleMap; + final Iterator> i = maps.iterator(); int count = 0; while (i.hasNext()) { // get next entity: @@ -94,7 +97,7 @@ public class SetTools { // we now must pairwise build up a conjunction of these maps Long k = orderMap.firstKey(); // the smallest, which means, the one with the least entries - TreeMap mapA, mapB, joinResult = orderMap.remove(k); + SortedMap mapA, mapB, joinResult = orderMap.remove(k); while (!orderMap.isEmpty() && !joinResult.isEmpty()) { // take the first element of map which is a result and combine it with result k = orderMap.firstKey(); // the next smallest... @@ -111,7 +114,7 @@ public class SetTools { return joinResult; } - public final static TreeMap joinConstructive(final TreeMap map1, final TreeMap map2, final boolean concatStrings) { + public static SortedMap joinConstructive(final SortedMap map1, final SortedMap map2, final boolean concatStrings) { // comparators must be equal if ((map1 == null) || (map2 == null)) return null; if (map1.comparator() != map2.comparator()) return null; @@ -132,9 +135,9 @@ public class SetTools { } @SuppressWarnings("unchecked") - private final static TreeMap joinConstructiveByTest(final TreeMap small, final TreeMap large, final boolean concatStrings) { + private static SortedMap joinConstructiveByTest(final SortedMap small, final SortedMap large, final boolean concatStrings) { final Iterator> mi = small.entrySet().iterator(); - final TreeMap result = new TreeMap(large.comparator()); + final SortedMap result = new TreeMap(large.comparator()); synchronized (mi) { Map.Entry mentry1; B mobj2; @@ -159,12 +162,12 @@ public class SetTools { } @SuppressWarnings("unchecked") - private final static TreeMap joinConstructiveByEnumeration(final TreeMap map1, final TreeMap map2, final boolean concatStrings) { + private static SortedMap joinConstructiveByEnumeration(final SortedMap map1, final SortedMap map2, final boolean concatStrings) { // implement pairwise enumeration final Comparator comp = map1.comparator(); final Iterator> mi1 = map1.entrySet().iterator(); final Iterator> mi2 = map2.entrySet().iterator(); - final TreeMap result = new TreeMap(map1.comparator()); + final SortedMap result = new TreeMap(map1.comparator()); int c; if ((mi1.hasNext()) && (mi2.hasNext())) { Map.Entry mentry1 = mi1.next(); @@ -190,7 +193,7 @@ public class SetTools { } // now the same for set-set - public final static TreeSet joinConstructive(final TreeSet set1, final TreeSet set2) { + public static SortedSet joinConstructive(final SortedSet set1, final SortedSet set2) { // comparators must be equal if ((set1 == null) || (set2 == null)) return null; if (set1.comparator() != set2.comparator()) return null; @@ -210,9 +213,9 @@ public class SetTools { return joinConstructiveByEnumeration(set1, set2); } - private final static TreeSet joinConstructiveByTest(final TreeSet small, final TreeSet large) { + private static SortedSet joinConstructiveByTest(final SortedSet small, final SortedSet large) { final Iterator mi = small.iterator(); - final TreeSet result = new TreeSet(small.comparator()); + final SortedSet result = new TreeSet(small.comparator()); A o; while (mi.hasNext()) { o = mi.next(); @@ -221,12 +224,12 @@ public class SetTools { return result; } - private final static TreeSet joinConstructiveByEnumeration(final TreeSet set1, final TreeSet set2) { + private static SortedSet joinConstructiveByEnumeration(final SortedSet set1, final SortedSet set2) { // implement pairwise enumeration final Comparator comp = set1.comparator(); final Iterator mi = set1.iterator(); final Iterator si = set2.iterator(); - final TreeSet result = new TreeSet(set1.comparator()); + final SortedSet result = new TreeSet(set1.comparator()); int c; if ((mi.hasNext()) && (si.hasNext())) { A mobj = mi.next(); @@ -254,7 +257,7 @@ public class SetTools { * @param large * @return true if the small set is completely included in the large set */ - public final static boolean totalInclusion(final Set small, final Set large) { + public static boolean totalInclusion(final Set small, final Set large) { for (A o: small) { if (!large.contains(o)) return false; } @@ -267,7 +270,7 @@ public class SetTools { * @param large * @return true if the small set is completely included in the large set */ - public final static boolean totalInclusion(final HandleSet small, final HandleSet large) { + public static boolean totalInclusion(final HandleSet small, final HandleSet large) { for (byte[] handle: small) { if (!large.has(handle)) return false; } @@ -281,7 +284,7 @@ public class SetTools { * @param set2 * @return true if any element of the first set is part of the second set or vice-versa */ - public final static boolean anymatch(final TreeSet set1, final TreeSet set2) { + public static boolean anymatch(final SortedSet set1, final SortedSet set2) { // comparators must be equal if ((set1 == null) || (set2 == null)) return false; if (set1.comparator() != set2.comparator()) return false; @@ -307,7 +310,7 @@ public class SetTools { * @param set2 * @return true if any element of the first set is part of the second set or vice-versa */ - public final static boolean anymatch(final HandleSet set1, final HandleSet set2) { + public static boolean anymatch(final HandleSet set1, final HandleSet set2) { // comparators must be equal if ((set1 == null) || (set2 == null)) return false; if (set1.comparator() != set2.comparator()) return false; @@ -327,7 +330,7 @@ public class SetTools { return anymatchByEnumeration(set1, set2); } - private final static boolean anymatchByTest(final TreeSet small, final TreeSet large) { + private static boolean anymatchByTest(final SortedSet small, final SortedSet large) { final Iterator mi = small.iterator(); A o; while (mi.hasNext()) { @@ -337,7 +340,7 @@ public class SetTools { return false; } - private final static boolean anymatchByTest(final HandleSet small, final HandleSet large) { + private static boolean anymatchByTest(final HandleSet small, final HandleSet large) { final Iterator mi = small.iterator(); byte[] o; while (mi.hasNext()) { @@ -347,7 +350,7 @@ public class SetTools { return false; } - private final static boolean anymatchByEnumeration(final TreeSet set1, final TreeSet set2) { + private static boolean anymatchByEnumeration(final SortedSet set1, final SortedSet set2) { // implement pairwise enumeration final Comparator comp = set1.comparator(); final Iterator mi = set1.iterator(); @@ -370,7 +373,7 @@ public class SetTools { return false; } - private final static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) { + private static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) { // implement pairwise enumeration final Comparator comp = set1.comparator(); final Iterator mi = set1.iterator(); @@ -418,11 +421,11 @@ public class SetTools { } */ - public final static void excludeDestructive(final Map map, final Set set) { + public static void excludeDestructive(final Map map, final Set set) { // comparators must be equal if (map == null) return; if (set == null) return; - assert !(map instanceof TreeMap && set instanceof TreeSet) || ((TreeMap) map).comparator() == ((TreeSet) set).comparator(); + assert !(map instanceof SortedMap && set instanceof SortedSet) || ((SortedMap) map).comparator() == ((SortedSet) set).comparator(); if (map.isEmpty() || set.isEmpty()) return; if (map.size() < set.size()) @@ -431,21 +434,21 @@ public class SetTools { excludeDestructiveByTestSetInMap(map, set); } - private final static void excludeDestructiveByTestMapInSet(final Map map, final Set set) { + private static void excludeDestructiveByTestMapInSet(final Map map, final Set set) { final Iterator mi = map.keySet().iterator(); while (mi.hasNext()) if (set.contains(mi.next())) mi.remove(); } - private final static void excludeDestructiveByTestSetInMap(final Map map, final Set set) { + private static void excludeDestructiveByTestSetInMap(final Map map, final Set set) { final Iterator si = set.iterator(); while (si.hasNext()) map.remove(si.next()); } // and the same again with set-set - public final static void excludeDestructive(final Set set1, final Set set2) { + public static void excludeDestructive(final Set set1, final Set set2) { if (set1 == null) return; if (set2 == null) return; - assert !(set1 instanceof TreeSet && set2 instanceof TreeSet) || ((TreeSet) set1).comparator() == ((TreeSet) set2).comparator(); + assert !(set1 instanceof SortedSet && set2 instanceof SortedSet) || ((SortedSet) set1).comparator() == ((SortedSet) set2).comparator(); if (set1.isEmpty() || set2.isEmpty()) return; if (set1.size() < set2.size()) @@ -454,20 +457,20 @@ public class SetTools { excludeDestructiveByTestLargeInSmall(set1, set2); } - private final static void excludeDestructiveByTestSmallInLarge(final Set small, final Set large) { + private static void excludeDestructiveByTestSmallInLarge(final Set small, final Set large) { final Iterator mi = small.iterator(); while (mi.hasNext()) if (large.contains(mi.next())) mi.remove(); } - private final static void excludeDestructiveByTestLargeInSmall(final Set large, final Set small) { + private static void excludeDestructiveByTestLargeInSmall(final Set large, final Set small) { final Iterator si = small.iterator(); while (si.hasNext()) large.remove(si.next()); } // ------------------------------------------------------------------------------------------------ - public final static TreeMap loadMap(final String filename, final String sep) { - final TreeMap map = new TreeMap(); + public static SortedMap loadMap(final String filename, final String sep) { + final SortedMap map = new TreeMap(); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); @@ -485,8 +488,8 @@ public class SetTools { return map; } - public final static TreeMap> loadMapMultiValsPerKey(final String filename, final String sep) { - final TreeMap> map = new TreeMap>(); + public static SortedMap> loadMapMultiValsPerKey(final String filename, final String sep) { + final SortedMap> map = new TreeMap>(); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); @@ -508,8 +511,8 @@ public class SetTools { return map; } - public final static TreeSet loadList(final File file, final Comparator c) { - final TreeSet list = new TreeSet(c); + public static SortedSet loadList(final File file, final Comparator c) { + final SortedSet list = new TreeSet(c); if (!(file.exists())) return list; BufferedReader br = null; @@ -528,7 +531,7 @@ public class SetTools { return list; } - public final static String setToString(final HandleSet set, final char separator) { + public static String setToString(final HandleSet set, final char separator) { final Iterator i = set.iterator(); final StringBuilder sb = new StringBuilder(set.size() * 7); if (i.hasNext()) sb.append(new String(i.next())); @@ -538,7 +541,7 @@ public class SetTools { return sb.toString(); } - public final static String setToString(final Set set, final char separator) { + public static String setToString(final Set set, final char separator) { final Iterator i = set.iterator(); final StringBuilder sb = new StringBuilder(set.size() * 7); if (i.hasNext()) sb.append(i.next()); @@ -552,8 +555,8 @@ public class SetTools { public static void main(final String[] args) { - final TreeMap m = new TreeMap(); - final TreeMap s = new TreeMap(); + final SortedMap m = new TreeMap(); + final SortedMap s = new TreeMap(); m.put("a", "a"); m.put("x", "x"); m.put("f", "f"); diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 38f3d0fbb..7de638b2f 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -90,32 +90,28 @@ public class Blacklist { Blacklist.BLACKLIST_NEWS })); public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news"; - protected File blacklistRootPath = null; - protected HashMap cachedUrlHashs = null; - //protected HashMap>> hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here - protected HashMap>> hostpaths_matchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here - protected HashMap>> hostpaths_notmatchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private File blacklistRootPath = null; + private final Map cachedUrlHashs; + private final Map>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private final Map>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here public Blacklist(final File rootPath) { - this.setRootPath(rootPath); - this.blacklistRootPath = rootPath; + this.setRootPath(rootPath); // prepare the data structure - //this.hostpaths = new HashMap>>(); - this.hostpaths_matchable = new HashMap>>(); - this.hostpaths_notmatchable = new HashMap>>(); + this.hostpaths_matchable = new HashMap>>(); + this.hostpaths_notmatchable = new HashMap>>(); this.cachedUrlHashs = new HashMap(); for (final String blacklistType : BLACKLIST_TYPES) { - //this.hostpaths.put(blacklistType, new HashMap>()); - this.hostpaths_matchable.put(blacklistType, new HashMap>()); - this.hostpaths_notmatchable.put(blacklistType, new HashMap>()); + this.hostpaths_matchable.put(blacklistType, new HashMap>()); + this.hostpaths_notmatchable.put(blacklistType, new HashMap>()); this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); } } - public void setRootPath(final File rootPath) { + public final void setRootPath(final File rootPath) { if (rootPath == null) { throw new NullPointerException("The blacklist root path must not be null."); } @@ -129,9 +125,9 @@ public class Blacklist { this.blacklistRootPath = rootPath; } - protected Map> getBlacklistMap(final String blacklistType, final boolean matchable) { + protected Map> getBlacklistMap(final String blacklistType, final boolean matchable) { if (blacklistType == null) { - throw new IllegalArgumentException(); + throw new IllegalArgumentException("Blacklist type not set."); } if (!BLACKLIST_TYPES.contains(blacklistType)) { throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + "."); @@ -142,7 +138,7 @@ public class Blacklist { protected HandleSet getCacheUrlHashsSet(final String blacklistType) { if (blacklistType == null) { - throw new IllegalArgumentException(); + throw new IllegalArgumentException("Blacklist type not set."); } if (!BLACKLIST_TYPES.contains(blacklistType)) { throw new IllegalArgumentException("Unknown backlist type."); @@ -152,10 +148,10 @@ public class Blacklist { } public void clear() { - for (final Map> entry : this.hostpaths_matchable.values()) { + for (final Map> entry : this.hostpaths_matchable.values()) { entry.clear(); } - for (final Map> entry : this.hostpaths_notmatchable.values()) { + for (final Map> entry : this.hostpaths_notmatchable.values()) { entry.clear(); } for (final HandleSet entry : this.cachedUrlHashs.values()) { @@ -166,12 +162,12 @@ public class Blacklist { public int size() { int size = 0; for (final String entry : this.hostpaths_matchable.keySet()) { - for (final ArrayList ientry : this.hostpaths_matchable.get(entry).values()) { + for (final List ientry : this.hostpaths_matchable.get(entry).values()) { size += ientry.size(); } } for (final String entry : this.hostpaths_notmatchable.keySet()) { - for (final ArrayList ientry : this.hostpaths_notmatchable.get(entry).values()) { + for (final List ientry : this.hostpaths_notmatchable.get(entry).values()) { size += ientry.size(); } } @@ -179,48 +175,45 @@ public class Blacklist { } public void loadList(final BlacklistFile[] blFiles, final String sep) { - for (int j = 0; j < blFiles.length; j++) { - final BlacklistFile blf = blFiles[j]; + for (final BlacklistFile blf : blFiles) { loadList(blf.getType(), blf.getFileName(), sep); } } private void loadList(final BlacklistFile blFile, final String sep) { - final Map> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); - final Map> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); - Set>> loadedBlacklist; - Map.Entry> loadedEntry; - ArrayList paths; - ArrayList loadedPaths; + final Map> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); + final Map> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); + Set>> loadedBlacklist; + Map.Entry> loadedEntry; + List paths; + List loadedPaths; final String[] fileNames = blFile.getFileNamesUnified(); - if (fileNames.length > 0) { - for (int i = 0; i < fileNames.length; i++) { - // make sure all requested blacklist files exist - final File file = new File(this.blacklistRootPath, fileNames[i]); - try { - file.createNewFile(); - } catch (final IOException e) { /* */ } - - // join all blacklists from files into one internal blacklist map - loadedBlacklist = SetTools.loadMapMultiValsPerKey(file.toString(), sep).entrySet(); - for (final Iterator>> mi = loadedBlacklist.iterator(); mi.hasNext();) { - loadedEntry = mi.next(); - loadedPaths = loadedEntry.getValue(); - - // create new entry if host mask unknown, otherwise merge - // existing one with path patterns from blacklist file - paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey()); - if (paths == null) { - if (isMatchable(loadedEntry.getKey())) { - blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths); - } else { - blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths); - } + for (final String fileName : fileNames) { + // make sure all requested blacklist files exist + final File file = new File(this.blacklistRootPath, fileName); + try { + file.createNewFile(); + } catch (final IOException e) { /* */ } + + // join all blacklists from files into one internal blacklist map + loadedBlacklist = SetTools.loadMapMultiValsPerKey(file.toString(), sep).entrySet(); + for (final Iterator>> mi = loadedBlacklist.iterator(); mi.hasNext();) { + loadedEntry = mi.next(); + loadedPaths = loadedEntry.getValue(); + + // create new entry if host mask unknown, otherwise merge + // existing one with path patterns from blacklist file + paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey()); + if (paths == null) { + if (isMatchable(loadedEntry.getKey())) { + blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths); } else { - // TODO check for duplicates? (refactor List -> Set) - paths.addAll(loadedPaths); + blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths); } + } else { + // TODO check for duplicates? (refactor List -> Set) + paths.addAll(loadedPaths); } } } @@ -240,15 +233,16 @@ public class Blacklist { public void remove(final String blacklistType, final String host, final String path) { - final Map> blacklistMap = getBlacklistMap(blacklistType, true); - ArrayList hostList = blacklistMap.get(host); + final Map> blacklistMap = getBlacklistMap(blacklistType, true); + List hostList = blacklistMap.get(host); if (hostList != null) { hostList.remove(path); if (hostList.isEmpty()) { blacklistMap.remove(host); } } - final Map> blacklistMapNotMatch = getBlacklistMap(blacklistType, false); + + final Map> blacklistMapNotMatch = getBlacklistMap(blacklistType, false); hostList = blacklistMapNotMatch.get(host); if (hostList != null) { hostList.remove(path); @@ -258,61 +252,55 @@ public class Blacklist { } } - public void add(final String blacklistType, String host, String path) { + public void add(final String blacklistType, final String host, final String path) { if (host == null) { - throw new NullPointerException(); + throw new IllegalArgumentException("host may not be null"); } if (path == null) { - throw new NullPointerException(); + throw new IllegalArgumentException("path may not be null"); } - if (path.length() > 0 && path.charAt(0) == '/') { - path = path.substring(1); - } + final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path; - Map> blacklistMap; - blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false); + final Map> blacklistMap = getBlacklistMap(blacklistType, (isMatchable(host)) ? true : false); // avoid PatternSyntaxException e - if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') { - host = "." + host; - } + final String h = + ((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase(); - ArrayList hostList = blacklistMap.get(host.toLowerCase()); - if (hostList == null) { - blacklistMap.put(host.toLowerCase(), (hostList = new ArrayList())); + List hostList; + if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) { + blacklistMap.put(h, (hostList = new ArrayList())); } - hostList.add(path); + + hostList.add(p); } public int blacklistCacheSize() { int size = 0; final Iterator iter = this.cachedUrlHashs.keySet().iterator(); while (iter.hasNext()) { - final HandleSet blacklistMap = this.cachedUrlHashs.get(iter.next()); - size += blacklistMap.size(); + size += this.cachedUrlHashs.get(iter.next()).size(); } return size; } public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) { - final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType); - return urlHashCache.has(urlHash); + return getCacheUrlHashsSet(blacklistType).has(urlHash); } - public boolean contains(final String blacklistType, String host, String path) { + public boolean contains(final String blacklistType, final String host, final String path) { boolean ret = false; if (blacklistType != null && host != null && path != null) { - Map> blacklistMap; - blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false); + final Map> blacklistMap = + getBlacklistMap(blacklistType, (isMatchable(host)) ? true : false); // avoid PatternSyntaxException e - if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') { - host = "." + host; - } + final String h = + ((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase(); - ArrayList hostList = blacklistMap.get(host.toLowerCase()); + List hostList = blacklistMap.get(h); if (hostList != null) { ret = hostList.contains(path); } @@ -321,6 +309,10 @@ public class Blacklist { } public boolean isListed(final String blacklistType, final DigestURI url) { + if (url == null) { + throw new IllegalArgumentException("url may not be null"); + } + if (url.getHost() == null) { return false; } @@ -340,44 +332,31 @@ public class Blacklist { } public static boolean isMatchable(final String host) { - try { - if (Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net) - { - return true; - } - if (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot) - { - return true; - } - if (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and befor * must be a dot) - { - return true; - } - } catch (final PatternSyntaxException e) { - //System.out.println(e.toString()); - return false; - } - return false; + + return ( + (Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net) + || (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot) + || (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and before * must be a dot) + ); } public String getEngineInfo() { return "Default YaCy Blacklist Engine"; } - public boolean isListed(final String blacklistType, final String hostlow, String path) { + public boolean isListed(final String blacklistType, final String hostlow, final String path) { if (hostlow == null) { - throw new NullPointerException(); + throw new IllegalArgumentException("hostlow may not be null"); } if (path == null) { - throw new NullPointerException(); + throw new IllegalArgumentException("path may not be null"); } // getting the proper blacklist - final Map> blacklistMapMatched = getBlacklistMap(blacklistType, true); + final Map> blacklistMapMatched = getBlacklistMap(blacklistType, true); + + final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path; - if (path.length() > 0 && path.charAt(0) == '/') { - path = path.substring(1); - } List app; boolean matched = false; String pp = ""; // path-pattern @@ -391,7 +370,7 @@ public class Blacklist { Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + pp); continue; } - matched |= (("*".equals(pp)) || (path.matches(pp))); + matched |= (("*".equals(pp)) || (p.matches(pp))); } } // first try to match the domain with wildcard '*' @@ -430,9 +409,9 @@ public class Blacklist { // loop over all Regexentrys if (!matched) { - final Map> blacklistMapNotMatched = getBlacklistMap(blacklistType, false); + final Map> blacklistMapNotMatched = getBlacklistMap(blacklistType, false); String key; - for (final Entry> entry : blacklistMapNotMatched.entrySet()) { + for (final Entry> entry : blacklistMapNotMatched.entrySet()) { key = entry.getKey(); try { if (Pattern.matches(key, hostlow)) { @@ -451,11 +430,11 @@ public class Blacklist { return matched; } - public BlacklistError checkError(String element, Map properties) { + public BlacklistError checkError(final String element, final Map properties) { boolean allowRegex = true; int slashPos; - String host, path; + final String host, path; if (properties != null) { allowRegex = properties.get("allowRegex").equalsIgnoreCase("true") ? true : false; @@ -500,7 +479,7 @@ public class Blacklist { } // check for errors on regex-compiling path - if (!isValidRegex(path) && !path.equals("*")) { + if (!isValidRegex(path) && !"*".equals(path)) { return BlacklistError.PATH_REGEX; } @@ -512,19 +491,18 @@ public class Blacklist { * @param expression The expression to be checked. * @return True if the expression is a valid regular expression, else false. */ - private static boolean isValidRegex(String expression) { + private static boolean isValidRegex(final String expression) { boolean ret = true; try { Pattern.compile(expression); } catch (final PatternSyntaxException e) { - ret = false; } return ret; } public static String defaultBlacklist(final File listsPath) { - List dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); + final List dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); if (dirlist.isEmpty()) { return null; } @@ -537,12 +515,8 @@ public class Blacklist { * @param newEntry The Entry. * @return True if file contains entry, else false. */ - public static boolean blacklistFileContains(final File listsPath, final String blacklistToUse, String newEntry) { - boolean ret = false; - final HashSet Blacklist = new HashSet(FileUtils.getListArray(new File(listsPath, blacklistToUse))); - if (Blacklist != null) { - ret = Blacklist.contains(newEntry); - } - return ret; + public static boolean blacklistFileContains(final File listsPath, final String blacklistToUse, final String newEntry) { + final Set blacklist = new HashSet(FileUtils.getListArray(new File(listsPath, blacklistToUse))); + return blacklist != null && blacklist.contains(newEntry); } }