diff --git a/htroot/ConfigAppearance_p.java b/htroot/ConfigAppearance_p.java index d1d759d38..fa06723ae 100644 --- a/htroot/ConfigAppearance_p.java +++ b/htroot/ConfigAppearance_p.java @@ -34,7 +34,6 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; -import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -91,19 +90,18 @@ public class ConfigAppearance_p { if (post.containsKey("install_button")) { // load skin from URL final String url = post.get("url"); - ArrayList skinVector; + Iterator it; try { final yacyURL u = new yacyURL(url, null); final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - skinVector = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000), "UTF-8"); + it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); } catch (final IOException e) { prop.put("status", "1");// unable to get URL prop.put("status_url", url); return prop; } try { - final Iterator it = skinVector.iterator(); final File skinFile = new File(skinPath, url.substring(url.lastIndexOf("/"), url.length())); final BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(skinFile))); diff --git a/htroot/ConfigLanguage_p.java b/htroot/ConfigLanguage_p.java index c8386c093..772500319 100644 --- a/htroot/ConfigLanguage_p.java +++ b/htroot/ConfigLanguage_p.java @@ -34,7 +34,6 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; -import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -80,19 +79,18 @@ public class ConfigLanguage_p { //load language file from URL } else if (post.containsKey("url")){ final String url = post.get("url"); - ArrayList langVector; + Iterator it; try{ final yacyURL u = new yacyURL(url, null); final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - langVector = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000), "UTF-8"); + it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); }catch(final IOException e){ prop.put("status", "1");//unable to get url prop.put("status_url", url); return prop; } try{ - final Iterator it = langVector.iterator(); final File langFile = new File(langPath, url.substring(url.lastIndexOf("/"), url.length())); final BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(langFile))); diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java index a1d512462..629e8f65b 100644 --- a/htroot/CrawlProfileEditor_p.java +++ b/htroot/CrawlProfileEditor_p.java @@ -114,11 +114,20 @@ public class CrawlProfileEditor_p { entry selentry; while (it.hasNext()) { selentry = it.next(); - if (selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE) || - selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) || - selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) /*|| - selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) || - selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)*/) + if (selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE) || + selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE) || + selentry.name().equals(CrawlSwitchboard.DBFILE_ACTIVE_CRAWL_PROFILES) || + selentry.name().equals(CrawlSwitchboard.DBFILE_PASSIVE_CRAWL_PROFILES)) continue; prop.put("profiles_" + count + "_name", selentry.name()); prop.put("profiles_" + count + "_handle", selentry.handle()); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 14d7f9205..c62f11899 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -36,6 +36,7 @@ import java.io.PrintWriter; import java.io.StringReader; import java.util.Arrays; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import de.anomic.crawler.retrieval.HTTPLoader; @@ -104,7 +105,7 @@ public class sharedBlacklist_p { } prop.put("page_blackLists", blacklistCount); - List otherBlacklist = null; + Iterator otherBlacklist = null; ListAccumulator otherBlacklists = null; if (post.containsKey("hash")) { @@ -147,7 +148,7 @@ public class sharedBlacklist_p { // get List yacyURL u = new yacyURL(downloadURLOld, null); - otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000), "UTF-8"); + otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000)); } catch (final Exception e) { prop.put("status", STATUS_PEER_UNKNOWN); prop.putHTML("status_name", Hash); @@ -166,7 +167,7 @@ public class sharedBlacklist_p { final yacyURL u = new yacyURL(downloadURL, null); final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000), "UTF-8"); //get List + otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); //get List } catch (final Exception e) { prop.put("status", STATUS_URL_PROBLEM); prop.putHTML("status_address",downloadURL); @@ -203,7 +204,7 @@ public class sharedBlacklist_p { if (fileString != null) { try { - otherBlacklist = FileUtils.strings(fileString.getBytes("UTF-8"), "UTF-8"); + otherBlacklist = FileUtils.strings(fileString.getBytes("UTF-8")); } catch (IOException ex) { prop.put("status", STATUS_FILE_ERROR); } @@ -278,13 +279,9 @@ public class sharedBlacklist_p { // loading the current blacklist content final HashSet Blacklist = new HashSet(listManager.getListArray(new File(listManager.listsPath, selectedBlacklistName))); - // sort the loaded blacklist - final String[] sortedlist = otherBlacklist.toArray(new String[otherBlacklist.size()]); - Arrays.sort(sortedlist); - int count = 0; - for(int i = 0; i < sortedlist.length; i++){ - final String tmp = sortedlist[i]; + while (otherBlacklist.hasNext()) { + final String tmp = otherBlacklist.next(); if( !Blacklist.contains(tmp) && (!tmp.equals("")) ){ //newBlacklist.add(tmp); prop.put("page_urllist_" + count + "_dark", count % 2 == 0 ? "0" : "1"); diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 2d9b8e988..531108685 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -28,9 +28,9 @@ import java.io.IOException; +import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; -import java.util.List; import de.anomic.content.RSSMessage; import de.anomic.data.Blacklist; @@ -129,13 +129,11 @@ public final class transferRWI { final long startProcess = System.currentTimeMillis(); // decode request - final List v = FileUtils.strings(indexes, null); + System.out.println("STRINGS " + new String(indexes)); + Iterator it = FileUtils.strings(indexes); // free memory indexes = null; - - // the value-vector should now have the same length as entryc - if (v.size() != entryc) sb.getLog().logSevere("ERROR WITH ENTRY COUNTER: v=" + v.size() + ", entryc=" + entryc); // now parse the Strings in the value-vector and write index entries String estring; @@ -145,14 +143,13 @@ public final class transferRWI { WordReferenceRow iEntry; final HashSet unknownURL = new HashSet(); final HashSet knownURL = new HashSet(); - final String[] wordhashes = new String[v.size()]; + final ArrayList wordhashes = new ArrayList(); int received = 0; int blocked = 0; int receivedURL = 0; - final Iterator i = v.iterator(); - while (i.hasNext()) { + while (it.hasNext()) { serverCore.checkInterruption(); - estring = i.next(); + estring = it.next(); // check if RWI entry is well-formed p = estring.indexOf("{"); @@ -161,7 +158,7 @@ public final class transferRWI { continue; } wordHash = estring.substring(0, p); - wordhashes[received] = wordHash; + wordhashes.add(wordHash); iEntry = new WordReferenceRow(estring.substring(p)); urlHash = iEntry.metadataHash(); @@ -207,18 +204,20 @@ public final class transferRWI { sb.peers.mySeed().incRI(received); // finally compose the unknownURL hash list - final Iterator it = unknownURL.iterator(); + it = unknownURL.iterator(); unknownURLs.ensureCapacity(unknownURL.size() * 25); while (it.hasNext()) { unknownURLs.append(",").append(it.next()); } if (unknownURLs.length() > 0) { unknownURLs.delete(0, 1); } - if ((wordhashes.length == 0) || (received == 0)) { + if ((wordhashes.size() == 0) || (received == 0)) { sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs"); } else { - final long avdist = (FlatWordPartitionScheme.std.dhtDistance(wordhashes[0].getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(wordhashes[received - 1].getBytes(), null, sb.peers.mySeed())) / 2; - sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs"); - RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", requesting " + unknownURL.size() + " URLs, blocked " + blocked, "", "")); + String firstHash = wordhashes.get(0); + String lastHash = wordhashes.get(wordhashes.size() - 1); + final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2; + sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs"); + RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs [" + firstHash + " .. " + lastHash + "]/" + avdist + " from " + otherPeerName + ", requesting " + unknownURL.size() + " URLs, blocked " + blocked, "", "")); } result = "ok"; diff --git a/source/de/anomic/content/oai/PMHReader.java b/source/de/anomic/content/oai/PMHReader.java index 5fae73b19..95e76b16a 100644 --- a/source/de/anomic/content/oai/PMHReader.java +++ b/source/de/anomic/content/oai/PMHReader.java @@ -135,7 +135,7 @@ ListSets http://an.oa.org/OAI-script?verb=ListSets http://purl.org/alcme/etdcat/servlet/OAIHandler?verb=ListSets -urn identifier kšnnen źber den resolver der d-nb aufgelšst werden: +urn identifier koennen ueber den resolver der d-nb aufgeloest werden: http://nbn-resolving.de/urn:nbn:de:bsz:960-opus-1860 @@ -188,42 +188,6 @@ http://nbn-resolving.de/urn:nbn:de:bsz:960-opus-1860 - -
- oai:opus.bsz-bw.de-fhhv:52 - 2008-10-30T11:24:12Z - ddc:360 - pub-type:20 - has-source-swb:false -
- - - Akademisierung von Pflege - Oelke, Uta-Karola - Pflege - Pflegewissenschaft - Akademisierung - Ausbildung - "Social services; association" - Die Arbeit bietet einen †berblick źber pflegebezogene StudiengŠnge und diskutiert die Akademisierung von Pflege. - Fachhochschule Hannover - FakultŠt V - Diakonie, Gesundheit und Soziales. FakultŠt V - Diakonie, Gesundheit und Soziales - Werner-Schule vom DRK - 1994 - TechReport - application/pdf - urn:nbn:de:bsz:960-opus-520 - http://opus.bsz-bw.de/fhhv/volltexte/2008/52/ - ger - /fhhv/doku/urheberrecht.php?la=de - - -
0)) { + if (!sourceFile.exists() || !sourceFile.canRead() || sourceFile.length() == 0) { final String errorMsg = sourceFile.exists() ? "Empty resource file." : "No resource content available (2)."; log.logInfo("Unable to parse '" + location + "'. " + errorMsg); throw new ParserException(errorMsg, location); diff --git a/source/de/anomic/document/language/LanguageStatisticsHolder.java b/source/de/anomic/document/language/LanguageStatisticsHolder.java index 40d54b4a3..85ef3e2ea 100644 --- a/source/de/anomic/document/language/LanguageStatisticsHolder.java +++ b/source/de/anomic/document/language/LanguageStatisticsHolder.java @@ -67,7 +67,7 @@ public class LanguageStatisticsHolder extends Vector { final File folder = new File(directory); if (!folder.exists()) { - Log.logSevere("LanguageStatistics", "the language statistics folder " + directory + " cannot be found"); + Log.logWarning("LanguageStatistics", "the language statistics folder " + directory + " cannot be found"); return; } final FilenameFilter filter = new LanguageFilenameFilter(); diff --git a/source/de/anomic/document/parser/html/ContentScraper.java b/source/de/anomic/document/parser/html/ContentScraper.java index e1b4f5174..006c670cf 100644 --- a/source/de/anomic/document/parser/html/ContentScraper.java +++ b/source/de/anomic/document/parser/html/ContentScraper.java @@ -113,7 +113,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { } public final static boolean punctuation(final char c) { - return (c == '.') || (c == '!') || (c == '?'); + return c == '.' || c == '!' || c == '?'; } public void scrapeText(final char[] newtext, final String insideTag) { diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index 1a4e757b6..0abd5a37b 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -89,6 +89,7 @@ import de.anomic.http.io.ChunkedOutputStream; import de.anomic.http.metadata.HeaderFramework; import de.anomic.http.metadata.RequestHeader; import de.anomic.http.metadata.ResponseHeader; +import de.anomic.http.server.servlets.transferURL; import de.anomic.kelondro.util.ByteBuffer; import de.anomic.kelondro.util.DateFormatter; import de.anomic.kelondro.util.FileUtils; @@ -1173,6 +1174,17 @@ public final class HTTPDFileHandler { } public static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { + // debug functions: for special servlets call them without reflection to get better stack trace results + if (targetClass.getName().equals("transferURL.class")) { + try { + return transferURL.respond(request, args, switchboard); + } catch (Exception e) { + e.printStackTrace(); + Log.logSevere("HTTPFileHandler", "fail of transferURL", e); + throw new InvocationTargetException(e); + } + } + Object result; if (safeServletsMode) synchronized (switchboard) { result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard}); diff --git a/source/de/anomic/http/server/servlets/transferURL.java b/source/de/anomic/http/server/servlets/transferURL.java new file mode 100644 index 000000000..ad6efce4f --- /dev/null +++ b/source/de/anomic/http/server/servlets/transferURL.java @@ -0,0 +1,166 @@ +// this is a temporary 1-to-1 copy of the transferURL servlet + + +package de.anomic.http.server.servlets; + +import java.io.IOException; +import java.text.ParseException; + +import de.anomic.content.RSSMessage; +import de.anomic.data.Blacklist; +import de.anomic.document.parser.xml.RSSFeed; +import de.anomic.http.metadata.RequestHeader; +import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow; +import de.anomic.kelondro.util.DateFormatter; +import de.anomic.search.Switchboard; +import de.anomic.server.serverCore; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyNetwork; +import de.anomic.yacy.yacySeed; + +public final class transferURL { + + + + + + + + + + + + + + + + + + + + + + + + + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException { + final long start = System.currentTimeMillis(); + long freshdate = 0; + try {freshdate = DateFormatter.parseShortDay("20061101").getTime();} catch (final ParseException e1) {} + + // return variable that accumulates replacements + final Switchboard sb = (Switchboard) env; + final serverObjects prop = new serverObjects(); + if ((post == null) || (env == null)) return prop; + if (!yacyNetwork.authentifyRequest(post, env)) return prop; + + // request values + final String iam = post.get("iam", ""); // seed hash of requester + final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability +// final String key = post.get("key", ""); // transmission key + final int urlc = post.getInt("urlc", 0); // number of transported urls + final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true"); + final boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true"); + + // response values + String result = ""; + String doublevalues = "0"; + + final yacySeed otherPeer = sb.peers.get(iam); + final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion())); + + if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) { + yacyCore.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash); + result = "wrong_target"; + } else if ((!granted) || (sb.isRobinsonMode())) { + yacyCore.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Not granted."); + result = "error_not_granted"; + } else { + int received = 0; + int blocked = 0; + final int sizeBefore = sb.indexSegment.urlMetadata().size(); + // read the urls from the other properties and store + String urls; + URLMetadataRow lEntry; + for (int i = 0; i < urlc; i++) { + serverCore.checkInterruption(); + + // read new lurl-entry + urls = post.get("url" + i); + if (urls == null) { + if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName); + blocked++; + continue; + } + + // parse new lurl-entry + lEntry = URLMetadataRow.importEntry(urls); + if (lEntry == null) { + yacyCore.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls); + blocked++; + continue; + } + + // check if entry is well-formed + final URLMetadataRow.Components metadata = lEntry.metadata(); + if (metadata.url() == null) { + yacyCore.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls); + blocked++; + continue; + } + + // check whether entry is too old + if (lEntry.freshdate().getTime() <= freshdate) { + if (yacyCore.log.isFine()) yacyCore.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate()); + blocked++; + continue; + } + + // check if the entry is blacklisted + if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()))) { + if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName); + lEntry = null; + blocked++; + continue; + } + + // check if the entry is in our network domain + final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url()); + if (urlRejectReason != null) { + if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: blocked URL '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName); + lEntry = null; + blocked++; + continue; + } + + // write entry to database + yacyCore.log.logInfo("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.metadata().url().toNormalform(true, false)); + try { + sb.indexSegment.urlMetadata().store(lEntry); + sb.crawlResults.stack(lEntry, iam, iam, 3); + if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: received URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName); + received++; + } catch (final IOException e) { + e.printStackTrace(); + } + } + + sb.peers.mySeed().incRU(received); + + // return rewrite properties + final int more = sb.indexSegment.urlMetadata().size() - sizeBefore; + doublevalues = Integer.toString(received - more); + yacyCore.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs"); + RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " URLs from peer " + otherPeerName + ", blocked " + blocked, "", "")); + if ((received - more) > 0) yacyCore.log.logSevere("Received " + doublevalues + " double URLs from peer " + otherPeerName); + result = "ok"; + } + + prop.put("double", doublevalues); + prop.put("result", result); + return prop; + } +} diff --git a/source/de/anomic/kelondro/table/SplitTable.java b/source/de/anomic/kelondro/table/SplitTable.java index 4ab5095f1..f34054cae 100644 --- a/source/de/anomic/kelondro/table/SplitTable.java +++ b/source/de/anomic/kelondro/table/SplitTable.java @@ -353,6 +353,7 @@ public class SplitTable implements ObjectIndex { this.readyCheck.release(); } public ObjectIndex discover(long timeout) { + if (this.discovery != null) return this.discovery; try { this.readyCheck.tryAcquire(1, timeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) {} @@ -441,7 +442,7 @@ public class SplitTable implements ObjectIndex { } } // wait for a result - ObjectIndex result = challenge.discover(10000); + ObjectIndex result = challenge.discover(1000); //System.out.println("result of discovery: file = " + ((result == null) ? "null" : result.filename())); return result; } diff --git a/source/de/anomic/kelondro/text/DocumentIndex.java b/source/de/anomic/kelondro/text/DocumentIndex.java index d8ede7b45..7b1c58666 100644 --- a/source/de/anomic/kelondro/text/DocumentIndex.java +++ b/source/de/anomic/kelondro/text/DocumentIndex.java @@ -135,7 +135,7 @@ public class DocumentIndex extends Segment { File w; for (String t: s) { w = new File(start, t); - if (w.canRead() && ! w.isHidden()) { + if (w.canRead() && !w.isHidden()) { if (w.isDirectory()) { addAll(w); } else { diff --git a/source/de/anomic/kelondro/text/Segment.java b/source/de/anomic/kelondro/text/Segment.java index 5354da2eb..063245d76 100644 --- a/source/de/anomic/kelondro/text/Segment.java +++ b/source/de/anomic/kelondro/text/Segment.java @@ -133,7 +133,9 @@ public class Segment { } catch (final IOException e) { e.printStackTrace(); } - Switchboard.getSwitchboard().peers.mySeed().resetCounters(); + if (Switchboard.getSwitchboard() != null && + Switchboard.getSwitchboard().peers != null && + Switchboard.getSwitchboard().peers.mySeed() != null) Switchboard.getSwitchboard().peers.mySeed().resetCounters(); } public File getLocation() { diff --git a/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java b/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java index 1f72fcc93..fadb168b1 100644 --- a/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java +++ b/source/de/anomic/kelondro/text/metadataPrototype/URLMetadataRow.java @@ -29,8 +29,8 @@ package de.anomic.kelondro.text.metadataPrototype; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.text.ParseException; -import java.util.ArrayList; import java.util.Date; +import java.util.Iterator; import java.util.Properties; import de.anomic.crawler.retrieval.Request; @@ -361,14 +361,14 @@ public class URLMetadataRow implements Metadata { // avoid double computation of metadata elements if (this.comp != null) return this.comp; // parse elements from comp string; - final ArrayList cl = FileUtils.strings(this.entry.getCol("comp", null), "UTF-8"); + final Iterator cl = FileUtils.strings(this.entry.getCol("comp", null)); this.comp = new Components( - (cl.size() > 0) ? (cl.get(0)).trim() : "", + (cl.hasNext()) ? cl.next() : "", hash(), - (cl.size() > 1) ? (cl.get(1)).trim() : "", - (cl.size() > 2) ? (cl.get(2)).trim() : "", - (cl.size() > 3) ? (cl.get(3)).trim() : "", - (cl.size() > 4) ? (cl.get(4)).trim() : ""); + (cl.hasNext()) ? cl.next() : "", + (cl.hasNext()) ? cl.next() : "", + (cl.hasNext()) ? cl.next() : "", + (cl.hasNext()) ? cl.next() : ""); return this.comp; } diff --git a/source/de/anomic/kelondro/util/FileUtils.java b/source/de/anomic/kelondro/util/FileUtils.java index 406d39d6e..fa648ec55 100644 --- a/source/de/anomic/kelondro/util/FileUtils.java +++ b/source/de/anomic/kelondro/util/FileUtils.java @@ -42,7 +42,6 @@ import java.io.Writer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Comparator; -import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -50,7 +49,6 @@ import java.util.Map; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeSet; -import java.util.Vector; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; @@ -388,7 +386,7 @@ public final class FileUtils { // load props try { final byte[] b = read(f); - return table(strings(b, "UTF-8")); + return table(strings(b)); } catch (final IOException e2) { System.err.println("ERROR: " + f.toString() + " not found in settings path"); return null; @@ -490,21 +488,20 @@ public final class FileUtils { } - public static HashMap table(final Vector list) { - final Enumeration i = list.elements(); + public static HashMap table(Iterator li) { int pos; String line; - final HashMap props = new HashMap(list.size()); - while (i.hasMoreElements()) { - line = (i.nextElement()).trim(); + final HashMap props = new HashMap(); + while (li.hasNext()) { + line = li.next(); pos = line.indexOf("="); if (pos > 0) props.put(line.substring(0, pos).trim(), line.substring(pos + 1).trim()); } return props; } - public static HashMap table(final byte[] a, final String encoding) { - return table(strings(a, encoding)); + public static HashMap table(final byte[] a) { + return table(strings(a)); } /** @@ -535,60 +532,69 @@ public final class FileUtils { return props; } - public static ArrayList strings(final byte[] a) { - return strings(a, null); + public static Iterator strings(byte[] a) { + try { + return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8"))); + } catch (UnsupportedEncodingException e) { + return null; + } } - public static ArrayList strings(final byte[] a, final String encoding) { - if (a == null) return new ArrayList(); - int s = 0; - int e; - final ArrayList v = new ArrayList(); - byte b; - while (s < a.length) { - // find eol - e = s; - while (e < a.length) { - b = a[e]; - if ((b == 10) || (b == 13) || (b == 0)) break; - e++; - } - - // read line - if (encoding == null) { - v.add(new String(a, s, e - s)); - } else try { - v.add(new String(a, s, e - s, encoding)); - } catch (final UnsupportedEncodingException xcptn) { - return v; - } - - // eat up additional eol bytes - s = e + 1; - while (s < a.length) { - b = a[s]; - if ((b != 10) && (b != 13)) break; - s++; - } - } - return v; + /* + public static ArrayList strings(byte[] a) { + final ArrayList list = new ArrayList(); + Iterator i = new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a)))); + while (i.hasNext()) list.add(i.next()); + return list; } - public static ArrayList strings(final Reader reader) { - if (reader == null) return new ArrayList(); - BufferedReader bufreader = new BufferedReader(reader); + + public static ArrayList strings(byte[] a) { final ArrayList list = new ArrayList(); + BufferedReader bufreader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a))); String line = null; try { - while ((line = bufreader.readLine()) != null) { - list.add(line); - } - } catch (IOException e) { - e.printStackTrace(); - return null; - } + while ((line = bufreader.readLine()) != null) { + line = line.trim(); + if (line.length() > 0) list.add(line); + } + } catch (IOException e) { + Log.logWarning("FileUtils", "failed strings: " + e.getMessage(), e); + return list; + } return list; } - + */ + + public static class StringsIterator implements Iterator { + private BufferedReader reader; + private String nextLine; + public StringsIterator(final BufferedReader reader) { + this.reader = reader; + this.nextLine = null; + next(); + } + public boolean hasNext() { + return nextLine != null; + } + + public String next() { + String line = nextLine; + try { + while ((nextLine = reader.readLine()) != null) { + nextLine = nextLine.trim(); + if (nextLine.length() > 0) break; + } + } catch (IOException e) { + nextLine = null; + } + return line; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + } /** * @param from diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 81f278399..f3f0b8e46 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -27,71 +27,11 @@ package de.anomic.net; import java.net.InetAddress; import java.net.UnknownHostException; -import java.util.ArrayList; -import de.anomic.http.client.Client; -import de.anomic.kelondro.util.FileUtils; import de.anomic.search.Switchboard; -import de.anomic.server.serverDomains; -import de.anomic.tools.disorderHeap; -import de.anomic.tools.nxTools; public class natLib { - public static String getDI604(final String password) { - // this pulls off the ip number from the DI-604 router/nat - /* - wget --quiet --ignore-length http://admin:@192.168.0.1:80/status.htm > /dev/null - grep -A 1 "IP Address" status.htm | tail -1 | awk '{print $1}' | awk 'BEGIN{FS=">"} {print $2}' - rm status.htm - */ - try { - ArrayList x = FileUtils.strings(Client.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000), "UTF-8"); - x = nxTools.grep(x, 1, "IP Address"); - if ((x == null) || (x.size() == 0)) return null; - final String line = nxTools.tail1(x); - return nxTools.awk(nxTools.awk(line, " ", 1), ">", 2); - } catch (final Exception e) { - return null; - } - } - - private static String getWhatIsMyIP() { - try { - ArrayList x = FileUtils.strings( - Client.wget("http://www.whatismyip.com/", null, 10000), "UTF-8"); - x = nxTools.grep(x, 0, "Your IP is"); - final String line = nxTools.tail1(x); - return nxTools.awk(line, " ", 4); - } catch (final Exception e) { - return null; - } - } - - private static String getStanford() { - try { - ArrayList x = FileUtils.strings( - Client.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000), - "UTF-8"); - x = nxTools.grep(x, 0, "firewall protecting your browser"); - final String line = nxTools.tail1(x); - return nxTools.awk(line, " ", 7); - } catch (final Exception e) { - return null; - } - } - - private static String getIPID() { - try { - ArrayList x = FileUtils.strings(Client.wget("http://ipid.shat.net/", null, 10000), "UTF-8"); - x = nxTools.grep(x, 2, "Your IP address"); - final String line = nxTools.tail1(x); - return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); - } catch (final Exception e) { - return null; - } - } - private static boolean isNotLocal(final String ip) { if ((ip.equals("localhost")) || (ip.startsWith("127")) || @@ -152,47 +92,6 @@ public class natLib { } } - private static int retrieveOptions() { - return 3; - } - - private static String retrieveFrom(final int option) { - if ((option < 0) || (option >= retrieveOptions())) return null; - if (option == 0) return getWhatIsMyIP(); - if (option == 1) return getStanford(); - if (option == 2) return getIPID(); - return null; - } - - public static String retrieveIP(final boolean DI604, final String password) { - String ip; - if (DI604) { - // first try the simple way... - ip = getDI604(password); - if (isProper(ip)) { - //System.out.print("{DI604}"); - return ip; - } - } - - // maybe this is a dial-up connection (or LAN and DebugMode) and we can get it from java variables - /*InetAddress ia = serverCore.publicIP(); - if (ia != null) { - ip = ia.getHostAddress(); - if (isProper(ip)) return ip; - }*/ - ip = serverDomains.myPublicIP(); - if (isProper(ip)) return ip; - - // now go the uneasy way and ask some web responder - final disorderHeap random = new disorderHeap(retrieveOptions()); - for (int i = 0; i < retrieveOptions(); i++) { - ip = retrieveFrom(random.number()); - if (isProper(ip)) return ip; - } - return null; - } - // rDNS services: // http://www.xdr2.net/reverse_DNS_lookup.asp // http://remote.12dt.com/rns/ @@ -202,16 +101,5 @@ public class natLib { // listlist: http://www.aspnetimap.com/help/welcome/dnsbl.html - - public static void main(final String[] args) { - //System.out.println("PROBE DI604 : " + getDI604("")); - //System.out.println("PROBE whatismyip: " + getWhatIsMyIP()); - //System.out.println("PROBE stanford : " + getStanford()); - //System.out.println("PROBE ipid : " + getIPID()); - //System.out.println("retrieveIP-NAT : " + retrieveIP(true,"")); - //System.out.println("retrieveIP : " + retrieveIP(false,"12345")); - - System.out.println(isProper(args[0]) ? "yes" : "no"); - } } diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index c39400a49..cf5e2383e 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1954,7 +1954,7 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi return "no DHT distribution: disabled by network.unit.dht"; } if (getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false")) { - return "no DHT distribution: not enabled (ser setting)"; + return "no DHT distribution: not enabled (per setting)"; } if (indexSegment.urlMetadata().size() < 10) { return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size(); @@ -2090,7 +2090,6 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi yacySeed ys; String seedListFileURL; yacyURL url; - ArrayList seedList; Iterator enu; int lc; final int sc = peers.sizeConnected(); @@ -2133,8 +2132,7 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi } else { ssc++; final byte[] content = Client.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000)); - seedList = FileUtils.strings(content, "UTF-8"); - enu = seedList.iterator(); + enu = FileUtils.strings(content); lc = 0; while (enu.hasNext()) { ys = yacySeed.genRemoteSeed(enu.next(), null, false); @@ -2199,7 +2197,7 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi // sending request final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - final HashMap result = FileUtils.table(Client.wget(url.toString(), reqHeader, 10000), "UTF-8"); + final HashMap result = FileUtils.table(Client.wget(url.toString(), reqHeader, 10000)); if (result == null) return new HashMap(); return result; } catch (final Exception e) { diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index b00c2a417..dbacd46cc 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -21,7 +21,6 @@ package de.anomic.tools; -import java.util.ArrayList; import java.util.Hashtable; import de.anomic.crawler.retrieval.HTTPLoader; @@ -29,7 +28,6 @@ import de.anomic.http.client.Client; import de.anomic.http.client.RemoteProxyConfig; import de.anomic.http.metadata.HeaderFramework; import de.anomic.http.metadata.RequestHeader; -import de.anomic.kelondro.util.FileUtils; import de.anomic.yacy.yacyURL; public class loaderThreads { @@ -61,10 +59,6 @@ public class loaderThreads { this.failed = 0; } - public void newPropLoaderThread(final String name, final yacyURL url) { - newThread(name, url, new propLoader()); - } - public void newThread(final String name, final yacyURL url, final loaderProcess process) { final Thread t = new loaderThread(url, process); threads.put(name, t); @@ -166,56 +160,5 @@ public class loaderThreads { } } - - public static class propLoader extends loaderCore implements loaderProcess { - - public propLoader() { - this.status = STATUS_READY; - } - - public synchronized void feed(final byte[] v) { - this.status = STATUS_RUNNING; - this.completion = 1; - int line = 0; - String s, key, value; - int p; - final ArrayList lines = FileUtils.strings(v, "UTF-8"); - try { - while ((this.run) && (line < lines.size())) { - // parse line and construct a property - s = lines.get(line); - if ((s != null) && ((p = s.indexOf('=')) > 0)) { - key = s.substring(0, p).trim(); - value = s.substring(p + 1).trim(); - if (key.length() > 0) result.put(key, value); - } - // update thread information - line++; - this.completion = 100 * line / lines.size(); - } - if (line == lines.size()) { - this.status = STATUS_COMPLETED; - } else { - this.status = STATUS_ABORTED; - } - return; - } catch (final Exception e) { - this.status = STATUS_FAILED; - this.error = e; - return; - } - } - } - - /* - public static void main(String[] args) { - httpdProxyHandler.setRemoteProxyConfig(httpRemoteProxyConfig.init("192.168.1.122", 3128)); - loaderThreads loader = new loaderThreads(); - try { - loader.newPropLoaderThread("load1", new yacyURL("http://www.anomic.de/superseed.txt", null)); - } catch (MalformedURLException e) { - - } - } - */ + } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 939ce94ce..854b37409 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -128,7 +128,7 @@ public final class yacyClient { final long start = System.currentTimeMillis(); final byte[] content = wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", post, 30000, false); yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds"); - result = FileUtils.table(content, "UTF-8"); + result = FileUtils.table(content); break; } catch (final Exception e) { if (Thread.currentThread().isInterrupted()) { @@ -315,7 +315,7 @@ public final class yacyClient { // send request try { final byte[] content = postToFile(target, "query.html", post, 10000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); if (result == null || result.size() == 0) { return null; } //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time @@ -337,7 +337,7 @@ public final class yacyClient { // send request try { final byte[] content = postToFile(target, "query.html", post, 5000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); if (result == null || result.size() == 0) { return -1; } return Integer.parseInt(result.get("response")); @@ -360,7 +360,7 @@ public final class yacyClient { // send request try { final byte[] content = postToFile(target, "query.html", post, 5000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); if ((result == null) || (result.size() == 0)) return -1; final String resp = result.get("response"); @@ -484,7 +484,7 @@ public final class yacyClient { // send request HashMap result = null; try { - result = FileUtils.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000), "UTF-8"); + result = FileUtils.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000)); } catch (final IOException e) { yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore); //yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage()); @@ -682,7 +682,7 @@ public final class yacyClient { // send request try { final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 5000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); return result; } catch (final Exception e) { // most probably a network time-out exception @@ -709,7 +709,7 @@ public final class yacyClient { // send request try { final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 20000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); return result; } catch (final Exception e) { yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage()); @@ -745,7 +745,7 @@ public final class yacyClient { // send request try { final byte[] content = wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 10000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); return result; } catch (final Exception e) { // most probably a network time-out exception @@ -769,7 +769,7 @@ public final class yacyClient { // send request try { final byte[] content = wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 20000); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); return result; } catch (final Exception e) { yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage()); @@ -844,7 +844,7 @@ public final class yacyClient { // send request try { final byte[] content = wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", post, 10000); - return FileUtils.table(content, "UTF-8"); + return FileUtils.table(content); } catch (final Exception e) { // most probably a network time-out exception yacyCore.log.logSevere("yacyClient.crawlReceipt error:" + e.getMessage()); @@ -994,7 +994,7 @@ public final class yacyClient { post.add(new DefaultCharsetStringPart("indexes", entrypost.toString())); try { final byte[] content = wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody); - final ArrayList v = FileUtils.strings(content, "UTF-8"); + final Iterator v = FileUtils.strings(content); // this should return a list of urlhashes that are unknown final HashMap result = FileUtils.table(v); @@ -1037,7 +1037,7 @@ public final class yacyClient { post.add(new DefaultCharsetStringPart("urlc", Integer.toString(urlc))); try { final byte[] content = wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody); - final ArrayList v = FileUtils.strings(content, "UTF-8"); + final Iterator v = FileUtils.strings(content); final HashMap result = FileUtils.table(v); // return the transfered url data in bytes (for debugging only) @@ -1059,7 +1059,7 @@ public final class yacyClient { if (address == null) { address = "localhost:8080"; } try { final byte[] content = wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", post, 5000); - return FileUtils.table(content, "UTF-8"); + return FileUtils.table(content); } catch (final Exception e) { yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage()); return null; @@ -1098,7 +1098,7 @@ public final class yacyClient { "&query=" + new String(wordhashe) + "&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT), reqHeader, 10000, target.getHexHash() + ".yacyh"); - final HashMap result = FileUtils.table(content, "UTF-8"); + final HashMap result = FileUtils.table(content); System.out.println("Result=" + result.toString()); } catch (final Exception e) { e.printStackTrace(); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 299be7447..1863e5651 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -550,8 +550,9 @@ public final class yacySeedDB implements AlternativeDomainNames { if (s != null) try { seedPotentialDB.put(hash, seed.getMap()); return;} catch (final IOException e) {} } - public yacySeed lookupByName(final String peerName) { + public yacySeed lookupByName(String peerName) { // reads a seed by searching by name + if (peerName.endsWith(".yacy")) peerName = peerName.substring(0, peerName.length() - 5); // local peer? if (peerName.equals("localpeer")) { @@ -772,7 +773,7 @@ public final class yacySeedDB implements AlternativeDomainNames { // test download if (Log.isFine("YACY")) Log.logFine("YACY", "SaveSeedList: Trying to download seed-file '" + seedURL + "'."); - final ArrayList check = downloadSeedFile(seedURL); + final Iterator check = downloadSeedFile(seedURL); // Comparing if local copy and uploaded copy are equal final String errorMsg = checkCache(uv, check); @@ -793,7 +794,7 @@ public final class yacySeedDB implements AlternativeDomainNames { return log; } - private ArrayList downloadSeedFile(final yacyURL seedURL) throws IOException { + private Iterator downloadSeedFile(final yacyURL seedURL) throws IOException { // Configure http headers final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); @@ -826,25 +827,23 @@ public final class yacySeedDB implements AlternativeDomainNames { content = FileUtils.uncompressGZipArray(content); // convert it into an array - return FileUtils.strings(content,"UTF-8"); + return FileUtils.strings(content); } catch (final Exception e) { throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage()); } } - private String checkCache(final ArrayList uv, final ArrayList check) { - if ((check == null) || (uv == null) || (uv.size() != check.size())) { - if (Log.isFine("YACY")) Log.logFine("YACY", "SaveSeedList: Local and uploades seed-list " + - "contains varying numbers of entries." + - "\n\tLocal seed-list: " + ((uv == null) ? "null" : Integer.toString(uv.size())) + " entries" + - "\n\tRemote seed-list: " + ((check == null) ? "null" : Integer.toString(check.size())) + " enties"); - return "Entry count is different: uv.size() = " + ((uv == null) ? "null" : Integer.toString(uv.size())) + ", check = " + ((check == null) ? "null" : Integer.toString(check.size())); - } + private String checkCache(final ArrayList uv, final Iterator check) { + if ((check == null) || (uv == null)) { + if (Log.isFine("YACY")) Log.logFine("YACY", "SaveSeedList: Local and uploades seed-list are different"); + return "Entry count is different: uv.size() = " + ((uv == null) ? "null" : Integer.toString(uv.size())); + } if (Log.isFine("YACY")) Log.logFine("YACY", "SaveSeedList: Comparing local and uploades seed-list entries ..."); - int i; - for (i = 0; i < uv.size(); i++) { - if (!((uv.get(i)).equals(check.get(i)))) return "Element at position " + i + " is different."; + int i = 0; + while (check.hasNext() && i < uv.size()) { + if (!((uv.get(i)).equals(check.next()))) return "Element at position " + i + " is different."; + i++; } // no difference found