From 9ca46a8c694f130b9ea98582669764e5f26b80da Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 24 Jul 2007 00:46:17 +0000 Subject: [PATCH] indexing of local (intranet) urls enabled To do this, one must create a separate YaCy network that has a local URL domain A description how to do this is here: http://www.yacy-websuche.de/wiki/index.php/De:Netzdefinition git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4001 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/ConfigBasic.java | 3 +- htroot/PerformanceMemory_p.java | 6 +- htroot/PerformanceQueues_p.html | 10 +- htroot/PerformanceQueues_p.java | 19 +- htroot/Statistics.java | 4 +- htroot/Status.java | 3 +- htroot/htdocsdefault/dir.java | 5 +- htroot/index.java | 4 +- htroot/www/welcome.java | 5 +- htroot/xml/status_p.java | 2 +- htroot/yacy/crawlOrder.java | 5 + htroot/yacy/crawlReceipt.java | 7 + htroot/yacy/hello.java | 12 +- htroot/yacy/transferRWI.java | 2 +- htroot/yacy/transferURL.java | 12 + htroot/yacysearch.java | 3 +- source/de/anomic/http/httpc.java | 167 +-------- source/de/anomic/http/httpd.java | 19 +- source/de/anomic/http/httpdProxyHandler.java | 9 +- source/de/anomic/net/ftpc.java | 4 +- source/de/anomic/net/natLib.java | 4 +- source/de/anomic/plasma/plasmaCrawlEURL.java | 2 +- .../de/anomic/plasma/plasmaCrawlStacker.java | 16 +- source/de/anomic/plasma/plasmaHTCache.java | 4 +- .../de/anomic/plasma/plasmaSwitchboard.java | 53 ++- source/de/anomic/plasma/plasmaURL.java | 4 +- source/de/anomic/plasma/plasmaWordIndex.java | 3 - source/de/anomic/server/serverCore.java | 153 +------- source/de/anomic/server/serverDomains.java | 347 ++++++++++++++++++ source/de/anomic/yacy/yacyClient.java | 8 +- source/de/anomic/yacy/yacySearch.java | 4 +- source/de/anomic/yacy/yacySeed.java | 5 +- source/de/anomic/yacy/yacySeedDB.java | 9 +- yacy.network.unit | 2 +- 35 files changed, 502 insertions(+), 415 deletions(-) create mode 100644 source/de/anomic/server/serverDomains.java diff --git a/build.properties b/build.properties index dc2d479fc..db6c8287a 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.537 +releaseVersion=0.538 releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFileParentDir=yacy diff --git a/htroot/ConfigBasic.java b/htroot/ConfigBasic.java index 3ceae6c73..a2f46dd95 100644 --- a/htroot/ConfigBasic.java +++ b/htroot/ConfigBasic.java @@ -59,6 +59,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverInstantThread; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -181,7 +182,7 @@ public class ConfigBasic { int idx = host.indexOf(":"); if (idx != -1) host = host.substring(0,idx); } else { - host = serverCore.publicLocalIP().getHostAddress(); + host = serverDomains.myPublicLocalIP().getHostAddress(); } prop.put("reconnect", 1); diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index bc852ada9..062b94523 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -49,11 +49,11 @@ import java.util.Iterator; import java.util.Map; import de.anomic.http.httpHeader; -import de.anomic.http.httpc; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroRecords; import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; import de.anomic.server.serverObjects; @@ -211,9 +211,9 @@ public class PerformanceMemory_p { prop.put("Xms", Xms.substring(0, Xms.length() - 1)); // other caching structures - long amount = httpc.nameCacheHitSize(); + long amount = serverDomains.nameCacheHitSize(); prop.put("namecache.hit",Long.toString(amount)); - amount = httpc.nameCacheNoCachingListSize(); + amount = serverDomains.nameCacheNoCachingListSize(); prop.put("namecache.noCache",Long.toString(amount)); amount = plasmaSwitchboard.urlBlacklist.blacklistCacheSize(); prop.put("blacklistcache.size",Long.toString(amount)); diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index 0f5ae4ba5..78a549798 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -120,11 +120,8 @@ Maximum number of words in cache: - - - - - + + This is is the number of word indexes that shall be held in the @@ -134,10 +131,9 @@ Initial space of words in cache: - + - - This is is the init size of space for words in cache. diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index 37fc51b43..0943eff06 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -182,16 +182,12 @@ public class PerformanceQueues_p { prop.put("table", c); if ((post != null) && (post.containsKey("cacheSizeSubmit"))) { - int wordOutCacheMaxCount = post.getInt("wordOutCacheMaxCount", 20000); - switchboard.setConfig("wordCacheMaxCount", Integer.toString(wordOutCacheMaxCount)); - switchboard.wordIndex.setMaxWordCount(wordOutCacheMaxCount); - - int wordInCacheMaxCount = post.getInt("wordInCacheMaxCount", 1000); - switchboard.setConfig("indexDistribution.dhtReceiptLimit", Integer.toString(wordInCacheMaxCount)); - switchboard.wordIndex.setInMaxWordCount(wordInCacheMaxCount); + int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000); + switchboard.setConfig(plasmaSwitchboard.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); + switchboard.wordIndex.setMaxWordCount(wordCacheMaxCount); - int wordCacheInitCount = post.getInt("wordCacheInitCount", 30000); - switchboard.setConfig("wordCacheInitCount", Integer.toString(wordCacheInitCount)); + int wordCacheInitCount = post.getInt(plasmaSwitchboard.WORDCACHE_INIT_COUNT, 30000); + switchboard.setConfig(plasmaSwitchboard.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount)); int flushsize = post.getInt("wordFlushSize", 2000); switchboard.setConfig("wordFlushSize", Integer.toString(flushsize)); @@ -282,9 +278,8 @@ public class PerformanceQueues_p { prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfDHTOutCache() / 1000 / 60)); // minutes prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfDHTInCache() / 1000 / 60)); // minutes prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180")); - prop.put("wordOutCacheMaxCount", switchboard.getConfigLong("wordCacheMaxCount", 20000)); - prop.put("wordInCacheMaxCount", switchboard.getConfigLong("indexDistribution.dhtReceiptLimit", 1000)); - prop.put("wordCacheInitCount", switchboard.getConfigLong("wordCacheInitCount", 30000)); + prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboard.WORDCACHE_MAX_COUNT, 20000)); + prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboard.WORDCACHE_INIT_COUNT, 30000)); prop.put("wordFlushSize", switchboard.getConfigLong("wordFlushSize", 2000)); prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000")); prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess); diff --git a/htroot/Statistics.java b/htroot/Statistics.java index 912f74909..d5db8967a 100644 --- a/htroot/Statistics.java +++ b/htroot/Statistics.java @@ -50,7 +50,7 @@ import java.util.Map; import de.anomic.http.httpHeader; import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -81,7 +81,7 @@ public class Statistics { if (count >= maxCount) break; urlString = (String) map.get("key"); try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (serverCore.isNotLocal(url))) { + if ((url != null) && (!serverDomains.isLocal(url))) { prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark; prop.put("page_backlinks_list_" + count + "_url", urlString); prop.put("page_backlinks_list_" + count + "_date", map.get("date")); diff --git a/htroot/Status.java b/htroot/Status.java index 90b8fc7c7..14b3568c2 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -57,6 +57,7 @@ import de.anomic.http.httpdByteCountOutputStream; import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverMemory; import de.anomic.server.serverObjects; @@ -180,7 +181,7 @@ public class Status { } else { prop.put("extPortFormat",0); } - prop.put("host", serverCore.publicLocalIP().getHostAddress()); + prop.put("host", serverDomains.myPublicLocalIP().getHostAddress()); // ssl support prop.put("sslSupport",sb.getConfig("keyStore", "").length() == 0 ? 0:1); diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index 01acb523c..f63c2942c 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -69,6 +69,7 @@ import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; import de.anomic.server.serverObjects; @@ -106,9 +107,9 @@ public class dir { prop.put("peername", env.getConfig("peerName", "")); prop.put("peerdomain", env.getConfig("peerName", "").toLowerCase()); prop.put("peeraddress", yacyCore.seedDB.mySeed.getPublicAddress()); - prop.put("hostname", serverCore.publicIP()); + prop.put("hostname", serverDomains.myPublicIP()); try{ - prop.put("hostip", InetAddress.getByName(serverCore.publicIP()).getHostAddress()); + prop.put("hostip", InetAddress.getByName(serverDomains.myPublicIP())); }catch(UnknownHostException e){ prop.put("hostip", "Unknown Host Exception"); } diff --git a/htroot/index.java b/htroot/index.java index 577319b6c..fc0324bcd 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -36,7 +36,7 @@ import de.anomic.http.httpHeader; import de.anomic.net.URL; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -86,7 +86,7 @@ public class index { } catch (MalformedURLException e) { url = null; } - if ((url != null) && (serverCore.isNotLocal(url))) { + if ((url != null) && (!serverDomains.isLocal(url))) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get(httpHeader.CONNECTION_PROP_CLIENTIP)); diff --git a/htroot/www/welcome.java b/htroot/www/welcome.java index 5706ac2ee..c51e560a3 100644 --- a/htroot/www/welcome.java +++ b/htroot/www/welcome.java @@ -52,6 +52,7 @@ import java.net.UnknownHostException; import de.anomic.http.httpHeader; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; @@ -69,9 +70,9 @@ public class welcome { prop.put("peername", env.getConfig("peerName", "")); prop.put("peerdomain", env.getConfig("peerName", "").toLowerCase()); prop.put("peeraddress", yacyCore.seedDB.mySeed.getPublicAddress()); - prop.put("hostname", serverCore.publicIP()); + prop.put("hostname", serverDomains.myPublicIP()); try{ - prop.put("hostip", InetAddress.getByName(serverCore.publicIP()).getHostAddress()); + prop.put("hostip", InetAddress.getByName(serverDomains.myPublicIP()).getHostAddress()); }catch(UnknownHostException e){ prop.put("hostip", "Unknown Host Exception"); } diff --git a/htroot/xml/status_p.java b/htroot/xml/status_p.java index 5e2311e1d..12f74db78 100644 --- a/htroot/xml/status_p.java +++ b/htroot/xml/status_p.java @@ -63,7 +63,7 @@ public class status_p { prop.put("wordCacheSize", switchboard.wordIndex.dhtOutCacheSize() + switchboard.wordIndex.dhtInCacheSize()); prop.put("wordCacheWSize", switchboard.wordIndex.dhtOutCacheSize()); prop.put("wordCacheKSize", switchboard.wordIndex.dhtInCacheSize()); - prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000")); + prop.put("wordCacheMaxCount", switchboard.getConfig(plasmaSwitchboard.WORDCACHE_MAX_COUNT, "10000")); // // memory usage and system attributes diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java index 5f65145f0..76c265e45 100644 --- a/htroot/yacy/crawlOrder.java +++ b/htroot/yacy/crawlOrder.java @@ -191,6 +191,11 @@ public final class crawlOrder { env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); } + if (!switchboard.acceptURL(new URL(newURL))) { + env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL); + return null; + } + // adding URL to noticeURL Queue env.getLog().logFinest("crawlOrder: a: url='" + newURL + "'"); diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index 63fa48b2a..71371fa06 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -146,6 +146,13 @@ public final class crawlReceipt { return prop; } + // check if the entry is in our network domain + if (!switchboard.acceptURL(comp.url())) { + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url outside of our domain) for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr); + prop.putASIS("delay", "9999"); + return prop; + } + if (result.equals("fill")) try { // put new entry into database switchboard.wordIndex.loadedURL.store(entry); diff --git a/htroot/yacy/hello.java b/htroot/yacy/hello.java index 0ba2f8b99..fd7f14253 100644 --- a/htroot/yacy/hello.java +++ b/htroot/yacy/hello.java @@ -46,6 +46,7 @@ // javac -classpath .:../../classes hello.java // if the shell's current path is HTROOT +import java.net.InetAddress; import java.util.Iterator; import java.util.Map; @@ -53,6 +54,7 @@ import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDate; +import de.anomic.server.serverDomains; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyClient; @@ -95,6 +97,8 @@ public final class hello { // we easily know the caller's IP: final String clientip = (String) header.get("CLIENTIP", ""); // read an artificial header addendum + InetAddress ias = serverDomains.dnsResolve(clientip); + if (ias == null) return null; final String userAgent = (String) header.get(httpHeader.USER_AGENT, ""); final String reportedip = remoteSeed.get(yacySeed.IP, ""); final String reportedPeerType = remoteSeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR); @@ -126,10 +130,10 @@ public final class hello { boolean isNotLocal = true; // we are only allowed to connect to the client IP address if it's not our own address - if(serverCore.portForwardingEnabled || serverCore.useStaticIP) - isNotLocal = serverCore.isNotLocal(clientip); - - if(isNotLocal) { + if (serverCore.portForwardingEnabled || serverCore.useStaticIP) { + isNotLocal = !ias.isSiteLocalAddress(); + } + if (isNotLocal) { serverCore.checkInterruption(); prop.putASIS("yourip", clientip); diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 1139c618b..367c3ac6b 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -107,7 +107,7 @@ public final class transferRWI { granted = false; // don't accept more words if there are too many words to flush result = "busy"; pause = 60000; - } /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) { + } /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong(plasmaSwitchboard.WORDCACHE_MAX_COUNT, 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) { // we are too busy flushing the ramCache to receive indexes sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ")."); granted = false; // don't accept more words if there are too many words to flush diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index cae6bbab9..f15e95016 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -109,6 +109,7 @@ public final class transferURL { urls = (String) post.get("url" + i); if (urls == null) { yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName); + blocked++; continue; } @@ -116,6 +117,7 @@ public final class transferURL { lEntry = sb.wordIndex.loadedURL.newEntry(urls); if (lEntry == null) { yacyCore.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls); + blocked++; continue; } @@ -123,12 +125,14 @@ public final class transferURL { indexURLEntry.Components comp = lEntry.comp(); if (comp.url() == null) { yacyCore.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls); + blocked++; continue; } // check whether entry is too old if (lEntry.freshdate().getTime() <= freshdate) { yacyCore.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate()); + blocked++; continue; } @@ -141,6 +145,14 @@ public final class transferURL { continue; } + // check if the entry is in our network domain + if (!sb.acceptURL(comp.url())) { + yacyCore.log.logFine("transferURL: blocked URL outside of our domain '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName); + lEntry = null; + blocked++; + continue; + } + // write entry to database try { sb.wordIndex.loadedURL.store(lEntry); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 8fa2231bd..a46665c14 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -74,6 +74,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSearchResults; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -111,7 +112,7 @@ public class yacysearch { if (referer != null) { URL url; try { url = new URL(referer); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (serverCore.isNotLocal(url))) { + if ((url != null) && (!serverDomains.isLocal(url))) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get("CLIENTIP")); diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 920681014..07455eaf3 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -55,18 +55,13 @@ import java.net.SocketException; import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.Enumeration; import java.util.GregorianCalendar; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.TimeZone; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -81,10 +76,10 @@ import javax.net.ssl.X509TrustManager; import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.net.URL; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; @@ -112,6 +107,7 @@ public final class httpc { public static String userAgent; private static final int terminalMaxLength = 30000; private static final TimeZone GMTTimeZone = TimeZone.getTimeZone("GMT"); + /** * This string is initialized on loading of this class and contains * information about the current OS. @@ -123,20 +119,6 @@ public final class httpc { static { HTTPGMTFormatter.setTimeZone(GMTTimeZone); } - static final HashMap reverseMappingCache = new HashMap(); - - // the dns cache - private static final Map nameCacheHit = Collections.synchronizedMap(new HashMap()); // a not-synchronized map resulted in deadlocks - private static final Set nameCacheMiss = Collections.synchronizedSet(new HashSet()); - private static final kelondroMScoreCluster nameCacheHitAges = new kelondroMScoreCluster(); - private static final kelondroMScoreCluster nameCacheMissAges = new kelondroMScoreCluster(); - private static final long startTime = System.currentTimeMillis(); - private static final int maxNameCacheHitAge = 24 * 60 * 60; // 24 hours in minutes - private static final int maxNameCacheMissAge = 24 * 60 * 60; // 24 hours in minutes - private static final int maxNameCacheHitSize = 3000; - private static final int maxNameCacheMissSize = 3000; - public static final List nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList()); - private static final Set nameCacheNoCachingList = Collections.synchronizedSet(new HashSet()); /** * A Object Pool containing all pooled httpc-objects. @@ -164,6 +146,8 @@ public final class httpc { String requestPath = null; private boolean allowContentEncoding = true; public static boolean yacyDebugMode = false; + + static final HashMap reverseMappingCache = new HashMap(); /** * Indicates if the current object was removed from pool because the maximum limit @@ -417,147 +401,6 @@ public final class httpc { return (!this.socket.isConnected()) || (this.socket.isClosed()); } - /** - * Does an DNS-Check to resolve a hostname to an IP. - * - * @param host Hostname of the host in demand. - * @return String with the ip. null, if the host could not be resolved. - */ - public static InetAddress dnsResolve(String host) { - if ((host == null)||(host.length() == 0)) return null; - host = host.toLowerCase().trim(); - - // trying to resolve host by doing a name cache lookup - InetAddress ip = (InetAddress) nameCacheHit.get(host); - if (ip != null) return ip; - - if (nameCacheMiss.contains(host)) return null; - try { - boolean doCaching = true; - ip = InetAddress.getByName(host); - if ( - (ip == null) || - (ip.isLoopbackAddress()) || - (nameCacheNoCachingList.contains(ip.getHostName())) - ) { - doCaching = false; - } else { - Iterator noCachingPatternIter = nameCacheNoCachingPatterns.iterator(); - while (noCachingPatternIter.hasNext()) { - String nextPattern = (String) noCachingPatternIter.next(); - if (ip.getHostName().matches(nextPattern)) { - // disallow dns caching for this host - nameCacheNoCachingList.add(ip.getHostName()); - doCaching = false; - break; - } - } - } - - if (doCaching) { - // remove old entries - flushHitNameCache(); - - // add new entries - synchronized (nameCacheHit) { - nameCacheHit.put(ip.getHostName(), ip); - nameCacheHitAges.setScore(ip.getHostName(), intTime(System.currentTimeMillis())); - } - } - return ip; - } catch (UnknownHostException e) { - // remove old entries - flushMissNameCache(); - - // add new entries - nameCacheMiss.add(host); - nameCacheMissAges.setScore(host, intTime(System.currentTimeMillis())); - } - return null; - } - -// /** -// * Checks wether an hostname already is in the DNS-cache. -// * FIXME: This method should use dnsResolve, as the code is 90% identical? -// * -// * @param host Searched for hostname. -// * @return true, if the hostname already is in the cache. -// */ -// public static boolean dnsFetch(String host) { -// if ((nameCacheHit.get(host) != null) /*|| (nameCacheMiss.contains(host)) */) return false; -// try { -// String ip = InetAddress.getByName(host).getHostAddress(); -// if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { -// nameCacheHit.put(host, ip); -// return true; -// } -// return false; -// } catch (UnknownHostException e) { -// //nameCacheMiss.add(host); -// return false; -// } -// } - - /** - * Returns the number of entries in the nameCacheHit map - * - * @return int The number of entries in the nameCacheHit map - */ - public static int nameCacheHitSize() { - return nameCacheHit.size(); - } - - public static int nameCacheMissSize() { - return nameCacheMiss.size(); - } - - /** - * Returns the number of entries in the nameCacheNoCachingList list - * - * @return int The number of entries in the nameCacheNoCachingList list - */ - public static int nameCacheNoCachingListSize() { - return nameCacheNoCachingList.size(); - } - - /** - * Converts the time to a non negative int - * - * @param longTime Time in miliseconds since 01/01/1970 00:00 GMT - * @return int seconds since startTime - */ - private static int intTime(long longTime) { - return (int) Math.max(0, ((longTime - startTime) / 1000)); - } - - /** - * Removes old entries from the dns hit cache - */ - public static void flushHitNameCache() { - int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheHitAge; - String k; - while ((nameCacheHitAges.size() > maxNameCacheHitSize) || (nameCacheHitAges.getMinScore() < cutofftime)) { - k = (String) nameCacheHitAges.getMinObject(); - if (nameCacheHit.remove(k) == null) break; // ensure termination - nameCacheHitAges.deleteScore(k); - } - - } - - /** - * Removes old entries from the dns miss cache - */ - public static void flushMissNameCache() { - int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheMissAge; - String k; - while ((nameCacheMissAges.size() > maxNameCacheMissSize) || (nameCacheMissAges.getMinScore() < cutofftime)) { - k = (String) nameCacheMissAges.getMinObject(); - if (!nameCacheMiss.remove(k)) break; // ensure termination - nameCacheMissAges.deleteScore(k); - } - - } - /** * Returns the given date in an HTTP-usable format. * (according to RFC822) @@ -664,7 +507,7 @@ public final class httpc { InetSocketAddress address = null; if (!this.remoteProxyUse) { // only try to resolve the address if we are not using a proxy - InetAddress hostip = dnsResolve(server); + InetAddress hostip = serverDomains.dnsResolve(server); if (hostip == null) throw new UnknownHostException(server); address = new InetSocketAddress(hostip, port); } else { diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index 0281f10f4..d1fdd41d0 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -72,6 +72,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverHandler; import de.anomic.server.serverObjects; @@ -1218,15 +1219,15 @@ public final class httpd implements serverHandler { String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1"); // check if ip is local ip address - InetAddress hostAddress = httpc.dnsResolve(clientIP); + InetAddress hostAddress = serverDomains.dnsResolve(clientIP); if (hostAddress == null) { - tp.put("host", serverCore.publicLocalIP().getHostAddress()); + tp.put("host", serverDomains.myPublicLocalIP().getHostAddress()); tp.put("port", serverCore.getPortNr(switchboard.getConfig("port", "8080"))); } else if (hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress()) { - tp.put("host", serverCore.publicLocalIP().getHostAddress()); + tp.put("host", serverDomains.myPublicLocalIP().getHostAddress()); tp.put("port", serverCore.getPortNr(switchboard.getConfig("port", "8080"))); } else { - tp.put("host", serverCore.publicIP()); + tp.put("host", serverDomains.myPublicIP()); tp.put("port", (serverCore.portForwardingEnabled && (serverCore.portForwarding != null)) ? Integer.toString(serverCore.portForwarding.getPort()) : Integer.toString(serverCore.getPortNr(switchboard.getConfig("port", "8080")))); @@ -1539,9 +1540,9 @@ public final class httpd implements serverHandler { boolean isThisHostIP = false; try { //InetAddress hostAddress = InetAddress.getByName(hostName); - InetAddress hostAddress = httpc.dnsResolve(hostName); + InetAddress hostAddress = serverDomains.dnsResolve(hostName); //InetAddress forwardingAddress = InetAddress.getByName(serverCore.portForwarding.getHost()); - InetAddress forwardingAddress = httpc.dnsResolve(serverCore.portForwarding.getHost()); + InetAddress forwardingAddress = serverDomains.dnsResolve(serverCore.portForwarding.getHost()); if ((hostAddress==null)||(forwardingAddress==null)) return false; if (hostAddress.equals(forwardingAddress)) return true; @@ -1559,8 +1560,8 @@ public final class httpd implements serverHandler { // resolve ip addresses if (thisSeedIP == null || thisSeedPort == null) return false; - InetAddress seedInetAddress = httpc.dnsResolve(thisSeedIP); - InetAddress hostInetAddress = httpc.dnsResolve(hostName); + InetAddress seedInetAddress = serverDomains.dnsResolve(thisSeedIP); + InetAddress hostInetAddress = serverDomains.dnsResolve(hostName); if (seedInetAddress == null || hostInetAddress == null) return false; // if it's equal, the hostname points to this seed @@ -1573,7 +1574,7 @@ public final class httpd implements serverHandler { boolean isThisHostIP = false; try { // final InetAddress clientAddress = InetAddress.getByName(hostName); - final InetAddress clientAddress = httpc.dnsResolve(hostName); + final InetAddress clientAddress = serverDomains.dnsResolve(hostName); if (clientAddress == null) return false; if (clientAddress.isAnyLocalAddress() || clientAddress.isLoopbackAddress()) return true; diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 768822225..ded62cdda 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -101,6 +101,7 @@ import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.http.ResourceInfo; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -1491,16 +1492,16 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt String testHostName = null; if (!orgHostName.startsWith("www.")) { testHostName = "www." + orgHostName; - InetAddress addr = httpc.dnsResolve(testHostName); + InetAddress addr = serverDomains.dnsResolve(testHostName); if (addr != null) testHostNames.add(testHostName); } else if (orgHostName.startsWith("www.")) { testHostName = orgHostName.substring(4); - InetAddress addr = httpc.dnsResolve(testHostName); + InetAddress addr = serverDomains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } if (orgHostName.length()>4 && orgHostName.startsWith("www") && (orgHostName.charAt(3) != '.')) { testHostName = orgHostName.substring(0,3) + "." + orgHostName.substring(3); - InetAddress addr = httpc.dnsResolve(testHostName); + InetAddress addr = serverDomains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } @@ -1510,7 +1511,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt while (iter.hasNext()) { String topLevelDomain = (String) iter.next(); testHostName = orgHostName.substring(0,pos) + "." + topLevelDomain; - InetAddress addr = httpc.dnsResolve(testHostName); + InetAddress addr = serverDomains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } } diff --git a/source/de/anomic/net/ftpc.java b/source/de/anomic/net/ftpc.java index 0ce27f2bd..7d150ff83 100644 --- a/source/de/anomic/net/ftpc.java +++ b/source/de/anomic/net/ftpc.java @@ -72,7 +72,7 @@ import java.util.StringTokenizer; import java.util.TimeZone; import java.util.Vector; -import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; public class ftpc { @@ -1609,7 +1609,7 @@ cd .. // save ip address in high byte order //byte[] Bytes = LocalIp.getAddress(); - byte[] Bytes = serverCore.publicIP().getBytes(); + byte[] Bytes = serverDomains.myPublicIP().getBytes(); // bytes greater than 127 should not be printed as negative short[] Shorts = new short[4]; diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 806f5862a..9cd2633e1 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -50,7 +50,7 @@ import java.util.ArrayList; import de.anomic.http.httpc; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.tools.disorderHeap; import de.anomic.tools.nxTools; @@ -192,7 +192,7 @@ public class natLib { ip = ia.getHostAddress(); if (isProper(ip)) return ip; }*/ - ip = serverCore.publicIP(); + ip = serverDomains.myPublicIP(); if (isProper(ip)) return ip; // now go the uneasy way and ask some web responder diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index a801f013f..34518df4e 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -36,7 +36,7 @@ public class plasmaCrawlEURL { public static final String DENIED_URL_NULL = "denied_(url_null)"; public static final String DENIED_MALFORMED_URL = "denied_(malformed_url)"; public static final String DENIED_UNSUPPORTED_PROTOCOL = "denied_(unsupported_protocol)"; - public static final String DENIED_PRIVATE_IP_ADDRESS = "denied_(private_ip_address)"; + public static final String DENIED_IP_ADDRESS_NOT_IN_DECLARED_DOMAIN = "denied_(address_not_in_declared_domain)"; public static final String DENIED_LOOPBACK_IP_ADDRESS = "denied_(loopback_ip_address)"; public static final String DENIED_CACHEFILE_PATH_TOO_LONG = "denied_(cachefile_path_too_long)"; public static final String DENIED_INVALID_CACHEFILE_PATH = "denied_(invalid_cachefile_path)"; diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 957ee31ff..4af393197 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -70,6 +70,7 @@ import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroTree; import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; +import de.anomic.server.serverDomains; import de.anomic.server.serverSemaphore; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; @@ -288,8 +289,8 @@ public final class plasmaCrawlStacker { } // check if ip is local ip address - checkInterruption(); // TODO: this is protocol specific - InetAddress hostAddress = httpc.dnsResolve(nexturl.getHost()); + checkInterruption(); // TODO: this is protocol specific + InetAddress hostAddress = serverDomains.dnsResolve(nexturl.getHost()); if(this.sb.getConfig("yacyDebugMode", "true").equals("true")){ //just ignore the check in debugmode (useful for tor(.eff.org) }else if (hostAddress == null) { @@ -300,16 +301,11 @@ public final class plasmaCrawlStacker { "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); return reason; } - } else if (hostAddress.isSiteLocalAddress()) { - reason = plasmaCrawlEURL.DENIED_PRIVATE_IP_ADDRESS; - this.log.logFine("Host in URL '" + nexturlString + "' has private IP address. " + + } else if (!sb.acceptURL(hostAddress)) { + reason = plasmaCrawlEURL.DENIED_IP_ADDRESS_NOT_IN_DECLARED_DOMAIN + "[" + sb.getConfig("network.unit.domain", "unknown") + "]"; + this.log.logFine("Host in URL '" + nexturlString + "' has IP address outside of declared range (" + sb.getConfig("network.unit.domain", "unknown") + "). " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); return reason; - } else if (hostAddress.isLoopbackAddress()) { - reason = plasmaCrawlEURL.DENIED_LOOPBACK_IP_ADDRESS; - this.log.logFine("Host in URL '" + nexturlString + "' has loopback IP address. " + - "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); - return reason; } // check blacklist diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 7977b7021..4476e6c0a 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -72,7 +72,6 @@ import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; -import de.anomic.http.httpc; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroBase64Order; @@ -84,6 +83,7 @@ import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; import de.anomic.plasma.cache.UnsupportedProtocolException; import de.anomic.server.serverCodings; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverInstantThread; import de.anomic.server.serverSystem; @@ -456,7 +456,7 @@ public final class plasmaHTCache { while ((doms.size() > 0) && (fileCount < 50) && ((System.currentTimeMillis() - start) < 60000)) { if (Thread.currentThread().isInterrupted()) return; dom = (String) doms.getMaxObject(); - InetAddress ip = httpc.dnsResolve(dom); + InetAddress ip = serverDomains.dnsResolve(dom); if (ip == null) continue; result += ", " + dom + "=" + ip.getHostAddress(); this.log.logConfig("PRE-FILLED " + dom + "=" + ip.getHostAddress()); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index e3a0e3add..157fccec3 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -155,6 +155,7 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.urlPattern.defaultURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverAbstractSwitch; +import de.anomic.server.serverDomains; import de.anomic.server.serverFileUtils; import de.anomic.server.serverInstantThread; import de.anomic.server.serverObjects; @@ -256,7 +257,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public int totalPPM = 0; public double totalQPM = 0d; public TreeMap clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used - + public boolean acceptLocalURLs, acceptGlobalURLs; /* * Remote Proxy configuration */ @@ -947,6 +948,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser i++; } + // set URL domain acceptance + this.acceptGlobalURLs = "global.any".indexOf(getConfig("network.unit.domain", "global")) >= 0; + this.acceptLocalURLs = "local.any".indexOf(getConfig("network.unit.domain", "global")) >= 0; + // load values from configs this.plasmaPath = new File(rootPath, getConfig(DBPATH, DBPATH_DEFAULT)); this.log.logConfig("Plasma DB Path: " + this.plasmaPath.toString()); @@ -1136,11 +1141,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int wordCacheMaxCount = Math.max((int) getConfigLong(WORDCACHE_INIT_COUNT, 30000), (int) getConfigLong(WORDCACHE_MAX_COUNT, 20000)); setConfig(WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); - wordIndex.setMaxWordCount(wordCacheMaxCount); - - int wordInCacheMaxCount = (int) getConfigLong(INDEX_DIST_DHT_RECEIPT_LIMIT, 1000); - wordIndex.setInMaxWordCount(wordInCacheMaxCount); - wordIndex.setWordFlushSize((int) getConfigLong("wordFlushSize", 1000)); + wordIndex.setMaxWordCount(wordCacheMaxCount); + wordIndex.setWordFlushSize((int) getConfigLong("wordFlushSize", 10000)); // set a maximum amount of memory for the caches long memprereq = Math.max(getConfigLong(INDEXER_MEMPREREQ, 0), wordIndex.minMem()); @@ -1279,7 +1281,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String[] noCachingEntries = noCachingList.split(","); for (i = 0; i < noCachingEntries.length; i++) { String entry = noCachingEntries[i].trim(); - httpc.nameCacheNoCachingPatterns.add(entry); + serverDomains.nameCacheNoCachingPatterns.add(entry); } // generate snippets cache @@ -1338,7 +1340,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser Long.parseLong(getConfig(INDEXER_MEMPREREQ , "1000000"))); } - deployThread(PROXY_CACHE_ENQUEUE, "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", null, + deployThread(PROXY_CACHE_ENQUEUE, "Proxy Cache Enqueue", "job takes new input files from RAM stack, stores them, and hands over to the Indexing Stack", null, new serverInstantThread(this, PROXY_CACHE_ENQUEUE_METHOD_START, PROXY_CACHE_ENQUEUE_METHOD_JOBCOUNT, PROXY_CACHE_ENQUEUE_METHOD_FREEMEM), 10000); deployThread(CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer", null, new serverInstantThread(this, CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START, CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT, CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM), 30000); @@ -1371,8 +1373,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser log.logConfig("Finished Switchboard Initialization"); } - - + public void initMessages(long ramMessage_time) { this.log.logConfig("Starting Message Board"); File messageDbFile = new File(workPath, DBFILE_MESSAGE); @@ -1381,8 +1382,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ", " + this.messageDB.size() + " entries" + ", " + ppRamString(messageDbFile.length()/1024)); } - - + public void initWiki(long ramWiki_time) { this.log.logConfig("Starting Wiki Board"); File wikiDbFile = new File(workPath, DBFILE_WIKI); @@ -1391,6 +1391,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ", " + this.wikiDB.size() + " entries" + ", " + ppRamString(wikiDbFile.length()/1024)); } + public void initBlog(long ramBlog_time) { this.log.logConfig("Starting Blog"); File blogDbFile = new File(workPath, DBFILE_BLOG); @@ -1405,6 +1406,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ", " + this.blogCommentDB.size() + " entries" + ", " + ppRamString(blogCommentDbFile.length()/1024)); } + public void initBookmarks(){ this.log.logConfig("Loading Bookmarks DB"); File bookmarksFile = new File(workPath, DBFILE_BOOKMARKS); @@ -1415,7 +1417,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser this.log.logConfig(this.bookmarksDB.tagsSize()+" Tag, "+this.bookmarksDB.bookmarksSize()+" Bookmarks"); } - public static plasmaSwitchboard getSwitchboard(){ return sb; } @@ -1439,6 +1440,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // depending on the robinson cluster type, the peer String may be a peerhash (b64-hash) // or a ip:port String or simply a ip String // if this robinson mode does not define a cluster membership, false is returned + if (peer == null) return false; if (!isRobinsonMode()) return false; String clustermode = getConfig(CLUSTER_MODE, CLUSTER_MODE_PUBLIC_PEER); if (clustermode.equals(CLUSTER_MODE_PRIVATE_CLUSTER)) { @@ -1470,6 +1472,18 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return false; } } + + public boolean acceptURL(URL url) { + // returns true if the url can be accepted accoring to network.unit.domain + return acceptURL(serverDomains.dnsResolve(url.getHost())); + } + + public boolean acceptURL(InetAddress hostAddress) { + // returns true if the url can be accepted accoring to network.unit.domain + if (this.acceptGlobalURLs && this.acceptLocalURLs) return true; // fast shortcut + boolean local = hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); + return (this.acceptGlobalURLs && !local) || (this.acceptLocalURLs && local); + } public String urlExists(String hash) { // tests if hash occurrs in any database @@ -1668,18 +1682,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * * check if ip is local ip address // TODO: remove this procotol specific code here * ========================================================================= */ - InetAddress hostAddress = httpc.dnsResolve(entry.url().getHost()); + InetAddress hostAddress = serverDomains.dnsResolve(entry.url().getHost()); if (hostAddress == null) { if (this.remoteProxyConfig == null || !this.remoteProxyConfig.useProxy()) { this.log.logFine("Unknown host in URL '" + entry.url() + "'. Will not be indexed."); doIndexing = false; } - } else if (hostAddress.isSiteLocalAddress()) { + } else if (!acceptURL(hostAddress)) { this.log.logFine("Host in URL '" + entry.url() + "' has private ip address. Will not be indexed."); - doIndexing = false; - } else if (hostAddress.isLoopbackAddress()) { - this.log.logFine("Host in URL '" + entry.url() + "' has loopback ip address. Will not be indexed."); - doIndexing = false; + doIndexing = false; } /* ========================================================================= @@ -3270,7 +3281,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // do the transfer - int peerCount = (yacyCore.seedDB.mySeed.isJunior()) ? 1 : 3; + int peerCount = Math.max(1, (yacyCore.seedDB.mySeed.isJunior()) ? + (int) getConfigLong("network.unit.dhtredundancy.junior", 1) : + (int) getConfigLong("network.unit.dhtredundancy.senior", 1)); // set redundancy factor long starttime = System.currentTimeMillis(); boolean ok = dhtTransferProcess(dhtTransferChunk, peerCount); diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java index 1be87f00d..1a6cea2c1 100644 --- a/source/de/anomic/plasma/plasmaURL.java +++ b/source/de/anomic/plasma/plasmaURL.java @@ -39,7 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.net.URL; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; -import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.yacy.yacySeedDB; public class plasmaURL { @@ -421,7 +421,7 @@ public class plasmaURL { tld = host.substring(p + 1); dom = host.substring(0, p); } - Integer ID = (serverCore.isNotLocal(tld)) ? (Integer) TLDID.get(tld) : null; // identify local addresses + Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 boolean isHTTP = url.getProtocol().equals("http"); p = dom.lastIndexOf('.'); // locate subdomain diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 2859454b7..d52b4ec7a 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -128,9 +128,6 @@ public final class plasmaWordIndex implements indexRI { public void setMaxWordCount(int maxWords) { dhtOutCache.setMaxWordCount(maxWords); - } - - public void setInMaxWordCount(int maxWords) { dhtInCache.setMaxWordCount(maxWords); } diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 54338a492..31e61cfa2 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -61,7 +61,6 @@ import java.net.NetworkInterface; import java.net.ServerSocket; import java.net.Socket; import java.net.SocketException; -import java.net.UnknownHostException; import java.nio.channels.ClosedByInterruptException; import java.security.KeyStore; import java.util.Enumeration; @@ -79,8 +78,6 @@ import org.apache.commons.pool.impl.GenericObjectPool.Config; import de.anomic.http.httpc; import de.anomic.icap.icapd; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.logging.serverLog; import de.anomic.server.portForwarding.serverPortForwarding; import de.anomic.tools.PKCS12Tool; @@ -126,15 +123,7 @@ public final class serverCore extends serverAbstractThread implements serverThre private serverSwitch switchboard; // the command class switchboard Hashtable denyHost; int commandMaxLength; - - private static InetAddress[] localAddresses = null; - static { - try { - localAddresses = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); - } catch (UnknownHostException e) { - localAddresses = new InetAddress[0]; - } - } + /** * The session-object pool @@ -389,7 +378,7 @@ public final class serverCore extends serverAbstractThread implements serverThre serverCore.portForwarding.connect(); serverCore.portForwardingEnabled = true; - yacyCore.seedDB.mySeed.put(yacySeed.IP,publicIP()); + yacyCore.seedDB.mySeed.put(yacySeed.IP, serverDomains.myPublicIP()); yacyCore.seedDB.mySeed.put(yacySeed.PORT,Integer.toString(serverCore.portForwarding.getPort())); } catch (Exception e) { serverCore.portForwardingEnabled = false; @@ -404,7 +393,7 @@ public final class serverCore extends serverAbstractThread implements serverThre } else { serverCore.portForwardingEnabled = false; serverCore.portForwarding = null; - yacyCore.seedDB.mySeed.put(yacySeed.IP,publicIP()); + yacyCore.seedDB.mySeed.put(yacySeed.IP, serverDomains.myPublicIP()); yacyCore.seedDB.mySeed.put(yacySeed.PORT,Integer.toString(serverCore.getPortNr(this.switchboard.getConfig("port", "8080")))); } if(! this.switchboard.getConfig("staticIP", "").equals("")) @@ -418,142 +407,10 @@ public final class serverCore extends serverAbstractThread implements serverThre public void setPoolConfig(GenericObjectPool.Config newConfig) { this.theSessionPool.setConfig(newConfig); - } - - public static boolean isNotLocal(URL url) { - return isNotLocal(url.getHost()); - } - - /** - * Checks if a given address (hostname or IP) is *not* a local address - * - * @param address Address to check - * @return boolean, true if address is public, false if address is private - */ - public static boolean isNotLocal(String address) { - - assert (address != null); - - // check local ip addresses - if (address.equals("localhost") || - address.startsWith("127") || - address.startsWith("192.168") || - address.startsWith("10.") || - address.startsWith("169.254") || - // 172.16.0.0-172.31.255.255 (I think this is faster than a regex) - (address.startsWith("172.") && ( - address.startsWith("172.16.") || address.startsWith("172.17.") || - address.startsWith("172.18.") || address.startsWith("172.19.") || - address.startsWith("172.20.") || address.startsWith("172.21.") || - address.startsWith("172.22.") || address.startsWith("172.23.") || - address.startsWith("172.24.") || address.startsWith("172.25.") || - address.startsWith("172.26.") || address.startsWith("172.27.") || - address.startsWith("172.28.") || address.startsWith("172.29.") || - address.startsWith("172.30.") || address.startsWith("172.31.") - )) - ) return false; - - // make a dns resolve if a hostname is given and check again - final InetAddress clientAddress = httpc.dnsResolve(address); - if (clientAddress != null) { - if ((clientAddress.isAnyLocalAddress()) || (clientAddress.isLoopbackAddress())) return false; - if (address.charAt(0) > '9') address = clientAddress.getHostAddress(); - } - - // finally check if there are other local IP adresses that are not in the standard IP range - for (int i = 0; i < localAddresses.length; i++) { - if (localAddresses[i].equals(clientAddress)) return false; - } - - // the address must be a global address - return true; - } - - public static String publicIP() { - try { - - // if a static IP was configured, we have to return it here ... - plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard(); - if(sb != null){ - String staticIP=sb.getConfig("staticIP", ""); - if( (!staticIP.equals("")) ){ - return staticIP; - } - } - - // If port forwarding was enabled we need to return the remote IP Address - if ((serverCore.portForwardingEnabled)&&(serverCore.portForwarding != null)) { - //does not return serverCore.portForwarding.getHost(), because hostnames are not valid, except in DebugMode - return InetAddress.getByName(serverCore.portForwarding.getHost()).getHostAddress(); - } - - // otherwise we return the real IP address of this host - InetAddress pLIP = publicLocalIP(); - if (pLIP != null) return pLIP.getHostAddress(); - return null; - } catch (java.net.UnknownHostException e) { - System.err.println("ERROR: (internal) " + e.getMessage()); - return null; - } - } - - public static InetAddress publicLocalIP() { - try { - String hostName; - try { - hostName = InetAddress.getLocalHost().getHostName(); - } catch (java.net.UnknownHostException e) { - hostName = "localhost"; // hopin' nothing serious happened only the hostname changed while running yacy - System.err.println("ERROR: (internal) " + e.getMessage()); - } - // list all addresses - InetAddress[] ia = InetAddress.getAllByName(hostName); - //for (int i = 0; i < ia.length; i++) System.out.println("IP: " + ia[i].getHostAddress()); // DEBUG - if (ia.length == 0) { - try { - return InetAddress.getLocalHost(); - } catch (UnknownHostException e) { - try { - return InetAddress.getByName("127.0.0.1"); - } catch (UnknownHostException ee) { - return null; - } - } - } - if (ia.length == 1) { - // only one network connection available - return ia[0]; - } - // we have more addresses, find an address that is not local - int b0, b1; - for (int i = 0; i < ia.length; i++) { - b0 = 0Xff & ia[i].getAddress()[0]; - b1 = 0Xff & ia[i].getAddress()[1]; - if ((b0 != 10) && // class A reserved - (b0 != 127) && // loopback - ((b0 != 172) || (b1 < 16) || (b1 > 31)) && // class B reserved - ((b0 != 192) || (b1 != 168)) && // class C reserved - (ia[i].getHostAddress().indexOf(":") < 0) - ) return ia[i]; - } - // there is only a local address, we filter out the possibly returned loopback address 127.0.0.1 - for (int i = 0; i < ia.length; i++) { - if (((0Xff & ia[i].getAddress()[0]) != 127) && - (ia[i].getHostAddress().indexOf(":") < 0)) return ia[i]; - } - // if all fails, give back whatever we have - for (int i = 0; i < ia.length; i++) { - if (ia[i].getHostAddress().indexOf(":") < 0) return ia[i]; - } - return ia[0]; - } catch (java.net.UnknownHostException e) { - System.err.println("ERROR: (internal) " + e.getMessage()); - return null; - } } public void open() { - this.log.logConfig("* server started on " + publicLocalIP() + ":" + this.extendedPort); + this.log.logConfig("* server started on " + serverDomains.myPublicLocalIP() + ":" + this.extendedPort); } public void freemem() { @@ -1275,7 +1132,7 @@ public final class serverCore extends serverAbstractThread implements serverThre writeLine(this.commandObj.error(ite.getTargetException())); } catch (NoSuchMethodException nsme) { System.out.println("ERROR B " + this.userAddress.getHostAddress()); - if (isNotLocal(this.userAddress.getHostAddress().toString())) { + if (!this.userAddress.isSiteLocalAddress()) { if (serverCore.this.denyHost != null) { serverCore.this.denyHost.put((""+this.userAddress.getHostAddress()), "deny"); // block client: hacker attempt } diff --git a/source/de/anomic/server/serverDomains.java b/source/de/anomic/server/serverDomains.java new file mode 100644 index 000000000..9291e7702 --- /dev/null +++ b/source/de/anomic/server/serverDomains.java @@ -0,0 +1,347 @@ +// serverDNSCache.java +// ----------------------------- +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 23.07.2007 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.server; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import de.anomic.kelondro.kelondroMScoreCluster; +import de.anomic.net.URL; +import de.anomic.plasma.plasmaSwitchboard; + +public class serverDomains { + + // a dns cache + private static final Map nameCacheHit = Collections.synchronizedMap(new HashMap()); // a not-synchronized map resulted in deadlocks + private static final Set nameCacheMiss = Collections.synchronizedSet(new HashSet()); + private static final kelondroMScoreCluster nameCacheHitAges = new kelondroMScoreCluster(); + private static final kelondroMScoreCluster nameCacheMissAges = new kelondroMScoreCluster(); + private static final int maxNameCacheHitAge = 24 * 60 * 60; // 24 hours in minutes + private static final int maxNameCacheMissAge = 24 * 60 * 60; // 24 hours in minutes + private static final int maxNameCacheHitSize = 3000; + private static final int maxNameCacheMissSize = 3000; + public static final List nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList()); + private static final Set nameCacheNoCachingList = Collections.synchronizedSet(new HashSet()); + private static final long startTime = System.currentTimeMillis(); + + /** + * Converts the time to a non negative int + * + * @param longTime Time in miliseconds since 01/01/1970 00:00 GMT + * @return int seconds since startTime + */ + private static int intTime(long longTime) { + return (int) Math.max(0, ((longTime - startTime) / 1000)); + } + + /** + * Does an DNS-Check to resolve a hostname to an IP. + * + * @param host Hostname of the host in demand. + * @return String with the ip. null, if the host could not be resolved. + */ + public static InetAddress dnsResolve(String host) { + if ((host == null) || (host.length() == 0)) return null; + host = host.toLowerCase().trim(); + + // trying to resolve host by doing a name cache lookup + InetAddress ip = (InetAddress) nameCacheHit.get(host); + if (ip != null) return ip; + + if (nameCacheMiss.contains(host)) return null; + try { + boolean doCaching = true; + ip = InetAddress.getByName(host); + if ((ip == null) || + (ip.isLoopbackAddress()) || + (nameCacheNoCachingList.contains(ip.getHostName())) + ) { + doCaching = false; + } else { + Iterator noCachingPatternIter = nameCacheNoCachingPatterns.iterator(); + while (noCachingPatternIter.hasNext()) { + String nextPattern = (String) noCachingPatternIter.next(); + if (ip.getHostName().matches(nextPattern)) { + // disallow dns caching for this host + nameCacheNoCachingList.add(ip.getHostName()); + doCaching = false; + break; + } + } + } + + if (doCaching) { + // remove old entries + flushHitNameCache(); + + // add new entries + synchronized (nameCacheHit) { + nameCacheHit.put(ip.getHostName(), ip); + nameCacheHitAges.setScore(ip.getHostName(), intTime(System.currentTimeMillis())); + } + } + return ip; + } catch (UnknownHostException e) { + // remove old entries + flushMissNameCache(); + + // add new entries + nameCacheMiss.add(host); + nameCacheMissAges.setScore(host, intTime(System.currentTimeMillis())); + } + return null; + } + +// /** +// * Checks wether an hostname already is in the DNS-cache. +// * FIXME: This method should use dnsResolve, as the code is 90% identical? +// * +// * @param host Searched for hostname. +// * @return true, if the hostname already is in the cache. +// */ +// public static boolean dnsFetch(String host) { +// if ((nameCacheHit.get(host) != null) /*|| (nameCacheMiss.contains(host)) */) return false; +// try { +// String ip = InetAddress.getByName(host).getHostAddress(); +// if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { +// nameCacheHit.put(host, ip); +// return true; +// } +// return false; +// } catch (UnknownHostException e) { +// //nameCacheMiss.add(host); +// return false; +// } +// } + + /** + * Returns the number of entries in the nameCacheHit map + * + * @return int The number of entries in the nameCacheHit map + */ + public static int nameCacheHitSize() { + return nameCacheHit.size(); + } + + public static int nameCacheMissSize() { + return nameCacheMiss.size(); + } + + /** + * Returns the number of entries in the nameCacheNoCachingList list + * + * @return int The number of entries in the nameCacheNoCachingList list + */ + public static int nameCacheNoCachingListSize() { + return nameCacheNoCachingList.size(); + } + + + /** + * Removes old entries from the dns hit cache + */ + public static void flushHitNameCache() { + int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheHitAge; + String k; + while ((nameCacheHitAges.size() > maxNameCacheHitSize) || (nameCacheHitAges.getMinScore() < cutofftime)) { + k = (String) nameCacheHitAges.getMinObject(); + if (nameCacheHit.remove(k) == null) break; // ensure termination + nameCacheHitAges.deleteScore(k); + } + + } + + /** + * Removes old entries from the dns miss cache + */ + public static void flushMissNameCache() { + int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheMissAge; + String k; + while ((nameCacheMissAges.size() > maxNameCacheMissSize) || (nameCacheMissAges.getMinScore() < cutofftime)) { + k = (String) nameCacheMissAges.getMinObject(); + if (!nameCacheMiss.remove(k)) break; // ensure termination + nameCacheMissAges.deleteScore(k); + } + + } + + // checks for local/global IP range and local IP + + public static boolean isLocal(URL url) { + return dnsResolve(url.getHost()).isSiteLocalAddress(); + } + + private static InetAddress[] localAddresses = null; + static { + try { + localAddresses = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); + } catch (UnknownHostException e) { + localAddresses = new InetAddress[0]; + } + } + + public static boolean isLocal(String address) { + + assert (address != null); + + // check local ip addresses + if (address.equals("localhost") || address.startsWith("127") + || address.startsWith("192.168") + || address.startsWith("10.") + || address.startsWith("169.254") + || + // 172.16.0.0-172.31.255.255 (I think this is faster than a regex) + (address.startsWith("172.") && (address.startsWith("172.16.") + || address.startsWith("172.17.") + || address.startsWith("172.18.") + || address.startsWith("172.19.") + || address.startsWith("172.20.") + || address.startsWith("172.21.") + || address.startsWith("172.22.") + || address.startsWith("172.23.") + || address.startsWith("172.24.") + || address.startsWith("172.25.") + || address.startsWith("172.26.") + || address.startsWith("172.27.") + || address.startsWith("172.28.") + || address.startsWith("172.29.") + || address.startsWith("172.30.") + || address.startsWith("172.31.")))) + return true; + + // make a dns resolve if a hostname is given and check again + final InetAddress clientAddress = dnsResolve(address); + if (clientAddress != null) { + if ((clientAddress.isAnyLocalAddress()) || (clientAddress.isLoopbackAddress())) return true; + if (address.charAt(0) > '9') address = clientAddress.getHostAddress(); + } + + // finally check if there are other local IP adresses that are not in + // the standard IP range + for (int i = 0; i < localAddresses.length; i++) { + if (localAddresses[i].equals(clientAddress)) return true; + } + + // the address must be a global address + return false; + } + + public static String myPublicIP() { + try { + + // if a static IP was configured, we have to return it here ... + plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard(); + if (sb != null) { + String staticIP = sb.getConfig("staticIP", ""); + if ((!staticIP.equals(""))) { + return staticIP; + } + } + + // If port forwarding was enabled we need to return the remote IP + // Address + if ((serverCore.portForwardingEnabled) && (serverCore.portForwarding != null)) { + // does not return serverCore.portForwarding.getHost(), because + // hostnames are not valid, except in DebugMode + return InetAddress.getByName( + serverCore.portForwarding.getHost()).getHostAddress(); + } + + // otherwise we return the real IP address of this host + InetAddress pLIP = myPublicLocalIP(); + if (pLIP != null) return pLIP.getHostAddress(); + return null; + } catch (java.net.UnknownHostException e) { + System.err.println("ERROR: (internal) " + e.getMessage()); + return null; + } + } + + public static InetAddress myPublicLocalIP() { + try { + String hostName; + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (java.net.UnknownHostException e) { + hostName = "localhost"; // hopin' nothing serious happened only the hostname changed while running yacy + System.err.println("ERROR: (internal) " + e.getMessage()); + } + // list all addresses + InetAddress[] ia = InetAddress.getAllByName(hostName); + // for (int i = 0; i < ia.length; i++) System.out.println("IP: " + + // ia[i].getHostAddress()); // DEBUG + if (ia.length == 0) { + try { + return InetAddress.getLocalHost(); + } catch (UnknownHostException e) { + try { + return InetAddress.getByName("127.0.0.1"); + } catch (UnknownHostException ee) { + return null; + } + } + } + if (ia.length == 1) { + // only one network connection available + return ia[0]; + } + // we have more addresses, find an address that is not local + int b0, b1; + for (int i = 0; i < ia.length; i++) { + b0 = 0Xff & ia[i].getAddress()[0]; + b1 = 0Xff & ia[i].getAddress()[1]; + if ((b0 != 10) && // class A reserved + (b0 != 127) && // loopback + ((b0 != 172) || (b1 < 16) || (b1 > 31)) && // class B reserved + ((b0 != 192) || (b1 != 168)) && // class C reserved + (ia[i].getHostAddress().indexOf(":") < 0)) + return ia[i]; + } + // there is only a local address, we filter out the possibly + // returned loopback address 127.0.0.1 + for (int i = 0; i < ia.length; i++) { + if (((0Xff & ia[i].getAddress()[0]) != 127) && (ia[i].getHostAddress().indexOf(":") < 0)) return ia[i]; + } + // if all fails, give back whatever we have + for (int i = 0; i < ia.length; i++) { + if (ia[i].getHostAddress().indexOf(":") < 0) return ia[i]; + } + return ia[0]; + } catch (java.net.UnknownHostException e) { + System.err.println("ERROR: (internal) " + e.getMessage()); + return null; + } + } + +} diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index b276144d0..2f970cdfc 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -71,6 +71,7 @@ import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverObjects; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; @@ -155,7 +156,7 @@ public final class yacyClient { // we overwrite our own IP number only, if we do not portForwarding if (serverCore.portForwardingEnabled || serverCore.useStaticIP) { - yacyCore.seedDB.mySeed.put(yacySeed.IP, serverCore.publicIP()); + yacyCore.seedDB.mySeed.put(yacySeed.IP, serverDomains.myPublicIP()); } else { yacyCore.seedDB.mySeed.put(yacySeed.IP, (String) result.get("yourip")); } @@ -468,6 +469,11 @@ public final class yacyClient { yacyCore.log.logInfo("remote search (client): filtered blacklisted url " + comp.url() + " from peer " + target.getName()); continue; // block with backlist } + + if (!plasmaSwitchboard.getSwitchboard().acceptURL(comp.url())) { + yacyCore.log.logInfo("remote search (client): rejected url outside of our domain " + comp.url() + " from peer " + target.getName()); + continue; // reject url outside of our domain + } // save the url entry indexRWIEntry entry; diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index c2c20e949..0f4ba60c6 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -260,9 +260,9 @@ public class yacySearch extends Thread { // prepare seed targets and threads final yacySeed[] targetPeers = (clusterselection == null) ? selectDHTPeers(plasmaSearchQuery.hashes2Set(wordhashes), targets) : selectClusterPeers(clusterselection); - if (targetPeers == null) return null; + if (targetPeers == null) return new yacySearch[0]; targets = targetPeers.length; - if (targets == 0) return null; + if (targets == 0) return new yacySearch[0]; yacySearch[] searchThreads = new yacySearch[targets]; for (int i = 0; i < targets; i++) { searchThreads[i]= new yacySearch(wordhashes, excludehashes, urlhashes, prefer, filter, maxDist, true, targets, targetPeers[i], diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index e30c5baef..6d81cb54c 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -78,6 +78,7 @@ import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverSystem; import de.anomic.tools.bitfield; @@ -242,7 +243,7 @@ public class yacySeed { /** * Generate a default peer name assembled of the following fragments in order: *
    - *
  • the public IP (may be an IPv4- or IPv6-IP) obtained by {@link serverCore#publicIP()} followed by a minus sign (-)
  • + *
  • the public IP (may be an IPv4- or IPv6-IP) obtained by {@link serverCore#myPublicIP()} followed by a minus sign (-)
  • *
  • a pseudo-random value, the {@link yacyCore#speedKey}
  • *
  • the string 'dpn' which assumingly stands for Default Peer Name
  • *
  • shortened OS information, the {@link serverSystem#infoKey()}
  • @@ -251,7 +252,7 @@ public class yacySeed { * @return a default peer name following the above pattern whereas dots, underscores and colons are replaced by minus signs */ public static String makeDefaultPeerName() { - String name = serverCore.publicIP() + "-" + yacyCore.speedKey + "dpn" + serverSystem.infoKey() + (System.currentTimeMillis() & 99); + String name = serverDomains.myPublicIP() + "-" + yacyCore.speedKey + "dpn" + serverSystem.infoKey() + (System.currentTimeMillis() & 99); name = name.replace('.', '-'); name = name.replace('_', '-'); name = name.replace(':', '-'); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 896db34b5..c2d907dfb 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -71,6 +71,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; +import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.serverSwitch; @@ -838,9 +839,9 @@ public final class yacySeedDB { if ((check == null) || (uv == null) || (uv.size() != check.size())) { serverLog.logFine("YACY","SaveSeedList: Local and uploades seed-list " + "contains varying numbers of entries." + - "\n\tLocal seed-list: " + uv.size() + " entries" + - "\n\tRemote seed-list: " + check.size() + " enties"); - return "Entry count is different"; + "\n\tLocal seed-list: " + ((uv == null) ? "null" : Integer.toString(uv.size())) + " entries" + + "\n\tRemote seed-list: " + ((check == null) ? "null" : Integer.toString(check.size())) + " enties"); + return "Entry count is different: uv.size() = " + ((uv == null) ? "null" : Integer.toString(uv.size())) + ", check = " + ((check == null) ? "null" : Integer.toString(check.size())); } serverLog.logFine("YACY","SaveSeedList: Comparing local and uploades seed-list entries ..."); @@ -893,7 +894,7 @@ public final class yacySeedDB { if (seed == null) return null; if ((seed == mySeed) && (!(seed.isOnline()))) { // take local ip instead of external - return serverCore.publicIP() + ":" + serverCore.getPortNr(sb.getConfig("port", "8080")) + ((subdom == null) ? "" : ("/" + subdom)); + return serverDomains.myPublicIP() + ":" + serverCore.getPortNr(sb.getConfig("port", "8080")) + ((subdom == null) ? "" : ("/" + subdom)); } return seed.getPublicAddress() + ((subdom == null) ? "" : ("/" + subdom)); } else { diff --git a/yacy.network.unit b/yacy.network.unit index 0790fd1ed..267969bc2 100644 --- a/yacy.network.unit +++ b/yacy.network.unit @@ -30,7 +30,7 @@ # Defintion of property domains: # network.unit.name = # network.unit.description = -# network.unit.domain = 'global'|'local' +# network.unit.domain = 'global'|'local'|'any' # network.unit.dhtredundancy = # network.unit.bootstrap.seedlist = # network.unit.protocol.control = 'uncontrolled'|'moderated'|'controlled'