From 7fe878423175ff3cb3c421c09a496472f0733011 Mon Sep 17 00:00:00 2001 From: theli Date: Wed, 7 Sep 2005 21:38:03 +0000 Subject: [PATCH] *) URLs pointing to a server having a private ip addess will not be indexed anymore See: http://www.yacy-forum.de/viewtopic.php?p=9408 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@682 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaSwitchboard.java | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index b4c60c6c3..6fef44133 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -105,8 +105,10 @@ package de.anomic.plasma; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; +import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; +import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -544,13 +546,18 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (plasmaParser.supportedMimeTypesContains(entry.responseHeader.mime()) || plasmaParser.supportedFileExt(entry.url)) { + // registering the cachefile as in use + if (entry.cacheFile.exists()) { + cacheManager.filesInUse.add(entry.cacheFile); + } + // enqueue for further crawling enQueue(sbQueue.newEntry(entry.url, plasmaURL.urlHash(entry.referrerURL()), entry.requestHeader.ifModifiedSince(), entry.requestHeader.containsKey(httpHeader.COOKIE), entry.initiator(), entry.depth, entry.profile.handle(), entry.name() )); - } + } return true; } @@ -1092,7 +1099,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // explicit delete/free resources - if ((entry != null) && (entry.profile() != null) && (!(entry.profile().storeHTCache()))) cacheManager.deleteFile(entry.url()); + if ((entry != null) && (entry.profile() != null) && (!(entry.profile().storeHTCache()))) { + cacheManager.filesInUse.remove(entry.cacheFile()); + cacheManager.deleteFile(entry.url()); + } entry = null; } } @@ -1127,10 +1137,29 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return reason; } + // check if ip is local ip address + try { + InetAddress hostAddress = InetAddress.getByName(nexturl.getHost()); + if (hostAddress.isSiteLocalAddress()) { + reason = "denied_(private_ip_address)"; + log.logFine("Host in URL '" + nexturlString + "' has private ip address."); + return reason; + } else if (hostAddress.isLoopbackAddress()) { + reason = "denied_(loopback_ip_address)"; + log.logFine("Host in URL '" + nexturlString + "' has loopback ip address."); + return reason; + } + } catch (UnknownHostException e) { + reason = "denied_(unknown_host)"; + log.logFine("Unknown host in URL '" + nexturlString + "'."); + return reason; + } + // check blacklist String hostlow = nexturl.getHost().toLowerCase(); if (urlBlacklist.isListed(hostlow, nexturl.getPath())) { reason = "denied_(url_in_blacklist)"; + log.logFine("URL '" + nexturlString + "' is in blacklist."); return reason; }