* make .yacy-domains crawlable (.yacy-domains are local domains, so only in custom networks/peers)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7334 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 14 years ago
parent fd74bc388c
commit 741a87a3e9

@ -40,6 +40,8 @@ import net.yacy.repository.Blacklist;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency;
import de.anomic.http.server.AlternativeDomainNames;
import de.anomic.http.server.HTTPDemon;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
@ -80,11 +82,13 @@ public final class HTTPLoader {
throw new IOException("Redirection counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
}
final String host = request.url().getHost();
DigestURI url = request.url();
final String host = url.getHost();
if (host == null || host.length() < 2) throw new IOException("host is not well-formed: '" + host + "'");
final String path = request.url().getFile();
int port = request.url().getPort();
final boolean ssl = request.url().getProtocol().equals("https");
final String path = url.getFile();
int port = url.getPort();
final boolean ssl = url.getProtocol().equals("https");
if (port < 0) port = (ssl) ? 443 : 80;
// check if url is in blacklist
@ -94,6 +98,15 @@ public final class HTTPLoader {
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}
// resolve yacy and yacyh domains
AlternativeDomainNames yacyResolver = HTTPDemon.getAlternativeResolver();
if(yacyResolver != null) {
String yAddress = yacyResolver.resolve(host);
if(yAddress != null) {
url = new DigestURI(url.getProtocol() + "://" + yAddress + path);
}
}
// take a file from the net
Response response = null;
@ -113,7 +126,7 @@ public final class HTTPLoader {
client.setTimout(socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
final byte[] responseBody = client.GETbytes(request.url().toString(), maxFileSize);
final byte[] responseBody = client.GETbytes(url.toString(), maxFileSize);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode();

@ -1427,7 +1427,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
/**
* @return the alternativeResolver
*/
static AlternativeDomainNames getAlternativeResolver() {
public static AlternativeDomainNames getAlternativeResolver() {
return alternativeResolver;
}

Loading…
Cancel
Save