From 0f769215b5f80387d03b44018b983b3ecdbd5cfa Mon Sep 17 00:00:00 2001 From: theli Date: Wed, 30 Nov 2005 04:52:22 +0000 Subject: [PATCH] *) urlRedirector now uses http head requests to determine the mimetype of a resource before it checks if a URL has to be rejected or not. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1146 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/urlRedirector/urlRedirectord.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/source/de/anomic/urlRedirector/urlRedirectord.java b/source/de/anomic/urlRedirector/urlRedirectord.java index d793ee4f4..4def8aaa3 100644 --- a/source/de/anomic/urlRedirector/urlRedirectord.java +++ b/source/de/anomic/urlRedirector/urlRedirectord.java @@ -9,6 +9,8 @@ import java.net.URL; import java.util.Date; import de.anomic.data.userDB; +import de.anomic.http.httpHeader; +import de.anomic.http.httpc; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -144,7 +146,7 @@ public class urlRedirectord implements serverHandler { } int pos = line.indexOf(" "); - nextURL = (pos != -1) ? line.substring(0,pos):line; + this.nextURL = (pos != -1) ? line.substring(0,pos):line; this.theLogger.logFine("Receiving request " + line); outputWriter.print("\r\n"); @@ -152,10 +154,16 @@ public class urlRedirectord implements serverHandler { String reasonString = null; try { - if (plasmaParser.supportedFileExt(new URL(nextURL))) { + // generating URL Object + URL reqURL = new URL(this.nextURL); + + // getting URL mimeType + httpHeader header = httpc.whead(reqURL, 10000, null, null, switchboard.remoteProxyConfig); + + if (plasmaParser.supportedContent(reqURL,header.mime())) { // enqueuing URL for crawling reasonString = switchboard.sbStackCrawlThread.stackCrawl( - nextURL, + this.nextURL, null, yacyCore.seedDB.mySeed.hash, "URL Redirector",