*) Bugfix for urlRedirector

- media extension was not parsed correctly

*) Displaying urlRedirector connections in Connections_p.html

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1145 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 8f4259f1e3
commit b62c36d2ab

@ -61,6 +61,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverThread;
import de.anomic.server.serverCore.Session;
import de.anomic.urlRedirector.urlRedirectord;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -72,9 +73,9 @@ public final class Connections_p {
serverObjects prop = new serverObjects();
// determines if name lookup should be done or not
boolean doNameLookup = true;
if ((post != null) && post.containsKey("nameLookup") && post.get("nameLookup","true").equals("false")) {
doNameLookup = false;
boolean doNameLookup = false;
if ((post != null) && post.containsKey("nameLookup") && post.get("nameLookup","true").equals("true")) {
doNameLookup = true;
}
// getting the virtualHost string
@ -116,8 +117,12 @@ public final class Connections_p {
int userPort = currentSession.getUserPort();
if (userAddress == null) continue;
String dest = null;
String prot = null;
serverHandler cmdObj = currentSession.getCommandObj();
if (cmdObj instanceof httpd) {
prot = "http";
// getting the http command object
httpd currentHttpd = (httpd)cmdObj;
@ -126,49 +131,52 @@ public final class Connections_p {
Properties conProp = (Properties) currentHttpd.getConProp().clone();
// getting the destination host
String dest = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
dest = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
if (dest==null)continue;
if (dest.equals(virtualHost)) dest = yacyCore.seedDB.mySeed.getName() + ".yacy";
} else if (cmdObj instanceof urlRedirectord) {
prot = "urlRedirector";
// determining if the source is a yacy host
yacySeed seed = null;
if (doNameLookup) {
seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
if (seed != null) {
if ((seed.hash.equals(yacyCore.seedDB.mySeed.hash)) &&
(!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
seed = null;
}
urlRedirectord urlRedir = (urlRedirectord)cmdObj;
commandLine = urlRedir.getURL();
}
if ((dest != null) && (dest.equals(virtualHost))) dest = yacyCore.seedDB.mySeed.getName() + ".yacy";
// determining if the source is a yacy host
yacySeed seed = null;
if (doNameLookup) {
seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
if (seed != null) {
if ((seed.hash.equals(yacyCore.seedDB.mySeed.hash)) &&
(!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
seed = null;
}
}
prop.put("list_" + idx + "_dark", ((dark) ? 1 : 0) ); dark=!dark;
prop.put("list_" + idx + "_sessionName",currentSession.getName());
prop.put("list_" + idx + "_proto","http");
if (sessionTime > 1000*60) {
prop.put("list_" + idx + "_ms",0);
prop.put("list_" + idx + "_ms_duration",serverDate.intervalToString(sessionTime));
} else {
prop.put("list_" + idx + "_ms",1);
prop.put("list_" + idx + "_ms_duration",Long.toString(sessionTime));
}
prop.put("list_" + idx + "_source",(seed!=null)?seed.getName()+".yacy":userAddress.getHostAddress()+":"+userPort);
prop.put("list_" + idx + "_dest",dest);
if (blockingRequest) {
prop.put("list_" + idx + "_running",0);
prop.put("list_" + idx + "_running_reqNr",Integer.toString(commandCount+1));
numActivePending++;
} else {
prop.put("list_" + idx + "_running",1);
prop.put("list_" + idx + "_running_command",commandLine);
numActiveRunning++;
}
prop.put("list_" + idx + "_used",Integer.toString(commandCount));
idx++;
}
prop.put("list_" + idx + "_dark", ((dark) ? 1 : 0) ); dark=!dark;
prop.put("list_" + idx + "_sessionName",currentSession.getName());
prop.put("list_" + idx + "_proto",prot);
if (sessionTime > 1000*60) {
prop.put("list_" + idx + "_ms",0);
prop.put("list_" + idx + "_ms_duration",serverDate.intervalToString(sessionTime));
} else {
prop.put("list_" + idx + "_ms",1);
prop.put("list_" + idx + "_ms_duration",Long.toString(sessionTime));
}
prop.put("list_" + idx + "_source",(seed!=null)?seed.getName()+".yacy":userAddress.getHostAddress()+":"+userPort);
prop.put("list_" + idx + "_dest",(dest==null)?"-":dest);
if (blockingRequest) {
prop.put("list_" + idx + "_running",0);
prop.put("list_" + idx + "_running_reqNr",Integer.toString(commandCount+1));
numActivePending++;
} else {
prop.put("list_" + idx + "_running",1);
prop.put("list_" + idx + "_running_command",(commandLine==null)?"":commandLine);
numActiveRunning++;
}
prop.put("list_" + idx + "_used",Integer.toString(commandCount));
idx++;
}
}
prop.put("list",idx);

@ -7,20 +7,20 @@
#
# This scripts forwards URLs from squid to YaCy where the
# URLs are used to download and index the content of the URLs.
use strict;
use Socket qw(:DEFAULT :crlf);
use IO::Handle;
use Digest::MD5;
# setting administrator username + pwd, hostname + port
my $user = "admin";
my $user = "user";
my $pwd = "";
my $host = "localhost";
my $port = "8080";
my %mediaExt;
my @mediaExt;
my @requestData;
$|=1;
sub isCGI {
@ -39,11 +39,14 @@ sub isPOST {
}
sub isMediaExt {
my $url = lc shift;
my $url = $_[0];
my @extList = @{$_[1]};
my $pos = rindex $url, ".";
if ($pos != -1) {
my $ext = substr($url,$pos+1,length($url));
return exists($mediaExt{$ext});
my @match = grep(/$ext/,@extList);
return scalar(@match);
}
return 0;
}
@ -56,7 +59,7 @@ $host = inet_aton($host) or die "$host: unknown host";
socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!";
my $dest_addr = sockaddr_in($port,$host);
connect(SOCK,$dest_addr) or die "connect() failed: $!";
connect(SOCK,$dest_addr) or die("connect() failed: $!");
# enabling autoflush
SOCK->autoflush(1);
@ -76,7 +79,8 @@ print SOCK "PWD ".$md5Pwd.CRLF;
# Getting a list of file extensions that should be ignored
print SOCK "MEDIAEXT".CRLF;
$msg_in = lc <SOCK>;
%mediaExt = split(/,\s*/, $msg_in);
chomp $msg_in;
@mediaExt = split(/,\s*/, $msg_in);
# 1) Reading URLs from stdIn
# 2) Send it to Yacy
@ -93,19 +97,22 @@ while (defined($msg_out = <>)) {
# testing if the URL is CGI
if (isCGI($requestData[0])) {
print STDOUT "URL is cgi: ".$msg_out.CRLF;
print STDOUT CRLF;
print STDERR "URL is cgi: ".$msg_out.CRLF;
next;
}
# testing if the URL is a POST request
if (isPOST($requestData[0])){
print STDOUT "URL is post: ".$msg_out.CRLF;
print STDOUT CRLF;
print STDERR "URL is post: ".$msg_out.CRLF;
next;
}
# testing if the requested content is a media content
if (isMediaExt($requestData[0])) {
print STDOUT "URL has media extension: ".$msg_out.CRLF;
if (isMediaExt($requestData[0],\@mediaExt)) {
print STDOUT CRLF;
print STDERR "URL has media extension: ".$msg_out.CRLF;
next;
}
@ -117,6 +124,7 @@ while (defined($msg_out = <>)) {
if (defined($msg_in = <SOCK>)) {
print STDOUT $msg_in;
} else {
print STDERR "Socket closed".CRLF;
close SOCK;
exit(1);
}

@ -24,6 +24,7 @@ public class urlRedirectord implements serverHandler {
private static plasmaSwitchboard switchboard = null;
private serverLog theLogger = new serverLog("URL-REDIRECTOR");
private static plasmaCrawlProfile.entry profile = null;
private String nextURL;
public urlRedirectord() {
if (switchboard == null) {
@ -66,6 +67,10 @@ public class urlRedirectord implements serverHandler {
}
}
public String getURL() {
return this.nextURL;
}
public void initSession(Session theSession){
// getting current session
this.session = theSession;
@ -139,7 +144,7 @@ public class urlRedirectord implements serverHandler {
}
int pos = line.indexOf(" ");
String nextURL = (pos != -1) ? line.substring(0,pos):line;
nextURL = (pos != -1) ? line.substring(0,pos):line;
this.theLogger.logFine("Receiving request " + line);
outputWriter.print("\r\n");
@ -168,6 +173,7 @@ public class urlRedirectord implements serverHandler {
if (reasonString != null) {
this.theLogger.logFine("URL " + nextURL + " rejected. Reason: " + reasonString);
}
nextURL = null;
}
}

Loading…
Cancel
Save