- enhanced intranet search speed

- enhanced intranet portscan speed (better time-out)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7227 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent b9f405d1e8
commit 091dd3f6ec

@ -106,7 +106,7 @@ public class yacysearchitem {
final int port=result.url().getPort();
DigestURI faviconURL = null;
if (!result.url().isLocal()) try {
if (isHtml && !sb.isIntranetMode() && !result.url().isLocal()) try {
faviconURL = new DigestURI(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico", null);
} catch (final MalformedURLException e1) {
Log.logException(e1);

@ -64,14 +64,17 @@ public class FileLoader {
}
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
if (url.isDirectory()) {
String[] l = url.list();
String[] l = null;
try {l = url.list();} catch (IOException e) {}
if (l != null) {
/*
if (l == null) {
// this can only happen if there is no connection or the directory does not exist
//log.logInfo("directory listing not available. URL = " + request.url().toString());
sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, "directory listing not available. URL = " + request.url().toString());
throw new IOException("directory listing not available. URL = " + request.url().toString());
}
*/
String u = url.toNormalform(true, true);
List<String> list = new ArrayList<String>();
for (String s: l) {

@ -78,14 +78,17 @@ public class SMBLoader {
}
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
if (url.isDirectory()) {
String[] l = url.list();
String[] l = null;
try {l = url.list();} catch (IOException e) {}
if (l != null) {
/*
if (l == null) {
// this can only happen if there is no connection or the directory does not exist
//log.logInfo("directory listing not available. URL = " + request.url().toString());
sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, "directory listing not available. URL = " + request.url().toString());
throw new IOException("directory listing not available. URL = " + request.url().toString());
}
*/
String u = url.toNormalform(true, true);
List<String> list = new ArrayList<String>();
for (String s: l) {

@ -203,7 +203,7 @@ public final class RankingProcess extends Thread {
}
// check tld domain
if (!DigestURI.matchesAnyDomDomain(iEntry.metadataHash(), this.query.zonecode)) {
if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) {
// filter out all tld that do not match with wanted tld domain
continue;
}

@ -42,6 +42,7 @@ import jcifs.smb.SmbFileInputStream;
import net.yacy.cora.document.Punycode.PunycodeException;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.ftp.FTPClient;
import net.yacy.cora.protocol.http.HTTPClient;
@ -53,6 +54,8 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
private static final long serialVersionUID = -1173233022912141884L;
private static final long SMB_TIMEOUT = 500;
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
private static final Pattern backPathPattern = Pattern.compile("(/[^/]+(?<!/\\.{1,2})/)[.]{2}(?=/|$)|/\\.(?=/)|/(?=/)");
private static final Pattern patternDot = Pattern.compile("\\.");
@ -881,7 +884,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public boolean exists() throws IOException {
if (isFile()) return getFSFile().exists();
if (isSMB()) try {
return getSmbFile().exists();
return TimeoutRequest.exists(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.exists SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -893,7 +896,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public boolean canRead() throws IOException {
if (isFile()) return getFSFile().canRead();
if (isSMB()) try {
return getSmbFile().canRead();
return TimeoutRequest.canRead(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.canRead SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -905,7 +908,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public boolean canWrite() throws IOException {
if (isFile()) return getFSFile().canWrite();
if (isSMB()) try {
return getSmbFile().canWrite();
return TimeoutRequest.canWrite(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.canWrite SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -917,7 +920,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public boolean isHidden() throws IOException {
if (isFile()) return getFSFile().isHidden();
if (isSMB()) try {
return getSmbFile().isHidden();
return TimeoutRequest.isHidden(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.isHidden SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -929,7 +932,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public boolean isDirectory() throws IOException {
if (isFile()) return getFSFile().isDirectory();
if (isSMB()) try {
return getSmbFile().isDirectory();
return TimeoutRequest.isDirectory(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.isDirectory SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -941,7 +944,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public long length() throws IOException {
if (isFile()) return getFSFile().length();
if (isSMB()) try {
return getSmbFile().length();
return TimeoutRequest.length(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.length SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -953,7 +956,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public long lastModified() throws IOException {
if (isFile()) return getFSFile().lastModified();
if (isSMB()) try {
return getSmbFile().lastModified();
return TimeoutRequest.lastModified(getSmbFile(), SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.lastModified SmbException (" + e.getMessage() + ") for " + this.toString());
} catch (MalformedURLException e) {
@ -977,7 +980,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
if (isSMB()) try {
SmbFile sf = getSmbFile();
try {
return sf.list();
return TimeoutRequest.list(sf, SMB_TIMEOUT);
} catch (SmbException e) {
throw new IOException("SMB.list SmbException for " + sf.toString() + ": " + e.getMessage());
}

@ -28,15 +28,8 @@ import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Pattern;
import net.yacy.cora.storage.ARC;
@ -468,36 +461,16 @@ public class Domains {
if (hosts.size() > 0) return hosts.iterator().next();
// call i.getHostName() using concurrency to interrupt execution in case of a time-out
final Callable<String> callable = new Callable<String>() {
public String call() { return i.getHostName(); }
};
ExecutorService service = Executors.newSingleThreadExecutor();
final Future<String> taskFuture = service.submit(callable);
Runnable t = new Runnable() {
public void run() { taskFuture.cancel(true); }
};
service.execute(t);
service.shutdown();
try {
return taskFuture.get(500, TimeUnit.MILLISECONDS);
} catch (CancellationException e) {
// callable was interrupted
return i.getHostAddress();
} catch (InterruptedException e) {
// service was shutdown
return i.getHostAddress();
} catch(ExecutionException e) {
// callable failed unexpectedly
return i.getHostAddress();
} catch (TimeoutException e) {
// time-out
return TimeoutRequest.getHostName(i, 500);
} catch (ExecutionException e) {
return i.getHostAddress();
}
}
public static InetAddress dnsResolve(final String hostx) {
if ((hostx == null) || (hostx.length() == 0)) return null;
final String host = hostx.toLowerCase().trim();
public static InetAddress dnsResolve(String host) {
if ((host == null) || (host.length() == 0)) return null;
host = host.toLowerCase().trim();
// try to simply parse the address
InetAddress ip = parseInetAddress(host);
if (ip != null) return ip;
@ -509,59 +482,9 @@ public class Domains {
if (nameCacheMiss.containsKey(host)) return null;
// call dnsResolveNetBased(host) using concurrency to interrupt execution in case of a time-out
final Callable<InetAddress> callable = new Callable<InetAddress>() {
public InetAddress call() { return dnsResolveNetBased(host); }
};
ExecutorService service = Executors.newSingleThreadExecutor();
final Future<InetAddress> taskFuture = service.submit(callable);
Runnable t = new Runnable() {
public void run() { taskFuture.cancel(true); }
};
service.execute(t);
service.shutdown();
try {
return taskFuture.get(500, TimeUnit.MILLISECONDS);
} catch (CancellationException e) {
// callable was interrupted
return null;
} catch (InterruptedException e) {
// service was shutdown
return null;
} catch(ExecutionException e) {
// callable failed unexpectedly
return null;
} catch (TimeoutException e) {
// time-out
return null;
}
}
private static final InetAddress parseInetAddress(final String ip) {
if (ip == null) return null;
if (ip.length() < 8) return null;
final String[] ips = ip.split("\\.");
if (ips.length != 4) return null;
final byte[] ipb = new byte[4];
try {
ipb[0] = (byte) Integer.parseInt(ips[0]);
ipb[1] = (byte) Integer.parseInt(ips[1]);
ipb[2] = (byte) Integer.parseInt(ips[2]);
ipb[3] = (byte) Integer.parseInt(ips[3]);
} catch (final NumberFormatException e) {
return null;
}
try {
return InetAddress.getByAddress(ipb);
} catch (final UnknownHostException e) {
return null;
}
}
private static InetAddress dnsResolveNetBased(String host) {
try {
boolean doCaching = true;
InetAddress ip = InetAddress.getByName(host); // this makes the DNS request to backbone
ip = TimeoutRequest.getByName(host, 500); // this makes the DNS request to backbone
if ((ip == null) ||
(ip.isLoopbackAddress()) ||
(nameCacheNoCachingList.containsKey(host))
@ -580,7 +503,7 @@ public class Domains {
nameCacheHit.put(host, ip);
}
return ip;
} catch (final UnknownHostException e) {
} catch (final ExecutionException e) {
// remove old entries
flushMissNameCache();
@ -589,7 +512,27 @@ public class Domains {
}
return null;
}
private static final InetAddress parseInetAddress(final String ip) {
if (ip == null) return null;
if (ip.length() < 8) return null;
final String[] ips = ip.split("\\.");
if (ips.length != 4) return null;
final byte[] ipb = new byte[4];
try {
ipb[0] = (byte) Integer.parseInt(ips[0]);
ipb[1] = (byte) Integer.parseInt(ips[1]);
ipb[2] = (byte) Integer.parseInt(ips[2]);
ipb[3] = (byte) Integer.parseInt(ips[3]);
} catch (final NumberFormatException e) {
return null;
}
try {
return InetAddress.getByAddress(ipb);
} catch (final UnknownHostException e) {
return null;
}
}
/**
* Returns the number of entries in the nameCacheHit map

@ -20,11 +20,8 @@
package net.yacy.cora.protocol;
import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collection;
@ -34,6 +31,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.MultiProtocolURI;
@ -122,7 +120,7 @@ public class Scanner extends Thread {
for (InetAddress i: genlist(bigrange)) {
try {
this.scanqueue.put(new MultiProtocolURI(protocol + "://" + Domains.getHostName(i) + "/"));
this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/"));
} catch (MalformedURLException e) {
Log.logException(e);
} catch (InterruptedException e) {
@ -154,22 +152,17 @@ public class Scanner extends Thread {
this.starttime = System.currentTimeMillis();
}
public void run() {
if (ping(this.uri, timeout)) {
services.put(this.uri, "");
/*
try {
byte[] b = this.uri.get(MultiProtocolURI.yacybotUserAgent, timeout);
if (b != null) services.put(this.uri, "");
} catch (Exception e) {
// try a list
try {
if (TimeoutRequest.ping(this.uri, timeout)) {
try {
String[] l = this.uri.list();
if (l != null) services.put(this.uri, "");
} catch (Exception e1) {
// this just failed. do nothing
services.put(new MultiProtocolURI(this.uri.getProtocol() + "://" + Domains.getHostName(InetAddress.getByName(this.uri.getHost())) + "/"), "");
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
*/
} catch (ExecutionException e) {
}
Object r = runner.remove(this);
assert r != null;
@ -185,21 +178,6 @@ public class Scanner extends Thread {
}
}
private static boolean ping(MultiProtocolURI uri, int timeout) {
try {
Socket socket = new Socket();
socket.connect(new InetSocketAddress(Domains.dnsResolve(uri.getHost()), uri.getPort()), timeout);
if (socket.isConnected()) {
socket.close();
return true;
}
return false;
} catch (UnknownHostException e) {
return false;
} catch (IOException e) {
return false;
}
}
public Collection<MultiProtocolURI> services() {
return this.services.keySet();

@ -0,0 +1,325 @@
/**
* TimeoutRequest
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
* First released 08.10.2007 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol;
import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import net.yacy.cora.document.MultiProtocolURI;
/**
* TimeoutRequest is a class that can apply a timeout on method calls that may block
* for undefined time. Some network operations can only be accessed without a given
* time-out value. Using this class all network operations may be timed out.
* This class provides also some static methods that give already solutions for typical
* network operations that should be timed-out, like dns resolving and reverse domain name resolving.
*/
public class TimeoutRequest<E> {
private Callable<E> call;
/**
* initialize the TimeoutRequest with a callable method
*/
public TimeoutRequest(Callable<E> call) {
this.call = call;
}
/**
* call the method using a time-out
* @param timeout
* @return
* @throws ExecutionException
*/
public E call(long timeout) throws ExecutionException {
ExecutorService service = Executors.newSingleThreadExecutor();
final Future<E> taskFuture = service.submit(this.call);
Runnable t = new Runnable() {
public void run() { taskFuture.cancel(true); }
};
service.execute(t);
service.shutdown();
try {
return taskFuture.get(timeout, TimeUnit.MILLISECONDS);
} catch (CancellationException e) {
// callable was interrupted
throw new ExecutionException(e);
} catch (InterruptedException e) {
// service was shutdown
throw new ExecutionException(e);
} catch (ExecutionException e) {
// callable failed unexpectedly
throw e;
} catch (TimeoutException e) {
// time-out
throw new ExecutionException(e);
}
}
/**
* ping a remote server using a given uri and a time-out
* @param uri
* @param timeout
* @return true if the server exists and replies within the given time-out
* @throws ExecutionException
*/
public static boolean ping(final MultiProtocolURI uri, final int timeout) throws ExecutionException {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() {
try {
Socket socket = new Socket();
socket.connect(new InetSocketAddress(uri.getHost(), uri.getPort()), timeout);
if (socket.isConnected()) {
socket.close();
return Boolean.TRUE;
}
return Boolean.FALSE;
} catch (UnknownHostException e) {
return Boolean.FALSE;
} catch (IOException e) {
return Boolean.FALSE;
}
}
}).call(timeout).booleanValue();
}
/**
* do a DNS lookup within a given time
* @param host
* @param timeout
* @return the InetAddress for a given domain name
* @throws ExecutionException
*/
public static InetAddress getByName(final String host, final long timeout) throws ExecutionException {
return new TimeoutRequest<InetAddress>(new Callable<InetAddress>() {
public InetAddress call() {
try {
return InetAddress.getByName(host);
} catch (UnknownHostException e) {
return null;
}
}
}).call(timeout);
}
/**
* perform a reverse domain name lookup for a given InetAddress within a given timeout
* @param i
* @param timeout
* @return the host name of a given InetAddress
* @throws ExecutionException
*/
public static String getHostName(final InetAddress i, final long timeout) throws ExecutionException {
return new TimeoutRequest<String>(new Callable<String>() {
public String call() { return i.getHostName(); }
}).call(timeout);
}
/**
* check if a smb file exists
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static boolean exists(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() { try {
return file.exists();
} catch (SmbException e) {
return Boolean.FALSE;
} }
}).call(timeout).booleanValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* check if a smb file can be read
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static boolean canRead(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() { try {
return file.canRead();
} catch (SmbException e) {
return Boolean.FALSE;
} }
}).call(timeout).booleanValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* check if a smb file ran be written
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static boolean canWrite(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() { try {
return file.canWrite();
} catch (SmbException e) {
return Boolean.FALSE;
} }
}).call(timeout).booleanValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* check if a smb file is hidden
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static boolean isHidden(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() { try {
return file.isHidden();
} catch (SmbException e) {
return Boolean.FALSE;
} }
}).call(timeout).booleanValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* check if a smb file is a directory
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static boolean isDirectory(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Boolean>(new Callable<Boolean>() {
public Boolean call() { try {
return file.isDirectory();
} catch (SmbException e) {
return Boolean.FALSE;
} }
}).call(timeout).booleanValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* get the size of a smb file
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static long length(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Long>(new Callable<Long>() {
public Long call() { try {
return file.length();
} catch (SmbException e) {
return Long.valueOf(0);
} }
}).call(timeout).longValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* get last-modified time of a smb file
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static long lastModified(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<Long>(new Callable<Long>() {
public Long call() { try {
return file.lastModified();
} catch (SmbException e) {
return Long.valueOf(0);
} }
}).call(timeout).longValue();
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
/**
* get list of a smb directory
* @param file
* @param timeout
* @return
* @throws IOException
*/
public static String[] list(final SmbFile file, final long timeout) throws IOException {
try {
return new TimeoutRequest<String[]>(new Callable<String[]>() {
public String[] call() { try {
return file.list();
} catch (SmbException e) {
return null;
} }
}).call(timeout);
} catch (ExecutionException e) {
throw new IOException(e.getMessage());
}
}
public static void main(String[] args) {
try {
System.out.println(getByName("yacy.net", 100));
} catch (ExecutionException e) {
e.printStackTrace();
}
}
}

@ -265,22 +265,15 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
public static boolean isDomDomain(final byte[] urlHash, final int id) {
return domDomain(urlHash) == id;
}
public static boolean matchesAnyDomDomain(final byte[] urlHash, final int idset) {
// this is a boolean matching on a set of domDomains
return (domDomain(urlHash) | idset) != 0;
}
// checks for local/global IP range and local IP
public final boolean isLocal() {
if (this.isSMB() || this.isFile()) return true;
if (this.hash == null) {
if (super.isLocal()) return true;
synchronized (this) {
if (this.hash == null) this.hash = urlHashComputation();
}
if (this.hash == null) synchronized (this) {
// this is synchronized because another thread may also call the same method in between
// that is the reason that this.hash is checked again
if (this.hash == null) this.hash = urlHashComputation();
}
//if (domDomain(this.hash) != 7) System.out.println("*** DEBUG - not local: " + this.toNormalform(true, false));
return domDomain(this.hash) == 7;
}

Loading…
Cancel
Save