added a new feature to MultiProtocolURIs to get the locale for each url:

This is done using a new library InetAddressLocator.jar which is NOT added by default to YaCy because it is very old and with that library we will never get a debian package. However, some people want that functionality and it can be made available if the library is taken from http://javainetlocator.sourceforge.net/ and placed into the /lib directory where it will be found using reflection.
The new feature will be used to extend the crawler steering.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7975 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 13 years ago
parent 2c3161b4ac
commit 47a8c69745

@ -35,6 +35,7 @@ import java.io.Serializable;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@ -1119,6 +1120,10 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
}
public Locale getLocale() {
return Domains.getLocale(this.host);
}
//---------------------
private static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"';

@ -0,0 +1,45 @@
package net.yacy.cora.plugin;
import java.io.File;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
public class ClassProvider {
public static Class<?> load(final String classname, final File jarfile) {
Class<?> c;
try {
c = Class.forName(classname);
} catch (final ClassNotFoundException e) {
c = null;
}
if (c == null) {
// load jar
String path = jarfile.getAbsolutePath();
if (File.separatorChar != '/') path = path.replace(File.separatorChar, '/');
if (!path.startsWith("/")) path = "/" + path;
URL[] urls;
try {
urls = new URL[]{new URL("file", "", path)};
final ClassLoader cl = new URLClassLoader(urls);
c = cl.loadClass(classname);
} catch (final MalformedURLException e) {
} catch (final ClassNotFoundException e) {
}
}
return c;
}
public static Method getStaticMethod(final Class<?> c, final String methodName, final Class<?>[] args) {
if (c == null) return null;
try {
return c.getMethod(methodName, args);
} catch (final SecurityException e) {
return null;
} catch (final NoSuchMethodException e) {
return null;
}
}
}

@ -26,6 +26,8 @@ package net.yacy.cora.protocol;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
@ -38,11 +40,13 @@ import java.util.Enumeration;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.plugin.ClassProvider;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.storage.KeyList;
@ -50,6 +54,17 @@ import net.yacy.kelondro.util.MemoryControl;
public class Domains {
private static Class<?> InetAddressLocatorClass;
private static Method InetAddressLocatorGetLocaleInetAddressMethod;
static {
// using http://javainetlocator.sourceforge.net/ if library is present
// we use this class using reflection to be able to remove it because that class is old and without maintenancy
InetAddressLocatorClass = ClassProvider.load("net.sf.javainetlocator.InetAddressLocator", new File("lib/InetAddressLocator.jar"));
InetAddressLocatorGetLocaleInetAddressMethod = ClassProvider.getStaticMethod(InetAddressLocatorClass, "getLocale", new Class[]{InetAddress.class});
}
private static final String PRESENT = "";
private static final String LOCAL_PATTERNS = "10\\..*,127\\..*,172\\.(1[6-9]|2[0-9]|3[0-1])\\..*,169\\.254\\..*,192\\.168\\..*,localhost";
private static final int MAX_NAME_CACHE_HIT_SIZE = 20000;
@ -896,6 +911,45 @@ public class Domains {
return localp;
}
/**
* find the locale for a given host. This feature is only available in full quality,
* if the file InetAddressLocator.jar is placed in the /lib directory (as a plug-in)
* from http://javainetlocator.sourceforge.net/
* @param host
* @return the locale for the host
*/
public static Locale getLocale(final String host) {
if (host == null) return null;
final Locale locale = getLocale(dnsResolve(host));
if (locale != null) return locale;
final int p = host.lastIndexOf('.');
if (p < 0) return null;
String tld = host.substring(p + 1).toUpperCase();
if (tld.length() < 2) return null;
if (tld.length() > 2) tld = "US";
return new Locale("en", tld);
}
/**
* find the locale for a given Address
* @param address
* @return
*/
public static Locale getLocale(final InetAddress address) {
if (InetAddressLocatorGetLocaleInetAddressMethod == null) return null;
if (address == null) return null;
if (isLocal(address)) return null;
try {
return (Locale) InetAddressLocatorGetLocaleInetAddressMethod.invoke(null, new Object[]{address});
} catch (final IllegalArgumentException e) {
return null;
} catch (final IllegalAccessException e) {
return null;
} catch (final InvocationTargetException e) {
return null;
}
}
public static void main(final String[] args) {
/*
try {

Loading…
Cancel
Save