// yacySeed.java // ------------------------------------- // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2004 // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // // YACY stands for Yet Another CYberspace // // the yacySeed Object is the object that bundles and carries all information about // a single peer in the yacy space. // The yacySeed object is carried along peers using a string representation, that can // be compressed and/or scrambled, depending on the purpose of the process. // // the yacy status // any value that is defined here will be overwritten each time the proxy is started // to prevent that the system gets confused, it should be set to "" which means // undefined. Other status' that can be reached at run-time are // junior - a peer that has no public socket, thus cannot be reached on demand // senior - a peer that has a public socked and serves search queries // principal - a peer like a senior socket and serves as gateway for network definition package net.yacy.peers; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.net.Inet6Address; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.text.ParseException; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Random; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.regex.Pattern; import net.yacy.cora.date.AbstractFormatter; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.federate.yacy.Distribution; import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Digest; import net.yacy.cora.protocol.Domains; import net.yacy.cora.sorting.ClusteredScoreMap; import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.storage.HandleSet; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.util.MapTools; import net.yacy.kelondro.util.OS; import net.yacy.peers.operation.yacyVersion; import net.yacy.search.Switchboard; import net.yacy.utils.Bitfield; import net.yacy.utils.crypt; public class Seed implements Cloneable, Comparable, Comparator { public static String ANON_PREFIX = "_anon"; public static final int maxsize = 16000; /** * substance "sI" (send index/words) */ public static final String INDEX_OUT = "sI"; /** * substance "rI" (received index/words) */ public static final String INDEX_IN = "rI"; /** * substance "sU" (send URLs) */ public static final String URL_OUT = "sU"; /** * substance "rU" (received URLs) */ public static final String URL_IN = "rU"; /** * substance "virgin", a peer which cannot reach any other peers */ public static final String PEERTYPE_VIRGIN = "virgin"; /** * substance "junior", this is a peer which cannot be reached from the outside */ public static final String PEERTYPE_JUNIOR = "junior"; /** * substance "mentee", this is a junior peer with an mentor peer attached as 'remote' server port */ public static final String PEERTYPE_MENTEE = "mentee"; /** * substance "senior", this is a peer with an open port to the public */ public static final String PEERTYPE_SENIOR = "senior"; /** * substance "mentor", this is a senior peer which hosts server ports for mentee peers */ public static final String PEERTYPE_MENTOR = "mentor"; /** * substance "principal", a senior peer which distributes the seed list to an outside hoster (i.e. using ftp upload to a web server) */ public static final String PEERTYPE_PRINCIPAL = "principal"; /** * substance "PeerType" */ public static final String PEERTYPE = "PeerType"; /** static/dynamic (if the IP changes often for any reason) */ private static final String IPTYPE = "IPType"; private static final String FLAGS = "Flags"; public static final String FLAGSZERO = " "; /** the applications version */ public static final String VERSION = "Version"; public static final String YOURTYPE = "yourtype"; public static final String LASTSEEN = "LastSeen"; private static final String USPEED = "USpeed"; /** the name of the peer (user-set) */ public static final String NAME = "Name"; public static final String HASH = "Hash"; /** Birthday - first startup */ private static final String BDATE = "BDate"; /** UTC-Offset */ public static final String UTC = "UTC"; private static final String PEERTAGS = "Tags"; /** the speed of indexing (pages/minute) of the peer */ public static final String ISPEED = "ISpeed"; /** the speed of retrieval (queries/minute) of the peer */ public static final String RSPEED = "RSpeed"; /** the number of minutes that the peer is up in minutes/day (moving average MA30) */ public static final String UPTIME = "Uptime"; /** the number of links that the peer has stored (LURL's) */ public static final String LCOUNT = "LCount"; /** the number of links that the peer has noticed, but not loaded (NURL's) */ public static final String NCOUNT = "NCount"; /** the number of links that the peer provides for remote crawls (ZURL's) */ public static final String RCOUNT = "RCount"; /** the number of different words the peer has indexed */ public static final String ICOUNT = "ICount"; /** the number of seeds that the peer has stored */ public static final String SCOUNT = "SCount"; /** the number of clients that the peer connects (connects/hour as double) */ public static final String CCOUNT = "CCount"; /** the public IP of this peer (old field, will be used to carry the IPv4) */ public static final String IP = "IP"; /** more public IPs of this peer, containing only IPv6 addresses. This list of of IPv6 addresses is separated with a vertical bar/pipe '|'. * This list may have zero entries if the host does not have a IPv6 address. It may have more than one IPv6 address if the * Host detects more than one public IPv6 locally but did not get a feedback from other peers if any of these addresses are * reachable. The list may contain only one address, if a 'hello' with backping to another peer was successful and the other peer * peer confirmed one of the IPv6 IPs which is then the only entry in this field. */ public static final String IP6 = "IP6"; public static final String PORT = "Port"; public static final String SEEDLISTURL = "seedURL"; public static final String NEWS = "news"; // news attachment public static final String DCT = "dct"; // disconnect time public static final String SOLRAVAILABLE ="SorlAvail"; // field to remember if remotePeer solr interface is avail. /** zero-value */ private static final String ZERO = "0"; private static final int FLAG_DIRECT_CONNECT = 0; private static final int FLAG_ACCEPT_REMOTE_CRAWL = 1; private static final int FLAG_ACCEPT_REMOTE_INDEX = 2; private static final int FLAG_ROOT_NODE = 3; private static final int FLAG_SSL_AVAILABLE = 4; public static final String DFLT_NETWORK_UNIT = "freeworld"; public static final String DFLT_NETWORK_GROUP = ""; private static final Random random = new Random(System.currentTimeMillis()); // class variables /** the peer-hash */ public final String hash; /** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ... */ private final ConcurrentMap dna; private long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations. Bitfield bitfield = null; public Seed(final String theHash, final ConcurrentMap theDna) { // create a seed with a pre-defined hash map assert theHash != null; this.hash = theHash; this.dna = theDna; String flags = this.dna.get(Seed.FLAGS); if (flags == null) flags = Seed.FLAGSZERO; while (flags.length() < 4) flags += " "; this.dna.put(Seed.FLAGS, flags); this.dna.put(Seed.NAME, checkPeerName(get(Seed.NAME, "∅"))); this.birthdate = -1; // this means 'not yet parsed', parse that later when it is used } private Seed(final String theHash) { this.dna = new ConcurrentHashMap(); // settings that can only be computed by originating peer: // at first startup - this.hash = theHash; // the hash key of the peer - very important. should be static somehow, even after restart this.dna.put(Seed.NAME, defaultPeerName()); // later during operation - this.dna.put(Seed.ISPEED, Seed.ZERO); this.dna.put(Seed.RSPEED, Seed.ZERO); this.dna.put(Seed.UPTIME, Seed.ZERO); this.dna.put(Seed.LCOUNT, Seed.ZERO); this.dna.put(Seed.NCOUNT, Seed.ZERO); this.dna.put(Seed.RCOUNT, Seed.ZERO); this.dna.put(Seed.ICOUNT, Seed.ZERO); this.dna.put(Seed.SCOUNT, Seed.ZERO); this.dna.put(Seed.CCOUNT, Seed.ZERO); this.dna.put(Seed.VERSION, Seed.ZERO); // settings that is created during the 'hello' phase - in first contact this.dna.put(Seed.IP, ""); // 123.234.345.456 this.dna.put(Seed.PORT, "∅"); this.dna.put(Seed.IPTYPE, "∅"); // settings that can only be computed by visiting peer this.dna.put(Seed.USPEED, Seed.ZERO); // the computated uplink speed of the peer // settings that are needed to organize the seed round-trip this.dna.put(Seed.FLAGS, Seed.FLAGSZERO); setFlagDirectConnect(false); setFlagAcceptRemoteCrawl(true); setFlagAcceptRemoteIndex(true); setUnusedFlags(); // index transfer this.dna.put(Seed.INDEX_OUT, Seed.ZERO); // send index this.dna.put(Seed.INDEX_IN, Seed.ZERO); // received index this.dna.put(Seed.URL_OUT, Seed.ZERO); // send URLs this.dna.put(Seed.URL_IN, Seed.ZERO); // received URLs // default first filling this.dna.put(Seed.BDATE, GenericFormatter.SHORT_SECOND_FORMATTER.format()); this.dna.put(Seed.LASTSEEN, this.dna.get(Seed.BDATE)); // just as initial setting this.dna.put(Seed.UTC, GenericFormatter.UTCDiffString()); this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN); // virgin/junior/senior/principal this.birthdate = System.currentTimeMillis(); } /** * check the peer name: protect against usage as XSS hack * * @param id * @return a checked name without "<" and ">" */ private final static Pattern tp = Pattern.compile("<|>"); public static String checkPeerName(String name) { name = tp.matcher(name).replaceAll("_"); return name; } /** * generate a default peer name * * @return */ private static String defaultPeerName() { return ANON_PREFIX + OS.infoKey() + "-" + (System.currentTimeMillis() % 77777777L) + "-" + Network.speedKey; } /** * Checks for the static fragments of a generated default peer name, such as the string 'dpn' * * @see #makeDefaultPeerName() * @param name the peer name to check for default peer name compliance * @return whether the given peer name may be a default generated peer name */ public static boolean isDefaultPeerName(final String name) { return name.startsWith("_anon"); } private Set getIPv6Entries() { String ip6s = this.dna.get(Seed.IP6); final Set set = Collections.synchronizedSet(new HashSet()); if (ip6s == null) return set; final StringTokenizer st = new StringTokenizer(ip6s, "|"); while (st.hasMoreTokens()) { set.add(Domains.chopZoneID(st.nextToken().trim())); } return set; } /** * try to get the public IP
* * @return the IP or localhost IP (127.0.0.1) */ @Deprecated public final String getIP() { final String ipx = this.dna.get(Seed.IP); // may contain both, IPv4 or IPv6 if (ipx != null && !ipx.isEmpty()) return Domains.chopZoneID(ipx); Set ip6s = getIPv6Entries(); if (ip6s != null && ip6s.size() > 0) return ip6s.iterator().next(); return null; } /** * Get all my public IPs. If there was a static IP assignment, only one, that IP is returned. * If no feedback from other peers exist, then all locally determined IPs are returned. * If a feedback from other peers exist, then return at most two IPs: * the latest IPv4 and the latest IPv6 which was returned during a hello process from a remote peer * @return a set of IPs which are supposed to be my own public IPs */ public final Set getIPs() { Set h = new LinkedHashSet<>(); final String ipx = this.dna.get(Seed.IP); // may contain both, IPv4 or IPv6 Set ip6s = getIPv6Entries(); // put IPs in h in such a way that we believe the mostfront have more chances to get connected if (ipx != null && !ipx.isEmpty()) h.add(Domains.chopZoneID(ipx)); h.addAll(ip6s); return h; } /** * count the number of IPs assgined to that peer * @return the number of peers in field IP (should be 1 all the time) plus the number of IPs in the IP6 field. */ public final int countIPs() { final String ipx = this.dna.get(Seed.IP); // may contain both, IPv4 or IPv6 Set ip6s = getIPv6Entries(); if (ip6s == null || ip6s.size() == 0) { return (ipx == null || ipx.isEmpty()) ? 0 : 1; } return (ipx == null || ipx.isEmpty()) ? ip6s.size() : ip6s.size() + 1; } /** * remove the given IP from the seed. Be careful not to remove the last IP; maybe call countIPs before calling the method. * @param ip * @return true if the IP was in the seed and had been removed. If the peer did not change, this returns false. */ public final boolean removeIP(String ip) { String ipx = Domains.chopZoneID(this.dna.get(Seed.IP)); // may contain both, IPv4 or IPv6 Set ip6s = getIPv6Entries(); if (ip6s == null || ip6s.size() == 0) { if (ipx != null && !ipx.isEmpty() && ipx.equals(ip)) { this.dna.put(Seed.IP, ""); // DON'T DO THAT! (the line is correct but you should not remove the last IP return true; } return false; } if (ip6s != null && ip6s.contains(ip)) { ip6s.remove(ip); this.dna.put(Seed.IP6, MapTools.set2string(ip6s, "|", false)); return true; } if (ipx != null && !ipx.isEmpty() && ipx.equals(ip)) { ipx = ip6s.iterator().next(); this.dna.put(Seed.IP, Domains.chopZoneID(ipx)); ip6s.remove(ipx); this.dna.put(Seed.IP6, MapTools.set2string(ip6s, "|", false)); return true; } return false; } /** * clash tests if any of the given ips are also contained in the Seeds ip set * @param ips * @return true if any of the given IPs are identical to the Seeds IP set */ public boolean clash(Set ips) { Set myIPs = getIPs(); for (String s: ips) { if (myIPs.contains(s) && isProperIP(s)) return true; } return false; } /** * try to get the peertype
* * @return the peertype or null */ public final String getPeerType() { return get(Seed.PEERTYPE, ""); } /** * try to get the peertype
* * @return the peertype or "virgin" */ public final String orVirgin() { return get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN); } /** * try to get the peertype
* * @return the peertype or "junior" */ public final String orJunior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); } /** * try to get the peertype
* * @return the peertype or "senior" */ public final String orSenior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); } /** * try to get the peertype
* * @return the peertype or "principal" */ public final String orPrincipal() { return get(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); } /** * Get a value from the peer's DNA (its set of peer defining values, e.g. IP, name, version, ...) * * @param key the key for the value to fetch * @param dflt the default value */ public final String get(final String key, final String dflt) { final Object o = this.dna.get(key); if ( o == null ) { return dflt; } return (String) o; } public final float getFloat(final String key, final float dflt) { final Object o = this.dna.get(key); if ( o == null ) { return dflt; } if ( o instanceof String ) { try { return Float.parseFloat((String) o); } catch (final NumberFormatException e ) { return dflt; } } else if ( o instanceof Float ) { return ((Float) o).floatValue(); } else { return dflt; } } public final long getLong(final String key, final long dflt) { final Object o = this.dna.get(key); if ( o == null ) { return dflt; } if ( o instanceof String ) { try { return Long.parseLong((String) o); } catch (final NumberFormatException e ) { return dflt; } } else if ( o instanceof Long ) { return ((Long) o).longValue(); } else if ( o instanceof Integer ) { return ((Integer) o).intValue(); } else { return dflt; } } /** * set the Peer ip. * This sets the IP and IP6 field according to the current fill state of that fields: * - if no field has a content, then IP is filled with the given ip, even if that ip is of type IPv6 * - if IP is already set then check if this is equivalent with the given ip. If both are equal, nothing is done. * If they are not equal, the IP and IPv6 field is set according to the type if the given ip: if the given ip * is of type IPv4, then IP is set with ip, otherwise IP6 is set with the ip. * ATTENTION: if the given IP is IPv6, then after the call that IP is the only one assigned to the peer! * @param ip */ public final void setIP(String ip) { ip = Domains.chopZoneID(ip); if (!isProperIP(ip)) return; String oldIP = this.dna.get(Seed.IP); String oldIP6 = this.dna.get(Seed.IP6); if ((oldIP == null || oldIP.length() == 0) && (oldIP6 == null || oldIP6.length() == 0)) { this.dna.put(Seed.IP, ip); } else { if (oldIP == null || !oldIP.equals(ip)) { if (oldIP == null || oldIP.length() == 0 || ip.indexOf(':') == 0) this.dna.put(Seed.IP, ip); else this.dna.put(Seed.IP6, ip); } } } /** * Set several local IPs which are good to access this peer. * This OVERWRITES ALL ips stored before! * @param ips list of IPs */ public final void setIPs(final Set ips) { // we must sort IPv4 and IPv6 here Set ipv6 = new HashSet<>(); Set ipv4 = new HashSet<>(); for (String ip: ips) if (isProperIP(ip)) { if (ip.indexOf(':') >= 0) ipv6.add(ip); else ipv4.add(ip); } if (ipv4.size() == 0) { if (ipv6.size() == 1) { // if the only IP we have is IPv6, then put this into IP field this.dna.put(Seed.IP, ipv6.iterator().next()); this.dna.put(Seed.IP6, ""); } else if (ipv6.size() > 1) { this.dna.put(Seed.IP, ""); this.dna.put(Seed.IP6, MapTools.set2string(ipv6, "|", false)); } } else { this.dna.put(Seed.IP, Domains.chopZoneID(ipv4.iterator().next())); this.dna.put(Seed.IP6, MapTools.set2string(ipv6, "|", false)); } } public final void setPort(final String port) { this.dna.put(Seed.PORT, port); } public final void setType(final String type) { this.dna.put(Seed.PEERTYPE, type); } public final void setJunior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); } public final void setSenior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); } public final void setPrincipal() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); } public final void put(final String key, final String value) { synchronized ( this.dna ) { this.dna.put(key, value); } } /** @return the DNA-map of this peer */ public final ConcurrentMap getMap() { return this.dna; } public final void setName(final String name) { synchronized ( this.dna ) { this.dna.put(Seed.NAME, checkPeerName(name)); } } public final String getName() { return checkPeerName(get(Seed.NAME, "∅")); } public final String getHexHash() { return b64Hash2hexHash(this.hash); } public final void incSI(final int count) { String v = this.dna.get(Seed.INDEX_OUT); if ( v == null ) { v = Seed.ZERO; } this.dna.put(Seed.INDEX_OUT, Long.toString(Long.parseLong(v) + count)); } public final void incRI(final int count) { String v = this.dna.get(Seed.INDEX_IN); if ( v == null ) { v = Seed.ZERO; } this.dna.put(Seed.INDEX_IN, Long.toString(Long.parseLong(v) + count)); } public final void incSU(final int count) { String v = this.dna.get(Seed.URL_OUT); if ( v == null ) { v = Seed.ZERO; } this.dna.put(Seed.URL_OUT, Long.toString(Long.parseLong(v) + count)); } public final void incRU(final int count) { String v = this.dna.get(Seed.URL_IN); if ( v == null ) { v = Seed.ZERO; } this.dna.put(Seed.URL_IN, Long.toString(Long.parseLong(v) + count)); } public final void resetCounters() { this.dna.put(Seed.INDEX_OUT, Seed.ZERO); this.dna.put(Seed.INDEX_IN, Seed.ZERO); this.dna.put(Seed.URL_OUT, Seed.ZERO); this.dna.put(Seed.URL_IN, Seed.ZERO); } /** * 12 * 6 bit = 72 bit = 24 characters octal-hash *

* Octal hashes are used for cache-dumps that are DHT-ready *

*

* Cause: the natural order of octal hashes are the same as the b64-order of b64Hashes. a hexhash cannot * be used in such cases, and b64Hashes are not appropriate for file names *

* * @param b64Hash a base64 hash * @return the octal representation of the given base64 hash */ public static String b64Hash2octalHash(final String b64Hash) { return Digest.encodeOctal(Base64Order.enhancedCoder.decode(b64Hash)); } /** * 12 * 6 bit = 72 bit = 18 characters hex-hash * * @param b64Hash a base64 hash * @return the hexadecimal representation of the given base64 hash */ public static String b64Hash2hexHash(final String b64Hash) { if ( b64Hash.length() > 12 ) { return ""; } // the hash string represents 12 * 6 bit = 72 bits. This is too much for a long integer. return Digest.encodeHex(Base64Order.enhancedCoder.decode(b64Hash)); } /** * @param hexHash a hexadecimal hash * @return the base64 representation of the given hex hash */ public static String hexHash2b64Hash(final String hexHash) { return Base64Order.enhancedCoder.encode(Digest.decodeHex(hexHash)); } /** * The returned version follows this pattern: MAJORVERSION . MINORVERSION 0 SVN REVISION * * @return the YaCy version of this peer as a float or 0 if no valid value could be retrieved * from this yacySeed object */ public final Double getVersion() { try { return Double.parseDouble(get(Seed.VERSION, Seed.ZERO)); } catch (final NumberFormatException e ) { return 0.0d; } } /** * get the SVN version of the peer * * @return */ public final int getRevision() { return yacyVersion.revision(get(Seed.VERSION, Seed.ZERO)); } /** * generate a public address using a given ip. This combines the ip with the port and encloses the ip * with square brackets if the ip is of typeIPv6 * @param ip * @return an address string which can be used as host:port part of an url */ public final String getPublicAddress(final InetAddress ip) { // we do not use getPublicAddress(String ip) here to be able to check IPv6 with instanceof Inet6Address which is faster than indexOf(':') if (ip == null) throw new RuntimeException("ip == NULL"); // that should not happen final String port = this.dna.get(Seed.PORT); // we do not use getPort() here to avoid String->Integer->toString() conversion if ( port == null || port.length() < 2 || port.length() > 5 ) { throw new RuntimeException("port not wellformed: " + port); // that should not happen } final StringBuilder sb = new StringBuilder(); if (ip instanceof Inet6Address) { sb.append('[').append(ip.getHostAddress()).append(']'); } else { sb.append(ip.getHostAddress()); } sb.append(':'); sb.append(port); return sb.toString(); } /** * generate a public address using a given ip. This combines the ip with the port and encloses the ip * with square brackets if the ip is of typeIPv6 * @param ip * @return an address string which can be used as host:port part of an url */ public final String getPublicAddress(final String ip) { if (ip == null) throw new RuntimeException("ip == NULL"); // that should not happen final String port = this.dna.get(Seed.PORT); // we do not use getPort() here to avoid String->Integer->toString() conversion if ( port == null || port.length() < 2 || port.length() > 5 ) { throw new RuntimeException("port not wellformed: " + port); // that should not happen } final StringBuilder sb = new StringBuilder(ip.length() + port.length() + 3); if (ip.indexOf(':') >= 0) { if (!ip.startsWith("[")) sb.append('['); sb.append(ip); if (!ip.endsWith("]")) sb.append(']'); } else { sb.append(ip); } sb.append(':'); sb.append(port); return sb.toString(); } /** @return the port number of this seed or -1 if not present */ public final int getPort() { final String port = this.dna.get(Seed.PORT); if ( port == null ) { return -1; } /*if (port.length() < 2) return -1; It is possible to use port 0-9*/ return Integer.parseInt(port); } /** puts the current time into the lastseen field and cares about the time differential to UTC */ public final void setLastSeenUTC() { // because java thinks it must apply the UTC offset to the current time, // to create a string that looks like our current time, it adds the local UTC offset to the // time. To create a corrected UTC Date string, we first subtract the local UTC offset. final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes final String ls = my_SHORT_SECOND_FORMATTER.format(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/)); this.dna.put(Seed.LASTSEEN, ls); } /** * @return the last seen time converted to UTC in milliseconds */ public final long getLastSeenUTC() { try { final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes final long t = my_SHORT_SECOND_FORMATTER.parse(get(Seed.LASTSEEN, "20040101000000")).getTime(); // getTime creates a UTC time number. But in this case java thinks, that the given // time string is a local time, which has a local UTC offset applied. // Therefore java subtracts the local UTC offset, to get a UTC number. // But the given time string is already in UTC time, so the subtraction // of the local UTC offset is wrong. We correct this here by adding the local UTC // offset again. return t /*+ DateFormatter.UTCDiff()*/; } catch (final java.text.ParseException e ) { // in case of an error make seed look old!!! return System.currentTimeMillis() - AbstractFormatter.dayMillis; } catch (final java.lang.NumberFormatException e ) { return System.currentTimeMillis() - AbstractFormatter.dayMillis; } } /** * test if the lastSeen time of the seed has a time-out * * @param milliseconds the maximum age of the last-seen value * @return true, if the time between the last-seen time and now is greater then the given time-out */ public final boolean isLastSeenTimeout(final long milliseconds) { final long d = Math.abs(System.currentTimeMillis() - getLastSeenUTC()); return d > milliseconds; } public final long getBirthdate() { if ( this.birthdate > 0 ) { return this.birthdate; } long b; try { final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes b = my_SHORT_SECOND_FORMATTER.parse(get(Seed.BDATE, "20040101000000")).getTime(); } catch (final ParseException e ) { b = System.currentTimeMillis(); } this.birthdate = b; return this.birthdate; } /** @return the age of the seed in number of days */ public final int getAge() { return (int) Math.abs((System.currentTimeMillis() - getBirthdate()) / 1000 / 60 / 60 / 24); } public void setPeerTags(final Set keys) { this.dna.put(PEERTAGS, MapTools.set2string(keys, "|", false)); } public Set getPeerTags() { return MapTools.string2set(get(PEERTAGS, "*"), "|"); } public boolean matchPeerTags(final HandleSet searchHashes) { final String peertags = get(PEERTAGS, ""); if ( peertags.equals("*") ) { return true; } final Set tags = MapTools.string2set(peertags, "|"); final Iterator i = tags.iterator(); while ( i.hasNext() ) { if ( searchHashes.has(Word.word2hash(i.next())) ) { return true; } } return false; } public int getPPM() { try { return Integer.parseInt(get(Seed.ISPEED, Seed.ZERO)); } catch (final NumberFormatException e ) { return 0; } } public float getQPM() { try { return Float.parseFloat(get(Seed.RSPEED, Seed.ZERO)); } catch (final NumberFormatException e ) { return 0f; } } public final long getLinkCount() { try { return getLong(Seed.LCOUNT, 0); } catch (final NumberFormatException e ) { return 0; } } public final long getWordCount() { try { return getLong(Seed.ICOUNT, 0); } catch (final NumberFormatException e ) { return 0; } } private boolean getFlag(final int flag) { if (this.bitfield == null) { final String flags = get(Seed.FLAGS, Seed.FLAGSZERO); this.bitfield = new Bitfield(ASCII.getBytes(flags)); } return this.bitfield.get(flag); } private void setFlag(final int flag, final boolean value) { String flags = get(Seed.FLAGS, Seed.FLAGSZERO); if ( flags.length() != 4 ) { flags = Seed.FLAGSZERO; } final Bitfield f = new Bitfield(ASCII.getBytes(flags)); f.set(flag, value); this.dna.put(Seed.FLAGS, UTF8.String(f.getBytes())); this.bitfield = null; } public final void setFlagDirectConnect(final boolean value) { setFlag(FLAG_DIRECT_CONNECT, value); } public final boolean getFlagDirectConnect() { return getFlag(FLAG_DIRECT_CONNECT); } public final void setFlagAcceptRemoteCrawl(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_CRAWL, value); } public final boolean getFlagAcceptRemoteCrawl() { //if (getVersion() < 0.300) return false; //if (getVersion() < 0.334) return true; return getFlag(FLAG_ACCEPT_REMOTE_CRAWL); } public final void setFlagAcceptRemoteIndex(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_INDEX, value); } public final boolean getFlagAcceptRemoteIndex() { //if (getVersion() < 0.335) return false; return getFlag(FLAG_ACCEPT_REMOTE_INDEX); } public final void setFlagRootNode(final boolean value) { setFlag(FLAG_ROOT_NODE, value); } public final boolean getFlagRootNode() { double v = getVersion(); if (v < 1.02009142d) return false; return getFlag(FLAG_ROOT_NODE); } public final void setFlagSSLAvailable(final boolean value) { setFlag(FLAG_SSL_AVAILABLE, value); } public final boolean getFlagSSLAvailable() { if (getVersion() < 1.5) return false; return getFlag(FLAG_SSL_AVAILABLE); } /** * remembers status of remote Solr interface dynamicly * should not be used for the local peer * @param value */ public final void setFlagSolrAvailable(final boolean value) { if (value) this.dna.put(Seed.SOLRAVAILABLE, "OK"); else this.dna.put(Seed.SOLRAVAILABLE, "NA"); this.bitfield = null; } /** * gets the last set result for remote solr status * * @return if status unknown it returns true */ public final boolean getFlagSolrAvailable() { // field is indented to deal with 3 states // null = never checked, "OK" and "NA" for not available String solravail = this.dna.get(Seed.SOLRAVAILABLE); boolean my = (solravail != null) && ("NA".equals(solravail)); return !my; } public final void setUnusedFlags() { for ( int i = 4; i < 20; i++ ) { setFlag(i, false); } } public final boolean isType(final String type) { return get(Seed.PEERTYPE, "").equals(type); } public final boolean isVirgin() { return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_VIRGIN); } public final boolean isJunior() { return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_JUNIOR); } public final boolean isSenior() { return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_SENIOR); } public final boolean isPrincipal() { return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_PRINCIPAL); } public final boolean isPotential() { return isVirgin() || isJunior(); } public final boolean isActive() { return isSenior() || isPrincipal(); } public final boolean isOnline() { return isSenior() || isPrincipal(); } public final boolean isOnline(final String type) { return type.equals(Seed.PEERTYPE_SENIOR) || type.equals(Seed.PEERTYPE_PRINCIPAL); } public long nextLong(final Random random, final long n) { return Math.abs(random.nextLong()) % n; } private static byte[] bestGap(final SeedDB seedDB) { final byte[] randomHash = randomHash(); if ( seedDB == null || seedDB.sizeConnected() <= 2 ) { // use random hash return randomHash; } // find gaps final TreeMap gaps = hashGaps(seedDB); // take one gap; prefer biggest but take also another smaller by chance String interval = null; while ( !gaps.isEmpty() ) { interval = gaps.remove(gaps.lastKey()); if ( random.nextBoolean() ) { break; } } if ( interval == null ) { return randomHash(); } // find dht position and size of gap final long left = Distribution.horizontalDHTPosition(ASCII.getBytes(interval.substring(0, 12))); final long right = Distribution.horizontalDHTPosition(ASCII.getBytes(interval.substring(12))); final long gap8 = Distribution.horizontalDHTDistance(left, right) >> 3; // 1/8 of a gap final long gapx = gap8 + (Math.abs(random.nextLong()) % (6 * gap8)); final long gappos = (Long.MAX_VALUE - left >= gapx) ? left + gapx : (left - Long.MAX_VALUE) + gapx; final byte[] computedHash = Distribution.positionToHash(gappos); // the computed hash is the perfect position (modulo gap4 population and gap alternatives) // this is too tight. The hash must be more randomized. We take only (!) the first two bytes // of the computed hash and add random bytes at the remaining positions. The first two bytes // of the hash may have 64*64 = 2^^10 positions, good for over 1 million peers. byte[] combined = new byte[12]; System.arraycopy(computedHash, 0, combined, 0, 2); System.arraycopy(randomHash, 2, combined, 2, 10); // patch for the 'first sector' problem if ( combined[0] == 'A' || combined[1] == 'D' ) { // for some strange reason there are too many of them combined[1] = randomHash[1]; } // finally check if the hash is already known while ( seedDB.hasConnected(combined) || seedDB.hasDisconnected(combined) || seedDB.hasPotential(combined) ) { // if we are lucky then this loop will never run combined = randomHash(); } return combined; } private static TreeMap hashGaps(final SeedDB seedDB) { final TreeMap gaps = new TreeMap(); if ( seedDB == null ) { return gaps; } final Iterator i = seedDB.seedsConnected(true, false, null, (float) 0.0); long l; Seed s0 = null, s1, first = null; while ( i.hasNext() ) { s1 = i.next(); if ( s0 == null ) { s0 = s1; first = s0; continue; } l = Distribution.horizontalDHTDistance( Distribution.horizontalDHTPosition(ASCII.getBytes(s0.hash)), Distribution.horizontalDHTPosition(ASCII.getBytes(s1.hash))); gaps.put(l, s0.hash + s1.hash); s0 = s1; } // compute also the last gap if ( (first != null) && (s0 != null) ) { l = Distribution.horizontalDHTDistance( Distribution.horizontalDHTPosition(ASCII.getBytes(s0.hash)), Distribution.horizontalDHTPosition(ASCII.getBytes(first.hash))); gaps.put(l, s0.hash + first.hash); } return gaps; } public static Seed genLocalSeed(final SeedDB db) { // generate a seed for the local peer (as anonymous peer) // this is the birthplace of a seed, that then will start to travel to other peers final String hashs = ASCII.String(bestGap(db)); Network.log.info("init: OWN SEED = " + hashs); final Seed newSeed = new Seed(hashs); // now calculate other information about the host final int port = Switchboard.getSwitchboard().getPublicPort("port", 8090); //get port from config newSeed.dna.put(Seed.NAME, defaultPeerName() ); newSeed.dna.put(Seed.PORT, Integer.toString(port)); return newSeed; } //public static String randomHash() { return "zLXFf5lTteUv"; } // only for debugging public static byte[] randomHash() { final String hash = Base64Order.enhancedCoder .encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))) .substring(0, 6) + Base64Order.enhancedCoder .encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))) .substring(0, 6); return ASCII.getBytes(hash); } public static Seed genRemoteSeed( final String seedStr, final boolean ownSeed, final String patchIP) throws IOException { // this method is used to convert the external representation of a seed into a seed object // yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key); // check protocol and syntax of seed if ( seedStr == null ) { throw new IOException("seedStr == null"); } if ( seedStr.isEmpty() ) { throw new IOException("seedStr.isEmpty()"); } final String seed = crypt.simpleDecode(seedStr); if ( seed == null ) { throw new IOException("seed == null"); } if ( seed.isEmpty() ) { throw new IOException("seed.isEmpty()"); } // extract hash final ConcurrentHashMap dna = MapTools.string2map(seed, ","); final String hash = dna.remove(Seed.HASH); if ( hash == null ) { throw new IOException("hash == null"); } final Seed resultSeed = new Seed(hash, dna); // check semantics of content String testResult = resultSeed.isProper(ownSeed); if ( testResult != null && patchIP != null ) { // in case that this proper-Test fails and a patchIP is given // then replace the given IP in the resultSeed with given patchIP // this is done if a remote peer reports its IP in a wrong way (maybe fraud attempt) resultSeed.setIP(patchIP); testResult = resultSeed.isProper(ownSeed); } if ( testResult != null ) { throw new IOException("seed is not proper (" + testResult + "): " + resultSeed); } //assert resultSeed.toString().equals(seed) : "\nresultSeed.toString() = " + resultSeed.toString() + ",\n seed = " + seed; // debug // seed ok return resultSeed; } // TODO: add here IP ranges to accept also intranet networks public final String isProper(final boolean checkOwnIP) { // checks if everything is ok with that seed // check hash if ( this.hash == null ) { return "hash is null"; } if ( this.hash.length() != Word.commonHashLength ) { return "wrong hash length (" + this.hash.length() + ")"; } // name final String peerName = this.dna.get(Seed.NAME); if ( peerName == null ) { return "no peer name given"; } this.dna.put(Seed.NAME, checkPeerName(peerName)); // type final String peerType = getPeerType(); if ( (peerType == null) || !(peerType.equals(Seed.PEERTYPE_VIRGIN) || peerType.equals(Seed.PEERTYPE_JUNIOR) || peerType.equals(Seed.PEERTYPE_SENIOR) || peerType.equals(Seed.PEERTYPE_PRINCIPAL)) ) { return "invalid peerType '" + peerType + "'"; } // check IP if ( !checkOwnIP ) { // checking of IP is omitted if we read the own seed file final String ip = getIP(); if (!isProperIP(ip)) return "not a proper IP " + ip; } // seedURL final String seedURL = this.dna.get(SEEDLISTURL); if ( seedURL != null && !seedURL.isEmpty() ) { if ( !seedURL.startsWith("http://") && !seedURL.startsWith("https://") ) { return "wrong protocol for seedURL"; } try { final URL url = new URL(seedURL); final String host = url.getHost(); if (Domains.isIntranet(host)) { // network.unit.domain = any returns isIntranet() always true (because noLocalCheck is set true) // but seedlist on intranet host must be allowed -> check for intranet mode and deny "localhost" or loopback IP if (Switchboard.getSwitchboard().isIntranetMode() ) { if (Domains.isLocalhost(host)) { return "seedURL on local host rejected ("+host+")"; } } else { return "seedURL in local network rejected ("+host+")"; } } } catch (final MalformedURLException e ) { return "seedURL malformed"; } } return null; } /** * check if the given string containing an IP is proper. This checks also if the IP is within the given * range of the network definition * @param ipString * @return true iff the IP is proper */ public static final boolean isProperIP(final String ipString) { if (ipString == null) return false; if (ipString.length() < 3) return false; if (Switchboard.getSwitchboard().isAllIPMode()) return true; // accept everyting final boolean islocal = Domains.isLocal(ipString, null); //if (islocal && Switchboard.getSwitchboard().isGlobalMode()) return ipString + " - local IP for global mode rejected"; return islocal == Switchboard.getSwitchboard().isIntranetMode(); } @Override public final String toString() { final ConcurrentMap copymap = new ConcurrentHashMap(); copymap.putAll(this.dna); copymap.put(Seed.HASH, this.hash); // set hash into seed code structure String s = MapTools.map2string(copymap, ",", true); // generate string representation return s; } public final String genSeedStr(final String key) { // use a default encoding final String r = toString(); final String z = crypt.simpleEncode(r, key, 'z'); final String b = crypt.simpleEncode(r, key, 'b'); // the compressed string may be longer than the uncompressed if there is too much overhead for compression meta-info // take simply that string which is shorter return ( b.length() < z.length() ) ? b : z; } public final void save(final File f) throws IOException { final String out = crypt.simpleEncode(toString(), null, 'p'); final FileWriter fw = new FileWriter(f); fw.write(out, 0, out.length()); fw.close(); } public static Seed load(final File f) throws IOException { final FileReader fr = new FileReader(f); final char[] b = new char[(int) f.length()]; fr.read(b, 0, b.length); fr.close(); final Seed mySeed = genRemoteSeed(new String(b), true, null); assert mySeed != null; // in case of an error, an IOException is thrown mySeed.dna.put(Seed.IP, ""); // set own IP as unknown return mySeed; } @Override public final Seed clone() { final ConcurrentHashMap ndna = new ConcurrentHashMap(); ndna.putAll(this.dna); return new Seed(this.hash, ndna); } @Override public int compareTo(final Seed arg0) { final int o1 = hashCode(); final int o2 = arg0.hashCode(); if ( o1 > o2 ) { return 1; } if ( o2 > o1 ) { return -1; } return 0; } @Override public int hashCode() { return (int) ((Base64Order.enhancedCoder.cardinal(this.hash) & (Integer.MAX_VALUE))); } @Override public int compare(final Seed o1, final Seed o2) { return o1.compareTo(o2); } @Override public boolean equals(Object other) { return this.hash.equals(((Seed) other).hash); } public static void main(final String[] args) { final ScoreMap s = new ClusteredScoreMap(); for ( int i = 0; i < 10000; i++ ) { final byte[] b = randomHash(); s.inc(0xff & Base64Order.enhancedCoder.decodeByte(b[0])); //System.out.println(ASCII.String(b)); } final Iterator i = s.keys(false); while ( i.hasNext() ) { System.out.println(i.next()); } } }