diff --git a/htroot/CrawlStart_p.html b/htroot/CrawlStart_p.html index 8aeb5cc8d..2531e6e29 100644 --- a/htroot/CrawlStart_p.html +++ b/htroot/CrawlStart_p.html @@ -16,7 +16,7 @@ You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".

-
+ diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index d3196b299..1f8396422 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -306,7 +306,7 @@ public class WatchCrawler_p { // enqueuing the url for crawling sb.crawlStacker.enqueueEntry( nexturl, - null, + "", sb.webIndex.seedDB.mySeed().hash, (String) e.getValue(), new Date(), diff --git a/htroot/yacy/hello.java b/htroot/yacy/hello.java index 4f6e7b30b..e5c7269b2 100644 --- a/htroot/yacy/hello.java +++ b/htroot/yacy/hello.java @@ -86,12 +86,19 @@ public final class hello { int count = 0; try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;} // final Date remoteTime = yacyCore.parseUniversalDate((String) post.get(MYTIME)); // read remote time + final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, ""); // read an artificial header addendum + InetAddress ias = serverDomains.dnsResolve(clientip); + if (ias == null) { + prop.put("message", "cannot resolve your IP from your reported location " + clientip); + return prop; + } if (seed.length() > yacySeed.maxsize) { yacyCore.log.logInfo("hello/server: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")"); prop.put("message", "your seed is too long (" + seed.length() + ")"); return prop; } - final yacySeed remoteSeed = yacySeed.genRemoteSeed(seed, key); + final yacySeed remoteSeed = yacySeed.genRemoteSeed(seed, key, true); + remoteSeed.setIP(ias.toString()); // System.out.println("YACYHELLO: REMOTESEED=" + ((remoteSeed == null) ? "NULL" : remoteSeed.toString())); if ((remoteSeed == null) || (remoteSeed.hash == null)) { @@ -104,12 +111,6 @@ public final class hello { // if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; } // we easily know the caller's IP: - final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, ""); // read an artificial header addendum - InetAddress ias = serverDomains.dnsResolve(clientip); - if (ias == null) { - prop.put("message", "cannot resolve your IP from your reported location " + clientip); - return prop; - } final String userAgent = (String) header.get(httpHeader.USER_AGENT, ""); final String reportedip = remoteSeed.get(yacySeed.IP, ""); final String reportedPeerType = remoteSeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR); @@ -178,7 +179,7 @@ public final class hello { remoteSeed.put(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR); yacyCore.log.logInfo("hello: responded remote junior peer '" + remoteSeed.getName() + "' from " + reportedip); // no connection here, instead store junior in connection cache - if ((remoteSeed.hash != null) && (remoteSeed.isProper() == null)) { + if ((remoteSeed.hash != null) && (remoteSeed.isProper(false) == null)) { sb.webIndex.peerActions.peerPing(remoteSeed); } } @@ -212,7 +213,7 @@ public final class hello { String seedString; while (si.hasNext()) { s = si.next(); - if ((s != null) && (s.isProper() == null)) try { + if ((s != null) && (s.isProper(false) == null)) try { seedString = s.genSeedStr(key); if (seedString != null) { seeds.append("seed").append(count).append('=').append(seedString).append(serverCore.CRLF_STRING); diff --git a/htroot/yacy/message.java b/htroot/yacy/message.java index 4d6ca3eb3..ce37d4ef4 100644 --- a/htroot/yacy/message.java +++ b/htroot/yacy/message.java @@ -124,7 +124,7 @@ public final class message { return prop; } //Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time - yacySeed otherSeed = yacySeed.genRemoteSeed(otherSeedString, key); + yacySeed otherSeed = yacySeed.genRemoteSeed(otherSeedString, key, false); String subject = crypt.simpleDecode(post.get("subject", ""), key); // message's subject String message = crypt.simpleDecode(post.get("message", ""), key); // message body diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index d4f0dbbd3..fd0bbea91 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -150,7 +150,7 @@ public final class search { TreeSet abstractSet = ((abstracts.length() == 0) || (abstracts.equals("auto"))) ? null : plasmaSearchQuery.hashes2Set(abstracts); // store accessing peer - yacySeed remoteSeed = yacySeed.genRemoteSeed(oseed, key); + yacySeed remoteSeed = yacySeed.genRemoteSeed(oseed, key, false); if (sb.webIndex.seedDB == null) { yacyCore.log.logSevere("yacy.search: seed cache not initialized"); } else { diff --git a/source/de/anomic/plasma/plasmaDHTTransfer.java b/source/de/anomic/plasma/plasmaDHTTransfer.java index 629770baa..d584d3ce0 100644 --- a/source/de/anomic/plasma/plasmaDHTTransfer.java +++ b/source/de/anomic/plasma/plasmaDHTTransfer.java @@ -92,6 +92,7 @@ public class plasmaDHTTransfer extends Thread { super(new ThreadGroup("TransferIndexThreadGroup"), "TransferIndexWorker_" + destSeed.getName()); this.log = log; this.seedDB = seedDB; + this.peerActions = peerActions; this.gzipBody4Transfer = gzipBody; this.timeout4Transfer = timeout; this.dhtChunk = dhtChunk; diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index a09cca68f..24a7cda70 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -306,11 +306,10 @@ public class plasmaSnippetCache { // download resource using the crawler and keep resource in memory if possible plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing); - // place entry on crawl queue - plasmaHTCache.push(entry); - // getting resource metadata (e.g. the http headers for http resources) if (entry != null) { + // place entry on crawl queue + plasmaHTCache.push(entry); resInfo = entry.getDocumentInfo(); // read resource body (if it is there) diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 6a00837f1..9120cbc96 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2711,7 +2711,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch yacySeed.maxsize) { yacyCore.log.logInfo("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")"); } else { - otherPeer = yacySeed.genRemoteSeed(seed, salt); + otherPeer = yacySeed.genRemoteSeed(seed, salt, false); if (otherPeer == null || !otherPeer.hash.equals(otherHash)) { yacyCore.log.logFine("yacyClient.publishMySeed: consistency error: other peer '" + ((otherPeer==null)?"unknown":otherPeer.getName()) + "' wrong"); return -1; // no success @@ -196,7 +196,7 @@ public final class yacyClient { if (mySeed.orVirgin().equals(yacySeed.PEERTYPE_VIRGIN)) mySeed.put(yacySeed.PEERTYPE, mytype); - final String error = mySeed.isProper(); + final String error = mySeed.isProper(true); if (error != null) { yacyCore.log.logSevere("yacyClient.publishMySeed mySeed error - not proper: " + error); return -1; @@ -214,7 +214,7 @@ public final class yacyClient { if (seedStr.length() > yacySeed.maxsize) { yacyCore.log.logInfo("hello/client: rejected contacting seed; too large (" + seedStr.length() + " > " + yacySeed.maxsize + ")"); } else { - if (peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, salt), (i == 1))) count++; + if (peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, salt, false), (i == 1))) count++; } } return count; @@ -323,7 +323,7 @@ public final class yacyClient { if (result == null || result.size() == 0) { return null; } //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time - return yacySeed.genRemoteSeed((String) result.get("response"), salt); + return yacySeed.genRemoteSeed((String) result.get("response"), salt, false); } catch (Exception e) { yacyCore.log.logSevere("yacyClient.querySeed error:" + e.getMessage()); return null; diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index f599759f2..c673bb639 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -411,7 +411,7 @@ public class yacyCore { final String address = seed.getClusterAddress(); log.logFine("HELLO #" + i + " to peer '" + seed.get(yacySeed.NAME, "") + "' at " + address); // debug - String seederror = seed.isProper(); + String seederror = seed.isProper(false); if ((address == null) || (seederror != null)) { // we don't like that address, delete it sb.webIndex.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror); @@ -504,7 +504,7 @@ public class yacyCore { sb.webIndex.seedDB.saveMySeed(); // if we have an address, we do nothing - if (sb.webIndex.seedDB.mySeed().isProper() == null && !force) { return 0; } + if (sb.webIndex.seedDB.mySeed().isProper(true) == null && !force) { return 0; } if (newSeeds > 0) return newSeeds; // still no success: ask own NAT or internet responder diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index 887b37811..d8f683cba 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -82,7 +82,7 @@ public class yacyPeerActions { yacyCore.log.logSevere("connect: WRONG seed (NULL)"); return false; } - final String error = seed.isProper(); + final String error = seed.isProper(false); if (error != null) { yacyCore.log.logSevere("connect: WRONG seed (" + seed.getName() + "/" + seed.hash + "): " + error); return false; diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 2f1677d6f..c3ba47f9b 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -818,7 +818,7 @@ public class yacySeed { return hash; } - public static yacySeed genRemoteSeed(String seedStr, String key) { + public static yacySeed genRemoteSeed(String seedStr, String key, boolean ownSeed) { // this method is used to convert the external representation of a seed into a seed object // yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key); @@ -833,7 +833,7 @@ public class yacySeed { final yacySeed resultSeed = new yacySeed(hash, dna); // check semantics of content - final String testResult = resultSeed.isProper(); + final String testResult = resultSeed.isProper(ownSeed); if (testResult != null) { yacyCore.log.logFinest("seed is not proper (" + testResult + "): " + resultSeed); return null; @@ -843,7 +843,7 @@ public class yacySeed { return resultSeed; } - public final String isProper() { + public final String isProper(boolean checkOwnIP) { // checks if everything is ok with that seed // check hash @@ -856,11 +856,14 @@ public class yacySeed { dna.put(yacySeed.NAME, checkPeerName(peerName)); // check IP - final String ip = (String) this.dna.get(yacySeed.IP); - if (ip == null) return "IP is null"; - if (ip.length() > 0 && ip.length() < 8) return "IP is too short: " + ip; - if (!natLib.isProper(ip)) return "IP is not proper: " + ip; //this does not work with staticIP - if (ip.equals("localhost") || ip.startsWith("127.") || (ip.startsWith("0:0:0:0:0:0:0:1"))) return "IP for localhost rejected"; + if (!checkOwnIP) { + // checking of IP is omitted if we read the own seed file + final String ip = (String) this.dna.get(yacySeed.IP); + if (ip == null) return "IP is null"; + if (ip.length() > 0 && ip.length() < 8) return "IP is too short: " + ip; + if (!natLib.isProper(ip)) return "IP is not proper: " + ip; //this does not work with staticIP + if (ip.equals("localhost") || ip.startsWith("127.") || (ip.startsWith("0:0:0:0:0:0:0:1"))) return "IP for localhost rejected"; + } // seedURL final String seedURL = this.dna.get(SEEDLIST); @@ -911,7 +914,10 @@ public class yacySeed { final char[] b = new char[(int) f.length()]; fr.read(b, 0, b.length); fr.close(); - return genRemoteSeed(new String(b), null); + yacySeed mySeed = genRemoteSeed(new String(b), null, true); + if (mySeed == null) return null; + mySeed.dna.put(yacySeed.IP, ""); // set own IP as unknown + return mySeed; } @SuppressWarnings("unchecked") diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 1020b3504..91d9d8d0a 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -452,7 +452,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { public long countPotentialRWI() { return seedPotentialDB.getLongAcc(yacySeed.ICOUNT); } public synchronized void addConnected(yacySeed seed) { - if ((seed == null) || (seed.isProper() != null)) return; + if ((seed == null) || (seed.isProper(false) != null)) return; //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); try { nameLookupCache.put(seed.getName(), seed); @@ -506,7 +506,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { seedActiveDB.remove(seed.hash); seedPassiveDB.remove(seed.hash); } catch (Exception e) {} - if (seed.isProper() != null) return; + if (seed.isProper(false) != null) return; //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); try { HashMap seedPropMap = seed.getMap(); @@ -628,7 +628,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { seed = (yacySeed) e.next(); if (seed != null) { name = seed.getName().toLowerCase(); - if (seed.isProper() == null) nameLookupCache.put(name, seed); + if (seed.isProper(false) == null) nameLookupCache.put(name, seed); if (name.equals(peerName)) return seed; } } @@ -636,7 +636,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { // check local seed if (this.mySeed == null) initMySeed(); name = mySeed.getName().toLowerCase(); - if (mySeed.isProper() == null) nameLookupCache.put(name, mySeed); + if (mySeed.isProper(false) == null) nameLookupCache.put(name, mySeed); if (name.equals(peerName)) return mySeed; // nothing found return null; @@ -687,7 +687,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { addressStr = addressStr.substring(0,pos); } seedIPAddress = InetAddress.getByName(addressStr); - if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); + if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); if (seedIPAddress.equals(peerIP)) return seed; } } catch (UnknownHostException ex) {} @@ -716,7 +716,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { addressStr = addressStr.substring(0,pos); } seedIPAddress = InetAddress.getByName(addressStr); - if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); + if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); if (seedIPAddress.equals(peerIP)) return seed; } } catch (UnknownHostException ex) {} @@ -739,7 +739,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { addressStr = addressStr.substring(0,pos); } seedIPAddress = InetAddress.getByName(addressStr); - if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); + if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference(seed)); if (seedIPAddress.equals(peerIP)) return seed; } } catch (UnknownHostException ex) {} @@ -755,7 +755,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames { addressStr = addressStr.substring(0,pos); } seedIPAddress = InetAddress.getByName(addressStr); - if (mySeed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference(mySeed)); + if (mySeed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference(mySeed)); if (seedIPAddress.equals(peerIP)) return mySeed; // nothing found return null;
Attribut