- fixed several NPE bugs

- fixed loosing of own seed hash (hopefully)
- fixed a bug with crawl start s beginning with (bookmark) files
- added better IP recognition during hello process


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4882 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 2f381b8d7a
commit 40d7f485f3

@ -16,7 +16,7 @@
You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth". You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
</p> </p>
<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data"> <form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellpadding="5" cellspacing="1"> <table border="0" cellpadding="5" cellspacing="1">
<tr class="TableHeader"> <tr class="TableHeader">
<td><strong>Attribut</strong></td> <td><strong>Attribut</strong></td>

@ -306,7 +306,7 @@ public class WatchCrawler_p {
// enqueuing the url for crawling // enqueuing the url for crawling
sb.crawlStacker.enqueueEntry( sb.crawlStacker.enqueueEntry(
nexturl, nexturl,
null, "",
sb.webIndex.seedDB.mySeed().hash, sb.webIndex.seedDB.mySeed().hash,
(String) e.getValue(), (String) e.getValue(),
new Date(), new Date(),

@ -86,12 +86,19 @@ public final class hello {
int count = 0; int count = 0;
try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;} try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;}
// final Date remoteTime = yacyCore.parseUniversalDate((String) post.get(MYTIME)); // read remote time // final Date remoteTime = yacyCore.parseUniversalDate((String) post.get(MYTIME)); // read remote time
final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
InetAddress ias = serverDomains.dnsResolve(clientip);
if (ias == null) {
prop.put("message", "cannot resolve your IP from your reported location " + clientip);
return prop;
}
if (seed.length() > yacySeed.maxsize) { if (seed.length() > yacySeed.maxsize) {
yacyCore.log.logInfo("hello/server: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")"); yacyCore.log.logInfo("hello/server: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")");
prop.put("message", "your seed is too long (" + seed.length() + ")"); prop.put("message", "your seed is too long (" + seed.length() + ")");
return prop; return prop;
} }
final yacySeed remoteSeed = yacySeed.genRemoteSeed(seed, key); final yacySeed remoteSeed = yacySeed.genRemoteSeed(seed, key, true);
remoteSeed.setIP(ias.toString());
// System.out.println("YACYHELLO: REMOTESEED=" + ((remoteSeed == null) ? "NULL" : remoteSeed.toString())); // System.out.println("YACYHELLO: REMOTESEED=" + ((remoteSeed == null) ? "NULL" : remoteSeed.toString()));
if ((remoteSeed == null) || (remoteSeed.hash == null)) { if ((remoteSeed == null) || (remoteSeed.hash == null)) {
@ -104,12 +111,6 @@ public final class hello {
// if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; } // if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; }
// we easily know the caller's IP: // we easily know the caller's IP:
final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
InetAddress ias = serverDomains.dnsResolve(clientip);
if (ias == null) {
prop.put("message", "cannot resolve your IP from your reported location " + clientip);
return prop;
}
final String userAgent = (String) header.get(httpHeader.USER_AGENT, "<unknown>"); final String userAgent = (String) header.get(httpHeader.USER_AGENT, "<unknown>");
final String reportedip = remoteSeed.get(yacySeed.IP, ""); final String reportedip = remoteSeed.get(yacySeed.IP, "");
final String reportedPeerType = remoteSeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR); final String reportedPeerType = remoteSeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR);
@ -178,7 +179,7 @@ public final class hello {
remoteSeed.put(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR); remoteSeed.put(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR);
yacyCore.log.logInfo("hello: responded remote junior peer '" + remoteSeed.getName() + "' from " + reportedip); yacyCore.log.logInfo("hello: responded remote junior peer '" + remoteSeed.getName() + "' from " + reportedip);
// no connection here, instead store junior in connection cache // no connection here, instead store junior in connection cache
if ((remoteSeed.hash != null) && (remoteSeed.isProper() == null)) { if ((remoteSeed.hash != null) && (remoteSeed.isProper(false) == null)) {
sb.webIndex.peerActions.peerPing(remoteSeed); sb.webIndex.peerActions.peerPing(remoteSeed);
} }
} }
@ -212,7 +213,7 @@ public final class hello {
String seedString; String seedString;
while (si.hasNext()) { while (si.hasNext()) {
s = si.next(); s = si.next();
if ((s != null) && (s.isProper() == null)) try { if ((s != null) && (s.isProper(false) == null)) try {
seedString = s.genSeedStr(key); seedString = s.genSeedStr(key);
if (seedString != null) { if (seedString != null) {
seeds.append("seed").append(count).append('=').append(seedString).append(serverCore.CRLF_STRING); seeds.append("seed").append(count).append('=').append(seedString).append(serverCore.CRLF_STRING);

@ -124,7 +124,7 @@ public final class message {
return prop; return prop;
} }
//Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time //Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
yacySeed otherSeed = yacySeed.genRemoteSeed(otherSeedString, key); yacySeed otherSeed = yacySeed.genRemoteSeed(otherSeedString, key, false);
String subject = crypt.simpleDecode(post.get("subject", ""), key); // message's subject String subject = crypt.simpleDecode(post.get("subject", ""), key); // message's subject
String message = crypt.simpleDecode(post.get("message", ""), key); // message body String message = crypt.simpleDecode(post.get("message", ""), key); // message body

@ -150,7 +150,7 @@ public final class search {
TreeSet<String> abstractSet = ((abstracts.length() == 0) || (abstracts.equals("auto"))) ? null : plasmaSearchQuery.hashes2Set(abstracts); TreeSet<String> abstractSet = ((abstracts.length() == 0) || (abstracts.equals("auto"))) ? null : plasmaSearchQuery.hashes2Set(abstracts);
// store accessing peer // store accessing peer
yacySeed remoteSeed = yacySeed.genRemoteSeed(oseed, key); yacySeed remoteSeed = yacySeed.genRemoteSeed(oseed, key, false);
if (sb.webIndex.seedDB == null) { if (sb.webIndex.seedDB == null) {
yacyCore.log.logSevere("yacy.search: seed cache not initialized"); yacyCore.log.logSevere("yacy.search: seed cache not initialized");
} else { } else {

@ -92,6 +92,7 @@ public class plasmaDHTTransfer extends Thread {
super(new ThreadGroup("TransferIndexThreadGroup"), "TransferIndexWorker_" + destSeed.getName()); super(new ThreadGroup("TransferIndexThreadGroup"), "TransferIndexWorker_" + destSeed.getName());
this.log = log; this.log = log;
this.seedDB = seedDB; this.seedDB = seedDB;
this.peerActions = peerActions;
this.gzipBody4Transfer = gzipBody; this.gzipBody4Transfer = gzipBody;
this.timeout4Transfer = timeout; this.timeout4Transfer = timeout;
this.dhtChunk = dhtChunk; this.dhtChunk = dhtChunk;

@ -306,11 +306,10 @@ public class plasmaSnippetCache {
// download resource using the crawler and keep resource in memory if possible // download resource using the crawler and keep resource in memory if possible
plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing); plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing);
// place entry on crawl queue
plasmaHTCache.push(entry);
// getting resource metadata (e.g. the http headers for http resources) // getting resource metadata (e.g. the http headers for http resources)
if (entry != null) { if (entry != null) {
// place entry on crawl queue
plasmaHTCache.push(entry);
resInfo = entry.getDocumentInfo(); resInfo = entry.getDocumentInfo();
// read resource body (if it is there) // read resource body (if it is there)

@ -2711,7 +2711,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
enu = seedList.iterator(); enu = seedList.iterator();
lc = 0; lc = 0;
while (enu.hasNext()) { while (enu.hasNext()) {
ys = yacySeed.genRemoteSeed((String) enu.next(), null); ys = yacySeed.genRemoteSeed((String) enu.next(), null, false);
if ((ys != null) && if ((ys != null) &&
((!webIndex.seedDB.mySeedIsDefined()) || (webIndex.seedDB.mySeed().hash != ys.hash))) { ((!webIndex.seedDB.mySeedIsDefined()) || (webIndex.seedDB.mySeed().hash != ys.hash))) {
if (webIndex.peerActions.connectPeer(ys, false)) lc++; if (webIndex.peerActions.connectPeer(ys, false)) lc++;

@ -148,7 +148,7 @@ public final class yacyClient {
if (seed.length() > yacySeed.maxsize) { if (seed.length() > yacySeed.maxsize) {
yacyCore.log.logInfo("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")"); yacyCore.log.logInfo("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")");
} else { } else {
otherPeer = yacySeed.genRemoteSeed(seed, salt); otherPeer = yacySeed.genRemoteSeed(seed, salt, false);
if (otherPeer == null || !otherPeer.hash.equals(otherHash)) { if (otherPeer == null || !otherPeer.hash.equals(otherHash)) {
yacyCore.log.logFine("yacyClient.publishMySeed: consistency error: other peer '" + ((otherPeer==null)?"unknown":otherPeer.getName()) + "' wrong"); yacyCore.log.logFine("yacyClient.publishMySeed: consistency error: other peer '" + ((otherPeer==null)?"unknown":otherPeer.getName()) + "' wrong");
return -1; // no success return -1; // no success
@ -196,7 +196,7 @@ public final class yacyClient {
if (mySeed.orVirgin().equals(yacySeed.PEERTYPE_VIRGIN)) if (mySeed.orVirgin().equals(yacySeed.PEERTYPE_VIRGIN))
mySeed.put(yacySeed.PEERTYPE, mytype); mySeed.put(yacySeed.PEERTYPE, mytype);
final String error = mySeed.isProper(); final String error = mySeed.isProper(true);
if (error != null) { if (error != null) {
yacyCore.log.logSevere("yacyClient.publishMySeed mySeed error - not proper: " + error); yacyCore.log.logSevere("yacyClient.publishMySeed mySeed error - not proper: " + error);
return -1; return -1;
@ -214,7 +214,7 @@ public final class yacyClient {
if (seedStr.length() > yacySeed.maxsize) { if (seedStr.length() > yacySeed.maxsize) {
yacyCore.log.logInfo("hello/client: rejected contacting seed; too large (" + seedStr.length() + " > " + yacySeed.maxsize + ")"); yacyCore.log.logInfo("hello/client: rejected contacting seed; too large (" + seedStr.length() + " > " + yacySeed.maxsize + ")");
} else { } else {
if (peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, salt), (i == 1))) count++; if (peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, salt, false), (i == 1))) count++;
} }
} }
return count; return count;
@ -323,7 +323,7 @@ public final class yacyClient {
if (result == null || result.size() == 0) { return null; } if (result == null || result.size() == 0) { return null; }
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
return yacySeed.genRemoteSeed((String) result.get("response"), salt); return yacySeed.genRemoteSeed((String) result.get("response"), salt, false);
} catch (Exception e) { } catch (Exception e) {
yacyCore.log.logSevere("yacyClient.querySeed error:" + e.getMessage()); yacyCore.log.logSevere("yacyClient.querySeed error:" + e.getMessage());
return null; return null;

@ -411,7 +411,7 @@ public class yacyCore {
final String address = seed.getClusterAddress(); final String address = seed.getClusterAddress();
log.logFine("HELLO #" + i + " to peer '" + seed.get(yacySeed.NAME, "") + "' at " + address); // debug log.logFine("HELLO #" + i + " to peer '" + seed.get(yacySeed.NAME, "") + "' at " + address); // debug
String seederror = seed.isProper(); String seederror = seed.isProper(false);
if ((address == null) || (seederror != null)) { if ((address == null) || (seederror != null)) {
// we don't like that address, delete it // we don't like that address, delete it
sb.webIndex.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror); sb.webIndex.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror);
@ -504,7 +504,7 @@ public class yacyCore {
sb.webIndex.seedDB.saveMySeed(); sb.webIndex.seedDB.saveMySeed();
// if we have an address, we do nothing // if we have an address, we do nothing
if (sb.webIndex.seedDB.mySeed().isProper() == null && !force) { return 0; } if (sb.webIndex.seedDB.mySeed().isProper(true) == null && !force) { return 0; }
if (newSeeds > 0) return newSeeds; if (newSeeds > 0) return newSeeds;
// still no success: ask own NAT or internet responder // still no success: ask own NAT or internet responder

@ -82,7 +82,7 @@ public class yacyPeerActions {
yacyCore.log.logSevere("connect: WRONG seed (NULL)"); yacyCore.log.logSevere("connect: WRONG seed (NULL)");
return false; return false;
} }
final String error = seed.isProper(); final String error = seed.isProper(false);
if (error != null) { if (error != null) {
yacyCore.log.logSevere("connect: WRONG seed (" + seed.getName() + "/" + seed.hash + "): " + error); yacyCore.log.logSevere("connect: WRONG seed (" + seed.getName() + "/" + seed.hash + "): " + error);
return false; return false;

@ -818,7 +818,7 @@ public class yacySeed {
return hash; return hash;
} }
public static yacySeed genRemoteSeed(String seedStr, String key) { public static yacySeed genRemoteSeed(String seedStr, String key, boolean ownSeed) {
// this method is used to convert the external representation of a seed into a seed object // this method is used to convert the external representation of a seed into a seed object
// yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key); // yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key);
@ -833,7 +833,7 @@ public class yacySeed {
final yacySeed resultSeed = new yacySeed(hash, dna); final yacySeed resultSeed = new yacySeed(hash, dna);
// check semantics of content // check semantics of content
final String testResult = resultSeed.isProper(); final String testResult = resultSeed.isProper(ownSeed);
if (testResult != null) { if (testResult != null) {
yacyCore.log.logFinest("seed is not proper (" + testResult + "): " + resultSeed); yacyCore.log.logFinest("seed is not proper (" + testResult + "): " + resultSeed);
return null; return null;
@ -843,7 +843,7 @@ public class yacySeed {
return resultSeed; return resultSeed;
} }
public final String isProper() { public final String isProper(boolean checkOwnIP) {
// checks if everything is ok with that seed // checks if everything is ok with that seed
// check hash // check hash
@ -856,11 +856,14 @@ public class yacySeed {
dna.put(yacySeed.NAME, checkPeerName(peerName)); dna.put(yacySeed.NAME, checkPeerName(peerName));
// check IP // check IP
final String ip = (String) this.dna.get(yacySeed.IP); if (!checkOwnIP) {
if (ip == null) return "IP is null"; // checking of IP is omitted if we read the own seed file
if (ip.length() > 0 && ip.length() < 8) return "IP is too short: " + ip; final String ip = (String) this.dna.get(yacySeed.IP);
if (!natLib.isProper(ip)) return "IP is not proper: " + ip; //this does not work with staticIP if (ip == null) return "IP is null";
if (ip.equals("localhost") || ip.startsWith("127.") || (ip.startsWith("0:0:0:0:0:0:0:1"))) return "IP for localhost rejected"; if (ip.length() > 0 && ip.length() < 8) return "IP is too short: " + ip;
if (!natLib.isProper(ip)) return "IP is not proper: " + ip; //this does not work with staticIP
if (ip.equals("localhost") || ip.startsWith("127.") || (ip.startsWith("0:0:0:0:0:0:0:1"))) return "IP for localhost rejected";
}
// seedURL // seedURL
final String seedURL = this.dna.get(SEEDLIST); final String seedURL = this.dna.get(SEEDLIST);
@ -911,7 +914,10 @@ public class yacySeed {
final char[] b = new char[(int) f.length()]; final char[] b = new char[(int) f.length()];
fr.read(b, 0, b.length); fr.read(b, 0, b.length);
fr.close(); fr.close();
return genRemoteSeed(new String(b), null); yacySeed mySeed = genRemoteSeed(new String(b), null, true);
if (mySeed == null) return null;
mySeed.dna.put(yacySeed.IP, ""); // set own IP as unknown
return mySeed;
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")

@ -452,7 +452,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
public long countPotentialRWI() { return seedPotentialDB.getLongAcc(yacySeed.ICOUNT); } public long countPotentialRWI() { return seedPotentialDB.getLongAcc(yacySeed.ICOUNT); }
public synchronized void addConnected(yacySeed seed) { public synchronized void addConnected(yacySeed seed) {
if ((seed == null) || (seed.isProper() != null)) return; if ((seed == null) || (seed.isProper(false) != null)) return;
//seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
try { try {
nameLookupCache.put(seed.getName(), seed); nameLookupCache.put(seed.getName(), seed);
@ -506,7 +506,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
seedActiveDB.remove(seed.hash); seedActiveDB.remove(seed.hash);
seedPassiveDB.remove(seed.hash); seedPassiveDB.remove(seed.hash);
} catch (Exception e) {} } catch (Exception e) {}
if (seed.isProper() != null) return; if (seed.isProper(false) != null) return;
//seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); //seed.put(yacySeed.LASTSEEN, yacyCore.shortFormatter.format(new Date(yacyCore.universalTime())));
try { try {
HashMap<String, String> seedPropMap = seed.getMap(); HashMap<String, String> seedPropMap = seed.getMap();
@ -628,7 +628,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
seed = (yacySeed) e.next(); seed = (yacySeed) e.next();
if (seed != null) { if (seed != null) {
name = seed.getName().toLowerCase(); name = seed.getName().toLowerCase();
if (seed.isProper() == null) nameLookupCache.put(name, seed); if (seed.isProper(false) == null) nameLookupCache.put(name, seed);
if (name.equals(peerName)) return seed; if (name.equals(peerName)) return seed;
} }
} }
@ -636,7 +636,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
// check local seed // check local seed
if (this.mySeed == null) initMySeed(); if (this.mySeed == null) initMySeed();
name = mySeed.getName().toLowerCase(); name = mySeed.getName().toLowerCase();
if (mySeed.isProper() == null) nameLookupCache.put(name, mySeed); if (mySeed.isProper(false) == null) nameLookupCache.put(name, mySeed);
if (name.equals(peerName)) return mySeed; if (name.equals(peerName)) return mySeed;
// nothing found // nothing found
return null; return null;
@ -687,7 +687,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
addressStr = addressStr.substring(0,pos); addressStr = addressStr.substring(0,pos);
} }
seedIPAddress = InetAddress.getByName(addressStr); seedIPAddress = InetAddress.getByName(addressStr);
if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed)); if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed));
if (seedIPAddress.equals(peerIP)) return seed; if (seedIPAddress.equals(peerIP)) return seed;
} }
} catch (UnknownHostException ex) {} } catch (UnknownHostException ex) {}
@ -716,7 +716,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
addressStr = addressStr.substring(0,pos); addressStr = addressStr.substring(0,pos);
} }
seedIPAddress = InetAddress.getByName(addressStr); seedIPAddress = InetAddress.getByName(addressStr);
if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed)); if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed));
if (seedIPAddress.equals(peerIP)) return seed; if (seedIPAddress.equals(peerIP)) return seed;
} }
} catch (UnknownHostException ex) {} } catch (UnknownHostException ex) {}
@ -739,7 +739,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
addressStr = addressStr.substring(0,pos); addressStr = addressStr.substring(0,pos);
} }
seedIPAddress = InetAddress.getByName(addressStr); seedIPAddress = InetAddress.getByName(addressStr);
if (seed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed)); if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed));
if (seedIPAddress.equals(peerIP)) return seed; if (seedIPAddress.equals(peerIP)) return seed;
} }
} catch (UnknownHostException ex) {} } catch (UnknownHostException ex) {}
@ -755,7 +755,7 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
addressStr = addressStr.substring(0,pos); addressStr = addressStr.substring(0,pos);
} }
seedIPAddress = InetAddress.getByName(addressStr); seedIPAddress = InetAddress.getByName(addressStr);
if (mySeed.isProper() == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(mySeed)); if (mySeed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(mySeed));
if (seedIPAddress.equals(peerIP)) return mySeed; if (seedIPAddress.equals(peerIP)) return mySeed;
// nothing found // nothing found
return null; return null;

Loading…
Cancel
Save