diff --git a/build.properties b/build.properties index 851ad22db..21a9f29b9 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.5 javacTarget=1.5 # Release Configuration -releaseVersion=0.561 +releaseVersion=0.562 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz diff --git a/build.xml b/build.xml index 0c625bdbf..04faf3308 100644 --- a/build.xml +++ b/build.xml @@ -537,7 +537,7 @@ - + @@ -561,31 +561,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - @@ -767,7 +742,7 @@ This needs nsis-ant-1.2.jar in the yacyfolder. --> - + diff --git a/htroot/env/grafics/YaCyLogo_120ppi.png b/htroot/env/grafics/YaCyLogo_120ppi.png index 9dc8ae5e8..5672fcfb7 100644 Binary files a/htroot/env/grafics/YaCyLogo_120ppi.png and b/htroot/env/grafics/YaCyLogo_120ppi.png differ diff --git a/htroot/env/grafics/YaCyLogo_60ppi.png b/htroot/env/grafics/YaCyLogo_60ppi.png index 9afddaa95..c4f6b5461 100644 Binary files a/htroot/env/grafics/YaCyLogo_60ppi.png and b/htroot/env/grafics/YaCyLogo_60ppi.png differ diff --git a/htroot/env/grafics/yacy.gif b/htroot/env/grafics/yacy.gif index b8dbdbc6c..ded916509 100644 Binary files a/htroot/env/grafics/yacy.gif and b/htroot/env/grafics/yacy.gif differ diff --git a/htroot/yacy/crawlOrder.html b/htroot/yacy/crawlOrder.html deleted file mode 100644 index 54229c167..000000000 --- a/htroot/yacy/crawlOrder.html +++ /dev/null @@ -1,13 +0,0 @@ -version=#[version]# -uptime=#[uptime]# -response=#[response]# -reason=#[reason]# -delay=#[delay]# -depth=#[depth]# -forward=#[forward]# -key=#[key]# -lurl=#[lurl]# -#{list}# -job#[count]#=#[job]# -lurl#[count]#=#[lurl]# -#{/list}# \ No newline at end of file diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java deleted file mode 100644 index 33b20be67..000000000 --- a/htroot/yacy/crawlOrder.java +++ /dev/null @@ -1,281 +0,0 @@ -// crawlOrder.java -// ----------------------- -// part of the AnomicHTTPD caching proxy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -// You must compile this file with -// javac -classpath .:../classes crawlOrder.java - -import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Date; - -import de.anomic.http.httpHeader; -import de.anomic.index.indexURLEntry; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.tools.crypt; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNetwork; -import de.anomic.yacy.yacySeed; -import de.anomic.yacy.yacyURL; - -public final class crawlOrder { - - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { - // return variable that accumulates replacements - plasmaSwitchboard switchboard = (plasmaSwitchboard) env; - serverObjects prop = new serverObjects(); - if ((post == null) || (env == null)) return prop; - if (!yacyNetwork.authentifyRequest(post, env)) return prop; - - //int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0")); - //int crawlingdepth = Integer.parseInt(env.getConfig("crawlingDepth", "0")); - - // request values - String iam = post.get("iam", ""); // seed hash of requester - String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability - String process = post.get("process", ""); // process type - String key = post.get("key", ""); // transmission key - int orderDepth = post.getInt("depth", 0); // crawl depth - - // response values - /* - the result can have one of the following values: - negative cases, no retry - denied - the peer does not want to crawl that - exception - an exception occurred - - negative case, retry possible - rejected - the peer has rejected to process, but a re-try should be possible - - positive case with crawling - stacked - the resource is processed asap - - positive case without crawling - double - the resource is already in database, believed to be fresh and not reloaded - the resource is also returned in lurl - */ - String response = "denied"; - String reason = "false-input"; - String delay = "5"; - String lurl = ""; - boolean granted = switchboard.getConfigBool("crawlResponse", false); - int acceptDepth = Integer.parseInt(switchboard.getConfig("crawlResponseDepth", "0")); - int ppm = yacyCore.seedDB.mySeed().getPPM(); - int acceptDelay = (ppm == 0) ? 10 : (2 + 60 / yacyCore.seedDB.mySeed().getPPM()); - - if (orderDepth > acceptDepth) orderDepth = acceptDepth; - - // check if requester is authorized - if ((yacyCore.seedDB.mySeed() == null) || (!(yacyCore.seedDB.mySeed().hash.equals(youare)))) { - // this request has a wrong target - response = "denied"; - reason = "authentify-problem"; - delay = "3600"; // may request one hour later again - } else if ((switchboard.isRobinsonMode()) && (!switchboard.isInMyCluster(iam))) { - // check network environment, if we are a robinson peer or in a robinson cluster - // then the request must come from a peer that is in the same cluster as we are - reason = "not in my cluster"; - response = "denied"; - delay = "9999"; - } else if (orderDepth > 0) { - response = "denied"; - reason = "order depth must be 0"; - delay = "3600"; // may request one hour later again - } else if (!(granted)) { - response = "denied"; - reason = "not granted to remote crawl"; - delay = "3600"; // may request one hour later again - } else try { - yacySeed requester = yacyCore.seedDB.getConnected(iam); - int queuesize = switchboard.crawlQueues.coreCrawlJobSize() + switchboard.crawlQueues.limitCrawlJobSize() + switchboard.crawlQueues.remoteTriggeredCrawlJobSize() + switchboard.queueSize(); - if (requester == null) { - response = "denied"; - reason = "unknown-client"; - delay = "240"; - } else if (!((requester.isSenior()) || (requester.isPrincipal()))) { - response = "denied"; - reason = "not-qualified"; - delay = "240"; - } else if (queuesize > 100) { - response = "rejected"; - reason = "busy"; - delay = Integer.toString(30 + queuesize * acceptDelay); - } else if (!(process.equals("crawl"))) { - response = "denied"; - reason = "unknown-order"; - delay = "9999"; - } else { - // read the urls/referrer-vector - ArrayList urlv = new ArrayList(); - ArrayList refv = new ArrayList(); - String refencoded = post.get("referrer", null); - String urlencoded = post.get("url", null); - if (urlencoded != null) { - // old method: only one url - urlv.add(crypt.simpleDecode(urlencoded, key)); // the url string to crawl - } else { - // new method: read a vector of urls - while ((urlencoded = post.get("url" + urlv.size(), null)) != null) { - urlv.add(crypt.simpleDecode(urlencoded, key)); - } - } - if (refencoded != null) { - // old method: only one url - env.getLog().logFinest("crawlOrder: refencoded=" + refencoded + " key=" + key); - refv.add(crypt.simpleDecode(refencoded, key)); // the referrer url - } else { - // new method: read a vector of urls - while ((refencoded = post.get("ref" + refv.size(), null)) != null) { - env.getLog().logFinest("crawlOrder: refencoded=" + refencoded + " key=" + key); - refv.add(crypt.simpleDecode(refencoded, key)); - } - } - - // stack the urls - Object[] stackresult; - int count = Math.min(urlv.size(), refv.size()); - if (count == 1) { - // old method: only one url - - // normalizing URL - yacyURL url = new yacyURL((String) urlv.get(0), null); - String newURL = url.toNormalform(true, true); - if (!newURL.equals(urlv.get(0))) { - env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0)); - } - yacyURL refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null); - if ((refURL != null) && (!refURL.equals(refv.get(0)))) { - env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); - } - - if (!switchboard.acceptURL(url)) { - env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL); - return null; - } - - // adding URL to noticeURL Queue - env.getLog().logFinest("crawlOrder: a: url='" + newURL + "'"); - - stackresult = stack(switchboard, url, refURL, iam, youare); - response = (String) stackresult[0]; - reason = (String) stackresult[1]; - lurl = (String) stackresult[2]; - delay = (response.equals("stacked")) ? Integer.toString(5 + acceptDelay) : "1"; // this value needs to be calculated individually - } else { - // new method: several urls - int stackCount = 0; - //int doubleCount = 0; - //int rejectedCount = 0; - for (int i = 0; i < count; i++) { - env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'"); - try { - stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), ((refv.get(i) == null) || (((String) refv.get(i)).length() == 0)) ? null : new yacyURL((String) refv.get(i), null), iam, youare); - response = (String) stackresult[0]; - prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]); - prop.put("list_" + i + "_lurl", (String) stackresult[2]); - prop.put("list_" + i + "_count", i); - } catch (MalformedURLException e) {} - } - prop.put("list", count); - response = "enqueued"; - reason = "ok"; - lurl = ""; - delay = Integer.toString(stackCount * acceptDelay + 1); - } - } - } catch (Exception e) { - // mist - reason = "ERROR: " + e.getMessage(); - env.getLog().logSevere("crawlOrder: " + reason, e); - delay = "600"; - } - - prop.put("response", response); - prop.put("reason", reason); - prop.put("delay", delay); - prop.put("depth", acceptDepth); - prop.put("lurl", lurl); - prop.put("forward", ""); - prop.put("key", key); - - // return rewrite properties - return prop; - } - - private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, yacyURL referrer, String iam, String youare) { - String response, reason, lurl; - // stack url - switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'"); - String reasonString = null; - reasonString = switchboard.crawlStacker.stackCrawl(url, referrer, iam, "REMOTE-CRAWLING", new Date(), 0, switchboard.defaultRemoteProfile); - - if (reasonString == null) { - // liftoff! - response = "stacked"; - reason = "ok"; - lurl = ""; - } else if (reasonString.startsWith("double")) { - // case where we have already the url loaded; - reason = reasonString; - // send lurl-Entry as response - indexURLEntry entry; - entry = switchboard.wordIndex.loadedURL.load(url.hash(), null, 0); - if (entry == null) { - response = "rejected"; - lurl = ""; - } else { - response = "double"; - switchboard.wordIndex.loadedURL.notifyGCrawl(entry.hash(), iam, youare); - lurl = crypt.simpleEncode(entry.toString()); - } - } else { - response = "rejected"; - reason = reasonString; - lurl = ""; - } - return new Object[]{response, reason, lurl}; - } - -} diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index c8d6adf64..5070a263b 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -71,48 +71,27 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr System.out.println("*** Last Startup time: " + stt + " milliseconds"); long start = System.currentTimeMillis(); - - if (serverMemory.request(neededRAM, true)) { - // we can use a RAM index + if (!serverMemory.request(neededRAM, true)) { + System.out.println("WARNING: NOT ENOUGH MEMORY FOR RAM INDEX " + new File(path, tablename).toString()); + } - if (indexfile.exists()) { - // delete existing index file - System.out.println("*** Delete File index " + indexfile); - indexfile.delete(); - } + if (indexfile.exists()) { + // delete existing index file + System.out.println("*** Delete File index " + indexfile); + indexfile.delete(); + } - // fill the index - System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath); - index = initializeRamIndex(minimumSpace); - - System.out.println(" -done-"); - System.out.println(index.size() - + " index entries initialized and sorted from " - + super.col[0].size() + " keys."); - RAMIndex = true; - tableTracker.put(this.filename(), this); - } else { - // too less ram for a ram index - kelondroIndex ki; - if (indexfile.exists()) { - // use existing index file - System.out.println("*** Using File index " + indexfile); - ki = new kelondroCache(kelondroTree.open(indexfile, true, preloadTime, treeIndexRow(rowdef.width(0), rowdef.objectOrder), 2, 80), true, false); - RAMIndex = false; - } else { - // generate new index file - System.out.println("*** Generating File index for " + size() + " entries from " + indexfile); - System.out.println("*** Cause: too less RAM (" + serverMemory.available() + " Bytes) configured. Assign at least " + (neededRAM / 1024 / 1024) + " MB more RAM to enable a RAM index."); - ki = initializeTreeIndex(indexfile, preloadTime, rowdef.objectOrder); - - System.out.println(" -done-"); - System.out.println(ki.size() + " entries indexed from " + super.col[0].size() + " keys."); - RAMIndex = false; - } - index = new kelondroBytesIntMap(ki); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); + // fill the index + System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath); + index = initializeRamIndex(minimumSpace); - } + System.out.println(" -done-"); + System.out.println(index.size() + + " index entries initialized and sorted from " + + super.col[0].size() + " keys."); + RAMIndex = true; + tableTracker.put(this.filename(), this); + // assign index to wrapper description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";"; super.col[0].setDescription(description.getBytes()); diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index b8ed2c86b..f5f374574 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -55,9 +55,9 @@ public class plasmaCrawlQueues { private plasmaSwitchboard sb; private serverLog log; - private HashMap workers; // mapping from url hash to Worker thread object + private HashMap workers; // mapping from url hash to Worker thread object private plasmaProtocolLoader loader; - private ArrayList remoteCrawlProviderHashes; + private ArrayList remoteCrawlProviderHashes; public plasmaCrawlNURL noticeURL; public plasmaCrawlZURL errorURL, delegatedURL; @@ -65,9 +65,9 @@ public class plasmaCrawlQueues { public plasmaCrawlQueues(plasmaSwitchboard sb, File plasmaPath) { this.sb = sb; this.log = new serverLog("CRAWLER"); - this.workers = new HashMap(); + this.workers = new HashMap(); this.loader = new plasmaProtocolLoader(sb, log); - this.remoteCrawlProviderHashes = new ArrayList(); + this.remoteCrawlProviderHashes = new ArrayList(); // start crawling management log.logConfig("Starting Crawling Management"); @@ -85,7 +85,7 @@ public class plasmaCrawlQueues { if (noticeURL.existsInStack(hash)) return "crawler"; if (delegatedURL.exists(hash)) return "delegated"; if (errorURL.exists(hash)) return "errors"; - if (workers.containsKey(new Integer(hash.hashCode()))) return "workers"; + if (workers.containsKey(hash)) return "workers"; return null; } @@ -97,9 +97,9 @@ public class plasmaCrawlQueues { public yacyURL getURL(String urlhash) { if (urlhash.equals(yacyURL.dummyHash)) return null; - plasmaCrawlEntry ne = (plasmaCrawlEntry) workers.get(new Integer(urlhash.hashCode())); - if (ne != null) return ne.url(); - ne = noticeURL.get(urlhash); + crawlWorker w = workers.get(urlhash); + if (w != null) return w.entry.url(); + plasmaCrawlEntry ne = noticeURL.get(urlhash); if (ne != null) return ne.url(); plasmaCrawlZURL.Entry ee = delegatedURL.getEntry(urlhash); if (ee != null) return ee.url(); @@ -110,7 +110,7 @@ public class plasmaCrawlQueues { public void close() { // wait for all workers to finish - Iterator i = workers.values().iterator(); + Iterator i = workers.values().iterator(); while (i.hasNext()) ((Thread) i.next()).interrupt(); // TODO: wait some more time until all threads are finished noticeURL.close(); @@ -122,9 +122,9 @@ public class plasmaCrawlQueues { synchronized (workers) { plasmaCrawlEntry[] w = new plasmaCrawlEntry[workers.size()]; int i = 0; - Iterator j = workers.values().iterator(); + Iterator j = workers.values().iterator(); while (j.hasNext()) { - w[i++] = ((crawlWorker) j.next()).entry; + w[i++] = j.next().entry; } return w; } @@ -260,9 +260,9 @@ public class plasmaCrawlQueues { (remoteTriggeredCrawlJobSize() == 0) && (sb.queueSize() < 10)) { if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) { - Iterator e = yacyCore.dhtAgent.getProvidesRemoteCrawlURLs(); + Iterator e = yacyCore.dhtAgent.getProvidesRemoteCrawlURLs(); while (e.hasNext()) { - seed = (yacySeed) e.next(); + seed = e.next(); if (seed != null) { remoteCrawlProviderHashes.add(seed.hash); @@ -277,7 +277,14 @@ public class plasmaCrawlQueues { String hash = null; while ((seed == null) && (remoteCrawlProviderHashes.size() > 0)) { hash = (String) remoteCrawlProviderHashes.remove(remoteCrawlProviderHashes.size() - 1); + if (hash == null) continue; seed = yacyCore.seedDB.get(hash); + if (seed == null) continue; + // check if the peer is inside our cluster + if ((sb.isRobinsonMode()) && (!sb.isInMyCluster(seed))) { + seed = null; + continue; + } } if (seed == null) return false; @@ -416,7 +423,7 @@ public class plasmaCrawlQueues { synchronized (this.workers) { crawlWorker w = new crawlWorker(entry); synchronized (workers) { - workers.put(new Integer(entry.hashCode()), w); + workers.put(entry.url().hash(), w); } } diff --git a/source/de/anomic/yacy/yacyDHTAction.java b/source/de/anomic/yacy/yacyDHTAction.java index d5f1274f6..6703b1984 100644 --- a/source/de/anomic/yacy/yacyDHTAction.java +++ b/source/de/anomic/yacy/yacyDHTAction.java @@ -124,11 +124,11 @@ public class yacyDHTAction implements yacyPeerAction { } } - public Iterator getProvidesRemoteCrawlURLs() { + public Iterator getProvidesRemoteCrawlURLs() { return new providesRemoteCrawlURLsEnum(); } - class providesRemoteCrawlURLsEnum implements Iterator { + class providesRemoteCrawlURLsEnum implements Iterator { Iterator se; yacySeed nextSeed; @@ -159,7 +159,7 @@ public class yacyDHTAction implements yacyPeerAction { return null; } - public Object next() { + public yacySeed next() { yacySeed next = nextSeed; nextSeed = nextInternal(); return next;