diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index 5fc8248a8..9a49d0d20 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -1,6 +1,6 @@ -// AccessStatistics_p.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 14.01.2007 on http://www.anomic.de +// AccessTracker_p.java +// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 14.01.2007 on http://www.yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // diff --git a/htroot/CrawlURLFetchStack_p.java b/htroot/CrawlURLFetchStack_p.java index 5355c46c1..f0180b354 100644 --- a/htroot/CrawlURLFetchStack_p.java +++ b/htroot/CrawlURLFetchStack_p.java @@ -275,7 +275,7 @@ public class CrawlURLFetchStack_p { plasmaCrawlEntry entry; int failed = 0; for (int i=0; i 0) && (sb.noticeURL.stackSize(stackType) > 0)) { + try { + entry = sb.noticeURL.pop(stackType, false); + } catch (IOException e) { + break; + } + if (entry == null) break; + prop.put("item_" + c + "_title", ""); + prop.put("item_" + c + "_link", entry.url().toNormalform(true, false)); + prop.put("item_" + c + "_description", entry.name()); + prop.put("item_" + c + "_author", ""); + prop.put("item_" + c + "_pubDate", serverDate.shortSecondTime(entry.appdate())); + prop.put("item_" + c + "_guid", entry.urlhash()); + c++; + count--; + } + prop.put("item", c); + prop.put("response", "ok"); + } + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/urls.xml b/htroot/yacy/urls.xml new file mode 100644 index 000000000..c94fb4958 --- /dev/null +++ b/htroot/yacy/urls.xml @@ -0,0 +1,31 @@ + + + + + + + +#[iam]# +#[uptime]# +#[mytime]# +#[response]# + + + + +#[channel_title]# +#[channel_description]# +#[channel_pubDate]# + +#{item}# + +#[title]# +#[link]# +#[description]# +#[author]# +#[pubDate]# +#[guid]# + +#{/item}# + + diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index f56a719a5..7f30abd69 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -106,6 +106,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverClassLoader; import de.anomic.server.serverCore; +import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -631,8 +632,9 @@ public final class httpdFileHandler { } // add the application version, the uptime and the client name to every rewrite table tp.put(servletProperties.PEER_STAT_VERSION, switchboard.getConfig("version", "")); - tp.put(servletProperties.PEER_STAT_UPTIME, ((System.currentTimeMillis() - Long.parseLong(switchboard.getConfig("startupTime","0"))) / 1000) / 60); // uptime in minutes + tp.put(servletProperties.PEER_STAT_UPTIME, ((System.currentTimeMillis() - serverCore.startupTime) / 1000) / 60); // uptime in minutes tp.put(servletProperties.PEER_STAT_CLIENTNAME, switchboard.getConfig("peerName", "anomic")); + tp.put(servletProperties.PEER_STAT_MYTIME, serverDate.shortSecondTime()); //System.out.println("respond props: " + ((tp == null) ? "null" : tp.toString())); // debug } catch (InvocationTargetException e) { if (e.getCause() instanceof InterruptedException) { diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index 6c93df145..88a46c0cc 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -49,6 +49,7 @@ package de.anomic.plasma.crawler; import java.io.File; import java.io.IOException; +import java.util.Date; import de.anomic.plasma.plasmaURL; import de.anomic.net.URL; @@ -288,7 +289,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW this.url, referrerHash, this.name, - null, + new Date(), this.profile.handle(), this.depth, 0, diff --git a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java index a825e63c1..be2b0e6d4 100644 --- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java @@ -141,7 +141,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor if (this.importNurlDB.stackSize(stackTypes[stackType]) == 0) break; this.urlCount++; - nextEntry = this.importNurlDB.pop(stackTypes[stackType]); + nextEntry = this.importNurlDB.pop(stackTypes[stackType], false); nextHash = nextEntry.urlhash(); } else { if (!entryIter.hasNext()) break; diff --git a/source/de/anomic/plasma/plasmaCrawlEntry.java b/source/de/anomic/plasma/plasmaCrawlEntry.java index 129f5c414..0d416d4e2 100644 --- a/source/de/anomic/plasma/plasmaCrawlEntry.java +++ b/source/de/anomic/plasma/plasmaCrawlEntry.java @@ -56,7 +56,7 @@ public class plasmaCrawlEntry { "String handle-4, " + // extra handle "Cardinal loaddate-8 {b256}," + // time when the file was loaded "Cardinal serverdate-8 {b256}," + // time when that the server returned as document date - "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince + "Cardinal modifiedSince-8 {b256}", // time that was given to server as ifModifiedSince kelondroBase64Order.enhancedCoder, 0 ); @@ -105,6 +105,7 @@ public class plasmaCrawlEntry { int forkfactor ) { // create new entry and store it into database + assert appdate != null; this.urlhash = plasmaURL.urlHash(url); this.initiator = initiator; this.url = url; @@ -214,36 +215,36 @@ public class plasmaCrawlEntry { public Date appdate() { // the date when the url appeared first - return new Date(appdate); + return new Date(this.appdate); } public Date loaddate() { // the date when the url was loaded - return new Date(loaddate); + return new Date(this.loaddate); } public Date serverdate() { // the date that the server returned as document date - return new Date(serverdate); + return new Date(this.serverdate); } public Date imsdate() { // the date that the client (browser) send as ifModifiedSince in proxy mode - return new Date(imsdate); + return new Date(this.imsdate); } public String name() { // return the anchor name (text inside tag) - return name; + return this.name; } public int depth() { // crawl depth where the url appeared - return depth; + return this.depth; } public String profileHandle() { // the handle of the crawl profile - return profileHandle; + return this.profileHandle; } } \ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index ab554ffac..9f2710507 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -156,18 +156,18 @@ public class plasmaCrawlNURL { } } - public plasmaCrawlEntry pop(int stackType) throws IOException { + public plasmaCrawlEntry pop(int stackType, boolean delay) throws IOException { switch (stackType) { - case STACK_TYPE_CORE: return pop(coreStack); - case STACK_TYPE_LIMIT: return pop(limitStack); - case STACK_TYPE_REMOTE: return pop(remoteStack); + case STACK_TYPE_CORE: return pop(coreStack, delay); + case STACK_TYPE_LIMIT: return pop(limitStack, delay); + case STACK_TYPE_REMOTE: return pop(remoteStack, delay); default: return null; } } public void shift(int fromStack, int toStack) { try { - plasmaCrawlEntry entry = pop(fromStack); + plasmaCrawlEntry entry = pop(fromStack, false); if (entry != null) push(toStack, entry); } catch (IOException e) { return; @@ -183,13 +183,13 @@ public class plasmaCrawlNURL { } } - private plasmaCrawlEntry pop(plasmaCrawlBalancer balancer) throws IOException { + private plasmaCrawlEntry pop(plasmaCrawlBalancer balancer, boolean delay) throws IOException { // this is a filo - pop int s; plasmaCrawlEntry entry; synchronized (balancer) { while ((s = balancer.size()) > 0) { - entry = balancer.pop(minimumLocalDelta, minimumGlobalDelta, maximumDomAge); + entry = balancer.pop((delay) ? minimumLocalDelta : 0, (delay) ? minimumGlobalDelta : 0, maximumDomAge); if (entry == null) { if (s > balancer.size()) continue; int aftersize = balancer.size(); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c6279151e..6d14dd997 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -247,7 +247,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser private plasmaDHTChunk dhtTransferChunk = null; public ArrayList localSearches, remoteSearches; // array of search result properties as HashMaps public HashMap localSearchTracker, remoteSearchTracker; // mappings from requesting host to a TreeSet of Long(access time) - public long startupTime = 0; public long lastseedcheckuptime = -1; public long indexedPages = 0; public long lastindexedPages = 0; @@ -2172,7 +2171,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser while (urlEntry == null && noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) > 0) { String stats = "LOCALCRAWL[" + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; try { - urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); + urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE, true); String profileHandle = urlEntry.profileHandle(); // System.out.println("DEBUG plasmaSwitchboard.processCrawling: // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url()); @@ -2241,7 +2240,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String stats = "REMOTECRAWLTRIGGER[" + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; try { - plasmaCrawlEntry urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT); + plasmaCrawlEntry urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT, true); String profileHandle = urlEntry.profileHandle(); // System.out.println("DEBUG plasmaSwitchboard.processCrawling: // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url()); @@ -2328,7 +2327,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String stats = "REMOTETRIGGEREDCRAWL[" + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; try { - plasmaCrawlEntry urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE); + plasmaCrawlEntry urlEntry = noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE, true); String profileHandle = urlEntry.profileHandle(); // System.out.println("DEBUG plasmaSwitchboard.processCrawling: // profileHandle = " + profileHandle + ", urlEntry.url = " + diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 31e61cfa2..21b435a75 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -94,6 +94,7 @@ public final class serverCore extends serverAbstractThread implements serverThre public static final String crlfString = new String(crlf); public static final String lfstring = new String(new byte[]{lf}); public static final Class[] stringType = {"".getClass()}; // set up some reflection + public static final long startupTime = System.currentTimeMillis(); //Class[] exceptionType = {Class.forName("java.lang.Throwable")}; diff --git a/source/de/anomic/server/servletProperties.java b/source/de/anomic/server/servletProperties.java index fb2cc4dbf..5a1bb32cc 100644 --- a/source/de/anomic/server/servletProperties.java +++ b/source/de/anomic/server/servletProperties.java @@ -32,6 +32,7 @@ public class servletProperties extends serverObjects { public static final String PEER_STAT_VERSION = "version"; public static final String PEER_STAT_UPTIME = "uptime"; + public static final String PEER_STAT_MYTIME = "mytime"; public static final String PEER_STAT_CLIENTNAME = "clientname"; private String prefix=""; diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index a6d8a509a..d5f77dea2 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -81,7 +81,6 @@ public class yacyCore { // statics public static ThreadGroup publishThreadGroup = new ThreadGroup("publishThreadGroup"); - public static long startupTime = System.currentTimeMillis(); public static yacySeedDB seedDB = null; public static yacyNewsPool newsPool = null; public static final HashMap seedUploadMethods = new HashMap(); @@ -115,7 +114,7 @@ public class yacyCore { public static int yacyTime() { // the time since startup of yacy in seconds - return (int) ((System.currentTimeMillis() - startupTime) / 1000); + return (int) ((System.currentTimeMillis() - serverCore.startupTime) / 1000); } public yacyCore(plasmaSwitchboard sb) { diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index 60ab16f9a..fc1e32143 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -52,6 +52,7 @@ import java.util.Iterator; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.net.URL; +import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDate; @@ -97,7 +98,7 @@ public class yacyPeerActions { seedDB.mySeed.put(yacySeed.PORT, Integer.toString(serverCore.getPortNr(sb.getConfig("port", "8080")))); } - long uptime = (System.currentTimeMillis() - sb.startupTime) / 1000; + long uptime = (System.currentTimeMillis() - serverCore.startupTime) / 1000; long uptimediff = uptime - sb.lastseedcheckuptime; long indexedcdiff = sb.indexedPages - sb.lastindexedPages; //double requestcdiff = sb.requestedQueries - sb.lastrequestedQueries; @@ -116,6 +117,7 @@ public class yacyPeerActions { seedDB.mySeed.put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30) seedDB.mySeed.put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's) seedDB.mySeed.put(yacySeed.NCOUNT, Integer.toString(sb.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's) + seedDB.mySeed.put(yacySeed.RCOUNT, Integer.toString(sb.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT))); // the number of links that the peer provides for remote crawling (ZURL's) seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says) seedDB.mySeed.put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored seedDB.mySeed.put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour) diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 6d81cb54c..d903041ab 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -154,6 +154,8 @@ public class yacySeed { public static final String LCOUNT = "LCount"; /** the number of links that the peer has noticed, but not loaded (NURL's) */ public static final String NCOUNT = "NCount"; + /** the number of links that the peer provides for remote crawls (ZURL's) */ + public static final String RCOUNT = "RCount"; /** the number of words the peer has indexed (as it says) */ public static final String ICOUNT = "ICount"; /** the number of seeds that the peer has stored */ @@ -205,6 +207,7 @@ public class yacySeed { this.dna.put(yacySeed.UPTIME, yacySeed.ZERO); this.dna.put(yacySeed.LCOUNT, yacySeed.ZERO); this.dna.put(yacySeed.NCOUNT, yacySeed.ZERO); + this.dna.put(yacySeed.RCOUNT, yacySeed.ZERO); this.dna.put(yacySeed.ICOUNT, yacySeed.ZERO); this.dna.put(yacySeed.SCOUNT, yacySeed.ZERO); this.dna.put(yacySeed.CCOUNT, yacySeed.ZERO); diff --git a/source/yacy.java b/source/yacy.java index 82fcfbca8..d97e5c314 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -167,8 +167,6 @@ public final class yacy { * @param startupFree free memory at startup time, to be used later for statistics */ private static void startup(String homePath, long startupMemFree, long startupMemTotal) { - long startup = System.currentTimeMillis(); - int oldRev=0; int newRev=0; @@ -268,7 +266,6 @@ public final class yacy { sb.setConfig("vString", yacyVersion.combined2prettyVersion(Double.toString(version))); sb.setConfig("vdate", (vDATE.startsWith("@")) ? serverDate.shortDayTime() : vDATE); sb.setConfig("applicationRoot", homePath); - sb.startupTime = startup; serverLog.logConfig("STARTUP", "YACY Version: " + version + ", Built " + sb.getConfig("vdate", "00000000")); yacyVersion.latestRelease = version;