From 871ee1ce0f40f80f086aebe297ebe9797ad53134 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 27 Apr 2007 09:23:44 +0000 Subject: [PATCH] one step closer to automatic updates: automatically aquire release information from download archives web pages from latest.yacy-forum.net and yacy.net are retrieved, parsed, links wihin are analysed, sorted and the most recent developer and main releases are provided as direct download link on the status page, if it was discovered that a more recent version than the current version is available. This process is done only once during run-time of a peer, to protect our download archives from DoS by YaCy peers. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3606 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Status.html | 7 +- htroot/Status.java | 11 +- .../htmlFilter/htmlFilterContentScraper.java | 29 ++- source/de/anomic/net/URL.java | 11 + source/de/anomic/plasma/plasmaParser.java | 6 +- source/de/anomic/yacy/yacyCore.java | 1 - source/de/anomic/yacy/yacyPeerActions.java | 2 +- source/de/anomic/yacy/yacyVersion.java | 209 ++++++++++++++++-- source/yacy.java | 4 +- 9 files changed, 244 insertions(+), 36 deletions(-) diff --git a/htroot/Status.html b/htroot/Status.html index 89abdffc0..24acc605a 100644 --- a/htroot/Status.html +++ b/htroot/Status.html @@ -28,7 +28,12 @@
Public System Properties
System version - #[versionpp]# #(versioncomment)#:: - the latest public version is #[latestVersion]#. Click here to download it.#(/versioncomment)# + #[versionpp]# + #(versioncomment)#:: - the latest public version is #[latestVersion]#. + You can download the latest releases here:
+ #[versionResMain]#
+ #[versionResDev]# + #(/versioncomment)#
This peer's address #(peerAddress)# diff --git a/htroot/Status.java b/htroot/Status.java index c188c2c61..50f0c4723 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -61,6 +61,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyVersion; public class Status { @@ -150,16 +151,20 @@ public class Status { // version information prop.put("versionpp", yacy.combined2prettyVersion(env.getConfig("version","0.1"))); + + double thisVersion = Double.parseDouble(env.getConfig("version","0.1")); // cut off the SVN Rev in the Version try {thisVersion = Math.round(thisVersion*1000.0)/1000.0;} catch (NumberFormatException e) {} -// System.out.println("TEST: "+thisVersion); - if (yacyCore.latestVersion >= (thisVersion+0.01)) { // only new Versions(not new SVN) + if (yacyVersion.latestRelease >= (thisVersion+0.01)) { // only new Versions(not new SVN) prop.put("versioncomment", 1); // new version } else { prop.put("versioncomment", 0); // no comment } - prop.put("versioncomment_latestVersion", Double.toString(yacyCore.latestVersion)); + yacyVersion.aquireLatestReleaseInfo(); + prop.putASIS("versioncomment_versionResMain", (yacyVersion.latestMainRelease == null) ? "-" : yacyVersion.latestMainRelease.toAnchor()); + prop.putASIS("versioncomment_versionResDev", (yacyVersion.latestDevRelease == null) ? "-" : yacyVersion.latestDevRelease.toAnchor()); + prop.put("versioncomment_latestVersion", Double.toString(yacyVersion.latestRelease)); // hostname and port String extendedPortString = env.getConfig("port", "8080"); diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index e6a6610f4..6017af1ef 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -45,7 +45,10 @@ package de.anomic.htmlFilter; +import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.io.Writer; import java.net.MalformedURLException; import java.text.Collator; import java.util.ArrayList; @@ -58,8 +61,11 @@ import java.util.TreeSet; import javax.swing.event.EventListenerList; +import de.anomic.http.httpc; import de.anomic.net.URL; +import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCharBuffer; +import de.anomic.server.serverFileUtils; public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper { @@ -328,6 +334,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } public Map getAnchors() { + // returns a url (String) / name (String) relation return anchors; } @@ -449,5 +456,25 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen ((htmlFilterEventListener)listeners[i+1]).scrapeTag1(tagname, tagopts, text); } } - } + } + + public static htmlFilterContentScraper parseResource(URL location) throws IOException { + // load page + byte[] page = httpc.wget( + location, + location.getHost(), + 10000, + null, + null, + plasmaSwitchboard.getSwitchboard().remoteProxyConfig + ); + if (page == null) throw new IOException("no response from url " + location.toString()); + + // scrape content + htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); + Writer writer = new htmlFilterWriter(null, null, scraper, null, false); + serverFileUtils.copy(new ByteArrayInputStream(page), writer, "UTF-8"); + + return scraper; + } } \ No newline at end of file diff --git a/source/de/anomic/net/URL.java b/source/de/anomic/net/URL.java index 275ef04c8..0cdac47c9 100644 --- a/source/de/anomic/net/URL.java +++ b/source/de/anomic/net/URL.java @@ -415,9 +415,20 @@ public class URL { public String getFile(boolean includeReference) { // this is the path plus quest plus ref + // if there is no quest and no ref the result is identical to getPath + // this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile() if (quest != null) return ((includeReference) && (ref != null)) ? path + "?" + quest + "#" + ref : path + "?" + quest; return ((includeReference) && (ref != null)) ? path + "#" + ref : path; } + + public String getFileName() { + // this is a method not defined in any sun api + // it returns the last portion of a path without any reference + int p = path.lastIndexOf('/'); + if (p < 0) return path; + if (p == path.length() - 1) return ""; // no file name, this is a path to a directory + return path.substring(p + 1); // the 'real' file name + } public String getPath() { return path; diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 3cf319fed..49d5e37f0 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -680,7 +680,7 @@ public final class plasmaParser { theParser.setContentLength(contentLength); // parse the resource doc = theParser.parse(location, mimeType,documentCharset,sourceStream); - } else if (realtimeParsableMimeTypesContains(mimeType)) { + } else if (realtimeParsableMimeTypesContains(mimeType)) { doc = parseHtml(location, mimeType, documentCharset, sourceStream); } else { String errorMsg = "No parser available to parse mimetype '" + mimeType + "'"; @@ -719,7 +719,7 @@ public final class plasmaParser { private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, InputStream sourceStream) throws IOException, ParserException { - // ...otherwise we make a scraper and transformer + // make a scraper and transformer htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false); String charset = htmlFilter.detectCharset(); if (charset == null) { @@ -745,7 +745,7 @@ public final class plasmaParser { this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg); throw new ParserException(errorMsg,location); } - return transformScraper(location, mimeType, documentCharset, scraper); + return transformScraper(location, mimeType, documentCharset, scraper); } public plasmaParserDocument transformScraper(URL location, String mimeType, String charSet, htmlFilterContentScraper scraper) { diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index d7df30185..ff9ac87d5 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -95,7 +95,6 @@ public class yacyCore { public static yacyDHTAction dhtAgent = null; public static serverLog log; public static long lastOnlineTime = 0; - public static double latestVersion = 0.1; /** pseudo-random key derived from a time-interval while YaCy startup*/ public static long speedKey = 0; public static File yacyDBPath; diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index 12938285e..4ccbca7a1 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -338,7 +338,7 @@ public class yacyPeerActions { } // update latest version number - if (seed.getVersion() > yacyCore.latestVersion) yacyCore.latestVersion = seed.getVersion(); + if (seed.getVersion() > yacyVersion.latestRelease) yacyVersion.latestRelease = seed.getVersion(); // prepare to update if (disconnectedSeed != null) { diff --git a/source/de/anomic/yacy/yacyVersion.java b/source/de/anomic/yacy/yacyVersion.java index 9c952ed96..137a65bf3 100644 --- a/source/de/anomic/yacy/yacyVersion.java +++ b/source/de/anomic/yacy/yacyVersion.java @@ -1,13 +1,16 @@ // yacyVersion.java -// ------------------------------------- -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004, 2005 +// ---------------- +// (C) 2007 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany +// first published 27.04.2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // +// LICENSE +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -21,32 +24,190 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. package de.anomic.yacy; -public final class yacyVersion { +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeSet; + +import de.anomic.htmlFilter.htmlFilterContentScraper; +import de.anomic.net.URL; + +public final class yacyVersion implements Comparator, Comparable { + + // general release info public static final float YACY_SUPPORTS_PORT_FORWARDING = (float) 0.383; public static final float YACY_SUPPORTS_GZIP_POST_REQUESTS = (float) 0.40300772; public static final float YACY_ACCEPTS_RANKING_TRANSMISSION = (float) 0.414; public static final float YACY_HANDLES_COLLECTION_INDEX = (float) 0.486; public static final float YACY_PROVIDES_CRAWLS_VIA_LIST_HTML = (float) 0.50403367; + + // information about latest release, retrieved by other peers release version + public static double latestRelease = 0.1; // this value is overwritten when a peer with later version appears + + // information about latest release, retrieved from download pages + public static yacyVersion latestDevRelease = null; + public static yacyVersion latestMainRelease = null; + + + // class variables + public float releaseNr; + public String dateStamp; + public int svn; + public boolean mainRelease; + public URL url; + + public yacyVersion(URL url) { + this(url.getFileName()); + this.url = url; + } + + public yacyVersion(String release) { + // parse a release file name + // the have the following form: + // yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz + // yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz + // i.e. yacy_v0.51_20070321_3501.tar.gz + this.url = null; + if ((release == null) || (!release.endsWith(".tar.gz"))) { + throw new RuntimeException("release file name '" + release + "' is not valid, no tar.gz"); + } + // cut off tail + release = release.substring(0, release.length() - 7); + if (release.startsWith("yacy_dev_v")) { + mainRelease = false; + release = release.substring(10); + } else if (release.startsWith("yacy_v")) { + mainRelease = true; + release = release.substring(6); + } else { + throw new RuntimeException("release file name '" + release + "' is not valid, wrong prefix"); + } + // now all release names have the form + // ${releaseVersion}_${DSTAMP}_${releaseNr} + String[] comp = release.split("_"); // should be 3 parts + if (comp.length != 3) { + throw new RuntimeException("release file name '" + release + "' is not valid, 3 information parts expected"); + } + try { + this.releaseNr = Float.parseFloat(comp[0]); + } catch (NumberFormatException e) { + throw new RuntimeException("release file name '" + release + "' is not valid, '" + comp[0] + "' should be a float number"); + } + this.dateStamp = comp[1]; + if (this.dateStamp.length() != 8) { + throw new RuntimeException("release file name '" + release + "' is not valid, '" + comp[1] + "' should be a 8-digit date string"); + } + try { + this.svn = Integer.parseInt(comp[2]); + } catch (NumberFormatException e) { + throw new RuntimeException("release file name '" + release + "' is not valid, '" + comp[2] + "' should be a integer number"); + } + // finished! we parsed a relase string + } + + /* + public yacyVersion(URL url, float releaseNr, String dateStamp, int svn, boolean mainRelease) { + this.url = url; + this.releaseNr = releaseNr; + this.dateStamp = dateStamp; + this.svn = svn; + this.mainRelease = mainRelease; + } + */ + + public int compareTo(Object obj) { + yacyVersion v = (yacyVersion) obj; + return compare(this, v); + } + + public int compare(Object arg0, Object arg1) { + // compare-operator for two yacyVersion objects + // must be implemented to make it possible to put this object into + // a ordered structure, like TreeSet or TreeMap + yacyVersion a0 = (yacyVersion) arg0, a1 = (yacyVersion) arg1; + return (new Integer(a0.svn)).compareTo(new Integer(a1.svn)); + } + + public boolean equals(Object obj) { + yacyVersion v = (yacyVersion) obj; + return (this.svn == v.svn) && (this.url.toNormalform().equals(v.url.toNormalform())); + } + + public int hashCode() { + return this.url.toNormalform().hashCode(); + } + + public String toAnchor() { + // generates an anchor string that can be used to embed in an html for direct download + return "YaCy " + ((this.mainRelease) ? "main release" : "developer release") + " v" + this.releaseNr + ", SVN " + this.svn + ""; + } + + public static void aquireLatestReleaseInfo() { + if ((latestDevRelease == null) && (latestMainRelease == null)) { + if (latestDevRelease == null) latestDevRelease = aquireLatestDevRelease(); + if (latestMainRelease == null) latestMainRelease = aquireLatestMainRelease(); + } + } + + public static yacyVersion aquireLatestDevRelease() { + // get the latest release info from a internet resource + try { + return latestReleaseFrom(new URL("http://latest.yacy-forum.net")); + } catch (MalformedURLException e) { + return null; + } + } + + public static yacyVersion aquireLatestMainRelease() { + // get the latest release info from a internet resource + try { + return latestReleaseFrom(new URL("http://yacy.net/yacy/Download.html")); + } catch (MalformedURLException e) { + return null; + } + } + + public static yacyVersion latestReleaseFrom(URL url) { + // retrieves the latest info about releases + // this is done by contacting a release location, + // parsing the content and filtering+parsing links + // returns the version info if successful, null otherwise + htmlFilterContentScraper scraper; + try { + scraper = htmlFilterContentScraper.parseResource(url); + } catch (IOException e) { + return null; + } + + // analyse links in scraper resource, and find link to latest release in it + Map anchors = scraper.getAnchors(); // a url (String) / name (String) relation + Iterator i = anchors.keySet().iterator(); + TreeSet releases = new TreeSet(); // will contain a release (Float) / url (String) relation + yacyVersion release; + while (i.hasNext()) { + try { + url = new URL((String) i.next()); + } catch (MalformedURLException e1) { + continue; // just ignore invalid urls + } + try { + release = new yacyVersion(url); + //System.out.println("r " + release.toAnchor()); + releases.add(release); + } catch (RuntimeException e) { + // the release string was not well-formed. + // that might have been another link + // just dont care + continue; + } + } + if (releases.size() == 0) return null; + //i = releases.iterator(); while (i.hasNext()) {System.out.println("v " + ((yacyVersion) i.next()).toAnchor());} + return (yacyVersion) releases.last(); + } } diff --git a/source/yacy.java b/source/yacy.java index c0561d7b1..57ffa2dca 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -91,8 +91,8 @@ import de.anomic.server.serverSystem; import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacyClient; -import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyVersion; /** * This is the main class of YaCy. Several threads are started from here: @@ -300,7 +300,7 @@ public final class yacy { sb.setConfig("applicationRoot", homePath); sb.startupTime = startup; serverLog.logConfig("STARTUP", "YACY Version: " + version + ", Built " + vDATE); - yacyCore.latestVersion = version; + yacyVersion.latestRelease = version; // read environment int timeout = Integer.parseInt(sb.getConfig("httpdTimeout", "60000"));