diff --git a/build.xml b/build.xml index 8fd6508bb..d9d15970d 100644 --- a/build.xml +++ b/build.xml @@ -221,7 +221,7 @@ @@ -241,7 +241,6 @@ @@ -297,33 +296,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -359,7 +331,7 @@ - + - - - + @@ -527,8 +497,6 @@ - - @@ -545,7 +513,6 @@ - diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 4ab355c9d..2411caf8c 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -60,7 +60,6 @@ import java.util.TreeMap; import de.anomic.data.listManager; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern; @@ -68,6 +67,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Blacklist_p { private final static String DISABLED = "disabled_"; @@ -95,9 +95,9 @@ public class Blacklist_p { prop.put("testlist",1); String urlstring = post.get("testurl", ""); if(!urlstring.startsWith("http://")) urlstring = "http://"+urlstring; - URL testurl = null; + yacyURL testurl = null; try { - testurl = new URL(urlstring); + testurl = new yacyURL(urlstring, null); } catch (MalformedURLException e) { } if(testurl != null) { prop.put("testlist_url",testurl.toString()); diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 95451fe69..8f0eb3a87 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -59,7 +59,6 @@ import de.anomic.data.bookmarksDB.Tag; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; @@ -69,6 +68,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class Bookmarks { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -214,7 +214,7 @@ public class Bookmarks { } try { File file=new File((String)post.get("bookmarksfile")); - switchboard.bookmarksDB.importFromBookmarks(new URL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic); + switchboard.bookmarksDB.importFromBookmarks(new yacyURL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic); } catch (MalformedURLException e) {} }else if(post.containsKey("xmlfile")){ diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java index 279c8231d..27e6aec23 100644 --- a/htroot/CacheAdmin_p.java +++ b/htroot/CacheAdmin_p.java @@ -62,7 +62,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSwitchboard; @@ -71,6 +70,7 @@ import de.anomic.plasma.cache.UnsupportedProtocolException; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class CacheAdmin_p { @@ -118,7 +118,7 @@ public class CacheAdmin_p { final StringBuffer tree = new StringBuffer(); final StringBuffer info = new StringBuffer(); - final URL url = plasmaHTCache.getURL(file); + final yacyURL url = plasmaHTCache.getURL(file); String urlstr = ""; diff --git a/htroot/ConfigLanguage_p.java b/htroot/ConfigLanguage_p.java index 5e5a90853..668ac3b7a 100644 --- a/htroot/ConfigLanguage_p.java +++ b/htroot/ConfigLanguage_p.java @@ -58,11 +58,11 @@ import de.anomic.data.listManager; import de.anomic.data.translator; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class ConfigLanguage_p { @@ -97,7 +97,7 @@ public class ConfigLanguage_p { String url = (String)post.get("url"); ArrayList langVector; try{ - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); }catch(IOException e){ prop.put("status", 1);//unable to get url diff --git a/htroot/ConfigSkins_p.java b/htroot/ConfigSkins_p.java index 61405b36c..9a80f2b1e 100644 --- a/htroot/ConfigSkins_p.java +++ b/htroot/ConfigSkins_p.java @@ -56,12 +56,12 @@ import java.util.Iterator; import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class ConfigSkins_p { @@ -126,7 +126,7 @@ public class ConfigSkins_p { String url = (String)post.get("url"); ArrayList skinVector; try{ - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); }catch(IOException e){ prop.put("status", 1);//unable to get URL diff --git a/htroot/ConfigUpdate_p.java b/htroot/ConfigUpdate_p.java index 459d4ab4e..381e2835f 100644 --- a/htroot/ConfigUpdate_p.java +++ b/htroot/ConfigUpdate_p.java @@ -31,11 +31,11 @@ import java.util.Iterator; import java.util.TreeSet; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.serverSystem; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class ConfigUpdate_p { @@ -54,7 +54,7 @@ public class ConfigUpdate_p { String release = post.get("releasedownload", ""); if (release.length() > 0) { try { - yacyVersion.downloadRelease(new yacyVersion(new URL(release))); + yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null))); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 29e4e676d..7b992180e 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -30,7 +30,6 @@ import java.util.Locale; import de.anomic.http.httpHeader; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; @@ -39,6 +38,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class CrawlResults { @@ -170,7 +170,7 @@ public class CrawlResults { urlstr = comp.url().toNormalform(false, true); urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL - cachepath = plasmaHTCache.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1); + cachepath = plasmaHTCache.getCachePath(new yacyURL(urlstr, null)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1); prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0); if (showControl) { diff --git a/htroot/CrawlStartSimple_p.html b/htroot/CrawlStartSimple_p.html index 9708320d5..69e9f701e 100644 --- a/htroot/CrawlStartSimple_p.html +++ b/htroot/CrawlStartSimple_p.html @@ -53,7 +53,7 @@ : Wide: depth   |   - Complete Single Domain + Complete Domain The range defines if the crawl shall consider a complete domain, or a wide crawl up to a specific depth. diff --git a/htroot/CrawlStartSimple_p.java b/htroot/CrawlStartSimple_p.java index 7926e7d27..8cd1693b2 100644 --- a/htroot/CrawlStartSimple_p.java +++ b/htroot/CrawlStartSimple_p.java @@ -28,7 +28,6 @@ import java.util.Enumeration; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -36,6 +35,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class CrawlStartSimple_p { @@ -152,8 +152,8 @@ public class CrawlStartSimple_p { if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed.isVirgin()) || (yacyCore.seedDB.mySeed.isJunior())) { prop.put("remoteCrawlPeers", 0); } else { - Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true); - Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false); + Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, true); + Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, false); if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) { prop.put("remoteCrawlPeers", 0); //no peers availible } else { diff --git a/htroot/CrawlURLFetchStack_p.java b/htroot/CrawlURLFetchStack_p.java index f0180b354..974e8e77a 100644 --- a/htroot/CrawlURLFetchStack_p.java +++ b/htroot/CrawlURLFetchStack_p.java @@ -55,7 +55,6 @@ import de.anomic.data.URLFetcherStack; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaSwitchboard; @@ -64,6 +63,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class CrawlURLFetchStack_p { @@ -200,16 +200,16 @@ public class CrawlURLFetchStack_p { prop.put("upload", 1); } else if (type.equals("html")) { try { - final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file)); + final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file)); final Writer writer = new htmlFilterWriter(null, null, scraper, null, false); serverFileUtils.write(content, writer); writer.close(); final Iterator it = ((HashMap)scraper.getAnchors()).keySet().iterator(); int added = 0, failed = 0; - URL url; + yacyURL url; while (it.hasNext()) try { - url = new URL((String)it.next()); + url = new yacyURL((String) it.next(), null); if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url)) { failed++; continue; @@ -264,7 +264,7 @@ public class CrawlURLFetchStack_p { private static boolean addURL(String url, boolean blCheck, URLFetcherStack stack) { try { if (url == null || url.length() == 0) return false; - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, u)) return false; stack.push(u); return true; @@ -288,7 +288,7 @@ public class CrawlURLFetchStack_p { url = post.get("url" + i, null); if (url == null || url.length() == 0) continue; try { - stack.push(new URL(url)); + stack.push(new yacyURL(url, null)); count++; } catch (MalformedURLException e) { serverLog.logInfo("URLFETCHER", "retrieved invalid url for adding to the stack: " + url); diff --git a/htroot/CrawlURLFetch_p.java b/htroot/CrawlURLFetch_p.java index caa7bc752..1488f4592 100644 --- a/htroot/CrawlURLFetch_p.java +++ b/htroot/CrawlURLFetch_p.java @@ -49,7 +49,6 @@ import java.util.Iterator; import java.util.Random; import java.util.TreeMap; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; @@ -62,6 +61,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class CrawlURLFetch_p { @@ -139,10 +139,10 @@ public class CrawlURLFetch_p { count, frequency); } else { - URL url = null; + yacyURL url = null; if (post.get("source", "").equals("url")) { try { - url = new URL(post.get("host", null)); + url = new yacyURL(post.get("host", null), null); if (!savedURLs.contains(url.toNormalform(true, true))) savedURLs.add(url.toNormalform(true, true)); prop.put("host", post.get("host", url.toString())); @@ -152,7 +152,7 @@ public class CrawlURLFetch_p { } } else if (post.get("source", "").equals("savedURL")) { try { - url = new URL(post.get("saved", "")); + url = new yacyURL(post.get("saved", ""), null); } catch (MalformedURLException e) { /* should never appear, except for invalid input, see above */ } @@ -355,7 +355,7 @@ public class CrawlURLFetch_p { public String lastServerResponse = null; public int lastFailed = 0; - public final URL url; + public final yacyURL url; public final int count; public long delay; public final plasmaSwitchboard sb; @@ -363,7 +363,7 @@ public class CrawlURLFetch_p { public boolean paused = false; - public static URL getListServletURL(String host, int mode, int count, String peerHash) { + public static yacyURL getListServletURL(String host, int mode, int count, String peerHash) { String r = "http://" + host + "/yacy/list.html?list=queueUrls&display="; switch (mode) { @@ -380,7 +380,7 @@ public class CrawlURLFetch_p { } try { - return new URL(r); + return new yacyURL(r, null); } catch (MalformedURLException e) { return null; } @@ -389,7 +389,7 @@ public class CrawlURLFetch_p { public URLFetcher( serverSwitch env, plasmaCrawlProfile.entry profile, - URL url, + yacyURL url, int count, long delayMs) { if (env == null || profile == null || url == null) @@ -420,7 +420,7 @@ public class CrawlURLFetch_p { public void run() { this.paused = false; long start; - URL url; + yacyURL url; while (!isInterrupted()) { try { start = System.currentTimeMillis(); @@ -449,7 +449,7 @@ public class CrawlURLFetch_p { } } - private URL getDLURL() { + private yacyURL getDLURL() { if (this.url != null) return this.url; // choose random seed @@ -493,7 +493,7 @@ public class CrawlURLFetch_p { this.failed.put(urls[i], reason); try { plasmaCrawlZURL.Entry ee = this.sb.errorURL.newEntry( - new URL(urls[i]), + new yacyURL(urls[i], null), reason); ee.store(); this.sb.errorURL.stackPushEntry(ee); @@ -503,7 +503,7 @@ public class CrawlURLFetch_p { return this.lastFetchedURLs; } - private String[] getURLs(URL url) { + private String[] getURLs(yacyURL url) { if (url == null) return null; String[] r = null; try { diff --git a/htroot/FeedReader_p.java b/htroot/FeedReader_p.java index 128d28ccb..94634f004 100644 --- a/htroot/FeedReader_p.java +++ b/htroot/FeedReader_p.java @@ -24,11 +24,11 @@ import java.net.MalformedURLException; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.servletProperties; import de.anomic.xml.rssReader; +import de.anomic.yacy.yacyURL; // test url: // http://localhost:8080/FeedReader_p.html?url=http://www.tagesthemen.de/xml/rss2 @@ -40,9 +40,9 @@ public class FeedReader_p { prop.put("page", 0); if (post != null) { - URL url; + yacyURL url; try { - url = new URL((String) post.get("url")); + url = new yacyURL((String) post.get("url"), null); } catch (MalformedURLException e) { prop.put("page", 2); return prop; diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index c8543a8aa..70fb63705 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -62,11 +62,9 @@ import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroRotateIterator; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; @@ -76,6 +74,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class IndexControl_p { @@ -196,7 +195,11 @@ public class IndexControl_p { } if (post.containsKey("urldelete")) { - urlhash = plasmaURL.urlHash(urlstring); + try { + urlhash = (new yacyURL(urlstring, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if ((urlhash == null) || (urlstring == null)) { prop.put("result", "No input given; nothing deleted."); } else { @@ -307,8 +310,8 @@ public class IndexControl_p { if (post.containsKey("urlstringsearch")) { try { - URL url = new URL(urlstring); - urlhash = plasmaURL.urlHash(url); + yacyURL url = new yacyURL(urlstring, null); + urlhash = url.hash(); prop.put("urlhash", urlhash); indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null); if (entry == null) { @@ -369,7 +372,7 @@ public class IndexControl_p { try { String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(","); pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true)); - URL url; + yacyURL url; for (int i=0; i= maxCount) break; urlString = (String) map.get("key"); - try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + try { url = new yacyURL(urlString, null); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (!url.isLocal())) { prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark; prop.put("page_backlinks_list_" + count + "_url", urlString); prop.put("page_backlinks_list_" + count + "_date", map.get("date")); diff --git a/htroot/Status.java b/htroot/Status.java index 14b3568c2..59517d9d8 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -54,7 +54,6 @@ import de.anomic.http.httpHeader; import de.anomic.http.httpd; import de.anomic.http.httpdByteCountInputStream; import de.anomic.http.httpdByteCountOutputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDomains; @@ -64,6 +63,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class Status { @@ -120,7 +120,7 @@ public class Status { String release = post.get("releasedownload", ""); if (release.length() > 0) { try { - yacyVersion.downloadRelease(new yacyVersion(new URL(release))); + yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null))); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); diff --git a/htroot/Supporter.java b/htroot/Supporter.java index 65823abba..a02c34753 100644 --- a/htroot/Supporter.java +++ b/htroot/Supporter.java @@ -32,8 +32,6 @@ import java.util.HashMap; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroNaturalOrder; @@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Supporter { @@ -125,10 +124,9 @@ public class Supporter { if (row == null) continue; url = row.getColString(0, null); - try{ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url))) - continue; - }catch(MalformedURLException e){continue;}; + try { + if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, urlhash))) continue; + } catch(MalformedURLException e) {continue;} title = row.getColString(1,"UTF-8"); description = row.getColString(2,"UTF-8"); if ((url == null) || (title == null) || (description == null)) continue; @@ -241,10 +239,18 @@ public class Supporter { // add/subtract votes and write record if (entry != null) { - urlhash = plasmaURL.urlHash(url); + try { + urlhash = (new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if (urlhash == null) - urlhash=plasmaURL.urlHash("http://"+url); - if(urlhash==null){ + try { + urlhash = (new yacyURL("http://" + url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } + if (urlhash==null) { System.out.println("Supporter: bad url '" + url + "' from news record " + record.toString()); continue; } diff --git a/htroot/Surftips.java b/htroot/Surftips.java index 8db16bfc7..00c47aeea 100644 --- a/htroot/Surftips.java +++ b/htroot/Surftips.java @@ -32,8 +32,6 @@ import java.util.HashMap; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroNaturalOrder; @@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Surftips { @@ -134,7 +133,7 @@ public class Surftips { url = row.getColString(0, null); try{ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url))) + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, null))) continue; }catch(MalformedURLException e){continue;}; title = row.getColString(1,"UTF-8"); @@ -302,10 +301,18 @@ public class Surftips { // add/subtract votes and write record if (entry != null) { - urlhash = plasmaURL.urlHash(url); + try { + urlhash = (new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if (urlhash == null) - urlhash=plasmaURL.urlHash("http://"+url); - if(urlhash==null){ + try { + urlhash = (new yacyURL("http://"+url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } + if (urlhash == null) { System.out.println("Surftips: bad url '" + url + "' from news record " + record.toString()); continue; } diff --git a/htroot/Thumbnail.html b/htroot/Thumbnail.html deleted file mode 100644 index 20f06a069..000000000 --- a/htroot/Thumbnail.html +++ /dev/null @@ -1 +0,0 @@ -#[image]# \ No newline at end of file diff --git a/htroot/Thumbnail.java b/htroot/Thumbnail.java deleted file mode 100644 index cfc3ea659..000000000 --- a/htroot/Thumbnail.java +++ /dev/null @@ -1,69 +0,0 @@ -//Thumbnail.java -//------------ -// part of YACY -// -// (C) 2007 Alexander Schier -// -// last change: $LastChangedDate: $ by $LastChangedBy: $ -// $LastChangedRevision: $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; - -import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.server.servletProperties; - -public class Thumbnail{ - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { - servletProperties prop = new servletProperties(); - String command=env.getConfig("thumbnailProgram", ""); - if(command.equals("")||post==null||!post.containsKey("url")){ - prop.put("image", "thumbnail cannot be generated"); //TODO: put a "thumbnail not possible" image. - return prop; - } - String[] cmdline=new String[3]; - cmdline[0]=env.getConfig("thumbnailProgram", ""); - cmdline[1]=post.get("url", ""); - plasmaSwitchboard sb=plasmaSwitchboard.getSwitchboard(); - File path=new File(sb.workPath, plasmaURL.urlHash(cmdline[1])+".png"); - cmdline[2]=path.getAbsolutePath();//does not contain an extension! - try { - Runtime.getRuntime().exec(cmdline); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path))); - String line; - StringBuffer image=new StringBuffer(); - while((line=br.readLine())!=null){ - image.append(line); - } - //path.delete(); //we do not cache, yet. - prop.put("image", image.toString()); - } catch (IOException e) { - prop.put("image", "error creating thumbnail");//TODO: put a "thumbnail error" image. - } - httpHeader out_header=new httpHeader(); - out_header.put(httpHeader.CONTENT_TYPE, "image/png"); - prop.setOutgoingHeader(out_header); - return prop; - } -} diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 94c5c1557..806c0a0d9 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -58,7 +58,6 @@ import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParserDocument; @@ -70,6 +69,7 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class ViewFile { @@ -99,7 +99,7 @@ public class ViewFile { String viewMode = post.get("viewMode","sentences"); prop.put("error_vMode-" + viewMode, 1); - URL url = null; + yacyURL url = null; String descr = ""; int wordCount = 0; int size = 0; @@ -144,7 +144,7 @@ public class ViewFile { } // define an url by post parameter - url = new URL(urlString); + url = new yacyURL(urlString, null); pre = post.get("pre", "false").equals("true"); } catch (MalformedURLException e) {} diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index b0fee1001..b8abf6b22 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -48,12 +48,12 @@ import java.io.InputStream; import java.net.MalformedURLException; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; import de.anomic.ymage.ymageImageParser; public class ViewImage { @@ -70,9 +70,9 @@ public class ViewImage { String urlLicense = post.get("code", ""); boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights - URL url = null; + yacyURL url = null; if ((urlString.length() > 0) && (auth)) try { - url = new URL(urlString); + url = new yacyURL(urlString, null); } catch (MalformedURLException e1) { url = null; } diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 66f5f334f..3358af088 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -38,11 +38,9 @@ import java.util.regex.PatternSyntaxException; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.dbImport.dbImporter; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; @@ -50,6 +48,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class WatchCrawler_p { public static final String CRAWLING_MODE_URL = "url"; @@ -101,12 +100,12 @@ public class WatchCrawler_p { String newcrawlingfilter = post.get("crawlingFilter", ".*"); if (fullDomain) try { - newcrawlingfilter = ".*" + (new URL(post.get("crawlingURL",""))).getHost() + ".*"; + newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*"; } catch (MalformedURLException e) {} env.setConfig("crawlingFilter", newcrawlingfilter); - int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0")); - if (fullDomain) newcrawlingdepth = 99; + int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8")); + if (fullDomain) newcrawlingdepth = 8; env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on"); @@ -158,12 +157,12 @@ public class WatchCrawler_p { if (pos == -1) crawlingStart = "http://" + crawlingStart; // normalizing URL - try {crawlingStart = new URL(crawlingStart).toNormalform(true, true);} catch (MalformedURLException e1) {} + try {crawlingStart = new yacyURL(crawlingStart, null).toNormalform(true, true);} catch (MalformedURLException e1) {} // check if url is proper - URL crawlingStartURL = null; + yacyURL crawlingStartURL = null; try { - crawlingStartURL = new URL(crawlingStart); + crawlingStartURL = new yacyURL(crawlingStart, null); } catch (MalformedURLException e) { crawlingStartURL = null; } @@ -181,7 +180,7 @@ public class WatchCrawler_p { // stack request // first delete old entry, if exists - String urlhash = plasmaURL.urlHash(crawlingStart); + String urlhash = (new yacyURL(crawlingStart, null)).hash(); switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.noticeURL.remove(urlhash); switchboard.errorURL.remove(urlhash); @@ -258,7 +257,7 @@ public class WatchCrawler_p { String fileString = new String(fileContent,"UTF-8"); // parsing the bookmark file and fetching the headline and contained links - htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file)); + htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file)); //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); Writer writer = new htmlFilterWriter(null,null,scraper,null,false); serverFileUtils.write(fileString,writer); @@ -282,12 +281,12 @@ public class WatchCrawler_p { nexturlstring = nexturlstring.trim(); // normalizing URL - nexturlstring = new URL(nexturlstring).toNormalform(true, true); + nexturlstring = new yacyURL(nexturlstring, null).toNormalform(true, true); // generating an url object - URL nexturlURL = null; + yacyURL nexturlURL = null; try { - nexturlURL = new URL(nexturlstring); + nexturlURL = new yacyURL(nexturlstring, null); } catch (MalformedURLException ex) { nexturlURL = null; c++; diff --git a/htroot/WebStructurePicture_p.java b/htroot/WebStructurePicture_p.java index 18195e2e8..df533f12d 100644 --- a/htroot/WebStructurePicture_p.java +++ b/htroot/WebStructurePicture_p.java @@ -32,12 +32,11 @@ import java.util.Map; import de.anomic.http.httpHeader; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaWebStructure; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; import de.anomic.ymage.ymageGraph; import de.anomic.ymage.ymageMatrix; import de.anomic.ymage.ymageToolPrint; @@ -92,7 +91,7 @@ public class WebStructurePicture_p { // find start hash String hash = null; try { - hash = plasmaURL.urlHash(new URL("http://" + host)).substring(6); + hash = (new yacyURL("http://" + host, null)).hash().substring(6); } catch (MalformedURLException e) {e.printStackTrace();} assert (sb.webStructure.references(hash) != null); diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index 9ad104c97..c2c097216 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -60,12 +60,11 @@ import de.anomic.data.userDB; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.http.httpd; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; +import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; @@ -79,6 +78,7 @@ import de.anomic.tools.dirlistComparator; import de.anomic.tools.md5DirFileFilter; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class dir { @@ -364,7 +364,7 @@ public class dir { public static void indexPhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr, byte[] md5) { try { - final URL url = new URL(urlstring); + final yacyURL url = new yacyURL(urlstring, null); final plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes()), "UTF-8"); final indexURLEntry newEntry = new indexURLEntry( url, @@ -379,7 +379,7 @@ public class dir { md5, // md5 (long) phrase.length(), // size condenser.RESULT_NUMB_WORDS, // word count - plasmaURL.DT_SHARE, // doctype + plasmaHTCache.DT_SHARE, // doctype new kelondroBitfield(4), "**", // language 0,0,0,0,0,0 @@ -392,14 +392,13 @@ public class dir { 5 /*process case*/ ); - final String urlHash = newEntry.hash(); - /*final int words =*/ switchboard.wordIndex.addPageIndex(url, urlHash, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaURL.DT_SHARE, 0, 0); + /*final int words =*/ switchboard.wordIndex.addPageIndex(url, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaHTCache.DT_SHARE, 0, 0); } catch (IOException e) {} } public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { try { - final String urlhash = plasmaURL.urlHash(new URL(urlstring)); + final String urlhash = (new yacyURL(urlstring, null)).hash(); final Iterator words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes("UTF-8"), "UTF-8").keySet().iterator(); String word; while (words.hasNext()) { diff --git a/htroot/index.java b/htroot/index.java index fc0324bcd..9eb764a78 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -33,15 +33,14 @@ import java.net.MalformedURLException; import java.util.HashMap; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class index { @@ -80,13 +79,13 @@ public class index { final String referer = (String) header.get(httpHeader.REFERER); if (referer != null) { - URL url; + yacyURL url; try { - url = new URL(referer); + url = new yacyURL(referer, null); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + if ((url != null) && (!url.isLocal())) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get(httpHeader.CONNECTION_PROP_CLIENTIP)); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 1a62fd05f..bee3a1023 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -58,7 +58,6 @@ import java.util.HashSet; import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; import de.anomic.server.serverObjects; @@ -66,6 +65,7 @@ import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class sharedBlacklist_p { @@ -131,7 +131,7 @@ public class sharedBlacklist_p { reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache"); // get List - URL u = new URL(downloadURL); + yacyURL u = new yacyURL(downloadURL, null); otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader, null), "UTF-8"); } catch (Exception e) { prop.put("status", STATUS_PEER_UNKNOWN); @@ -147,7 +147,7 @@ public class sharedBlacklist_p { prop.put("page_source", downloadURL); try { - URL u = new URL(downloadURL); + yacyURL u = new yacyURL(downloadURL, null); otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); //get List } catch (Exception e) { prop.put("status", STATUS_URL_PROBLEM); diff --git a/htroot/soap/ServiceList.html b/htroot/soap/ServiceList.html deleted file mode 100644 index 79e1d4f44..000000000 --- a/htroot/soap/ServiceList.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - SOAP Service List - #%env/templates/metas.template%# - - - #%env/templates/header.template%# -

Deployed SOAP Services

-

Currently #[services]# services are deployed.

- - - - - - - - #{services}# - - - - #{/services}# - - #%env/templates/footer.template%# - - \ No newline at end of file diff --git a/htroot/soap/ServiceList.java b/htroot/soap/ServiceList.java deleted file mode 100644 index bac6808e8..000000000 --- a/htroot/soap/ServiceList.java +++ /dev/null @@ -1,105 +0,0 @@ -// ServiceList.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// -// This File is contributed by Martin Thelian -// -// $LastChangedDate: 2007-02-24 13:56:32 +0000 (Sa, 24 Feb 2007) $ -// $LastChangedRevision: 3391 $ -// $LastChangedBy: karlchenofhell $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -// You must compile this file with -// javac -classpath .:../classes Blacklist_p.java -// if the shell's current path is HTROOT - - -package soap; - -import java.util.ArrayList; -import java.util.Iterator; - -import org.apache.axis.AxisEngine; -import org.apache.axis.ConfigurationException; -import org.apache.axis.description.OperationDesc; -import org.apache.axis.description.ServiceDesc; - -import de.anomic.http.httpHeader; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; - -public class ServiceList { - - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws ConfigurationException { - - serverObjects prop = new serverObjects(); - - // getting the SOAP engine - AxisEngine engine = (AxisEngine) post.get("SOAP.engine"); - - // loop through the deployed services - int i = 0; - boolean dark = true; - Iterator serviceIter = engine.getConfig().getDeployedServices(); - while (serviceIter.hasNext()) { - // getting the service description - ServiceDesc serviceDescription = (ServiceDesc)serviceIter.next(); - prop.put("services_" + i + "_name",serviceDescription.getName()); - prop.put("services_" + i + "_style",serviceDescription.getStyle()); - prop.put("services_" + i + "_dark", ((dark) ? 1 : 0) ); dark =! dark; - - // loop through the methods of this service - int j = 0; - ArrayList operations = serviceDescription.getOperations(); - while (j < operations.size()) { - OperationDesc op = (OperationDesc)operations.get(j); - - prop.put("services_" + i + "_methods_" + j + "_name",op.getName()); - prop.put("services_" + i + "_methods_" + j + "_method",op.getMethod()); - j++; - } - prop.put("services_" + i + "_methods",j); - - i++; - } - prop.put("services",i); - - return prop; - } - -} diff --git a/htroot/xml/bookmarks/posts/delete_p.java b/htroot/xml/bookmarks/posts/delete_p.java index dd73a4d3e..ce214aed4 100644 --- a/htroot/xml/bookmarks/posts/delete_p.java +++ b/htroot/xml/bookmarks/posts/delete_p.java @@ -42,11 +42,13 @@ // Contributions and changes to the program code must be marked as such. package xml.bookmarks.posts; +import java.net.MalformedURLException; + import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class delete_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -54,13 +56,17 @@ public class delete_p { plasmaSwitchboard switchboard = (plasmaSwitchboard) env; serverObjects prop = new serverObjects(); if(post!= null){ - if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark(plasmaURL.urlHash(post.get("url", "nourl"))) ){ - prop.put("result", 1); - }else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){ - prop.put("result", 1); - }else{ - prop.put("result",0); - } + try { + if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark((new yacyURL(post.get("url", "nourl"), null)).hash())) { + prop.put("result", 1); + }else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){ + prop.put("result", 1); + }else{ + prop.put("result",0); + } + } catch (MalformedURLException e) { + prop.put("result",0); + } }else{ prop.put("result",0); } diff --git a/htroot/xml/queues_p.java b/htroot/xml/queues_p.java index d6713f05a..78cc327c4 100644 --- a/htroot/xml/queues_p.java +++ b/htroot/xml/queues_p.java @@ -200,7 +200,7 @@ public class queues_p { prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate())); prop.put(tableName + "_" + showNum + "_anchor", urle.name()); prop.put(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true)); - prop.put(tableName + "_" + showNum + "_hash", urle.urlhash()); + prop.put(tableName + "_" + showNum + "_hash", urle.url().hash()); showNum++; } } diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java index 444c9d8e7..fd53919b3 100644 --- a/htroot/xml/util/getpageinfo_p.java +++ b/htroot/xml/util/getpageinfo_p.java @@ -55,11 +55,11 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class getpageinfo_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -82,7 +82,7 @@ public class getpageinfo_p { } if (actions.indexOf("title")>=0) { try { - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); String contentString=new String(httpc.wget(u, u.getHost(), 6000, null, null, ((plasmaSwitchboard) env).remoteProxyConfig, null, null)) ; htmlFilterContentScraper scraper = new htmlFilterContentScraper(u); @@ -110,13 +110,13 @@ public class getpageinfo_p { } if(actions.indexOf("robots")>=0){ try { - URL theURL = new URL(url); + yacyURL theURL = new yacyURL(url, null); // determine if crawling of the current URL is allowed prop.put("robots-allowed", robotsParser.isDisallowed(theURL) ? 0:1); // get the sitemap URL of the domain - URL sitemapURL = robotsParser.getSitemapURL(theURL); + yacyURL sitemapURL = robotsParser.getSitemapURL(theURL); prop.put("sitemap", (sitemapURL==null)?"":sitemapURL.toString()); } catch (MalformedURLException e) {} } diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java index 76c265e45..c79d210b4 100644 --- a/htroot/yacy/crawlOrder.java +++ b/htroot/yacy/crawlOrder.java @@ -45,13 +45,12 @@ // You must compile this file with // javac -classpath .:../classes crawlOrder.java +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Date; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -59,6 +58,7 @@ import de.anomic.tools.crypt; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class crawlOrder { @@ -182,16 +182,16 @@ public final class crawlOrder { // old method: only one url // normalizing URL - String newURL = new URL((String) urlv.get(0)).toNormalform(true, true); + String newURL = new yacyURL((String) urlv.get(0), null).toNormalform(true, true); if (!newURL.equals(urlv.get(0))) { env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0)); } - String refURL = (refv.get(0) == null) ? null : new URL((String) refv.get(0)).toNormalform(true, true); + String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true); if ((refURL != null) && (!refURL.equals(refv.get(0)))) { env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); } - if (!switchboard.acceptURL(new URL(newURL))) { + if (!switchboard.acceptURL(new yacyURL(newURL, null))) { env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL); return null; } @@ -263,7 +263,12 @@ public final class crawlOrder { // case where we have already the url loaded; reason = reasonString; // send lurl-Entry as response - indexURLEntry entry = switchboard.wordIndex.loadedURL.load(plasmaURL.urlHash(url), null); + indexURLEntry entry; + try { + entry = switchboard.wordIndex.loadedURL.load((new yacyURL(url, null)).hash(), null); + } catch (MalformedURLException e) { + entry = null; + } if (entry == null) { response = "rejected"; lurl = ""; diff --git a/htroot/yacy/list.java b/htroot/yacy/list.java index 3c8afcb44..581bbf927 100644 --- a/htroot/yacy/list.java +++ b/htroot/yacy/list.java @@ -56,7 +56,6 @@ import de.anomic.data.URLFetcherStack; import de.anomic.data.htmlTools; import de.anomic.data.listManager; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; @@ -65,6 +64,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class list { @@ -121,7 +121,7 @@ public final class list { if (count > 0 && db.size() > 0) { final StringBuffer b = new StringBuffer(); - URL url; + yacyURL url; int cnt = 0; for (int i=0; i 0) sb.requestedQueries = sb.requestedQueries + 1d / (double) partitions; // increase query counter - + // prepare reference hints + localProcess.startTimer(); Object[] ws = theSearch.references(); StringBuffer refstr = new StringBuffer(); for (int j = 0; j < ws.length; j++) refstr.append(",").append((String) ws[j]); prop.putASIS("references", (refstr.length() > 0) ? refstr.substring(1) : new String(refstr)); + localProcess.yield("reference collection", ws.length); } prop.putASIS("indexabstract", new String(indexabstract)); @@ -241,6 +242,7 @@ public final class search { } else { // result is a List of urlEntry elements + localProcess.startTimer(); StringBuffer links = new StringBuffer(); String resource = null; plasmaSearchEvent.ResultEntry entry; @@ -253,6 +255,7 @@ public final class search { } prop.putASIS("links", new String(links)); prop.put("linkcount", accu.size()); + localProcess.yield("result list preparation", accu.size()); } // add information about forward peers @@ -278,7 +281,7 @@ public final class search { yacyCore.log.logInfo("EXIT HASH SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + joincount + " links found, " + prop.get("linkcount", "?") + " links selected, " + - indexabstractContainercount + " index abstract references attached, " + + indexabstractContainercount + " index abstracts, " + (System.currentTimeMillis() - timestamp) + " milliseconds"); prop.putASIS("searchtime", Long.toString(System.currentTimeMillis() - timestamp)); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index f15e95016..c229fca3d 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -137,7 +137,7 @@ public final class transferURL { } // check if the entry is blacklisted - if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, lEntry.hash(), comp.url()))) { + if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url()))) { int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash()); yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs"); lEntry = null; diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index 58a525a90..f802b2643 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -72,7 +72,7 @@ public class urls { prop.put("item_" + c + "_description", entry.name()); prop.put("item_" + c + "_author", ""); prop.put("item_" + c + "_pubDate", serverDate.shortSecondTime(entry.appdate())); - prop.put("item_" + c + "_guid", entry.urlhash()); + prop.put("item_" + c + "_guid", entry.url().hash()); c++; count--; } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 8e654847f..af4b8858f 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -58,7 +58,6 @@ import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSearchEvent; @@ -69,7 +68,6 @@ import de.anomic.plasma.plasmaSearchProcessing; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; -import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -78,6 +76,7 @@ import de.anomic.tools.crypt; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class yacysearch { @@ -104,9 +103,9 @@ public class yacysearch { // save referrer // System.out.println("HEADER=" + header.toString()); if (referer != null) { - URL url; - try { url = new URL(referer); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + yacyURL url; + try { url = new yacyURL(referer, null); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (!url.isLocal())) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get("CLIENTIP")); @@ -454,8 +453,8 @@ public class yacysearch { int depth = post.getInt("depth", 0); int columns = post.getInt("columns", 6); - URL url = null; - try {url = new URL(post.get("url", ""));} catch (MalformedURLException e) {} + yacyURL url = null; + try {url = new yacyURL(post.get("url", ""), null);} catch (MalformedURLException e) {} plasmaSearchImages si = new plasmaSearchImages(6000, url, depth); Iterator i = si.entries(); htmlFilterImageEntry ie; diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 1dcb6240e..ac9447dc8 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -30,13 +30,11 @@ import java.net.URLEncoder; import java.util.TreeSet; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchPreOrder; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.crypt; @@ -44,6 +42,7 @@ import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class yacysearchitem { @@ -86,9 +85,9 @@ public class yacysearchitem { prop.put("content_url", result.urlstring()); int port=result.url().getPort(); - URL faviconURL; + yacyURL faviconURL; try { - faviconURL = new URL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico"); + faviconURL = new yacyURL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico", null); } catch (MalformedURLException e1) { faviconURL = null; } @@ -102,14 +101,14 @@ public class yacysearchitem { prop.put("content_size", Long.toString(result.filesize())); TreeSet[] query = theQuery.queryWords(); - URL wordURL = null; + yacyURL wordURL = null; try { prop.put("content_words", URLEncoder.encode(query[0].toString(),"UTF-8")); } catch (UnsupportedEncodingException e) {} prop.put("content_former", theQuery.queryString); - prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.hash()) + - ((plasmaURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + - (((wordURL = plasmaURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); + prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + yacyURL.domLengthEstimation(result.hash()) + + ((yacyURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + + (((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); prop.putASIS("content_snippet", result.textSnippet().getLineMarked(theQuery.queryHashes)); diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java index 5e7e731f9..45f421dea 100644 --- a/source/de/anomic/data/SitemapParser.java +++ b/source/de/anomic/data/SitemapParser.java @@ -60,14 +60,13 @@ import org.xml.sax.helpers.DefaultHandler; import de.anomic.http.httpc; import de.anomic.http.httpdByteCountInputStream; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverDate; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; /** * Class to parse a sitemap file.
@@ -140,7 +139,7 @@ public class SitemapParser extends DefaultHandler { /** * The location of the sitemap file */ - private URL siteMapURL = null; + private yacyURL siteMapURL = null; /** * The next URL to enqueue @@ -153,7 +152,7 @@ public class SitemapParser extends DefaultHandler { private Date lastMod = null; - public SitemapParser(plasmaSwitchboard sb, URL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) { + public SitemapParser(plasmaSwitchboard sb, yacyURL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) { if (sb == null) throw new NullPointerException("The switchboard must not be null"); if (sitemap == null) throw new NullPointerException("The sitemap URL must not be null"); this.switchboard = sb; @@ -276,7 +275,12 @@ public class SitemapParser extends DefaultHandler { if (this.nextURL == null) return; // get the url hash - String nexturlhash = plasmaURL.urlHash(this.nextURL); + String nexturlhash; + try { + nexturlhash = (new yacyURL(this.nextURL, null)).hash(); + } catch (MalformedURLException e1) { + nexturlhash = null; + } // check if the url is known and needs to be recrawled if (this.lastMod != null) { @@ -314,7 +318,7 @@ public class SitemapParser extends DefaultHandler { this.logger.logInfo("The URL '" + this.nextURL + "' can not be crawled. Reason: " + error); // insert URL into the error DB - plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new URL(this.nextURL), error); + plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new yacyURL(this.nextURL, null), error); ee.store(); this.switchboard.errorURL.stackPushEntry(ee); } catch (MalformedURLException e) {/* ignore this */ } diff --git a/source/de/anomic/data/URLFetcherStack.java b/source/de/anomic/data/URLFetcherStack.java index aab43392d..42703ad0c 100644 --- a/source/de/anomic/data/URLFetcherStack.java +++ b/source/de/anomic/data/URLFetcherStack.java @@ -51,8 +51,8 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; -import de.anomic.net.URL; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class URLFetcherStack { @@ -84,7 +84,7 @@ public class URLFetcherStack { this.db.close(); } - public boolean push(URL url) { + public boolean push(yacyURL url) { try { this.db.push(this.db.row().newEntry( new byte[][] { url.toNormalform(true, true).getBytes() } @@ -97,14 +97,14 @@ public class URLFetcherStack { } } - public URL pop() { + public yacyURL pop() { try { kelondroRow.Entry r = this.db.pop(); if (r == null) return null; final String url = r.getColString(0, null); try { this.popped++; - return new URL(url); + return new yacyURL(url, null); } catch (MalformedURLException e) { this.log.logSevere("found invalid URL-entry: " + url); return null; diff --git a/source/de/anomic/data/URLLicense.java b/source/de/anomic/data/URLLicense.java index b7ed0e132..f4e437456 100644 --- a/source/de/anomic/data/URLLicense.java +++ b/source/de/anomic/data/URLLicense.java @@ -29,7 +29,7 @@ package de.anomic.data; import java.util.HashMap; import java.util.Random; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class URLLicense { @@ -46,7 +46,7 @@ public class URLLicense { this.keylen = keylen; } - public String aquireLicense(URL url) { + public String aquireLicense(yacyURL url) { // generate license key String license = ""; while (license.length() < keylen) license += Integer.toHexString(random.nextInt()); @@ -59,10 +59,10 @@ public class URLLicense { return license; } - public URL releaseLicense(String license) { - URL url = null; + public yacyURL releaseLicense(String license) { + yacyURL url = null; synchronized (permissions) { - url = (URL) permissions.remove(license); + url = (yacyURL) permissions.remove(license); } /* if (url == null) { diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index b9295717f..842ed5c15 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -48,6 +48,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.io.Writer; +import java.net.MalformedURLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -72,7 +73,6 @@ import org.xml.sax.SAXException; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.plasma.plasmaCondenser; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroException; @@ -80,10 +80,10 @@ import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroObjects; import de.anomic.kelondro.kelondroObjectsMapEntry; -import de.anomic.net.URL; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class bookmarksDB { kelondroMapObjects tagsTable; @@ -390,7 +390,7 @@ public class bookmarksDB { } - public int importFromBookmarks(URL baseURL, String input, String tag, boolean importPublic){ + public int importFromBookmarks(yacyURL baseURL, String input, String tag, boolean importPublic){ try { // convert string to inputstream ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); @@ -402,7 +402,7 @@ public class bookmarksDB { return 0; } } - public int importFromBookmarks(URL baseURL, InputStreamReader input, String tag, boolean importPublic){ + public int importFromBookmarks(yacyURL baseURL, InputStreamReader input, String tag, boolean importPublic){ int importCount = 0; HashMap links=new HashMap(); @@ -712,7 +712,11 @@ public class bookmarksDB { if(!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")){ url="http://"+url; } - this.urlHash=plasmaURL.urlHash(url); + try { + this.urlHash=(new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + this.urlHash = null; + } entry.put(BOOKMARK_URL, url); this.timestamp=System.currentTimeMillis(); tags=new HashSet(); @@ -728,7 +732,7 @@ public class bookmarksDB { removeBookmark(this.urlHash); //prevent empty tags } - public Bookmark(String urlHash, URL url){ + public Bookmark(String urlHash, yacyURL url){ super(); this.urlHash=urlHash; entry.put(BOOKMARK_URL, url.toNormalform(false, true)); @@ -742,9 +746,9 @@ public class bookmarksDB { tags=new HashSet(); timestamp=System.currentTimeMillis(); } - - public Bookmark(kelondroObjectsMapEntry map) { - this(plasmaURL.urlHash((String)map.map().get(BOOKMARK_URL)), map.map()); + + public Bookmark(kelondroObjectsMapEntry map) throws MalformedURLException { + this((new yacyURL((String)map.map().get(BOOKMARK_URL), null)).hash(), map.map()); } private Map toMap(){ diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index 16351fd17..e33b006ea 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -56,11 +56,11 @@ import java.util.Date; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlRobotsTxt; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /* * A class for Parsing robots.txt files. @@ -227,7 +227,7 @@ public final class robotsParser{ return new Object[]{denyList,sitemap,crawlDelay}; } - private static final int getPort(URL theURL) { + private static final int getPort(yacyURL theURL) { int port = theURL.getPort(); if (port == -1) { if (theURL.getProtocol().equalsIgnoreCase("http")) { @@ -240,7 +240,7 @@ public final class robotsParser{ return port; } - private static final String getHostPort(URL theURL) { + private static final String getHostPort(yacyURL theURL) { String urlHostPort = null; int port = getPort(theURL); urlHostPort = theURL.getHost() + ":" + port; @@ -249,9 +249,9 @@ public final class robotsParser{ return urlHostPort; } - public static URL getSitemapURL(URL theURL) { + public static yacyURL getSitemapURL(yacyURL theURL) { if (theURL == null) throw new IllegalArgumentException(); - URL sitemapURL = null; + yacyURL sitemapURL = null; // generating the hostname:poart string needed to do a DB lookup String urlHostPort = getHostPort(theURL); @@ -265,13 +265,13 @@ public final class robotsParser{ try { String sitemapUrlStr = robotsTxt4Host.getSitemap(); - if (sitemapUrlStr != null) sitemapURL = new URL(sitemapUrlStr); + if (sitemapUrlStr != null) sitemapURL = new yacyURL(sitemapUrlStr, null); } catch (MalformedURLException e) {/* ignore this */} return sitemapURL; } - public static Integer getCrawlDelay(URL theURL) { + public static Integer getCrawlDelay(yacyURL theURL) { if (theURL == null) throw new IllegalArgumentException(); Integer crawlDelay = null; @@ -292,7 +292,7 @@ public final class robotsParser{ return crawlDelay; } - public static boolean isDisallowed(URL nexturl) { + public static boolean isDisallowed(yacyURL nexturl) { if (nexturl == null) throw new IllegalArgumentException(); // generating the hostname:poart string needed to do a DB lookup @@ -309,10 +309,10 @@ public final class robotsParser{ (robotsTxt4Host.getLoadedDate() == null) || (System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() > 7*24*60*60*1000) ) { - URL robotsURL = null; + yacyURL robotsURL = null; // generating the proper url to download the robots txt try { - robotsURL = new URL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt"); + robotsURL = new yacyURL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt"); } catch (MalformedURLException e) { serverLog.logSevere("ROBOTS","Unable to generate robots.txt URL for URL '" + nexturl.toString() + "'."); return false; @@ -371,7 +371,7 @@ public final class robotsParser{ return false; } - static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception { + static Object[] downloadRobotsTxt(yacyURL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception { if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null}; redirectionCount--; @@ -392,7 +392,7 @@ public final class robotsParser{ httpHeader reqHeaders = new httpHeader(); // adding referer - reqHeaders.put(httpHeader.REFERER, (URL.newURL(robotsURL,"/")).toNormalform(true, true)); + reqHeaders.put(httpHeader.REFERER, (yacyURL.newURL(robotsURL,"/")).toNormalform(true, true)); if (entry != null) { oldEtag = entry.getETag(); @@ -447,7 +447,7 @@ public final class robotsParser{ redirectionUrlString = redirectionUrlString.trim(); // generating the new URL object - URL redirectionUrl = URL.newURL(robotsURL, redirectionUrlString); + yacyURL redirectionUrl = yacyURL.newURL(robotsURL, redirectionUrlString); // returning the used httpc httpc.returnInstance(con); diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index 509cc8aba..1d6e70a3f 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -314,12 +314,11 @@ public final class userDB { public static final String BLOG_RIGHT = "blogRight"; public static final String WIKIADMIN_RIGHT = "wikiAdminRight"; public static final String BOOKMARK_RIGHT = "bookmarkRight"; - public static final String SOAP_RIGHT = "soapRight"; //to create new rights, you just need to edit this strings public static final String RIGHT_TYPES= ADMIN_RIGHT+","+DOWNLOAD_RIGHT+","+UPLOAD_RIGHT+","+PROXY_RIGHT+","+ - BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT+","+SOAP_RIGHT; + BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT; public static final String RIGHT_NAMES="Admin,Download,Upload,Proxy usage,Blog,Bookmark,Wiki Admin,SOAP"; public static final int PROXY_ALLOK = 0; //can Surf diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 847c2a0ca..dfd8603eb 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -62,10 +62,10 @@ import java.util.TreeSet; import javax.swing.event.EventListenerList; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper { @@ -112,14 +112,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen /** * {@link URL} to the favicon that belongs to the document */ - private URL favicon; + private yacyURL favicon; /** * The document root {@link URL} */ - private URL root; + private yacyURL root; - public htmlFilterContentScraper(URL root) { + public htmlFilterContentScraper(yacyURL root) { // the root value here will not be used to load the resource. // it is only the reference for relative links super(linkTags0, linkTags1); @@ -161,7 +161,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen private String absolutePath(String relativePath) { try { - return URL.newURL(root, relativePath).toNormalform(false, true); + return yacyURL.newURL(root, relativePath).toNormalform(false, true); } catch (Exception e) { return ""; } @@ -175,13 +175,13 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen height = Integer.parseInt(tagopts.getProperty("height", "-1")); } catch (NumberFormatException e) {} try { - URL url = new URL(absolutePath(tagopts.getProperty("src", ""))); + yacyURL url = new yacyURL(absolutePath(tagopts.getProperty("src", "")), null); htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt",""), width, height); images.add(ie); } catch (MalformedURLException e) {} } if (tagname.equalsIgnoreCase("base")) try { - root = new URL(tagopts.getProperty("href", "")); + root = new yacyURL(tagopts.getProperty("href", ""), null); } catch (MalformedURLException e) {} if (tagname.equalsIgnoreCase("frame")) { anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name","")); @@ -204,9 +204,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen if (href.length() > 0) anchors.put(absolutePath(href), areatitle); } if (tagname.equalsIgnoreCase("link")) { - URL newLink = null; + yacyURL newLink = null; try { - newLink = new URL(absolutePath(tagopts.getProperty("href", ""))); + newLink = new yacyURL(absolutePath(tagopts.getProperty("href", "")), null); } catch (MalformedURLException e) {} if (newLink != null) { @@ -363,7 +363,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen /** * @return the {@link URL} to the favicon that belongs to the document */ - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } @@ -478,7 +478,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } } - public static htmlFilterContentScraper parseResource(URL location) throws IOException { + public static htmlFilterContentScraper parseResource(yacyURL location) throws IOException { // load page byte[] page = httpc.wget( location, diff --git a/source/de/anomic/htmlFilter/htmlFilterImageEntry.java b/source/de/anomic/htmlFilter/htmlFilterImageEntry.java index 4064ef1e5..abbb697e5 100644 --- a/source/de/anomic/htmlFilter/htmlFilterImageEntry.java +++ b/source/de/anomic/htmlFilter/htmlFilterImageEntry.java @@ -40,22 +40,22 @@ package de.anomic.htmlFilter; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class htmlFilterImageEntry implements Comparable { - private URL url; + private yacyURL url; private String alt; private int width, height; - public htmlFilterImageEntry(URL url, String alt, int width, int height) { + public htmlFilterImageEntry(yacyURL url, String alt, int width, int height) { this.url = url; this.alt = alt; this.width = width; this.height = height; } - public URL url() { + public yacyURL url() { return this.url; } diff --git a/source/de/anomic/htmlFilter/htmlFilterInputStream.java b/source/de/anomic/htmlFilter/htmlFilterInputStream.java index 8e9885542..f7df006e5 100644 --- a/source/de/anomic/htmlFilter/htmlFilterInputStream.java +++ b/source/de/anomic/htmlFilter/htmlFilterInputStream.java @@ -36,8 +36,7 @@ import java.io.Writer; import java.util.Properties; import de.anomic.http.httpHeader; -import de.anomic.net.URL; - +import de.anomic.yacy.yacyURL; public class htmlFilterInputStream extends InputStream implements htmlFilterEventListener { @@ -59,7 +58,7 @@ public class htmlFilterInputStream extends InputStream implements htmlFilterEven public htmlFilterInputStream( InputStream inStream, String inputStreamCharset, - URL rooturl, + yacyURL rooturl, htmlFilterTransformer transformer, boolean passbyIfBinarySuspect ) throws UnsupportedEncodingException { diff --git a/source/de/anomic/htmlFilter/htmlFilterWriter.java b/source/de/anomic/htmlFilter/htmlFilterWriter.java index 8d7175e4d..d00dc7673 100644 --- a/source/de/anomic/htmlFilter/htmlFilterWriter.java +++ b/source/de/anomic/htmlFilter/htmlFilterWriter.java @@ -63,8 +63,8 @@ import java.net.MalformedURLException; import java.util.Enumeration; import java.util.Properties; -import de.anomic.net.URL; import de.anomic.server.serverCharBuffer; +import de.anomic.yacy.yacyURL; public final class htmlFilterWriter extends Writer { @@ -508,7 +508,7 @@ public final class htmlFilterWriter extends Writer { if (args.length != 1) return; char[] buffer = new char[512]; try { - htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL("http://localhost:8080")); + htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost:8080", null)); htmlFilterTransformer transformer = new htmlFilterContentTransformer(); // TODO: this does not work at the moment System.exit(0); diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java index 73669fe24..78e7255f9 100644 --- a/source/de/anomic/http/httpHeader.java +++ b/source/de/anomic/http/httpHeader.java @@ -73,9 +73,9 @@ import java.util.TimeZone; import java.util.TreeMap; import java.util.Vector; -import de.anomic.net.URL; import de.anomic.server.serverCore; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class httpHeader extends TreeMap implements Map { @@ -675,7 +675,7 @@ public final class httpHeader extends TreeMap implements Map { } public static boolean supportChunkedEncoding(Properties conProp) { - // getting the http version of the soap client + // getting the http version of the client String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); // only clients with http version 1.1 supports chunk @@ -793,7 +793,7 @@ public final class httpHeader extends TreeMap implements Map { theHeader.append("\r\n"); } - public static URL getRequestURL(Properties conProp) throws MalformedURLException { + public static yacyURL getRequestURL(Properties conProp) throws MalformedURLException { String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/' String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given @@ -807,7 +807,7 @@ public final class httpHeader extends TreeMap implements Map { host = host.substring(0, pos); } - URL url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + yacyURL url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); return url; } diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 1bcf1a189..bad06b831 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -76,7 +76,6 @@ import javax.net.ssl.X509TrustManager; import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCore; import de.anomic.server.serverDomains; @@ -84,6 +83,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; /** * This class implements an http client. While http access is built-in in java @@ -959,7 +959,7 @@ public final class httpc { } public static byte[] singleGET( - URL u, + yacyURL u, String vhost, int timeout, String user, @@ -1017,7 +1017,7 @@ public final class httpc { } public static byte[] singlePOST( - URL u, + yacyURL u, String vhost, int timeout, String user, @@ -1049,7 +1049,7 @@ public final class httpc { } public static byte[] wget( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1090,7 +1090,7 @@ public final class httpc { return a; } - public static Map loadHashMap(URL url, httpRemoteProxyConfig proxy) { + public static Map loadHashMap(yacyURL url, httpRemoteProxyConfig proxy) { try { // should we use the proxy? boolean useProxy = (proxy != null) && @@ -1119,7 +1119,7 @@ public final class httpc { } public static httpHeader whead( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1130,7 +1130,7 @@ public final class httpc { } public static httpHeader whead( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1172,7 +1172,7 @@ public final class httpc { } public static byte[] wput( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1217,7 +1217,7 @@ public final class httpc { httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort); try { - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig, null, null)); } catch (MalformedURLException e) { System.out.println("The url '" + url + "' is wrong."); diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index b7b8b421c..dc38d277b 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -51,7 +51,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; -import java.lang.reflect.Constructor; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URLDecoder; @@ -67,7 +66,6 @@ import java.util.StringTokenizer; import de.anomic.data.htmlTools; import de.anomic.data.userDB; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; @@ -78,9 +76,9 @@ import de.anomic.server.serverHandler; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; -import de.anomic.soap.httpdSoapHandler; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; /** @@ -119,7 +117,6 @@ public final class httpd implements serverHandler { public static final String hline = "-------------------------------------------------------------------------------"; public static HashMap reverseMappingCache = new HashMap(); - private httpdSoapHandler soapHandler = null; private static plasmaSwitchboard switchboard = null; private static String virtualHost = null; @@ -488,36 +485,8 @@ public final class httpd implements serverHandler { if (this.prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) { // pass to server if (this.allowServer) { - - /* - * Handling SOAP Requests here ... - */ - if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) { - if (this.soapHandler == null) { - try { - Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler"); - Constructor classConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } ); - this.soapHandler = (httpdSoapHandler) classConstructor.newInstance(new Object[] { switchboard }); - } catch (Exception e) { - sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e); - return serverCore.TERMINATE_CONNECTION; - } catch (NoClassDefFoundError e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } catch (Error e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } - } - this.soapHandler.doGet(this.prop, header, this.session.out); - - /* - * Handling HTTP requests here ... - */ - } else { - if (this.handleServerAuthentication(header)) { - httpdFileHandler.doGet(this.prop, header, this.session.out); - } + if (this.handleServerAuthentication(header)) { + httpdFileHandler.doGet(this.prop, header, this.session.out); } } else { // not authorized through firewall blocking (ip does not match filter) @@ -637,40 +606,8 @@ public final class httpd implements serverHandler { if (prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) { // pass to server if (allowServer) { - - /* - * Handling SOAP Requests here ... - */ - if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) { - if (this.soapHandler == null) { - try { - // creating the soap handler class by name - Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler"); - - // Look for the proper constructor - Constructor soapHandlerConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } ); - - // creating the new object - this.soapHandler = (httpdSoapHandler)soapHandlerConstructor.newInstance( new Object[] { switchboard } ); - } catch (Exception e) { - sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e); - return serverCore.TERMINATE_CONNECTION; - } catch (NoClassDefFoundError e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } catch (Error e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } - } - this.soapHandler.doPost(this.prop, header, this.session.out, this.session.in); - /* - * Handling normal HTTP requests here ... - */ - } else { - if (handleServerAuthentication(header)) { - httpdFileHandler.doPost(prop, header, this.session.out, this.session.in); - } + if (handleServerAuthentication(header)) { + httpdFileHandler.doPost(prop, header, this.session.out, this.session.in); } } else { // not authorized through firewall blocking (ip does not match filter) @@ -1199,7 +1136,7 @@ public final class httpd implements serverHandler { String urlString; try { - urlString = (new URL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString(); + urlString = (new yacyURL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString(); } catch (MalformedURLException e) { urlString = "invalid URL"; } diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index 051662f6e..65f4f6435 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -962,7 +962,6 @@ public final class httpdFileHandler { } } - //System.out.println("**DEBUG** loading class file " + classFile); Class c = provider.loadClass(classFile); Class[] params = new Class[] { httpHeader.class, diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 64ef59f70..5faedd672 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -93,7 +93,6 @@ import java.util.zip.GZIPOutputStream; import de.anomic.htmlFilter.htmlFilterContentTransformer; import de.anomic.htmlFilter.htmlFilterTransformer; import de.anomic.htmlFilter.htmlFilterWriter; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -107,6 +106,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverMiniLogFormatter; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public final class httpdProxyHandler { @@ -319,7 +319,7 @@ public final class httpdProxyHandler { int pos=0; int port=0; - URL url = null; + yacyURL url = null; try { url = httpHeader.getRequestURL(conProp); @@ -329,11 +329,11 @@ public final class httpdProxyHandler { redirectorWriter.println(url.toNormalform(false, true)); redirectorWriter.flush(); } - String newUrl=redirectorReader.readLine(); - if(!newUrl.equals("")){ - try{ - url=new URL(newUrl); - }catch(MalformedURLException e){}//just keep the old one + String newUrl = redirectorReader.readLine(); + if (!newUrl.equals("")) { + try { + url = new yacyURL(newUrl, null); + } catch(MalformedURLException e){}//just keep the old one } conProp.setProperty(httpHeader.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort()); conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, url.getPath()); @@ -474,7 +474,7 @@ public final class httpdProxyHandler { } } - private static void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) { + private static void fulfillRequestFromWeb(Properties conProp, yacyURL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) { GZIPOutputStream gzippedOut = null; httpChunkedOutputStream chunkedOut = null; @@ -727,7 +727,7 @@ public final class httpdProxyHandler { private static void fulfillRequestFromCache( Properties conProp, - URL url, + yacyURL url, String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, @@ -865,7 +865,7 @@ public final class httpdProxyHandler { httpc remote = null; httpc.response res = null; - URL url = null; + yacyURL url = null; try { // remembering the starting time of the request Date requestDate = new Date(); // remember the time... @@ -892,7 +892,7 @@ public final class httpdProxyHandler { } try { - url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); } catch (MalformedURLException e) { String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; @@ -968,7 +968,7 @@ public final class httpdProxyHandler { public static void doPost(Properties conProp, httpHeader requestHeader, OutputStream respond, PushbackInputStream body) throws IOException { httpc remote = null; - URL url = null; + yacyURL url = null; try { // remembering the starting time of the request Date requestDate = new Date(); // remember the time... @@ -993,7 +993,7 @@ public final class httpdProxyHandler { } try { - url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); } catch (MalformedURLException e) { String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; @@ -1308,7 +1308,7 @@ public final class httpdProxyHandler { out.flush(); } */ - private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, URL url) { + private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, yacyURL url) { // this may happen if // - the targeted host does not exist // - anything with the remote server was wrong. diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java index dc5e172b9..de7ab0813 100644 --- a/source/de/anomic/icap/icapd.java +++ b/source/de/anomic/icap/icapd.java @@ -60,7 +60,6 @@ import java.util.Properties; import de.anomic.http.httpChunkedInputStream; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -71,6 +70,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverHandler; import de.anomic.server.logging.serverLog; import de.anomic.server.serverCore.Session; +import de.anomic.yacy.yacyURL; /** * @author theli @@ -346,7 +346,7 @@ public class icapd implements serverHandler { httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true); // getting the request URL - URL httpRequestURL = httpHeader.getRequestURL(httpReqProps); + yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps); /* ========================================================================= * Parsing response data diff --git a/source/de/anomic/index/indexURLEntry.java b/source/de/anomic/index/indexURLEntry.java index 5f89d4bac..bbc0cefcb 100644 --- a/source/de/anomic/index/indexURLEntry.java +++ b/source/de/anomic/index/indexURLEntry.java @@ -38,15 +38,14 @@ import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaSearchQuery; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverCodings; import de.anomic.server.serverDate; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; import de.anomic.index.indexRWIEntry; public class indexURLEntry { @@ -120,7 +119,7 @@ public class indexURLEntry { private indexRWIEntry word; // this is only used if the url is transported via remote search requests public indexURLEntry( - URL url, + yacyURL url, String descr, String author, String tags, @@ -143,12 +142,12 @@ public class indexURLEntry { int lapp) { // create new entry and store it into database this.entry = rowdef.newEntry(); - this.entry.setCol(col_hash, plasmaURL.urlHash(url), null); + this.entry.setCol(col_hash, url.hash(), null); this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); this.entry.setCol(col_mod, encodeDate(mod)); this.entry.setCol(col_load, encodeDate(load)); this.entry.setCol(col_fresh, encodeDate(fresh)); - this.entry.setCol(col_referrer, referrer.getBytes()); + this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); this.entry.setCol(col_md5, md5); this.entry.setCol(col_size, size); this.entry.setCol(col_wc, wc); @@ -170,7 +169,7 @@ public class indexURLEntry { return kelondroNaturalOrder.encodeLong(d.getTime() / 86400000, 4); } - public static byte[] encodeComp(URL url, String descr, String author, String tags, String ETag) { + public static byte[] encodeComp(yacyURL url, String descr, String author, String tags, String ETag) { serverCharBuffer s = new serverCharBuffer(200); s.append(url.toNormalform(false, true)).append(10); s.append(descr).append(10); @@ -190,9 +189,9 @@ public class indexURLEntry { // generates an plasmaLURLEntry using the properties from the argument // the property names must correspond to the one from toString //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); - URL url; + yacyURL url; try { - url = new URL(crypt.simpleDecode(prop.getProperty("url", ""), null)); + url = new yacyURL(crypt.simpleDecode(prop.getProperty("url", ""), null), prop.getProperty("hash")); } catch (MalformedURLException e) { url = null; } @@ -202,7 +201,7 @@ public class indexURLEntry { String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); if (ETag == null) ETag = ""; this.entry = rowdef.newEntry(); - this.entry.setCol(col_hash, plasmaURL.urlHash(url), null); + this.entry.setCol(col_hash, url.hash(), null); this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); try { this.entry.setCol(col_mod, encodeDate(serverDate.shortDayFormatter.parse(prop.getProperty("mod", "20000101")))); @@ -219,7 +218,7 @@ public class indexURLEntry { } catch (ParseException e) { this.entry.setCol(col_fresh, encodeDate(new Date())); } - this.entry.setCol(col_referrer, prop.getProperty("referrer", plasmaURL.dummyHash).getBytes()); + this.entry.setCol(col_referrer, prop.getProperty("referrer", yacyURL.dummyHash).getBytes()); this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", ""))); this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0"))); this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0"))); @@ -301,6 +300,7 @@ public class indexURLEntry { ArrayList cl = nxTools.strings(this.entry.getCol("comp", null), "UTF-8"); return new indexURLEntry.Components( (cl.size() > 0) ? ((String) cl.get(0)).trim() : "", + hash(), (cl.size() > 1) ? ((String) cl.get(1)).trim() : "", (cl.size() > 2) ? ((String) cl.get(2)).trim() : "", (cl.size() > 3) ? ((String) cl.get(3)).trim() : "", @@ -442,12 +442,12 @@ public class indexURLEntry { } public class Components { - private URL url; + private yacyURL url; private String title, author, tags, ETag; - public Components(String url, String title, String author, String tags, String ETag) { + public Components(String url, String urlhash, String title, String author, String tags, String ETag) { try { - this.url = new URL(url); + this.url = new yacyURL(url, urlhash); } catch (MalformedURLException e) { this.url = null; } @@ -456,18 +456,18 @@ public class indexURLEntry { this.tags = tags; this.ETag = ETag; } - public Components(URL url, String descr, String author, String tags, String ETag) { + public Components(yacyURL url, String descr, String author, String tags, String ETag) { this.url = url; this.title = descr; this.author = author; this.tags = tags; this.ETag = ETag; } - public URL url() { return this.url; } - public String title() { return this.title; } - public String author() { return this.author; } - public String tags() { return this.tags; } - public String ETag() { return this.ETag; } + public yacyURL url() { return this.url; } + public String title() { return this.title; } + public String author() { return this.author; } + public String tags() { return this.tags; } + public String ETag() { return this.ETag; } } } \ No newline at end of file diff --git a/source/de/anomic/kelondro/kelondroAbstractRecords.java b/source/de/anomic/kelondro/kelondroAbstractRecords.java index 6bfefc616..002340989 100644 --- a/source/de/anomic/kelondro/kelondroAbstractRecords.java +++ b/source/de/anomic/kelondro/kelondroAbstractRecords.java @@ -307,7 +307,7 @@ public abstract class kelondroAbstractRecords implements kelondroRecords { } private synchronized void checkConsistency() { - if (debugmode) try { // in debug mode + if ((debugmode) && (entryFile != null)) try { // in debug mode long efl = entryFile.length(); assert ((efl - POS_NODES) % ((long) recordsize)) == 0 : "rest = " + ((entryFile.length() - POS_NODES) % ((long) recordsize)) + ", USEDC = " + this.USEDC + ", FREEC = " + this.FREEC + ", recordsize = " + recordsize + ", file = " + filename; long calculated_used = (efl - POS_NODES) / ((long) recordsize); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 06d18b4ec..6406d1178 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -41,11 +41,11 @@ import java.util.TimeZone; import java.util.TreeMap; import de.anomic.index.indexContainer; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class kelondroCollectionIndex { @@ -762,7 +762,7 @@ public class kelondroCollectionIndex { while (i.hasNext()) { entry = (kelondroRow.Entry) i.next(); ref = entry.getColBytes(0); - if ((ref.length == 12) && (plasmaURL.probablyRootURL(new String(ref)))) { + if ((ref.length == 12) && (yacyURL.probablyRootURL(new String(ref)))) { survival.addUnique(entry); i.remove(); } diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 9cd2633e1..bca4e7108 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -53,6 +53,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverDomains; import de.anomic.tools.disorderHeap; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class natLib { @@ -64,7 +65,7 @@ public class natLib { rm status.htm */ try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://192.168.0.1:80/status.htm", null), "192.168.0.1", 5000, "admin", password, null, null, null)); x = nxTools.grep(x, 1, "IP Address"); if ((x == null) || (x.size() == 0)) return null; String line = nxTools.tail1(x); @@ -76,7 +77,7 @@ public class natLib { private static String getWhatIsMyIP() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.whatismyip.com/", null), "www.whatsmyip.com", 5000, null, null, null, null, null)); x = nxTools.grep(x, 0, "Your IP is"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 4); @@ -87,7 +88,7 @@ public class natLib { private static String getStanford() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null), "www.slac.stanford.edu", 5000, null, null, null, null, null)); x = nxTools.grep(x, 0, "firewall protecting your browser"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 7); @@ -98,7 +99,7 @@ public class natLib { private static String getIPID() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8"); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://ipid.shat.net/", null), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8"); x = nxTools.grep(x, 2, "Your IP address"); String line = nxTools.tail1(x); return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); diff --git a/source/de/anomic/plasma/cache/IResourceInfo.java b/source/de/anomic/plasma/cache/IResourceInfo.java index 3edfd5440..8b58eecb0 100644 --- a/source/de/anomic/plasma/cache/IResourceInfo.java +++ b/source/de/anomic/plasma/cache/IResourceInfo.java @@ -51,7 +51,7 @@ package de.anomic.plasma.cache; import java.util.Date; import java.util.Map; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; /** * A class containing metadata about a downloaded resource @@ -66,13 +66,13 @@ public interface IResourceInfo { /** * @return the URL of this content */ - public URL getUrl(); + public yacyURL getUrl(); /** * Returns the referer URL of this URL * @return referer URL */ - public URL getRefererUrl(); + public yacyURL getRefererUrl(); /** * Returns the mimetype of the cached object @@ -92,11 +92,6 @@ public interface IResourceInfo { */ public Date getModificationDate(); - /** - * @return the url hash of the content URL - */ - public String getUrlHash(); - /** * Specifies if the resource was requested with a * if modified since date diff --git a/source/de/anomic/plasma/cache/ResourceInfoFactory.java b/source/de/anomic/plasma/cache/ResourceInfoFactory.java index 54f281dc4..1af06c977 100644 --- a/source/de/anomic/plasma/cache/ResourceInfoFactory.java +++ b/source/de/anomic/plasma/cache/ResourceInfoFactory.java @@ -51,11 +51,12 @@ package de.anomic.plasma.cache; import java.lang.reflect.Constructor; import java.util.Map; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; + public class ResourceInfoFactory { public IResourceInfo buildResourceInfoObj( - URL resourceURL, + yacyURL resourceURL, Map resourceMetadata ) throws UnsupportedProtocolException, IllegalAccessException { @@ -73,7 +74,7 @@ public class ResourceInfoFactory { // getting the constructor final Constructor classConstructor = moduleClass.getConstructor( new Class[] { - URL.class, + yacyURL.class, Map.class } ); diff --git a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java index bf98cc639..76c7fb63e 100644 --- a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java @@ -51,10 +51,9 @@ import java.util.Date; import java.util.HashMap; import java.util.Map; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; +import de.anomic.yacy.yacyURL; public class ResourceInfo implements IResourceInfo { @@ -62,8 +61,7 @@ public class ResourceInfo implements IResourceInfo { public static final String MODIFICATION_DATE = "modificationDate"; public static final String REFERER = "referer"; - private URL url; - private String urlHash; + private yacyURL url; private HashMap propertyMap; /** @@ -71,24 +69,22 @@ public class ResourceInfo implements IResourceInfo { * @param objectURL * @param objectInfo */ - public ResourceInfo(URL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.propertyMap = new HashMap(objectInfo); } - public ResourceInfo(URL objectURL, String refererUrl, String mimeType, Date fileDate) { + public ResourceInfo(yacyURL objectURL, String refererUrl, String mimeType, Date fileDate) { if (objectURL == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.propertyMap = new HashMap(); @@ -113,22 +109,18 @@ public class ResourceInfo implements IResourceInfo { return new Date(Long.valueOf((String) this.propertyMap.get(MODIFICATION_DATE)).longValue()); } - public URL getRefererUrl() { + public yacyURL getRefererUrl() { try { - return (this.propertyMap == null) ? null : new URL((String)this.propertyMap.get(REFERER)); + return (this.propertyMap == null) ? null : new yacyURL((String)this.propertyMap.get(REFERER), null); } catch (MalformedURLException e) { return null; } } - public URL getUrl() { + public yacyURL getUrl() { return this.url; } - - public String getUrlHash() { - return this.urlHash; - } - + public Date ifModifiedSince() { return null; } diff --git a/source/de/anomic/plasma/cache/http/ResourceInfo.java b/source/de/anomic/plasma/cache/http/ResourceInfo.java index d3cc7a657..c942cd2a5 100644 --- a/source/de/anomic/plasma/cache/http/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/http/ResourceInfo.java @@ -52,16 +52,14 @@ import java.util.Date; import java.util.Map; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; import de.anomic.server.serverDate; +import de.anomic.yacy.yacyURL; public class ResourceInfo implements IResourceInfo { - private URL url; - private String urlHash; + private yacyURL url; private httpHeader responseHeader; private httpHeader requestHeader; @@ -70,25 +68,23 @@ public class ResourceInfo implements IResourceInfo { * @param objectURL * @param objectInfo */ - public ResourceInfo(URL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.responseHeader = new httpHeader(null, objectInfo); } - public ResourceInfo(URL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) { + public ResourceInfo(yacyURL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) { if (objectURL == null) throw new NullPointerException(); if (responseHeaders == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); this.requestHeader = requestHeaders; this.responseHeader = responseHeaders; @@ -131,10 +127,10 @@ public class ResourceInfo implements IResourceInfo { return docDate; } - public URL getRefererUrl() { + public yacyURL getRefererUrl() { if (this.requestHeader == null) return null; try { - return new URL((String) this.requestHeader.get(httpHeader.REFERER, "")); + return new yacyURL((String) this.requestHeader.get(httpHeader.REFERER, ""), null); } catch (Exception e) { return null; } @@ -143,7 +139,7 @@ public class ResourceInfo implements IResourceInfo { /** * @see de.anomic.plasma.cache.IResourceInfo#getUrl() */ - public URL getUrl() { + public yacyURL getUrl() { return this.url; } @@ -151,7 +147,7 @@ public class ResourceInfo implements IResourceInfo { * @see de.anomic.plasma.cache.IResourceInfo#getUrlHash() */ public String getUrlHash() { - return this.urlHash; + return this.url.hash(); } public void setRequestHeader(httpHeader reqestHeader) { diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index 88a46c0cc..d64b9a088 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -49,10 +49,9 @@ package de.anomic.plasma.crawler; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.util.Date; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaCrawlProfile; @@ -61,6 +60,7 @@ import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlWorker { @@ -86,7 +86,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW * Crawl job specific variables * ============================================================ */ public plasmaCrawlLoaderMessage theMsg; - protected URL url; + protected yacyURL url; protected String name; protected String refererURLString; protected String initiator; @@ -281,7 +281,12 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW this.errorMessage = failreason; // convert the referrer URL into a hash value - String referrerHash = (this.refererURLString==null)?null:plasmaURL.urlHash(this.refererURLString); + String referrerHash; + try { + referrerHash = (this.refererURLString == null) ? null : (new yacyURL(this.refererURLString, null)).hash(); + } catch (MalformedURLException e) { + referrerHash = null; + } // create a new errorURL DB entry plasmaCrawlEntry bentry = new plasmaCrawlEntry( diff --git a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java index ba5b8a60e..fbaaefcbc 100644 --- a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java @@ -55,7 +55,6 @@ import java.io.PrintStream; import java.io.PrintWriter; import java.util.Date; -import de.anomic.net.URL; import de.anomic.net.ftpc; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaHTCache; @@ -68,6 +67,7 @@ import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.plasma.plasmaHTCache.Entry; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorker { @@ -188,7 +188,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke if (isFolder) { fullPath = fullPath + "/"; file = ""; - this.url = URL.newURL(this.url,fullPath); + this.url = yacyURL.newURL(this.url,fullPath); } } diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index d977452ca..329d06da1 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -60,8 +60,6 @@ import de.anomic.http.httpc; import de.anomic.http.httpdBoundedSizeOutputStream; import de.anomic.http.httpdLimitExceededException; import de.anomic.http.httpdProxyHandler; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaCrawlLoader; import de.anomic.plasma.plasmaHTCache; @@ -74,6 +72,7 @@ import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverSystem; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class CrawlWorker extends AbstractCrawlWorker { @@ -140,7 +139,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { return load(DEFAULT_CRAWLING_RETRY_COUNT); } - protected plasmaHTCache.Entry createCacheEntry(URL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) { + protected plasmaHTCache.Entry createCacheEntry(yacyURL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) { IResourceInfo resourceInfo = new ResourceInfo(requestUrl,requestHeader,response.responseHeader); return plasmaHTCache.newEntry( requestDate, @@ -314,7 +313,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { } // normalizing URL - URL redirectionUrl = URL.newURL(this.url, redirectionUrlString); + yacyURL redirectionUrl = yacyURL.newURL(this.url, redirectionUrlString); // returning the used httpc httpc.returnInstance(remote); @@ -332,7 +331,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { } // generating url hash - String urlhash = plasmaURL.urlHash(redirectionUrl); + String urlhash = redirectionUrl.hash(); // removing url from loader queue plasmaCrawlLoader.switchboard.noticeURL.remove(urlhash); diff --git a/source/de/anomic/plasma/dbImport/SitemapImporter.java b/source/de/anomic/plasma/dbImport/SitemapImporter.java index 57f2ff0d1..8109f12e0 100644 --- a/source/de/anomic/plasma/dbImport/SitemapImporter.java +++ b/source/de/anomic/plasma/dbImport/SitemapImporter.java @@ -47,14 +47,14 @@ package de.anomic.plasma.dbImport; import java.util.HashMap; import de.anomic.data.SitemapParser; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.yacy.yacyURL; public class SitemapImporter extends AbstractImporter implements dbImporter { private SitemapParser parser = null; - private URL sitemapURL = null; + private yacyURL sitemapURL = null; public SitemapImporter(plasmaSwitchboard switchboard) { super("sitemap",switchboard); @@ -110,7 +110,7 @@ public class SitemapImporter extends AbstractImporter implements dbImporter { try { // getting the sitemap URL - this.sitemapURL = new URL((String)initParams.get("sitemapURL")); + this.sitemapURL = new yacyURL((String)initParams.get("sitemapURL"), null); // getting the crawling profile to use plasmaCrawlProfile.entry profileEntry = this.sb.profiles.getEntry((String)initParams.get("crawlingProfile")); diff --git a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java index be2b0e6d4..55e5ca384 100644 --- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java @@ -142,13 +142,13 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor this.urlCount++; nextEntry = this.importNurlDB.pop(stackTypes[stackType], false); - nextHash = nextEntry.urlhash(); + nextHash = nextEntry.url().hash(); } else { if (!entryIter.hasNext()) break; this.urlCount++; nextEntry = (plasmaCrawlEntry) entryIter.next(); - nextHash = nextEntry.urlhash(); + nextHash = nextEntry.url().hash(); } } catch (IOException e) { this.log.logWarning("Unable to import entry: " + e.toString()); diff --git a/source/de/anomic/plasma/parser/AbstractParser.java b/source/de/anomic/plasma/parser/AbstractParser.java index 4de015560..caa1c5001 100644 --- a/source/de/anomic/plasma/parser/AbstractParser.java +++ b/source/de/anomic/plasma/parser/AbstractParser.java @@ -52,11 +52,11 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.server.serverThread; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /** * New classes implementing the {@link de.anomic.plasma.parser.Parser} interface @@ -139,7 +139,7 @@ public abstract class AbstractParser implements Parser{ return tempFile; } - public int parseDir(URL location, String prefix, File dir, plasmaParserDocument doc) + public int parseDir(yacyURL location, String prefix, File dir, plasmaParserDocument doc) throws ParserException, InterruptedException, IOException { if (!dir.isDirectory()) throw new ParserException("tried to parse ordinary file " + dir + " as directory", location); @@ -153,7 +153,7 @@ public abstract class AbstractParser implements Parser{ if (file.isDirectory()) { result += parseDir(location, prefix, file, doc); } else try { - URL url = URL.newURL(location, "/" + prefix + "/" + yacyURL url = yacyURL.newURL(location, "/" + prefix + "/" // XXX: workaround for relative paths within document + file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1) + "/" + file.getName()); @@ -185,7 +185,7 @@ public abstract class AbstractParser implements Parser{ * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, byte[]) */ public plasmaParserDocument parse( - URL location, + yacyURL location, String mimeType, String charset, byte[] source @@ -220,7 +220,7 @@ public abstract class AbstractParser implements Parser{ * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.File) */ public plasmaParserDocument parse( - URL location, + yacyURL location, String mimeType, String charset, File sourceFile @@ -254,7 +254,7 @@ public abstract class AbstractParser implements Parser{ * * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream) */ - public abstract plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; + public abstract plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; /** * @return Returns a list of library names that are needed by this parser diff --git a/source/de/anomic/plasma/parser/Parser.java b/source/de/anomic/plasma/parser/Parser.java index a1adeae06..5b0ec0aad 100644 --- a/source/de/anomic/plasma/parser/Parser.java +++ b/source/de/anomic/plasma/parser/Parser.java @@ -48,9 +48,9 @@ import java.io.File; import java.io.InputStream; import java.util.Hashtable; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /** * This interface defines a list of methods that needs to be implemented @@ -74,7 +74,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, byte[] source) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, byte[] source) throws ParserException, InterruptedException; /** @@ -88,7 +88,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException; /** @@ -102,7 +102,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; /** diff --git a/source/de/anomic/plasma/parser/ParserException.java b/source/de/anomic/plasma/parser/ParserException.java index c05d9a484..4d94b7f9a 100644 --- a/source/de/anomic/plasma/parser/ParserException.java +++ b/source/de/anomic/plasma/parser/ParserException.java @@ -44,13 +44,13 @@ package de.anomic.plasma.parser; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.yacy.yacyURL; public class ParserException extends Exception { private String errorCode = null; - private URL url = null; + private yacyURL url = null; private static final long serialVersionUID = 1L; @@ -58,21 +58,21 @@ public class ParserException extends Exception super(); } - public ParserException(String message, URL url) { + public ParserException(String message, yacyURL url) { this(message,url,plasmaCrawlEURL.DENIED_PARSER_ERROR); } - public ParserException(String message, URL url, String errorCode) { + public ParserException(String message, yacyURL url, String errorCode) { super(message); this.errorCode = errorCode; this.url = url; } - public ParserException(String message, URL url, Throwable cause) { + public ParserException(String message, yacyURL url, Throwable cause) { this(message,url,cause,plasmaCrawlEURL.DENIED_PARSER_ERROR); } - public ParserException(String message, URL url, Throwable cause, String errorCode) { + public ParserException(String message, yacyURL url, Throwable cause, String errorCode) { super(message, cause); this.errorCode = errorCode; this.url = url; @@ -82,7 +82,7 @@ public class ParserException extends Exception return this.errorCode; } - public URL getURL() { + public yacyURL getURL() { return this.url; } } diff --git a/source/de/anomic/plasma/parser/bzip/bzipParser.java b/source/de/anomic/plasma/parser/bzip/bzipParser.java index 53b2630dd..77bed5a05 100644 --- a/source/de/anomic/plasma/parser/bzip/bzipParser.java +++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java @@ -50,12 +50,13 @@ import java.util.Hashtable; import org.apache.tools.bzip2.CBZip2InputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; + public class bzipParser extends AbstractParser implements Parser { /** @@ -87,7 +88,7 @@ public class bzipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/doc/docParser.java b/source/de/anomic/plasma/parser/doc/docParser.java index 10dfdaa01..d25e4abcb 100644 --- a/source/de/anomic/plasma/parser/doc/docParser.java +++ b/source/de/anomic/plasma/parser/doc/docParser.java @@ -48,15 +48,13 @@ import java.util.Hashtable; import org.textmining.text.extraction.WordExtractor; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; -public class docParser -extends AbstractParser -implements Parser { +public class docParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -78,7 +76,7 @@ implements Parser { this.parserName = "Word Document Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { diff --git a/source/de/anomic/plasma/parser/gzip/gzipParser.java b/source/de/anomic/plasma/parser/gzip/gzipParser.java index a289eb361..c057377f8 100644 --- a/source/de/anomic/plasma/parser/gzip/gzipParser.java +++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java @@ -49,12 +49,12 @@ import java.io.InputStream; import java.util.Hashtable; import java.util.zip.GZIPInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class gzipParser extends AbstractParser implements Parser { @@ -83,7 +83,7 @@ public class gzipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java index 0d4f90b7e..02fda681b 100644 --- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java +++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java @@ -56,17 +56,15 @@ import net.sf.jmimemagic.MagicMatchNotFoundException; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; -public class mimeTypeParser -extends AbstractParser -implements Parser { +public class mimeTypeParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -127,7 +125,7 @@ implements Parser { return null; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { String orgMimeType = mimeType; @@ -188,7 +186,7 @@ implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException { File dstFile = null; try { dstFile = File.createTempFile("mimeTypeParser",".tmp"); diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java index c0c7681fa..262e29760 100644 --- a/source/de/anomic/plasma/parser/odt/odtParser.java +++ b/source/de/anomic/plasma/parser/odt/odtParser.java @@ -59,7 +59,6 @@ import com.catcode.odf.OpenDocumentMetadata; import com.catcode.odf.OpenDocumentTextInputStream; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; @@ -67,6 +66,7 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class odtParser extends AbstractParser implements Parser { @@ -95,7 +95,7 @@ public class odtParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException { Writer writer = null; File writerFile = null; @@ -209,7 +209,7 @@ public class odtParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File dest = null; try { // creating a tempfile @@ -241,7 +241,7 @@ public class odtParser extends AbstractParser implements Parser { if (args.length != 1) return; // getting the content URL - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); // creating a new parser odtParser testParser = new odtParser(); diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java index 0e491ef35..2a2eab041 100644 --- a/source/de/anomic/plasma/parser/pdf/pdfParser.java +++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java @@ -55,13 +55,13 @@ import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation; import org.pdfbox.util.PDFTextStripper; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCharBuffer; +import de.anomic.yacy.yacyURL; public class pdfParser extends AbstractParser implements Parser { @@ -89,7 +89,7 @@ public class pdfParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { PDDocument theDocument = null; Writer writer = null; diff --git a/source/de/anomic/plasma/parser/ppt/pptParser.java b/source/de/anomic/plasma/parser/ppt/pptParser.java index 6d6fa20d3..bd276028f 100644 --- a/source/de/anomic/plasma/parser/ppt/pptParser.java +++ b/source/de/anomic/plasma/parser/ppt/pptParser.java @@ -50,11 +50,11 @@ import java.util.Hashtable; import org.apache.poi.hslf.extractor.PowerPointExtractor; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class pptParser extends AbstractParser implements Parser { @@ -88,7 +88,7 @@ public class pptParser extends AbstractParser implements Parser { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/ps/psParser.java b/source/de/anomic/plasma/parser/ps/psParser.java index 06abdb15f..3d7900647 100644 --- a/source/de/anomic/plasma/parser/ps/psParser.java +++ b/source/de/anomic/plasma/parser/ps/psParser.java @@ -52,12 +52,12 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.util.Hashtable; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class psParser extends AbstractParser implements Parser { @@ -114,7 +114,7 @@ public class psParser extends AbstractParser implements Parser { } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { File outputFile = null; try { @@ -281,7 +281,7 @@ public class psParser extends AbstractParser implements Parser { super.reset(); } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java index 1ac0be0dd..1296eb689 100644 --- a/source/de/anomic/plasma/parser/rpm/rpmParser.java +++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java @@ -53,12 +53,12 @@ import com.jguild.jrpm.io.RPMFile; import com.jguild.jrpm.io.datatype.DataTypeIf; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; /** * @author theli @@ -92,7 +92,7 @@ public class rpmParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException { File dstFile = null; try { @@ -106,7 +106,7 @@ public class rpmParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { RPMFile rpmFile = null; try { String summary = null, description = null, packager = null, name = sourceFile.getName(); @@ -177,7 +177,7 @@ public class rpmParser extends AbstractParser implements Parser { public static void main(String[] args) { try { - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); rpmParser testParser = new rpmParser(); byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null); diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index 97fb61610..4ee94ea4f 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -56,7 +56,6 @@ import de.anomic.htmlFilter.htmlFilterAbstractScraper; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterWriter; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; @@ -66,6 +65,7 @@ import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; import de.anomic.xml.rssReader; import de.anomic.xml.rssReader.Item; +import de.anomic.yacy.yacyURL; public class rssParser extends AbstractParser implements Parser { @@ -92,7 +92,7 @@ public class rssParser extends AbstractParser implements Parser { this.parserName = "Rich Site Summary/Atom Feed Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { LinkedList feedSections = new LinkedList(); @@ -114,7 +114,7 @@ public class rssParser extends AbstractParser implements Parser { String feedDescription = reader.getChannel().getDescription(); if (reader.getImage() != null) { - images.add(new htmlFilterImageEntry(new URL(reader.getImage()), feedTitle, -1, -1)); + images.add(new htmlFilterImageEntry(new yacyURL(reader.getImage(), null), feedTitle, -1, -1)); } // loop through the feed items @@ -126,7 +126,7 @@ public class rssParser extends AbstractParser implements Parser { Item item = reader.getItem(i); String itemTitle = item.getTitle(); - URL itemURL = new URL(item.getLink()); + yacyURL itemURL = new yacyURL(item.getLink(), null); String itemDescr = item.getDescription(); String itemCreator = item.getCreator(); if (itemCreator != null && itemCreator.length() > 0) authors.append(",").append(itemCreator); diff --git a/source/de/anomic/plasma/parser/rtf/rtfParser.java b/source/de/anomic/plasma/parser/rtf/rtfParser.java index c3ef24b32..133bbe20b 100644 --- a/source/de/anomic/plasma/parser/rtf/rtfParser.java +++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java @@ -49,15 +49,13 @@ import java.util.Hashtable; import javax.swing.text.DefaultStyledDocument; import javax.swing.text.rtf.RTFEditorKit; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; -public class rtfParser -extends AbstractParser -implements Parser { +public class rtfParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -80,7 +78,7 @@ implements Parser { this.parserName = "Rich Text Format Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java index 64ace4f37..cea2a6066 100644 --- a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java +++ b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java @@ -48,13 +48,13 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; import SevenZip.ArchiveExtractCallback; import SevenZip.Archive.IInArchive; @@ -117,7 +117,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback { plasmaParserDocument theDoc; // workaround for relative links in file, normally '#' shall be used behind the location, see // below for reversion of the effects - URL url = URL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath); + yacyURL url = yacyURL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath); String mime = plasmaParser.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1)); if (this.cfos.isFallback()) { theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentFile()); diff --git a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java index 9032a9603..0c15fa30f 100644 --- a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java +++ b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java @@ -51,13 +51,13 @@ import SevenZip.IInStream; import SevenZip.MyRandomAccessFile; import SevenZip.Archive.SevenZip.Handler; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class sevenzipParser extends AbstractParser implements Parser { @@ -81,7 +81,7 @@ public class sevenzipParser extends AbstractParser implements Parser { super.parserName = "7zip Archive Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, IInStream source, long maxRamSize) throws ParserException, InterruptedException { plasmaParserDocument doc = new plasmaParserDocument(location, mimeType, charset); Handler archive; @@ -111,12 +111,12 @@ public class sevenzipParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, byte[] source) throws ParserException, InterruptedException { return parse(location, mimeType, charset, new ByteArrayIInStream(source), Parser.MAX_KEEP_IN_MEMORY_SIZE - source.length); } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { try { return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"), Parser.MAX_KEEP_IN_MEMORY_SIZE); @@ -125,7 +125,7 @@ public class sevenzipParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE); diff --git a/source/de/anomic/plasma/parser/swf/swfParser.java b/source/de/anomic/plasma/parser/swf/swfParser.java index e5e92e4ce..9e47ff7db 100644 --- a/source/de/anomic/plasma/parser/swf/swfParser.java +++ b/source/de/anomic/plasma/parser/swf/swfParser.java @@ -44,7 +44,6 @@ package de.anomic.plasma.parser.swf; import java.io.InputStream; -import de.anomic.net.URL; import java.util.Hashtable; import java.util.HashMap; @@ -54,6 +53,7 @@ import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class swfParser extends AbstractParser implements Parser { @@ -90,7 +90,7 @@ public class swfParser extends AbstractParser implements Parser { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { SWF2HTML swf2html = new SWF2HTML(); diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java index f547e1af5..35ef62e4e 100644 --- a/source/de/anomic/plasma/parser/tar/tarParser.java +++ b/source/de/anomic/plasma/parser/tar/tarParser.java @@ -59,7 +59,6 @@ import java.util.zip.GZIPInputStream; import com.ice.tar.TarEntry; import com.ice.tar.TarInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; @@ -67,6 +66,7 @@ import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class tarParser extends AbstractParser implements Parser { @@ -97,7 +97,7 @@ public class tarParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { long docTextLength = 0; OutputStream docText = null; @@ -166,7 +166,7 @@ public class tarParser extends AbstractParser implements Parser { checkInterruption(); // parsing the content - subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile); + subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile); } catch (ParserException e) { this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getMessage()); } finally { diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java index 4a6f2f108..53f8ba869 100644 --- a/source/de/anomic/plasma/parser/vcf/vcfParser.java +++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java @@ -57,11 +57,11 @@ import org.apache.commons.codec.net.QuotedPrintableCodec; import de.anomic.http.httpc; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; /** * Vcard specification: http://www.imc.org/pdi/vcard-21.txt @@ -97,7 +97,7 @@ public class vcfParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { StringBuffer parsedTitle = new StringBuffer(); @@ -212,7 +212,7 @@ public class vcfParser extends AbstractParser implements Parser { parsedData.clear(); } else if (key.toUpperCase().startsWith("URL")) { try { - URL newURL = new URL(value); + yacyURL newURL = new yacyURL(value, null); anchors.put(newURL.toString(),newURL.toString()); //parsedData.put(key,value); } catch (MalformedURLException ex) {/* ignore this */} @@ -268,7 +268,7 @@ public class vcfParser extends AbstractParser implements Parser { public static void main(String[] args) { try { - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); vcfParser testParser = new vcfParser(); byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null); diff --git a/source/de/anomic/plasma/parser/xls/xlsParser.java b/source/de/anomic/plasma/parser/xls/xlsParser.java index ea5de3899..17780429f 100644 --- a/source/de/anomic/plasma/parser/xls/xlsParser.java +++ b/source/de/anomic/plasma/parser/xls/xlsParser.java @@ -56,11 +56,11 @@ import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class xlsParser extends AbstractParser implements Parser, HSSFListener { @@ -102,7 +102,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java index 1c6baff16..c4d0d39c7 100644 --- a/source/de/anomic/plasma/parser/zip/zipParser.java +++ b/source/de/anomic/plasma/parser/zip/zipParser.java @@ -57,7 +57,6 @@ import java.util.TreeSet; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; @@ -65,6 +64,7 @@ import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class zipParser extends AbstractParser implements Parser { @@ -95,7 +95,7 @@ public class zipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { long docTextLength = 0; OutputStream docText = null; @@ -149,7 +149,7 @@ public class zipParser extends AbstractParser implements Parser { serverFileUtils.copy(zippedContent,subDocTempFile,entry.getSize()); // parsing the zip file entry - subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile); + subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile); } catch (ParserException e) { this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getMessage()); } finally { diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 177631ee3..fe2ed57e8 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -58,7 +58,6 @@ import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroAbstractRecords; -import de.anomic.server.serverDomains; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -269,24 +268,24 @@ public class plasmaCrawlBalancer { public synchronized void push(plasmaCrawlEntry entry) throws IOException { assert entry != null; - if (urlFileIndex.has(entry.urlhash().getBytes())) { - serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed"); + if (urlFileIndex.has(entry.url().hash().getBytes())) { + serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed"); return; } // extend domain stack - String dom = entry.urlhash().substring(6); + String dom = entry.url().hash().substring(6); LinkedList domainList = (LinkedList) domainStacks.get(dom); if (domainList == null) { // create new list domainList = new LinkedList(); synchronized (domainStacks) { - domainList.add(entry.urlhash()); + domainList.add(entry.url().hash()); domainStacks.put(dom, domainList); } } else { // extend existent domain list - domainList.addLast(entry.urlhash()); + domainList.addLast(entry.url().hash()); } // add to index @@ -430,7 +429,7 @@ public class plasmaCrawlBalancer { return null; } plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry); - long minimumDelta = (serverDomains.isLocal(crawlEntry.url())) ? minimumLocalDelta : minimumGlobalDelta; + long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta; plasmaCrawlRobotsTxt.Entry robotsEntry = plasmaSwitchboard.robots.getEntry(crawlEntry.url().getHost()); Integer hostDelay = (robotsEntry == null) ? null : robotsEntry.getCrawlDelay(); long genericDelta = ((robotsEntry == null) || (hostDelay == null)) ? minimumDelta : Math.max(minimumDelta, hostDelay.intValue() * 1000); diff --git a/source/de/anomic/plasma/plasmaCrawlEntry.java b/source/de/anomic/plasma/plasmaCrawlEntry.java index 0d416d4e2..c3ba841a7 100644 --- a/source/de/anomic/plasma/plasmaCrawlEntry.java +++ b/source/de/anomic/plasma/plasmaCrawlEntry.java @@ -34,9 +34,9 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaCrawlEntry { @@ -63,9 +63,8 @@ public class plasmaCrawlEntry { private String initiator; // the initiator hash, is NULL or "" if it is the own proxy; // if this is generated by a crawl, the own peer hash in entered - private String urlhash; // the url's hash private String referrer; // the url's referrer hash - private URL url; // the url as string + private yacyURL url; // the url as string private String name; // the name of the url, from anchor tag name private long appdate; // the time when the url was first time appeared private long loaddate; // the time when the url was loaded @@ -78,7 +77,7 @@ public class plasmaCrawlEntry { private kelondroBitfield flags; private int handle; - public plasmaCrawlEntry(URL url) { + public plasmaCrawlEntry(yacyURL url) { this(yacyCore.seedDB.mySeed.hash, url, null, null, new Date(), null, 0, 0, 0); } @@ -95,7 +94,7 @@ public class plasmaCrawlEntry { */ public plasmaCrawlEntry( String initiator, - URL url, + yacyURL url, String referrer, String name, Date appdate, @@ -106,10 +105,9 @@ public class plasmaCrawlEntry { ) { // create new entry and store it into database assert appdate != null; - this.urlhash = plasmaURL.urlHash(url); this.initiator = initiator; this.url = url; - this.referrer = (referrer == null) ? plasmaURL.dummyHash : referrer; + this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer; this.name = (name == null) ? "" : name; this.appdate = (appdate == null) ? 0 : appdate.getTime(); this.profileHandle = profileHandle; // must not be null @@ -131,10 +129,9 @@ public class plasmaCrawlEntry { private void insertEntry(kelondroRow.Entry entry) throws IOException { String urlstring = entry.getColString(2, null); if (urlstring == null) throw new IOException ("url string is null"); - this.urlhash = entry.getColString(0, null); this.initiator = entry.getColString(1, null); - this.url = new URL(urlstring); - this.referrer = (entry.empty(3)) ? plasmaURL.dummyHash : entry.getColString(3, null); + this.url = new yacyURL(urlstring, entry.getColString(0, null)); + this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.appdate = entry.getColLong(5); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); @@ -168,7 +165,7 @@ public class plasmaCrawlEntry { namebytes = this.name.getBytes(); } byte[][] entry = new byte[][] { - this.urlhash.getBytes(), + this.url.hash().getBytes(), (initiator == null) ? "".getBytes() : this.initiator.getBytes(), this.url.toString().getBytes(), this.referrer.getBytes(), @@ -186,16 +183,11 @@ public class plasmaCrawlEntry { return rowdef.newEntry(entry); } - public URL url() { + public yacyURL url() { // the url return url; } - public String urlhash() { - // the hash of this url - return this.urlhash; - } - public String referrerhash() { // the urlhash of a referer url return this.referrer; diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 4492b8b59..055f6fe4c 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -69,11 +69,11 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverCodings; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLURL { @@ -118,8 +118,8 @@ public final class plasmaCrawlLURL { public synchronized void stack(indexURLEntry e, String initiatorHash, String executorHash, int stackType) { if (e == null) { return; } try { - if (initiatorHash == null) { initiatorHash = plasmaURL.dummyHash; } - if (executorHash == null) { executorHash = plasmaURL.dummyHash; } + if (initiatorHash == null) { initiatorHash = yacyURL.dummyHash; } + if (executorHash == null) { executorHash = yacyURL.dummyHash; } switch (stackType) { case 0: break; case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break; @@ -161,6 +161,7 @@ public final class plasmaCrawlLURL { // - look into the hash cache // - look into the filed properties // if the url cannot be found, this returns null + if (urlHash == null) return null; try { kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes()); if (entry == null) return null; @@ -394,7 +395,7 @@ public final class plasmaCrawlLURL { if ((pos = oldUrlStr.indexOf("://")) != -1) { // trying to correct the url String newUrlStr = "http://" + oldUrlStr.substring(pos + 3); - URL newUrl = new URL(newUrlStr); + yacyURL newUrl = new yacyURL(newUrlStr, null); // doing a http head request to test if the url is correct theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig); @@ -534,7 +535,7 @@ public final class plasmaCrawlLURL { // returns url-hash if (args[0].equals("-h")) try { // arg 1 is url - System.out.println("HASH: " + plasmaURL.urlHash(new URL(args[1]))); + System.out.println("HASH: " + (new yacyURL(args[1], null)).hash()); } catch (MalformedURLException e) {} if (args[0].equals("-l")) try { // arg 1 is path to URLCache diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index 86545d1b2..328d6ec94 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -50,13 +50,13 @@ import java.util.HashSet; import org.apache.commons.pool.impl.GenericKeyedObjectPool; import org.apache.commons.pool.impl.GenericObjectPool; -import de.anomic.net.URL; import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.plasma.crawler.plasmaCrawlerException; import de.anomic.plasma.crawler.plasmaCrawlerFactory; import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue; import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLoader extends Thread { @@ -215,7 +215,7 @@ public final class plasmaCrawlLoader extends Thread { } public plasmaHTCache.Entry loadSync( - URL url, + yacyURL url, String urlName, String referer, String initiator, @@ -267,7 +267,7 @@ public final class plasmaCrawlLoader extends Thread { } public void loadAsync( - URL url, + yacyURL url, String urlName, String referer, String initiator, diff --git a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java index cd6eb1cd8..382f0ff06 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java +++ b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java @@ -42,13 +42,13 @@ package de.anomic.plasma; -import de.anomic.net.URL; import de.anomic.server.serverSemaphore; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLoaderMessage { public final int crawlingPriority; - public final URL url; + public final yacyURL url; public final String name; public final String referer; public final String initiator; @@ -64,7 +64,7 @@ public final class plasmaCrawlLoaderMessage { // loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) { public plasmaCrawlLoaderMessage( - URL url, + yacyURL url, String name, // the name of the url, from anchor tag name String referer, String initiator, diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 4af393197..5d2fcfc79 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -58,7 +58,6 @@ import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.data.robotsParser; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroException; @@ -68,12 +67,12 @@ import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroTree; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverDomains; import de.anomic.server.serverSemaphore; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlStacker { @@ -190,7 +189,7 @@ public final class plasmaCrawlStacker { } public void enqueue( - URL nexturl, + yacyURL nexturl, String referrerhash, String initiatorHash, String name, @@ -247,7 +246,7 @@ public final class plasmaCrawlStacker { String reason = null; // failure reason // getting the initiator peer hash - if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash; + if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = yacyURL.dummyHash; // strange errors if (nexturlString == null) { @@ -257,21 +256,20 @@ public final class plasmaCrawlStacker { } // getting the referer url and url hash - URL referrerURL = null; + yacyURL referrerURL = null; if (referrerString != null) { try { - referrerURL = new URL(referrerString); + referrerURL = new yacyURL(referrerString, null); } catch (MalformedURLException e) { referrerURL = null; referrerString = null; } } - String referrerHash = (referrerString==null)?null:plasmaURL.urlHash(referrerString); // check for malformed urls - URL nexturl = null; + yacyURL nexturl = null; try { - nexturl = new URL(nexturlString); + nexturl = new yacyURL(nexturlString, null); } catch (MalformedURLException e) { reason = plasmaCrawlEURL.DENIED_MALFORMED_URL; this.log.logSevere("Wrong URL in stackCrawl: " + nexturlString + @@ -367,10 +365,9 @@ public final class plasmaCrawlStacker { // check if the url is double registered checkInterruption(); - String nexturlhash = plasmaURL.urlHash(nexturl); - String dbocc = this.sb.urlExists(nexturlhash); + String dbocc = this.sb.urlExists(nexturl.hash()); indexURLEntry oldEntry = null; - oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null); + oldEntry = this.sb.wordIndex.loadedURL.load(nexturl.hash(), null); boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder()); // apply recrawl rule if ((dbocc != null) && (!(recrawl))) { @@ -396,7 +393,7 @@ public final class plasmaCrawlStacker { } // store information - boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); + boolean local = ((initiatorHash.equals(yacyURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); boolean global = (profile != null) && (profile.remoteIndexing()) /* granted */ && @@ -415,7 +412,7 @@ public final class plasmaCrawlStacker { checkInterruption(); plasmaCrawlEntry ne = new plasmaCrawlEntry(initiatorHash, /* initiator, needed for p2p-feedback */ nexturl, /* url clear text string */ - referrerHash, /* last url in crawling queue */ + (referrerURL == null) ? null : referrerURL.hash(), /* last url in crawling queue */ name, /* load date */ loadDate, /* the anchor name */ (profile == null) ? null : profile.handle(), // profile must not be null! @@ -551,7 +548,7 @@ public final class plasmaCrawlStacker { synchronized(this.urlEntryHashCache) { kelondroRow.Entry oldValue = this.urlEntryCache.put(newMessage.toRow()); if (oldValue == null) { - insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.urlhash()); + insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.url().hash()); } } diff --git a/source/de/anomic/plasma/plasmaCrawlZURL.java b/source/de/anomic/plasma/plasmaCrawlZURL.java index 3c9ae85a8..c64513543 100644 --- a/source/de/anomic/plasma/plasmaCrawlZURL.java +++ b/source/de/anomic/plasma/plasmaCrawlZURL.java @@ -37,9 +37,9 @@ import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; -import de.anomic.net.URL; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaCrawlZURL { @@ -83,12 +83,12 @@ public class plasmaCrawlZURL { public synchronized Entry newEntry( plasmaCrawlEntry bentry, String executor, Date workdate, int workcount, String anycause) { - if ((executor == null) || (executor.length() < yacySeedDB.commonHashLength)) executor = plasmaURL.dummyHash; + if ((executor == null) || (executor.length() < yacySeedDB.commonHashLength)) executor = yacyURL.dummyHash; if (anycause == null) anycause = "unknown"; return new Entry(bentry, executor, workdate, workcount, anycause); } - public synchronized Entry newEntry(URL url, String anycause) { + public synchronized Entry newEntry(yacyURL url, String anycause) { return new Entry(url, anycause); } @@ -139,13 +139,13 @@ public class plasmaCrawlZURL { public class Entry { plasmaCrawlEntry bentry; // the balancer entry - private String executor; // the crawling initiator - private Date workdate; // the time when the url was last time tried to load - private int workcount; // number of tryings - private String anycause; // string describing reason for load fail - private boolean stored; + private String executor; // the crawling initiator + private Date workdate; // the time when the url was last time tried to load + private int workcount; // number of tryings + private String anycause; // string describing reason for load fail + private boolean stored; - public Entry(URL url, String reason) { + public Entry(yacyURL url, String reason) { this(new plasmaCrawlEntry(url), null, new Date(), 0, reason); } @@ -181,7 +181,7 @@ public class plasmaCrawlZURL { this.workcount = (int) entry.getColLong(3); this.anycause = entry.getColString(4, "UTF-8"); this.bentry = new plasmaCrawlEntry(plasmaCrawlEntry.rowdef.newEntry(entry.getColBytes(5))); - assert ((new String(entry.getColBytes(0))).equals(bentry.urlhash())); + assert ((new String(entry.getColBytes(0))).equals(bentry.url().hash())); return; } @@ -190,7 +190,7 @@ public class plasmaCrawlZURL { if (this.stored) return; if (this.bentry == null) return; kelondroRow.Entry newrow = rowdef.newEntry(); - newrow.setCol(0, this.bentry.urlhash().getBytes()); + newrow.setCol(0, this.bentry.url().hash().getBytes()); newrow.setCol(1, this.executor.getBytes()); newrow.setCol(2, this.workdate.getTime()); newrow.setCol(3, this.workcount); @@ -204,7 +204,7 @@ public class plasmaCrawlZURL { } } - public URL url() { + public yacyURL url() { return this.bentry.url(); } @@ -217,7 +217,7 @@ public class plasmaCrawlZURL { // the result is a String of 12 bytes within a 72-bit space // (each byte has an 6-bit range) // that should be enough for all web pages on the world - return this.bentry.urlhash(); + return this.bentry.url().hash(); } public Date workdate() { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index b79680b83..4b62afd8d 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -61,7 +61,6 @@ import java.io.IOException; import java.io.InputStream; import java.lang.StringBuffer; import java.net.InetAddress; -import java.net.MalformedURLException; import java.util.Collections; import java.util.Date; import java.util.HashSet; @@ -73,12 +72,10 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; import de.anomic.plasma.cache.UnsupportedProtocolException; @@ -92,6 +89,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaHTCache { @@ -114,6 +112,122 @@ public final class plasmaHTCache { private static ResourceInfoFactory objFactory = new ResourceInfoFactory(); private static serverThread cacheScanThread; + // doctypes: + public static final char DT_PDFPS = 'p'; + public static final char DT_TEXT = 't'; + public static final char DT_HTML = 'h'; + public static final char DT_DOC = 'd'; + public static final char DT_IMAGE = 'i'; + public static final char DT_MOVIE = 'm'; + public static final char DT_FLASH = 'f'; + public static final char DT_SHARE = 's'; + public static final char DT_AUDIO = 'a'; + public static final char DT_BINARY = 'b'; + public static final char DT_UNKNOWN = 'u'; + + // appearance locations: (used for flags) + public static final int AP_TITLE = 0; // title tag from html header + public static final int AP_H1 = 1; // headline - top level + public static final int AP_H2 = 2; // headline, second level + public static final int AP_H3 = 3; // headline, 3rd level + public static final int AP_H4 = 4; // headline, 4th level + public static final int AP_H5 = 5; // headline, 5th level + public static final int AP_H6 = 6; // headline, 6th level + public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam) + public static final int AP_DOM = 8; // word inside an url: in Domain + public static final int AP_PATH = 9; // word inside an url: in path + public static final int AP_IMG = 10; // tag inside image references + public static final int AP_ANCHOR = 11; // anchor description + public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) + public static final int AP_BOLD = 13; // may be interpreted as emphasized + public static final int AP_ITALICS = 14; // may be interpreted as emphasized + public static final int AP_WEAK = 15; // for Text that is small or bareley visible + public static final int AP_INVISIBLE = 16; // good for spam detection + public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) + public static final int AP_AUTHOR = 18; // word appears in author name + public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) + public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) + + // URL attributes + public static final int UA_LOCAL = 0; // URL was crawled locally + public static final int UA_TILDE = 1; // tilde appears in URL + public static final int UA_REDIRECT = 2; // The URL is a redirection + + // local flag attributes + public static final char LT_LOCAL = 'L'; + public static final char LT_GLOBAL = 'G'; + + // doctype calculation + public static char docType(yacyURL url) { + String path = url.getPath().toLowerCase(); + // serverLog.logFinest("PLASMA", "docType URL=" + path); + char doctype = DT_UNKNOWN; + if (path.endsWith(".gif")) { doctype = DT_IMAGE; } + else if (path.endsWith(".ico")) { doctype = DT_IMAGE; } + else if (path.endsWith(".bmp")) { doctype = DT_IMAGE; } + else if (path.endsWith(".jpg")) { doctype = DT_IMAGE; } + else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; } + else if (path.endsWith(".png")) { doctype = DT_IMAGE; } + else if (path.endsWith(".html")) { doctype = DT_HTML; } + else if (path.endsWith(".txt")) { doctype = DT_TEXT; } + else if (path.endsWith(".doc")) { doctype = DT_DOC; } + else if (path.endsWith(".rtf")) { doctype = DT_DOC; } + else if (path.endsWith(".pdf")) { doctype = DT_PDFPS; } + else if (path.endsWith(".ps")) { doctype = DT_PDFPS; } + else if (path.endsWith(".avi")) { doctype = DT_MOVIE; } + else if (path.endsWith(".mov")) { doctype = DT_MOVIE; } + else if (path.endsWith(".qt")) { doctype = DT_MOVIE; } + else if (path.endsWith(".mpg")) { doctype = DT_MOVIE; } + else if (path.endsWith(".md5")) { doctype = DT_SHARE; } + else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; } + else if (path.endsWith(".asf")) { doctype = DT_FLASH; } + return doctype; + } + + public static char docType(String mime) { + // serverLog.logFinest("PLASMA", "docType mime=" + mime); + char doctype = DT_UNKNOWN; + if (mime == null) doctype = DT_UNKNOWN; + else if (mime.startsWith("image/")) doctype = DT_IMAGE; + else if (mime.endsWith("/gif")) doctype = DT_IMAGE; + else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE; + else if (mime.endsWith("/png")) doctype = DT_IMAGE; + else if (mime.endsWith("/html")) doctype = DT_HTML; + else if (mime.endsWith("/rtf")) doctype = DT_DOC; + else if (mime.endsWith("/pdf")) doctype = DT_PDFPS; + else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY; + else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH; + else if (mime.endsWith("/msword")) doctype = DT_DOC; + else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC; + else if (mime.endsWith("/postscript")) doctype = DT_PDFPS; + else if (mime.startsWith("text/")) doctype = DT_TEXT; + else if (mime.startsWith("image/")) doctype = DT_IMAGE; + else if (mime.startsWith("audio/")) doctype = DT_AUDIO; + else if (mime.startsWith("video/")) doctype = DT_MOVIE; + //bz2 = application/x-bzip2 + //dvi = application/x-dvi + //gz = application/gzip + //hqx = application/mac-binhex40 + //lha = application/x-lzh + //lzh = application/x-lzh + //pac = application/x-ns-proxy-autoconfig + //php = application/x-httpd-php + //phtml = application/x-httpd-php + //rss = application/xml + //tar = application/tar + //tex = application/x-tex + //tgz = application/tar + //torrent = application/x-bittorrent + //xhtml = application/xhtml+xml + //xla = application/msexcel + //xls = application/msexcel + //xsl = application/xml + //xml = application/xml + //Z = application/x-compress + //zip = application/zip + return doctype; + } + public static void init(File htCachePath, long CacheSizeMax, long preloadTime, String layout, boolean migration) { cachePath = htCachePath; @@ -252,7 +366,7 @@ public final class plasmaHTCache { return (curCacheSize >= maxCacheSize) ? 0 : maxCacheSize - curCacheSize; } - public static boolean writeResourceContent(URL url, byte[] array) { + public static boolean writeResourceContent(yacyURL url, byte[] array) { if (array == null) return false; File file = getCachePath(url); try { @@ -288,16 +402,16 @@ public final class plasmaHTCache { } } - public static boolean deleteFile(URL url) { + public static boolean deleteFile(yacyURL url) { return deleteURLfromCache("", url, "FROM"); } - private static boolean deleteURLfromCache(String key, URL url, String msg) { + private static boolean deleteURLfromCache(String key, yacyURL url, String msg) { if (deleteFileandDirs(key, getCachePath(url), msg)) { try { // As the file is gone, the entry in responseHeader.db is not needed anymore log.logFinest("Trying to remove responseHeader from URL: " + url.toNormalform(false, true)); - responseHeaderDB.remove(plasmaURL.urlHash(url)); + responseHeaderDB.remove(url.hash()); } catch (IOException e) { resetResponseHeaderDB(); log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); @@ -356,10 +470,10 @@ public final class plasmaHTCache { log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); responseHeaderDB.remove(urlHash); } else { - URL url = getURL(file); + yacyURL url = getURL(file); if (url != null) { log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); - responseHeaderDB.remove(plasmaURL.urlHash(url)); + responseHeaderDB.remove(url.hash()); } } } catch (IOException e) { @@ -497,13 +611,10 @@ public final class plasmaHTCache { * @throws UnsupportedProtocolException if the protocol is not supported and therefore the * info object couldn't be created */ - public static IResourceInfo loadResourceInfo(URL url) throws UnsupportedProtocolException, IllegalAccessException { - - // getting the URL hash - String urlHash = plasmaURL.urlHash(url.toNormalform(true, true)); + public static IResourceInfo loadResourceInfo(yacyURL url) throws UnsupportedProtocolException, IllegalAccessException { // loading data from database - Map hdb = responseHeaderDB.getMap(urlHash); + Map hdb = responseHeaderDB.getMap(url.hash()); if (hdb == null) return null; // generate the cached object @@ -601,7 +712,7 @@ public final class plasmaHTCache { * that path will be generated * @return new File */ - public static File getCachePath(final URL url) { + public static File getCachePath(final yacyURL url) { // this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString()); // peer.yacy || www.peer.yacy = http/yacy/peer @@ -662,18 +773,18 @@ public final class plasmaHTCache { if (cacheLayout.equals("tree")) { File FileTree = treeFile(fileName, "tree", path); if (cacheMigration) { - moveCachedObject(hashFile(fileName, "hash", extention, url), FileTree); - moveCachedObject(hashFile(fileName, null, extention, url), FileTree); // temporary migration + moveCachedObject(hashFile(fileName, "hash", extention, url.hash()), FileTree); + moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileTree); // temporary migration moveCachedObject(treeFile(fileName, null, path), FileTree); // temporary migration } return FileTree; } if (cacheLayout.equals("hash")) { - File FileFlat = hashFile(fileName, "hash", extention, url); + File FileFlat = hashFile(fileName, "hash", extention, url.hash()); if (cacheMigration) { moveCachedObject(treeFile(fileName, "tree", path), FileFlat); moveCachedObject(treeFile(fileName, null, path), FileFlat); // temporary migration - moveCachedObject(hashFile(fileName, null, extention, url), FileFlat); // temporary migration + moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileFlat); // temporary migration } return FileFlat; } @@ -688,8 +799,8 @@ public final class plasmaHTCache { return new File(cachePath, f.toString()); } - private static File hashFile(StringBuffer fileName, String prefix, String extention, URL url) { - String hexHash = yacySeed.b64Hash2hexHash(plasmaURL.urlHash(url)); + private static File hashFile(StringBuffer fileName, String prefix, String extention, String urlhash) { + String hexHash = yacySeed.b64Hash2hexHash(urlhash); StringBuffer f = new StringBuffer(fileName.length() + 30); f.append(fileName); if (prefix != null) f.append('/').append(prefix); @@ -720,11 +831,11 @@ public final class plasmaHTCache { * this is the reverse function to getCachePath: it constructs the url as string * from a given storage path */ - public static URL getURL(final File f) { + public static yacyURL getURL(final File f) { // this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]"); final String urlHash = getHash(f); if (urlHash != null) { - URL url = null; + yacyURL url = null; // try the urlPool try { url = plasmaSwitchboard.getSwitchboard().getURL(urlHash); @@ -744,7 +855,7 @@ public final class plasmaHTCache { String s = ((String)origRequestLine).substring(i).trim(); i = s.indexOf(" "); try { - url = new URL((i<0) ? s : s.substring(0,i)); + url = new yacyURL((i<0) ? s : s.substring(0,i), urlHash); } catch (final Exception e) { url = null; } @@ -831,7 +942,7 @@ public final class plasmaHTCache { // this.log.logFinest("plasmaHTCache: getURL: OUT=" + s); try { - return new URL(protocol + host + path); + return new yacyURL(protocol + host + path, null); } catch (final Exception e) { return null; } @@ -846,7 +957,7 @@ public final class plasmaHTCache { * is available or the cached file is not readable, null * is returned. */ - public static InputStream getResourceContentStream(URL url) { + public static InputStream getResourceContentStream(yacyURL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) try { @@ -858,7 +969,7 @@ public final class plasmaHTCache { return null; } - public static long getResourceContentLength(URL url) { + public static long getResourceContentLength(yacyURL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) { @@ -886,7 +997,7 @@ public final class plasmaHTCache { public static Entry newEntry( Date initDate, int depth, - URL url, + yacyURL url, String name, //httpHeader requestHeader, String responseStatus, @@ -898,7 +1009,7 @@ public final class plasmaHTCache { return new Entry( initDate, depth, - url, + url, name, //requestHeader, responseStatus, @@ -919,10 +1030,8 @@ public final class plasmaHTCache { private String responseStatus; private File cacheFile; // the cache file private byte[] cacheArray; // or the cache as byte-array - private URL url; + private yacyURL url; private String name; // the name of the link, read as anchor from an -tag - private String nomalizedURLHash; - private String nomalizedURLString; //private int status; // cache load/hit/stale etc status private Date lastModified; private char doctype; @@ -933,7 +1042,7 @@ public final class plasmaHTCache { /** * protocolspecific information about the resource */ - private IResourceInfo resInfo; + private IResourceInfo resInfo; protected Object clone() throws CloneNotSupportedException { return new Entry( @@ -952,7 +1061,7 @@ public final class plasmaHTCache { public Entry(Date initDate, int depth, - URL url, + yacyURL url, String name, //httpHeader requestHeader, String responseStatus, @@ -966,22 +1075,11 @@ public final class plasmaHTCache { System.exit(0); } this.resInfo = resourceInfo; - - - // normalize url - this.nomalizedURLString = url.toNormalform(true, true); - - try { - this.url = new URL(this.nomalizedURLString); - } catch (MalformedURLException e) { - System.out.println("internal error at httpdProxyCache.Entry: " + e); - System.exit(-1); - } + this.url = url; this.name = name; this.cacheFile = getCachePath(this.url); - this.nomalizedURLHash = plasmaURL.urlHash(this.nomalizedURLString); - - // assigned: + + // assigned: this.initDate = initDate; this.depth = depth; //this.requestHeader = requestHeader; @@ -994,9 +1092,9 @@ public final class plasmaHTCache { this.lastModified = resourceInfo.getModificationDate(); // getting the doctype - this.doctype = plasmaURL.docType(resourceInfo.getMimeType()); - if (this.doctype == plasmaURL.DT_UNKNOWN) this.doctype = plasmaURL.docType(url); - this.language = plasmaURL.language(url); + this.doctype = docType(resourceInfo.getMimeType()); + if (this.doctype == DT_UNKNOWN) this.doctype = docType(url); + this.language = yacyURL.language(url); // to be defined later: this.cacheArray = null; @@ -1006,12 +1104,12 @@ public final class plasmaHTCache { return this.name; } - public URL url() { + public yacyURL url() { return this.url; } public String urlHash() { - return this.nomalizedURLHash; + return this.url.hash(); } public Date lastModified() { @@ -1041,8 +1139,8 @@ public final class plasmaHTCache { return this.depth; } - public URL referrerURL() { - return (this.resInfo==null)?null:this.resInfo.getRefererUrl(); + public yacyURL referrerURL() { + return (this.resInfo == null) ? null : this.resInfo.getRefererUrl(); } public File cacheFile() { @@ -1070,10 +1168,9 @@ public final class plasmaHTCache { } public boolean writeResourceInfo() { - assert(this.nomalizedURLHash != null) : "URL Hash is null"; if (this.resInfo == null) return false; try { - responseHeaderDB.set(this.nomalizedURLHash, this.resInfo.getMap()); + responseHeaderDB.set(this.url.hash(), this.resInfo.getMap()); } catch (Exception e) { resetResponseHeaderDB(); return false; @@ -1134,8 +1231,8 @@ public final class plasmaHTCache { // -CGI access in request // CGI access makes the page very individual, and therefore not usable in caches - if (isPOST(this.nomalizedURLString) && !this.profile.crawlingQ()) { return "dynamic_post"; } - if (isCGI(this.nomalizedURLString)) { return "dynamic_cgi"; } + if (isPOST(this.url.toNormalform(true, true)) && !this.profile.crawlingQ()) { return "dynamic_post"; } + if (isCGI(this.url.toNormalform(true, true))) { return "dynamic_cgi"; } if (this.resInfo != null) { return this.resInfo.shallStoreCacheForProxy(); @@ -1153,8 +1250,8 @@ public final class plasmaHTCache { // -CGI access in request // CGI access makes the page very individual, and therefore not usable in caches - if (isPOST(this.nomalizedURLString)) { return false; } - if (isCGI(this.nomalizedURLString)) { return false; } + if (isPOST(this.url.toNormalform(true, true))) { return false; } + if (isCGI(this.url.toNormalform(true, true))) { return false; } if (this.resInfo != null) { return this.resInfo.shallUseCacheForProxy(); diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 940d203c6..2d7e165e1 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -75,12 +75,12 @@ import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterInputStream; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.parser.ParserInfo; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class plasmaParser { public static final String PARSER_MODE_PROXY = "PROXY"; @@ -322,11 +322,11 @@ public final class plasmaParser { } } - public static boolean supportedRealTimeContent(URL url, String mimeType) { + public static boolean supportedRealTimeContent(yacyURL url, String mimeType) { return realtimeParsableMimeTypesContains(mimeType) && supportedRealtimeFileExtContains(url); } - public static boolean supportedRealtimeFileExtContains(URL url) { + public static boolean supportedRealtimeFileExtContains(yacyURL url) { String fileExt = getFileExt(url); synchronized (supportedRealtimeFileExt) { return supportedRealtimeFileExt.contains(fileExt); @@ -334,7 +334,7 @@ public final class plasmaParser { } - public static String getFileExt(URL url) { + public static String getFileExt(yacyURL url) { // getting the file path String name = url.getPath(); @@ -566,7 +566,7 @@ public final class plasmaParser { } catch (Exception e) {/* ignore this */} } - public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] sourceArray) + public plasmaParserDocument parseSource(yacyURL location, String mimeType, String charset, byte[] sourceArray) throws InterruptedException, ParserException { ByteArrayInputStream byteIn = null; try { @@ -600,7 +600,7 @@ public final class plasmaParser { } - public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, File sourceFile) throws InterruptedException, ParserException { + public plasmaParserDocument parseSource(yacyURL location, String theMimeType, String theDocumentCharset, File sourceFile) throws InterruptedException, ParserException { BufferedInputStream sourceStream = null; try { @@ -644,7 +644,7 @@ public final class plasmaParser { * @throws InterruptedException * @throws ParserException */ - public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, long contentLength, InputStream sourceStream) throws InterruptedException, ParserException { + public plasmaParserDocument parseSource(yacyURL location, String theMimeType, String theDocumentCharset, long contentLength, InputStream sourceStream) throws InterruptedException, ParserException { Parser theParser = null; String mimeType = null; try { @@ -719,7 +719,7 @@ public final class plasmaParser { } } - private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, InputStream sourceStream) throws IOException, ParserException { + private plasmaParserDocument parseHtml(yacyURL location, String mimeType, String documentCharset, InputStream sourceStream) throws IOException, ParserException { // make a scraper and transformer htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false); @@ -750,13 +750,13 @@ public final class plasmaParser { return transformScraper(location, mimeType, documentCharset, scraper); } - public plasmaParserDocument transformScraper(URL location, String mimeType, String charSet, htmlFilterContentScraper scraper) { + public plasmaParserDocument transformScraper(yacyURL location, String mimeType, String charSet, htmlFilterContentScraper scraper) { try { String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length]; int p = 0; for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j]; plasmaParserDocument ppd = new plasmaParserDocument( - new URL(location.toNormalform(true, true)), + new yacyURL(location.toNormalform(true, true), null), mimeType, charSet, scraper.getKeywords(), @@ -897,7 +897,7 @@ public final class plasmaParser { httpc remote = null; try { Object content = null; - URL contentURL = null; + yacyURL contentURL = null; long contentLength = -1; String contentMimeType = "application/octet-stream"; String charSet = "UTF-8"; @@ -909,9 +909,9 @@ public final class plasmaParser { String mode = args[0]; if (mode.equalsIgnoreCase("-f")) { content = new File(args[1]); - contentURL = new URL((File)content); + contentURL = new yacyURL((File)content); } else if (mode.equalsIgnoreCase("-u")) { - contentURL = new URL(args[1]); + contentURL = new yacyURL(args[1], null); // downloading the document content remote = httpc.getInstance( @@ -1003,7 +1003,7 @@ public final class plasmaParser { config.enableAllParsers(); } - public static boolean supportedContent(URL url, String mimeType) { + public static boolean supportedContent(yacyURL url, String mimeType) { if (url == null) throw new NullPointerException(); Iterator configs = parserConfigList.values().iterator(); @@ -1017,7 +1017,7 @@ public final class plasmaParser { return false; } - public static boolean supportedContent(String parserMode, URL url, String mimeType) { + public static boolean supportedContent(String parserMode, yacyURL url, String mimeType) { if (!PARSER_MODE.contains(parserMode)) throw new IllegalArgumentException(); if (url == null) throw new NullPointerException(); diff --git a/source/de/anomic/plasma/plasmaParserConfig.java b/source/de/anomic/plasma/plasmaParserConfig.java index 883d8315a..4daf3c483 100644 --- a/source/de/anomic/plasma/plasmaParserConfig.java +++ b/source/de/anomic/plasma/plasmaParserConfig.java @@ -54,10 +54,10 @@ import java.util.Hashtable; import java.util.Iterator; import java.util.Set; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserInfo; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class plasmaParserConfig { /** @@ -85,7 +85,7 @@ public class plasmaParserConfig { this.parserMode = theParserMode; } - public boolean supportedContent(URL url, String mimeType) { + public boolean supportedContent(yacyURL url, String mimeType) { // TODO: we need some exceptions here to index URLs like this // http://www.musicabona.com/respighi/12668/cd/index.html.fr mimeType = plasmaParser.getRealMimeType(mimeType); @@ -112,7 +112,7 @@ public class plasmaParserConfig { } - public boolean supportedFileExt(URL url) { + public boolean supportedFileExt(yacyURL url) { if (url == null) throw new NullPointerException(); // getting the file path diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java index 1ce0c1076..e709cb768 100644 --- a/source/de/anomic/plasma/plasmaParserDocument.java +++ b/source/de/anomic/plasma/plasmaParserDocument.java @@ -52,6 +52,7 @@ import java.net.MalformedURLException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; import java.util.Arrays; import java.util.HashMap; @@ -62,12 +63,11 @@ import java.util.Map; import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; public class plasmaParserDocument { - private URL location; // the source url + private yacyURL location; // the source url private String mimeType; // mimeType as taken from http header private String charset; // the charset of the document private List keywords; // most resources provide a keyword field @@ -83,11 +83,11 @@ public class plasmaParserDocument { // text in image tags. private Map hyperlinks, audiolinks, videolinks, applinks; private Map emaillinks; - private URL favicon; + private yacyURL favicon; private boolean resorted; private InputStream textStream; - protected plasmaParserDocument(URL location, String mimeType, String charset, + protected plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, Object text, Map anchors, TreeSet images) { @@ -118,32 +118,32 @@ public class plasmaParserDocument { } } - public plasmaParserDocument(URL location, String mimeType, String charset) { + public plasmaParserDocument(yacyURL location, String mimeType, String charset) { this(location, mimeType, charset, null, null, null, null, null, (Object)null, null, null); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, byte[] text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, File text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, serverCachedFileOutputStream text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public URL getLocation() { + public yacyURL getLocation() { return this.location; } @@ -304,7 +304,7 @@ public class plasmaParserDocument { // extract hyperlinks, medialinks and emaillinks from anchorlinks Iterator i; - URL url; + yacyURL url; String u; int extpos, qpos; String ext = null; @@ -330,7 +330,7 @@ public class plasmaParserDocument { ext = u.substring(extpos + 1).toLowerCase(); } try { - url = new URL(u); + url = new yacyURL(u, null); u = url.toNormalform(true, true); if (plasmaParser.mediaExtContains(ext)) { // this is not a normal anchor, its a media link @@ -399,14 +399,14 @@ public class plasmaParserDocument { /** * @return the {@link URL} to the favicon that belongs to the document */ - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } /** * @param faviconURL the {@link URL} to the favicon that belongs to the document */ - public void setFavicon(URL faviconURL) { + public void setFavicon(yacyURL faviconURL) { this.favicon = faviconURL; } diff --git a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java index 050feb12e..2ee68a7e1 100644 --- a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java +++ b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java @@ -53,12 +53,11 @@ import java.util.HashSet; import java.util.Iterator; import java.util.TreeSet; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroAttrSeq; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class plasmaRankingRCIEvaluation { @@ -177,8 +176,8 @@ public class plasmaRankingRCIEvaluation { dom = (String) i.next(); if (dom.startsWith("www.")) dom = dom.substring(4); try { - dommap.put(plasmaURL.urlHash(new URL("http://" + dom)).substring(6), dom); - dommap.put(plasmaURL.urlHash(new URL("http://www." + dom)).substring(6), "www." + dom); + dommap.put((new yacyURL("http://" + dom, null)).hash().substring(6), dom); + dommap.put((new yacyURL("http://www." + dom, null)).hash().substring(6), "www." + dom); } catch (MalformedURLException e) {} } return dommap; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index b43b9c005..4064f43c6 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -41,12 +41,12 @@ import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class plasmaSearchEvent { @@ -213,7 +213,7 @@ public final class plasmaSearchEvent { IAneardhthash = wordhash; } IACount.put(wordhash, new Integer(container.size())); - IAResults.put(wordhash, plasmaURL.compressIndex(container, null, 1000).toString()); + IAResults.put(wordhash, plasmaSearchProcessing.compressIndex(container, null, 1000).toString()); } process.yield("abstract generation", searchContainerMaps[0].size()); } @@ -234,7 +234,7 @@ public final class plasmaSearchEvent { this.rankedCache = new plasmaSearchContainer(query, ranking, plasmaSearchQuery.cleanQuery(query.queryString)[0], rcLocal); } - + if (query.onlineSnippetFetch) { // start worker threads to fetch urls and snippets this.workerThreads = new resultWorker[workerThreadCount]; @@ -245,6 +245,7 @@ public final class plasmaSearchEvent { } else { // prepare result vector directly without worker threads int rankedIndex = 0; + process.startTimer(); while ((rankedIndex < rankedCache.container().size()) && (resultList.size() < (query.neededResults()))) { // fetch next entry to work on indexContainer c = rankedCache.container(); @@ -263,6 +264,7 @@ public final class plasmaSearchEvent { rankedCache.addReferences(resultEntry); } } + process.yield("offline snippet fetch", resultList.size()); } // remove old events in the event cache @@ -285,6 +287,7 @@ public final class plasmaSearchEvent { // load only urls if there was not yet a root url of that hash // find the url entry + indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry); if (page == null) { @@ -773,7 +776,7 @@ public final class plasmaSearchEvent { public String hash() { return urlentry.hash(); } - public URL url() { + public yacyURL url() { return urlcomps.url(); } public kelondroBitfield flags() { diff --git a/source/de/anomic/plasma/plasmaSearchImages.java b/source/de/anomic/plasma/plasmaSearchImages.java index 398c99625..bc4fedaca 100644 --- a/source/de/anomic/plasma/plasmaSearchImages.java +++ b/source/de/anomic/plasma/plasmaSearchImages.java @@ -48,15 +48,15 @@ import java.util.Map; import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; -import de.anomic.net.URL; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverDate; +import de.anomic.yacy.yacyURL; public final class plasmaSearchImages { private TreeSet images; - public plasmaSearchImages(long maxTime, URL url, int depth) { + public plasmaSearchImages(long maxTime, yacyURL url, int depth) { long start = System.currentTimeMillis(); this.images = new TreeSet(); if (maxTime > 10) { @@ -86,8 +86,8 @@ public final class plasmaSearchImages { Map.Entry e = (Map.Entry) i.next(); String nexturlstring; try { - nexturlstring = new URL((String) e.getKey()).toNormalform(true, true); - addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new URL(nexturlstring), depth - 1)); + nexturlstring = new yacyURL((String) e.getKey(), null).toNormalform(true, true); + addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new yacyURL(nexturlstring, null), depth - 1)); } catch (MalformedURLException e1) { e1.printStackTrace(); } diff --git a/source/de/anomic/plasma/plasmaSearchPreOrder.java b/source/de/anomic/plasma/plasmaSearchPreOrder.java index 5abdb1f61..1ccb91eeb 100644 --- a/source/de/anomic/plasma/plasmaSearchPreOrder.java +++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java @@ -52,10 +52,10 @@ import java.util.TreeSet; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroBinSearch; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public final class plasmaSearchPreOrder { @@ -143,7 +143,7 @@ public final class plasmaSearchPreOrder { entry = (Map.Entry) i.next(); iEntry = (indexRWIEntry) entry.getValue(); hashpart = iEntry.urlHash().substring(6); - isWordRootURL = plasmaURL.isWordRootURL(iEntry.urlHash(), querywords); + isWordRootURL = yacyURL.isWordRootURL(iEntry.urlHash(), querywords); if (isWordRootURL) { rootDoms.add(hashpart); } else { diff --git a/source/de/anomic/plasma/plasmaSearchProcessing.java b/source/de/anomic/plasma/plasmaSearchProcessing.java index 5a4496970..38e753744 100644 --- a/source/de/anomic/plasma/plasmaSearchProcessing.java +++ b/source/de/anomic/plasma/plasmaSearchProcessing.java @@ -32,8 +32,11 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import de.anomic.index.indexContainer; +import de.anomic.index.indexRWIEntry; +import de.anomic.server.serverByteBuffer; /** * @@ -179,4 +182,74 @@ public class plasmaSearchProcessing implements Cloneable { return rcLocal; } + + + public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) { + // collect references according to domains + long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; + TreeMap doms = new TreeMap(); + synchronized (inputContainer) { + Iterator i = inputContainer.entries(); + indexRWIEntry iEntry; + String dom, paths; + while (i.hasNext()) { + iEntry = (indexRWIEntry) i.next(); + if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer + dom = iEntry.urlHash().substring(6); + if ((paths = (String) doms.get(dom)) == null) { + doms.put(dom, iEntry.urlHash().substring(0, 6)); + } else { + doms.put(dom, paths + iEntry.urlHash().substring(0, 6)); + } + if (System.currentTimeMillis() > timeout) + break; + } + } + // construct a result string + serverByteBuffer bb = new serverByteBuffer(inputContainer.size() * 6); + bb.append('{'); + Iterator i = doms.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + bb.append((String) entry.getKey()); + bb.append(':'); + bb.append((String) entry.getValue()); + if (System.currentTimeMillis() > timeout) + break; + if (i.hasNext()) + bb.append(','); + } + bb.append('}'); + return bb; + } + + public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) { + // target is a mapping from url-hashes to a string of peer-hashes + if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) { + //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); + ci = ci.trim(1, ci.length() - 2); + String dom, url, peers; + while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + dom = ci.toString(0, 6); + ci.trim(7); + while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + url = ci.toString(0, 6) + dom; + ci.trim(6); + peers = (String) target.get(url); + if (peers == null) { + target.put(url, peerhash); + } else { + target.put(url, peers + peerhash); + } + //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); + } + if (ci.byteAt(0) == ',') ci.trim(1); + } + } + } + + } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index 57684572c..98fda8f1f 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -48,7 +48,7 @@ import java.util.Set; import java.util.TreeSet; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; +import de.anomic.yacy.yacyURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; @@ -252,7 +252,7 @@ public class plasmaSearchRankingProfile { public long preRanking(indexRWIEntry t, indexRWIEntry min, indexRWIEntry max, TreeSet searchedWords) { // the normalizedEntry must be a normalized indexEntry long ranking = 0; - ranking += (256 - plasmaURL.domLengthNormalized(t.urlHash())) << coeff_domlength; + ranking += (256 - yacyURL.domLengthNormalized(t.urlHash())) << coeff_domlength; ranking += plasmaSearchPreOrder.ybr_p(t.urlHash()) << coeff_ybr; ranking += (255 - (255 * (t.virtualAge() - min.virtualAge() ) / (1 + max.virtualAge() - min.virtualAge())) ) << coeff_date; ranking += (255 * (t.wordsintitle() - min.wordsintitle() ) / (1 + max.wordsintitle() - min.wordsintitle())) << coeff_wordsintitle; @@ -281,8 +281,8 @@ public class plasmaSearchRankingProfile { ranking += (flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 256 << coeff_cathasvideo : 0; ranking += (flags.get(plasmaCondenser.flag_cat_hasapp)) ? 256 << coeff_cathasapp : 0; - ranking += (plasmaURL.probablyRootURL(t.urlHash())) ? 16 << coeff_urllength : 0; - if (searchedWords != null) ranking += (plasmaURL.probablyWordURL(t.urlHash(), searchedWords) != null) ? 256 << coeff_appurl : 0; + ranking += (yacyURL.probablyRootURL(t.urlHash())) ? 16 << coeff_urllength : 0; + if (searchedWords != null) ranking += (yacyURL.probablyWordURL(t.urlHash(), searchedWords) != null) ? 256 << coeff_appurl : 0; return ranking; } diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index bda27351a..8664f8025 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -60,16 +60,15 @@ import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMSetTools; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.crawler.plasmaCrawlerException; import de.anomic.plasma.parser.ParserException; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public class plasmaSnippetCache { @@ -113,18 +112,18 @@ public class plasmaSnippetCache { } public static class TextSnippet { - private URL url; + private yacyURL url; private String line; private String error; private int errorCode; private Set remaingHashes; - private URL favicon; + private yacyURL favicon; - public TextSnippet(URL url, String line, int errorCode, Set remaingHashes, String errortext) { + public TextSnippet(yacyURL url, String line, int errorCode, Set remaingHashes, String errortext) { this(url,line,errorCode,remaingHashes,errortext,null); } - public TextSnippet(URL url, String line, int errorCode, Set remaingHashes, String errortext, URL favicon) { + public TextSnippet(yacyURL url, String line, int errorCode, Set remaingHashes, String errortext, yacyURL favicon) { this.url = url; this.line = line; this.errorCode = errorCode; @@ -132,7 +131,7 @@ public class plasmaSnippetCache { this.remaingHashes = remaingHashes; this.favicon = favicon; } - public URL getUrl() { + public yacyURL getUrl() { return this.url; } public boolean exists() { @@ -224,7 +223,7 @@ public class plasmaSnippetCache { return l.toString().trim(); } - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } } @@ -242,26 +241,26 @@ public class plasmaSnippetCache { } } - public static boolean existsInCache(URL url, Set queryhashes) { + public static boolean existsInCache(yacyURL url, Set queryhashes) { String hashes = yacySearch.set2string(queryhashes); - return retrieveFromCache(hashes, plasmaURL.urlHash(url)) != null; + return retrieveFromCache(hashes, url.hash()) != null; } - public static TextSnippet retrieveTextSnippet(URL url, Set queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout) { + public static TextSnippet retrieveTextSnippet(yacyURL url, Set queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout) { // heise = "0OQUNU3JSs05" + if (queryhashes.size() == 0) { //System.out.println("found no queryhashes for URL retrieve " + url); return new TextSnippet(url, null, ERROR_NO_HASH_GIVEN, queryhashes, "no query hashes given"); } - String urlhash = plasmaURL.urlHash(url); // try to get snippet from snippetCache int source = SOURCE_CACHE; String wordhashes = yacySearch.set2string(queryhashes); - String line = retrieveFromCache(wordhashes, urlhash); + String line = retrieveFromCache(wordhashes, url.hash()); if (line != null) { //System.out.println("found snippet for URL " + url + " in cache: " + line); - return new TextSnippet(url, line, source, null, null,(URL) faviconCache.get(urlhash)); + return new TextSnippet(url, line, source, null, null,(yacyURL) faviconCache.get(url.hash())); } /* =========================================================================== @@ -277,7 +276,7 @@ public class plasmaSnippetCache { if (resContent != null) { // if the content was found resContentLength = plasmaHTCache.getResourceContentLength(url); - } else if (fetchOnline) { + } else if (fetchOnline) { // if not found try to download it // download resource using the crawler and keep resource in memory if possible @@ -286,7 +285,7 @@ public class plasmaSnippetCache { // getting resource metadata (e.g. the http headers for http resources) if (entry != null) { resInfo = entry.getDocumentInfo(); - + // read resource body (if it is there) byte []resourceArray = entry.cacheArray(); if (resourceArray != null) { @@ -309,7 +308,7 @@ public class plasmaSnippetCache { if (!(e instanceof plasmaCrawlerException)) e.printStackTrace(); return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "error loading resource: " + e.getMessage()); } - + /* =========================================================================== * PARSING RESOURCE * =========================================================================== */ @@ -327,8 +326,8 @@ public class plasmaSnippetCache { /* =========================================================================== * COMPUTE SNIPPET * =========================================================================== */ - URL resFavicon = document.getFavicon(); - if (resFavicon != null) faviconCache.put(urlhash,resFavicon); + yacyURL resFavicon = document.getFavicon(); + if (resFavicon != null) faviconCache.put(url.hash(), resFavicon); // we have found a parseable non-empty file: use the lines // compute snippet from text @@ -356,7 +355,8 @@ public class plasmaSnippetCache { if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength); // finally store this snippet in our own cache - storeToCache(wordhashes, urlhash, line); + storeToCache(wordhashes, url.hash(), line); + document.close(); return new TextSnippet(url, line, source, null, null, resFavicon); } @@ -370,7 +370,7 @@ public class plasmaSnippetCache { * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache * @return the parsed document as {@link plasmaParserDocument} */ - public static plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline, int timeout, boolean forText) { + public static plasmaParserDocument retrieveDocument(yacyURL url, boolean fetchOnline, int timeout, boolean forText) { // load resource long resContentLength = 0; @@ -615,7 +615,7 @@ public class plasmaSnippetCache { } } - public static ArrayList retrieveMediaSnippets(URL url, Set queryhashes, int mediatype, boolean fetchOnline, int timeout) { + public static ArrayList retrieveMediaSnippets(yacyURL url, Set queryhashes, int mediatype, boolean fetchOnline, int timeout) { if (queryhashes.size() == 0) { serverLog.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList(); @@ -723,7 +723,7 @@ public class plasmaSnippetCache { return map; } - public static plasmaParserDocument parseDocument(URL url, long contentLength, InputStream resourceStream) throws ParserException { + public static plasmaParserDocument parseDocument(yacyURL url, long contentLength, InputStream resourceStream) throws ParserException { return parseDocument(url, contentLength, resourceStream, null); } @@ -736,7 +736,7 @@ public class plasmaSnippetCache { * @return the extracted data * @throws ParserException */ - public static plasmaParserDocument parseDocument(URL url, long contentLength, InputStream resourceStream, IResourceInfo docInfo) throws ParserException { + public static plasmaParserDocument parseDocument(yacyURL url, long contentLength, InputStream resourceStream, IResourceInfo docInfo) throws ParserException { try { if (resourceStream == null) return null; @@ -809,7 +809,7 @@ public class plasmaSnippetCache { *
*
Service nameOperationWSDL
#[name]# [#[style]#] - - #{methods}# - - - #{/methods}# -
#[name]##[method]#
-
WSDL
[1]the content-length as {@link Integer}
*/ - public static Object[] getResource(URL url, boolean fetchOnline, int socketTimeout, boolean forText) { + public static Object[] getResource(yacyURL url, boolean fetchOnline, int socketTimeout, boolean forText) { // load the url as resource from the web try { long contentLength = -1; @@ -845,7 +845,7 @@ public class plasmaSnippetCache { } public static plasmaHTCache.Entry loadResourceFromWeb( - URL url, + yacyURL url, int socketTimeout, boolean keepInMemory, boolean forText @@ -868,7 +868,7 @@ public class plasmaSnippetCache { public static String failConsequences(TextSnippet snippet, String eventID) { // problems with snippet fetch if (yacyCore.seedDB.mySeed.isVirgin()) return snippet.getError() + " (no consequences, no network connection)"; // no consequences if we do not have a network connection - String urlHash = plasmaURL.urlHash(snippet.getUrl()); + String urlHash = snippet.getUrl().hash(); String querystring = kelondroMSetTools.setToString(snippet.getRemainingHashes(), ' '); if ((snippet.getErrorCode() == ERROR_SOURCE_LOADING) || (snippet.getErrorCode() == ERROR_RESOURCE_LOADING) || diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0518dc95b..40b04d178 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -141,7 +141,6 @@ import de.anomic.http.httpd; import de.anomic.http.httpdRobotsTxtConfig; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroCache; @@ -150,7 +149,6 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMapTable; import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.net.URL; import de.anomic.plasma.dbImport.dbImportManager; import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.urlPattern.defaultURLPattern; @@ -165,6 +163,7 @@ import de.anomic.server.serverSwitch; import de.anomic.server.serverThread; import de.anomic.server.logging.serverLog; import de.anomic.tools.crypt; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; @@ -911,7 +910,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser Map initProps; if (networkUnitDefinition.startsWith("http://")) { try { - this.setConfig(httpc.loadHashMap(new URL(networkUnitDefinition), remoteProxyConfig)); + this.setConfig(httpc.loadHashMap(new yacyURL(networkUnitDefinition, null), remoteProxyConfig)); } catch (MalformedURLException e) { } } else { @@ -923,7 +922,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } if (networkGroupDefinition.startsWith("http://")) { try { - this.setConfig(httpc.loadHashMap(new URL(networkGroupDefinition), remoteProxyConfig)); + this.setConfig(httpc.loadHashMap(new yacyURL(networkGroupDefinition, null), remoteProxyConfig)); } catch (MalformedURLException e) { } } else { @@ -941,7 +940,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser location = getConfig("network.unit.update.location" + i, ""); if (location.length() == 0) break; try { - yacyVersion.latestReleaseLocations.add(new URL(location)); + yacyVersion.latestReleaseLocations.add(new yacyURL(location, null)); } catch (MalformedURLException e) { break; } @@ -1476,7 +1475,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } - public boolean acceptURL(URL url) { + public boolean acceptURL(yacyURL url) { // returns true if the url can be accepted accoring to network.unit.domain if (url == null) return false; String host = url.getHost(); @@ -1510,8 +1509,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser errorURL.remove(hash); } - public URL getURL(String urlhash) throws IOException { - if (urlhash.equals(plasmaURL.dummyHash)) return null; + public yacyURL getURL(String urlhash) throws IOException { + if (urlhash.equals(yacyURL.dummyHash)) return null; plasmaCrawlEntry ne = noticeURL.get(urlhash); if (ne != null) return ne.url(); indexURLEntry le = wordIndex.loadedURL.load(urlhash, null); @@ -1739,7 +1738,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // enqueue for further crawling enQueue(this.sbQueue.newEntry( entry.url(), - plasmaURL.urlHash(entry.referrerURL()), + (entry.referrerURL() == null) ? null : entry.referrerURL().hash(), entry.ifModifiedSince(), entry.requestWithCookie(), entry.initiator(), @@ -2363,7 +2362,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return parseResource(entry.url(), mimeType, charset, entry.cacheFile()); } - public plasmaParserDocument parseResource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException { + public plasmaParserDocument parseResource(yacyURL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException { plasmaParserDocument doc = parser.parseSource(location, mimeType, documentCharset, sourceFile); assert(doc != null) : "Unexpected error. Parser returned null."; return doc; @@ -2387,8 +2386,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // 6) local fetching for global crawling (other known or unknwon initiator) int processCase = PROCESSCASE_0_UNKNOWN; yacySeed initiatorPeer = null; - String initiatorPeerHash = (entry.proxy()) ? plasmaURL.dummyHash : entry.initiator(); - if (initiatorPeerHash.equals(plasmaURL.dummyHash)) { + String initiatorPeerHash = (entry.proxy()) ? yacyURL.dummyHash : entry.initiator(); + if (initiatorPeerHash.equals(yacyURL.dummyHash)) { // proxy-load processCase = PROCESSCASE_4_PROXY_LOAD; } else if (initiatorPeerHash.equals(yacyCore.seedDB.mySeed.hash)) { @@ -2442,7 +2441,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser Map hl = document.getHyperlinks(); Iterator i = hl.entrySet().iterator(); String nextUrlString; - URL nextUrl; + yacyURL nextUrl; Map.Entry nextEntry; while (i.hasNext()) { // check for interruption @@ -2452,7 +2451,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser nextEntry = (Map.Entry) i.next(); nextUrlString = (String) nextEntry.getKey(); try { - nextUrl = new URL(nextUrlString); + nextUrl = new yacyURL(nextUrlString, null); // enqueue the hyperlink into the pre-notice-url db sbStackCrawlThread.enqueue(nextUrl, entry.urlHash(), initiatorPeerHash, (String) nextEntry.getValue(), docDate, entry.depth() + 1, entry.profile()); @@ -2467,9 +2466,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * CREATE INDEX * ========================================================================= */ String docDescription = document.getTitle(); - URL referrerURL = entry.referrerURL(); - String referrerUrlHash = plasmaURL.urlHash(referrerURL); - if (referrerUrlHash == null) referrerUrlHash = plasmaURL.dummyHash; + yacyURL referrerURL = entry.referrerURL(); String noIndexReason = plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR; if (processCase == PROCESSCASE_4_PROXY_LOAD) { @@ -2506,13 +2503,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser docDate, // modification date new Date(), // loaded date new Date(ldate + Math.max(0, ldate - docDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula - referrerUrlHash, // referer hash + (referrerURL == null) ? null : referrerURL.hash(), // referer hash new byte[0], // md5 (int) entry.size(), // size condenser.RESULT_NUMB_WORDS, // word count - plasmaURL.docType(document.getMimeType()), // doctype + plasmaHTCache.docType(document.getMimeType()), // doctype condenser.RESULT_FLAGS, // flags - plasmaURL.language(entry.url()), // language + yacyURL.language(entry.url()), // language ioLinks[0].intValue(), // llocal ioLinks[1].intValue(), // lother document.getAudiolinks().size(), // laudio @@ -2567,13 +2564,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * ======================================================================== */ words = wordIndex.addPageIndex( entry.url(), // document url - urlHash, // document url hash docDate, // document mod date (int) entry.size(), // document size document, // document content condenser, // document condenser - plasmaURL.language(entry.url()), // document language - plasmaURL.docType(document.getMimeType()), // document type + yacyURL.language(entry.url()), // document language + plasmaHTCache.docType(document.getMimeType()),// document type ioLinks[0].intValue(), // outlinkSame ioLinks[1].intValue() // outlinkOthers ); @@ -2586,8 +2582,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ArrayList tmpContainers = new ArrayList(condenser.words().size()); - String language = plasmaURL.language(entry.url()); - char doctype = plasmaURL.docType(document.getMimeType()); + String language = yacyURL.language(entry.url()); + char doctype = plasmaHTCache.docType(document.getMimeType()); indexURLEntry.Components comp = newEntry.comp(); int urlLength = comp.url().toNormalform(true, true).length(); int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform(true, true)).length; @@ -2645,14 +2641,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String error = (String) resultObj.get("result"); if (error != null) { words = wordIndex.addPageIndex( - entry.url(), - urlHash, + entry.url(), docDate, (int) entry.size(), document, condenser, - plasmaURL.language(entry.url()), - plasmaURL.docType(document.getMimeType()), + yacyURL.language(entry.url()), + plasmaHTCache.docType(document.getMimeType()), ioLinks[0].intValue(), ioLinks[1].intValue() ); @@ -2692,7 +2687,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } else { log.logFine("Not Indexed Resource '" + entry.url().toNormalform(false, true) + "': process case=" + processCase); - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); } } catch (Exception ee) { if (ee instanceof InterruptedException) throw (InterruptedException)ee; @@ -2705,7 +2700,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, ""); } - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); } } else { @@ -2713,7 +2708,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser checkInterruption(); log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason); - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield()); if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) { if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, ""); @@ -2764,15 +2759,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // convert the referrer hash into the corresponding URL - URL refererURL = null; + yacyURL refererURL = null; String refererHash = urlEntry.referrerhash(); - if ((refererHash != null) && (!refererHash.equals(plasmaURL.dummyHash))) try { + if ((refererHash != null) && (!refererHash.equals(yacyURL.dummyHash))) try { refererURL = this.getURL(refererHash); } catch (IOException e) { refererURL = null; } cacheLoader.loadAsync(urlEntry.url(), urlEntry.name(), (refererURL!=null)?refererURL.toString():null, urlEntry.initiator(), urlEntry.depth(), profile, -1, false); - log.logInfo(stats + ": enqueued for load " + urlEntry.url() + " [" + urlEntry.urlhash() + "]"); + log.logInfo(stats + ": enqueued for load " + urlEntry.url() + " [" + urlEntry.url().hash() + "]"); return; } @@ -2800,8 +2795,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // check if peer for remote crawl is available yacySeed remoteSeed = ((this.isPublicRobinson()) && (getConfig("cluster.mode", "").equals("publiccluster"))) ? - yacyCore.dhtAgent.getPublicClusterCrawlSeed(urlEntry.urlhash(), this.clusterhashes) : - yacyCore.dhtAgent.getGlobalCrawlSeed(urlEntry.urlhash()); + yacyCore.dhtAgent.getPublicClusterCrawlSeed(urlEntry.url().hash(), this.clusterhashes) : + yacyCore.dhtAgent.getGlobalCrawlSeed(urlEntry.url().hash()); if (remoteSeed == null) { log.logFine("plasmaSwitchboard.processRemoteCrawlTrigger: no remote crawl seed available"); return false; @@ -2897,8 +2892,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // method for index deletion - public int removeAllUrlReferences(URL url, boolean fetchOnline) { - return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline); + public int removeAllUrlReferences(yacyURL url, boolean fetchOnline) { + return removeAllUrlReferences(url.hash(), fetchOnline); } public int removeAllUrlReferences(String urlhash, boolean fetchOnline) { @@ -3231,7 +3226,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } private void addURLtoErrorDB( - URL url, + yacyURL url, String referrerHash, String initiator, String name, diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index 01f4c6542..53537107d 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -55,10 +55,10 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaSwitchboardQueue { @@ -105,12 +105,12 @@ public class plasmaSwitchboardQueue { public synchronized void push(Entry entry) throws IOException { sbQueueStack.push(sbQueueStack.row().newEntry(new byte[][]{ entry.url.toString().getBytes(), - (entry.referrerHash == null) ? plasmaURL.dummyHash.getBytes() : entry.referrerHash.getBytes(), + (entry.referrerHash == null) ? yacyURL.dummyHash.getBytes() : entry.referrerHash.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong((entry.ifModifiedSince == null) ? 0 : entry.ifModifiedSince.getTime(), 11).getBytes(), new byte[]{entry.flags}, - (entry.initiator == null) ? plasmaURL.dummyHash.getBytes() : entry.initiator.getBytes(), + (entry.initiator == null) ? yacyURL.dummyHash.getBytes() : entry.initiator.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, rowdef.width(5)).getBytes(), - (entry.profileHandle == null) ? plasmaURL.dummyHash.getBytes() : entry.profileHandle.getBytes(), + (entry.profileHandle == null) ? yacyURL.dummyHash.getBytes() : entry.profileHandle.getBytes(), (entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8") })); } @@ -184,13 +184,13 @@ public class plasmaSwitchboardQueue { } } - public Entry newEntry(URL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, + public Entry newEntry(yacyURL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, String initiator, int depth, String profilehandle, String anchorName) { return new Entry(url, referrer, ifModifiedSince, requestWithCookie, initiator, depth, profilehandle, anchorName); } public class Entry { - private URL url; // plasmaURL.urlStringLength + private yacyURL url; // plasmaURL.urlStringLength private String referrerHash; // plasmaURL.urlHashLength private Date ifModifiedSince; // 6 private byte flags; // 1 @@ -202,9 +202,9 @@ public class plasmaSwitchboardQueue { // computed values private plasmaCrawlProfile.entry profileEntry; private IResourceInfo contentInfo; - private URL referrerURL; + private yacyURL referrerURL; - public Entry(URL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, + public Entry(yacyURL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, String initiator, int depth, String profileHandle, String anchorName) { this.url = url; this.referrerHash = referrer; @@ -224,7 +224,7 @@ public class plasmaSwitchboardQueue { long ims = row.getColLong(2); byte flags = row.getColByte(3); try { - this.url = new URL(row.getColString(0, "UTF-8")); + this.url = new yacyURL(row.getColString(0, "UTF-8"), null); } catch (MalformedURLException e) { this.url = null; } @@ -245,7 +245,7 @@ public class plasmaSwitchboardQueue { long ims = (row[2] == null) ? 0 : kelondroBase64Order.enhancedCoder.decodeLong(new String(row[2], "UTF-8")); byte flags = (row[3] == null) ? 0 : row[3][0]; try { - this.url = new URL(new String(row[0], "UTF-8")); + this.url = new yacyURL(new String(row[0], "UTF-8"), null); } catch (MalformedURLException e) { this.url = null; } @@ -262,12 +262,12 @@ public class plasmaSwitchboardQueue { this.referrerURL = null; } - public URL url() { + public yacyURL url() { return url; } public String urlHash() { - return plasmaURL.urlHash(url); + return url.hash(); } public boolean requestedWithCookie() { @@ -279,7 +279,7 @@ public class plasmaSwitchboardQueue { } public boolean proxy() { - return (initiator == null) || (initiator.equals(plasmaURL.dummyHash)); + return (initiator == null) || (initiator.equals(yacyURL.dummyHash)); } public String initiator() { @@ -324,9 +324,9 @@ public class plasmaSwitchboardQueue { return (info == null) ? new Date() : info.getModificationDate(); } - public URL referrerURL() { + public yacyURL referrerURL() { if (referrerURL == null) { - if ((referrerHash == null) || (referrerHash.equals(plasmaURL.dummyHash))) return null; + if ((referrerHash == null) || (referrerHash.equals(yacyURL.dummyHash))) return null; indexURLEntry entry = lurls.load(referrerHash, null); if (entry == null) referrerURL = null; else referrerURL = entry.comp().url(); } diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java deleted file mode 100644 index 1a6cea2c1..000000000 --- a/source/de/anomic/plasma/plasmaURL.java +++ /dev/null @@ -1,744 +0,0 @@ -// indexURL.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 20.05.2006 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.plasma; - -import java.net.MalformedURLException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.TreeSet; - -import de.anomic.index.indexContainer; -import de.anomic.index.indexRWIEntry; -import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; -import de.anomic.server.serverByteBuffer; -import de.anomic.server.serverCodings; -import de.anomic.server.serverDomains; -import de.anomic.yacy.yacySeedDB; - -public class plasmaURL { - - // TLD separation in political and cultural parts - // https://www.cia.gov/cia/publications/factbook/index.html - // http://en.wikipedia.org/wiki/List_of_countries_by_continent - - private static final String[] TLD_NorthAmericaOceania={ - // primary english-speaking countries - // english-speaking countries from central america are also included - // includes also dutch and french colonies in the caribbean sea - // and US/English/Australian military bases in asia - "EDU=US Educational", - "GOV=US Government", - "MIL=US Military", - "NET=Network", - "ORG=Non-Profit Organization", - "AN=Netherlands Antilles", - "AS=American Samoa", - "AG=Antigua and Barbuda", - "AI=Anguilla", - "AU=Australia", - "BB=Barbados", - "BZ=Belize", - "BM=Bermuda", - "BS=Bahamas", - "CA=Canada", - "CC=Cocos (Keeling) Islands", - "CK=Cook Islands", - "CX=Christmas Island", // located in the Indian Ocean, but belongs to Australia - "DM=Dominica", - "FM=Micronesia", - "FJ=Fiji", - "GD=Grenada", - "GP=Guadeloupe", - "GS=South Georgia and the South Sandwich Islands", // south of south america, but administrated by british, has only a scientific base - "GU=Guam", // strategical US basis close to Japan - "HM=Heard and McDonald Islands", // uninhabited, sub-Antarctic island, owned by Australia - "HT=Haiti", - "IO=British Indian Ocean Territory", // UK-US naval support facility in the Indian Ocean - "KI=Kiribati", // 33 coral atolls in the pacific, formerly owned by UK - "KN=Saint Kitts and Nevis", // islands in the carribean see - "KY=Cayman Islands", - "LC=Saint Lucia", - "MH=Marshall Islands", // formerly US atomic bomb test site, now a key installation in the US missile defense network - "MP=Northern Mariana Islands", // US strategic location in the western Pacific Ocean - "NC=New Caledonia", - "NF=Norfolk Island", - "NR=Nauru", // independent UN island - "NU=Niue", // one of world's largest coral islands - "NZ=New Zealand (Aotearoa)", - "PG=Papua New Guinea", - "PN=Pitcairn", // overseas territory of the UK - "PR=Puerto Rico", // territory of the US with commonwealth status - "PW=Palau", // was once governed by Micronesia - "Sb=Solomon Islands", - "TC=Turks and Caicos Islands", // overseas territory of the UK - "TK=Tokelau", // group of three atolls in the South Pacific Ocean, british protectorat - "TO=Tonga", - "TT=Trinidad and Tobago", - "TV=Tuvalu", // nine coral atolls in the South Pacific Ocean; in 2000, Tuvalu leased its TLD ".tv" for $50 million over a 12-year period - "UM=US Minor Outlying Islands", // nine insular United States possessions in the Pacific Ocean and the Caribbean Sea - "US=United States", - "VC=Saint Vincent and the Grenadines", - "VG=Virgin Islands (British)", - "VI=Virgin Islands (U.S.)", - "VU=Vanuatu", - "WF=Wallis and Futuna Islands", - "WS=Samoa" - }; - private static final String[] TLD_MiddleSouthAmerica = { - // primary spanish and portugese-speaking - "AR=Argentina", - "AW=Aruba", - "BR=Brazil", - "BO=Bolivia", - "CL=Chile", - "CO=Colombia", - "CR=Costa Rica", - "CU=Cuba", - "DO=Dominican Republic", - "EC=Ecuador", - "FK=Falkland Islands (Malvinas)", - "GF=French Guiana", - "GT=Guatemala", - "GY=Guyana", - "HN=Honduras", - "JM=Jamaica", - "MX=Mexico", - "NI=Nicaragua", - "PA=Panama", - "PE=Peru", - "PY=Paraguay", - "SR=Suriname", - "SV=El Salvador", - "UY=Uruguay", - "VE=Venezuela" - }; - private static final String[] TLD_EuropaRussia = { - // includes also countries that are mainly french- dutch- speaking - // and culturally close to europe - "AD=Andorra", - "AL=Albania", - "AQ=Antarctica", - "AT=Austria", - "BA=Bosnia and Herzegovina", - "BE=Belgium", - "BG=Bulgaria", - "BV=Bouvet Island", // this island is uninhabited and covered by ice, south of africa but governed by Norway - "BY=Belarus", - "CH=Switzerland", - "CS=Czechoslovakia (former)", - "CZ=Czech Republic", - "CY=Cyprus", - "DE=Germany", - "DK=Denmark", - "ES=Spain", - "EE=Estonia", - "FI=Finland", - "FO=Faroe Islands", // Viking Settlers - "FR=France", - "FX=France, Metropolitan", - "GB=Great Britain (UK)", - "GI=Gibraltar", - "GL=Greenland", - "GR=Greece", - "HR=Croatia (Hrvatska)", - "HU=Hungary", - "IE=Ireland", - "IS=Iceland", - "IT=Italy", - "LI=Liechtenstein", - "LT=Lithuania", - "LU=Luxembourg", - "LV=Latvia", - "MD=Moldova", - "MC=Monaco", - "MK=Macedonia", - "MN=Mongolia", - "MS=Montserrat", // British island in the Caribbean Sea, almost not populated because of strong vulcanic activity - "MT=Malta", - "MQ=Martinique", // island in the eastern Caribbean Sea, overseas department of France - "NATO=Nato field", - "NL=Netherlands", - "NO=Norway", - "PF=French Polynesia", // French annexed Polynesian island in the South Pacific, French atomic bomb test site - "PL=Poland", - "PM=St. Pierre and Miquelon", // french-administrated colony close to canada, belongs to France - "PT=Portugal", - "RO=Romania", - "RU=Russia", - "SE=Sweden", - "SI=Slovenia", - "SJ=Svalbard and Jan Mayen Islands", // part of Norway - "SM=San Marino", - "SK=Slovak Republic", - "SU=USSR (former)", - "TF=French Southern Territories", // islands in the arctic see, no inhabitants - "UK=United Kingdom", - "UA=Ukraine", - "VA=Vatican City State (Holy See)", - "YU=Yugoslavia" - }; - - private static final String[] TLD_MiddleEastWestAsia = { - // states that are influenced by islamic culture and arabic language - // includes also eurasia states and those that had been part of the former USSR and close to southwest asia - "AE=United Arab Emirates", - "AF=Afghanistan", - "AM=Armenia", - "AZ=Azerbaijan", - "BH=Bahrain", - "GE=Georgia", - "IL=Israel", - "IQ=Iraq", - "IR=Iran", - "JO=Jordan", - "KG=Kyrgyzstan", - "KZ=Kazakhstan", - "KW=Kuwait", - "LB=Lebanon", - "OM=Oman", - "QA=Qatar", - "SA=Saudi Arabia", - "SY=Syria", - "TJ=Tajikistan", - "TM=Turkmenistan", - "PK=Pakistan", - "TR=Turkey", - "UZ=Uzbekistan", - "YE=Yemen" - }; - private static final String[] TLD_SouthEastAsia = { - "BD=Bangladesh", - "BN=Brunei Darussalam", - "BT=Bhutan", - "CN=China", - "HK=Hong Kong", - "ID=Indonesia", - "IN=India", - "LA=Laos", - "NP=Nepal", - "JP=Japan", - "KH=Cambodia", - "KP=Korea (North)", - "KR=Korea (South)", - "LK=Sri Lanka", - "MY=Malaysia", - "MM=Myanmar", // formerly known as Burma - "MO=Macau", // Portuguese settlement, part of China, but has some autonomy - "MV=Maldives", // group of atolls in the Indian Ocean - "PH=Philippines", - "SG=Singapore", - "TP=East Timor", - "TH=Thailand", - "TW=Taiwan", - "VN=Viet Nam" - }; - private static final String[] TLD_Africa = { - "AO=Angola", - "BF=Burkina Faso", - "BI=Burundi", - "BJ=Benin", - "BW=Botswana", - "CF=Central African Republic", - "CG=Congo", - "CI=Cote D'Ivoire (Ivory Coast)", - "CM=Cameroon", - "CV=Cape Verde", - "DJ=Djibouti", - "DZ=Algeria", - "EG=Egypt", - "EH=Western Sahara", - "ER=Eritrea", - "ET=Ethiopia", - "GA=Gabon", - "GH=Ghana", - "GM=Gambia", - "GN=Guinea", - "GQ=Equatorial Guinea", - "GW=Guinea-Bissau", - "KE=Kenya", - "KM=Comoros", - "LR=Liberia", - "LS=Lesotho", - "LY=Libya", - "MA=Morocco", - "MG=Madagascar", - "ML=Mali", - "MR=Mauritania", - "MU=Mauritius", - "MW=Malawi", - "MZ=Mozambique", - "NA=Namibia", - "NE=Niger", - "NG=Nigeria", - "RE=Reunion", - "RW=Rwanda", - "SC=Seychelles", - "SD=Sudan", - "SH=St. Helena", - "SL=Sierra Leone", - "SN=Senegal", - "SO=Somalia", - "ST=Sao Tome and Principe", - "SZ=Swaziland", - "TD=Chad", - "TG=Togo", - "TN=Tunisia", - "TZ=Tanzania", - "UG=Uganda", - "ZA=South Africa", - "ZM=Zambia", - "ZR=Zaire", - "ZW=Zimbabwe", - "YT=Mayotte" - }; - private static final String[] TLD_Generic = { - "COM=US Commercial", - "AERO=", - "BIZ=", - "COOP=", - "INFO=", - "MUSEUM=", - "NAME=", - "PRO=", - "ARPA=", - "INT=International", - "ARPA=Arpanet", - "NT=Neutral Zone" - }; - - - /* - * TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net, - * org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR, - * ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM, - * BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL, - * CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ, - * EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE, - * GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM, - * HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE, - * JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI, - * LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO, - * MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC, - * NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH, - * PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC, - * SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC, - * TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ, - * UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT, - * YU, ZA, ZM, ZW - */ - - public static String dummyHash; - - private static HashMap TLDID = new HashMap(); - private static HashMap TLDName = new HashMap(); - - private static void insertTLDProps(String[] TLDList, int id) { - int p; - String tld, name; - Integer ID = new Integer(id); - for (int i = 0; i < TLDList.length; i++) { - p = TLDList[i].indexOf('='); - if (p > 0) { - tld = TLDList[i].substring(0, p).toLowerCase(); - name = TLDList[i].substring(p + 1); - TLDID.put(tld, ID); - TLDName.put(tld, name); - } - } - } - - static { - // create a dummy hash - dummyHash = ""; - for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; - - // assign TLD-ids and names - insertTLDProps(TLD_EuropaRussia, 0); - insertTLDProps(TLD_MiddleSouthAmerica, 1); - insertTLDProps(TLD_SouthEastAsia, 2); - insertTLDProps(TLD_MiddleEastWestAsia, 3); - insertTLDProps(TLD_NorthAmericaOceania, 4); - insertTLDProps(TLD_Africa, 5); - insertTLDProps(TLD_Generic, 6); - // the id=7 is used to flag local addresses - } - - public static final int flagTypeID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; - } - - public static final int flagTLDID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 28) >> 2; - } - - public static final int flagLengthID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 3); - } - - public static final String urlHash(String url) { - if ((url == null) || (url.length() == 0)) - return null; - try { - return urlHash(new URL(url)); - } catch (MalformedURLException e) { - return null; - } - } - - public static final String urlHash(URL url) { - if (url == null) return null; - String host = url.getHost().toLowerCase(); - int p = host.lastIndexOf('.'); - String tld = "", dom = tld; - if (p > 0) { - tld = host.substring(p + 1); - dom = host.substring(0, p); - } - Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses - int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 - boolean isHTTP = url.getProtocol().equals("http"); - p = dom.lastIndexOf('.'); // locate subdomain - String subdom = ""; - if (p > 0) { - subdom = dom.substring(0, p); - dom = dom.substring(p + 1); - } - int port = url.getPort(); - if (port <= 0) { - if (isHTTP) { - port = 80; - } else if (url.getProtocol().equalsIgnoreCase("https")) { - port = 443; - } else { - port = 21; - } - } - String path = url.getPath(); - if (path.startsWith("/")) - path = path.substring(1); - if (path.endsWith("/")) - path = path.substring(0, path.length() - 1); - p = path.indexOf('/'); - String rootpath = ""; - if (p > 0) { - rootpath = path.substring(0, p); - } - - // we collected enough information to compute the fragments that are - // basis for hashes - int l = dom.length(); - int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3; - byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey); - - // combine the attributes - StringBuffer hash = new StringBuffer(12); - // form the 'local' part of the hash - hash.append(kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform(true, true))).substring(0, 5)); // 5 chars - hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char - // form the 'global' part of the hash - hash.append(protocolHostPort(url.getProtocol(), host, port)); // 5 chars - hash.append(kelondroBase64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char - - // return result hash - return new String(hash); - } - - private static char subdomPortPath(String subdom, int port, String rootpath) { - return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(subdom + ":" + port + ":" + rootpath)).charAt(0); - } - - private static final char rootURLFlag = subdomPortPath("www", 80, ""); - - public static final boolean probablyRootURL(String urlHash) { - return (urlHash.charAt(5) == rootURLFlag); - } - - private static String protocolHostPort(String protocol, String host, int port) { - return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(protocol + ":" + host + ":" + port)).substring(0, 5); - } - - private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" }; - - public static final URL probablyWordURL(String urlHash, TreeSet words) { - Iterator wi = words.iterator(); - String word; - while (wi.hasNext()) { - word = (String) wi.next(); - if ((word == null) || (word.length() == 0)) continue; - String pattern = urlHash.substring(6, 11); - for (int i = 0; i < testTLDs.length; i++) { - if (pattern.equals(protocolHostPort("http", "www." + word.toLowerCase() + "." + testTLDs[i], 80))) - try { - return new URL("http://www." + word.toLowerCase() + "." + testTLDs[i]); - } catch (MalformedURLException e) { - return null; - } - } - } - return null; - } - - public static final boolean isWordRootURL(String givenURLHash, TreeSet words) { - if (!(probablyRootURL(givenURLHash))) return false; - URL wordURL = probablyWordURL(givenURLHash, words); - if (wordURL == null) return false; - if (urlHash(wordURL).equals(givenURLHash)) return true; - return false; - } - - public static final int domLengthEstimation(String urlHash) { - // generates an estimation of the original domain length - assert (urlHash != null); - assert (urlHash.length() == 12) : "urlhash = " + urlHash; - int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - int domLengthKey = flagbyte & 3; - switch (domLengthKey) { - case 0: - return 4; - case 1: - return 10; - case 2: - return 14; - case 3: - return 20; - } - return 20; - } - - public static int domLengthNormalized(String urlHash) { - return 255 * domLengthEstimation(urlHash) / 30; - } - - public static final int domDomain(String urlHash) { - // returns the ID of the domain of the domain - assert (urlHash != null); - assert (urlHash.length() == 12) : "urlhash = " + urlHash; - int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - return (flagbyte & 12) >> 2; - } - - public static boolean isGlobalDomain(String urlhash) { - return domDomain(urlhash) != 7; - } - - public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) { - // collect references according to domains - long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; - TreeMap doms = new TreeMap(); - synchronized (inputContainer) { - Iterator i = inputContainer.entries(); - indexRWIEntry iEntry; - String dom, paths; - while (i.hasNext()) { - iEntry = (indexRWIEntry) i.next(); - if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer - dom = iEntry.urlHash().substring(6); - if ((paths = (String) doms.get(dom)) == null) { - doms.put(dom, iEntry.urlHash().substring(0, 6)); - } else { - doms.put(dom, paths + iEntry.urlHash().substring(0, 6)); - } - if (System.currentTimeMillis() > timeout) - break; - } - } - // construct a result string - serverByteBuffer bb = new serverByteBuffer(inputContainer.size() * 6); - bb.append('{'); - Iterator i = doms.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - bb.append((String) entry.getKey()); - bb.append(':'); - bb.append((String) entry.getValue()); - if (System.currentTimeMillis() > timeout) - break; - if (i.hasNext()) - bb.append(','); - } - bb.append('}'); - return bb; - } - - public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) { - // target is a mapping from url-hashes to a string of peer-hashes - if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) { - //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); - ci = ci.trim(1, ci.length() - 2); - String dom, url, peers; - while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - dom = ci.toString(0, 6); - ci.trim(7); - while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - url = ci.toString(0, 6) + dom; - ci.trim(6); - peers = (String) target.get(url); - if (peers == null) { - target.put(url, peerhash); - } else { - target.put(url, peers + peerhash); - } - //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); - } - if (ci.byteAt(0) == ',') ci.trim(1); - } - } - } - - - // doctypes: - public static final char DT_PDFPS = 'p'; - public static final char DT_TEXT = 't'; - public static final char DT_HTML = 'h'; - public static final char DT_DOC = 'd'; - public static final char DT_IMAGE = 'i'; - public static final char DT_MOVIE = 'm'; - public static final char DT_FLASH = 'f'; - public static final char DT_SHARE = 's'; - public static final char DT_AUDIO = 'a'; - public static final char DT_BINARY = 'b'; - public static final char DT_UNKNOWN = 'u'; - - // appearance locations: (used for flags) - public static final int AP_TITLE = 0; // title tag from html header - public static final int AP_H1 = 1; // headline - top level - public static final int AP_H2 = 2; // headline, second level - public static final int AP_H3 = 3; // headline, 3rd level - public static final int AP_H4 = 4; // headline, 4th level - public static final int AP_H5 = 5; // headline, 5th level - public static final int AP_H6 = 6; // headline, 6th level - public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam) - public static final int AP_DOM = 8; // word inside an url: in Domain - public static final int AP_PATH = 9; // word inside an url: in path - public static final int AP_IMG = 10; // tag inside image references - public static final int AP_ANCHOR = 11; // anchor description - public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) - public static final int AP_BOLD = 13; // may be interpreted as emphasized - public static final int AP_ITALICS = 14; // may be interpreted as emphasized - public static final int AP_WEAK = 15; // for Text that is small or bareley visible - public static final int AP_INVISIBLE = 16; // good for spam detection - public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) - public static final int AP_AUTHOR = 18; // word appears in author name - public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) - public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) - - // URL attributes - public static final int UA_LOCAL = 0; // URL was crawled locally - public static final int UA_TILDE = 1; // tilde appears in URL - public static final int UA_REDIRECT = 2; // The URL is a redirection - - // local flag attributes - public static final char LT_LOCAL = 'L'; - public static final char LT_GLOBAL = 'G'; - - // doctype calculation - public static char docType(URL url) { - String path = url.getPath().toLowerCase(); - // serverLog.logFinest("PLASMA", "docType URL=" + path); - char doctype = DT_UNKNOWN; - if (path.endsWith(".gif")) { doctype = DT_IMAGE; } - else if (path.endsWith(".ico")) { doctype = DT_IMAGE; } - else if (path.endsWith(".bmp")) { doctype = DT_IMAGE; } - else if (path.endsWith(".jpg")) { doctype = DT_IMAGE; } - else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; } - else if (path.endsWith(".png")) { doctype = DT_IMAGE; } - else if (path.endsWith(".html")) { doctype = DT_HTML; } - else if (path.endsWith(".txt")) { doctype = DT_TEXT; } - else if (path.endsWith(".doc")) { doctype = DT_DOC; } - else if (path.endsWith(".rtf")) { doctype = DT_DOC; } - else if (path.endsWith(".pdf")) { doctype = DT_PDFPS; } - else if (path.endsWith(".ps")) { doctype = DT_PDFPS; } - else if (path.endsWith(".avi")) { doctype = DT_MOVIE; } - else if (path.endsWith(".mov")) { doctype = DT_MOVIE; } - else if (path.endsWith(".qt")) { doctype = DT_MOVIE; } - else if (path.endsWith(".mpg")) { doctype = DT_MOVIE; } - else if (path.endsWith(".md5")) { doctype = DT_SHARE; } - else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; } - else if (path.endsWith(".asf")) { doctype = DT_FLASH; } - return doctype; - } - - public static char docType(String mime) { - // serverLog.logFinest("PLASMA", "docType mime=" + mime); - char doctype = DT_UNKNOWN; - if (mime == null) doctype = DT_UNKNOWN; - else if (mime.startsWith("image/")) doctype = DT_IMAGE; - else if (mime.endsWith("/gif")) doctype = DT_IMAGE; - else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE; - else if (mime.endsWith("/png")) doctype = DT_IMAGE; - else if (mime.endsWith("/html")) doctype = DT_HTML; - else if (mime.endsWith("/rtf")) doctype = DT_DOC; - else if (mime.endsWith("/pdf")) doctype = DT_PDFPS; - else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY; - else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH; - else if (mime.endsWith("/msword")) doctype = DT_DOC; - else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC; - else if (mime.endsWith("/postscript")) doctype = DT_PDFPS; - else if (mime.startsWith("text/")) doctype = DT_TEXT; - else if (mime.startsWith("image/")) doctype = DT_IMAGE; - else if (mime.startsWith("audio/")) doctype = DT_AUDIO; - else if (mime.startsWith("video/")) doctype = DT_MOVIE; - //bz2 = application/x-bzip2 - //dvi = application/x-dvi - //gz = application/gzip - //hqx = application/mac-binhex40 - //lha = application/x-lzh - //lzh = application/x-lzh - //pac = application/x-ns-proxy-autoconfig - //php = application/x-httpd-php - //phtml = application/x-httpd-php - //rss = application/xml - //tar = application/tar - //tex = application/x-tex - //tgz = application/tar - //torrent = application/x-bittorrent - //xhtml = application/xhtml+xml - //xla = application/msexcel - //xls = application/msexcel - //xsl = application/xml - //xml = application/xml - //Z = application/x-compress - //zip = application/zip - return doctype; - } - - // language calculation - public static String language(URL url) { - String language = "uk"; - String host = url.getHost(); - int pos = host.lastIndexOf("."); - if ((pos > 0) && (host.length() - pos == 3)) language = host.substring(pos + 1).toLowerCase(); - return language; - } - -} diff --git a/source/de/anomic/plasma/plasmaWebStructure.java b/source/de/anomic/plasma/plasmaWebStructure.java index 10be39008..78c640917 100644 --- a/source/de/anomic/plasma/plasmaWebStructure.java +++ b/source/de/anomic/plasma/plasmaWebStructure.java @@ -29,6 +29,7 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.util.ConcurrentModificationException; import java.util.Date; import java.util.Iterator; @@ -39,10 +40,10 @@ import java.util.SortedMap; import java.util.TreeSet; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class plasmaWebStructure { @@ -92,8 +93,8 @@ public class plasmaWebStructure { } } - public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(URL url, String baseurlhash, Date docDate, plasmaParserDocument document, plasmaCondenser condenser) { - assert plasmaURL.urlHash(url).equals(baseurlhash); + public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(yacyURL url, String baseurlhash, Date docDate, plasmaParserDocument document, plasmaCondenser condenser) { + assert url.hash().equals(baseurlhash); // generate citation reference Map hl = document.getHyperlinks(); @@ -105,18 +106,20 @@ public class plasmaWebStructure { int GCount = 0; int LCount = 0; while (it.hasNext()) { - nexturlhash = plasmaURL.urlHash((String) ((Map.Entry) it.next()).getKey()); - if (nexturlhash != null) { - if (nexturlhash.substring(6).equals(lhp)) { - // this is a inbound link - cpl.append(nexturlhash.substring(0, 6)); // store only local part - LCount++; - } else { - // this is a outbound link - cpg.append(nexturlhash); // store complete hash - GCount++; + try { + nexturlhash = (new yacyURL((String) ((Map.Entry) it.next()).getKey(), null)).hash(); + if (nexturlhash != null) { + if (nexturlhash.substring(6).equals(lhp)) { + // this is a inbound link + cpl.append(nexturlhash.substring(0, 6)); // store only local part + LCount++; + } else { + // this is a outbound link + cpg.append(nexturlhash); // store complete hash + GCount++; + } } - } + } catch (MalformedURLException e) {} } // append this reference to buffer @@ -270,8 +273,8 @@ public class plasmaWebStructure { } } - private void learn(URL url, StringBuffer reference /*string of b64(12digits)-hashes*/) { - String domhash = plasmaURL.urlHash(url).substring(6); + private void learn(yacyURL url, StringBuffer reference /*string of b64(12digits)-hashes*/) { + String domhash = url.hash().substring(6); // parse the new reference string and join it with the stored references Map refs = references(domhash); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index eba6fce1d..f3ce46eb6 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -49,12 +49,12 @@ import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRotateIterator; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.logging.serverLog; import de.anomic.server.serverMemory; import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaWordIndex implements indexRI { @@ -261,7 +261,7 @@ public final class plasmaWordIndex implements indexRI { return ((long) microDateDays) * ((long) day); } - public int addPageIndex(URL url, String urlHash, Date urlModified, int size, plasmaParserDocument document, plasmaCondenser condenser, String language, char doctype, int outlinksSame, int outlinksOther) { + public int addPageIndex(yacyURL url, Date urlModified, int size, plasmaParserDocument document, plasmaCondenser condenser, String language, char doctype, int outlinksSame, int outlinksOther) { // this is called by the switchboard to put in a new page into the index // use all the words in one condenser object to simultanous create index entries @@ -280,7 +280,7 @@ public final class plasmaWordIndex implements indexRI { word = (String) wentry.getKey(); wprop = (plasmaCondenser.wordStatProp) wentry.getValue(); assert (wprop.flags != null); - ientry = new indexRWIEntry(urlHash, + ientry = new indexRWIEntry(url.hash(), urlLength, urlComps, (document == null) ? urlLength : document.getTitle().length(), wprop.count, condenser.words().size(), @@ -560,7 +560,7 @@ public final class plasmaWordIndex implements indexRI { serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started"); indexContainer container = null; indexRWIEntry entry = null; - URL url = null; + yacyURL url = null; HashSet urlHashs = new HashSet(); Iterator indexContainerIterator = indexContainerSet(startHash, false, false, 100).iterator(); while (indexContainerIterator.hasNext() && run) { diff --git a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java index 73207cdcb..bf480f84d 100644 --- a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java @@ -55,7 +55,7 @@ import java.util.Iterator; import java.util.Set; import de.anomic.kelondro.kelondroMSetTools; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public abstract class abstractURLPattern implements plasmaURLPattern { @@ -193,22 +193,17 @@ public abstract class abstractURLPattern implements plasmaURLPattern { return urlHashCache.contains(urlHash); } - public boolean isListed(String blacklistType, String urlHash, URL url) { + public boolean isListed(String blacklistType, yacyURL url) { Set urlHashCache = getCacheUrlHashsSet(blacklistType); - if (!urlHashCache.contains(urlHash)) { + if (!urlHashCache.contains(url.hash())) { boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile()); if (temp) { - urlHashCache.add(urlHash); + urlHashCache.add(url.hash()); } return temp; } return true; } - public final boolean isListed(String blacklistType, URL url) { - if (url == null) { return true; } - return isListed(blacklistType, url.getHost().toLowerCase(), url.getFile()); - } - } diff --git a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java index 4655f028a..59c826e28 100644 --- a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java @@ -2,7 +2,7 @@ package de.anomic.plasma.urlPattern; import java.io.File; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public interface plasmaURLPattern { @@ -46,10 +46,8 @@ public interface plasmaURLPattern { public boolean hashInBlacklistedCache(String blacklistType, String urlHash); - - public boolean isListed(String blacklistType, String urlHash, URL url); - public boolean isListed(String blacklistType, URL url); + public boolean isListed(String blacklistType, yacyURL url); public boolean isListed(String blacklistType, String hostlow, String path); diff --git a/source/de/anomic/server/serverDomains.java b/source/de/anomic/server/serverDomains.java index b4e85ccd5..f00beceff 100644 --- a/source/de/anomic/server/serverDomains.java +++ b/source/de/anomic/server/serverDomains.java @@ -37,7 +37,6 @@ import java.util.Map; import java.util.Set; import de.anomic.kelondro.kelondroMScoreCluster; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; public class serverDomains { @@ -196,13 +195,6 @@ public class serverDomains { } - // checks for local/global IP range and local IP - public static boolean isLocal(URL url) { - InetAddress hostAddress = dnsResolve(url.getHost()); - if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets - return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); - } - private static InetAddress[] localAddresses = null; static { try { @@ -214,6 +206,9 @@ public class serverDomains { public static boolean isLocal(String address) { + // attention! because this method does a dns resolve to look up an IP address, + // the result may be very slow. Consider 100 milliseconds per access + assert (address != null); // check local ip addresses diff --git a/source/de/anomic/soap/AbstractService.java b/source/de/anomic/soap/AbstractService.java deleted file mode 100644 index a9bb37393..000000000 --- a/source/de/anomic/soap/AbstractService.java +++ /dev/null @@ -1,189 +0,0 @@ -//AbstractService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import java.io.ByteArrayInputStream; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.message.SOAPEnvelope; -import org.apache.axis.message.SOAPHeaderElement; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.data.userDB; -import de.anomic.http.httpHeader; -import de.anomic.http.httpd; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverSwitch; - -public abstract class AbstractService { - protected serverSwitch switchboard; - protected httpHeader requestHeader; - protected MessageContext messageContext; - protected ServerContext serverContext; - - protected static final boolean NO_AUTHENTICATION = false; - protected static final boolean AUTHENTICATION_NEEDED = true; - - - /** - * This function is called by the available service functions to - * extract all needed informations from the SOAP message context. - * @throws AxisFault - */ - protected void extractMessageContext(boolean authenticate) throws AxisFault { - this.messageContext = MessageContext.getCurrentContext(); - - this.switchboard = (serverSwitch) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_SERVER_SWITCH); - this.requestHeader = (httpHeader) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_HTTP_HEADER); - this.serverContext = (ServerContext) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_SERVER_CONTEXT); - - if (authenticate) { - String authInfo = this.doAuthentication(); - - // modify headers - // This is needed for plasmaSwitchboard.adminAuthenticated to work - this.requestHeader.put(httpHeader.AUTHORIZATION,"Basic " + authInfo); - this.requestHeader.put("CLIENTIP","localhost"); - - } - } - - /** - * Doing the user authentication. To improve security, this client - * accepts the base64 encoded and md5 hashed password directly. - * - * @throws AxisFault if the authentication could not be done successfully - */ - protected String doAuthentication() throws AxisFault { - // accessing the SOAP request message - Message message = this.messageContext.getRequestMessage(); - - // getting the contained soap envelope - SOAPEnvelope envelope = message.getSOAPEnvelope(); - - // getting the proper soap header containing the authorization field - SOAPHeaderElement authElement = envelope.getHeaderByName(httpdSoapHandler.serviceHeaderNamespace, "Authorization"); - if (authElement != null) { - String adminAccountBase64MD5 = this.switchboard.getConfig(httpd.ADMIN_ACCOUNT_B64MD5,""); - - // the base64 encoded and md5 hashed authentication string - String authString = authElement.getValue(); - if (authString.length() == 0) throw new AxisFault("log-in required"); - - // validate MD5 hash against the user-DB - SOAPHeaderElement userElement = envelope.getHeaderByName(httpdSoapHandler.serviceHeaderNamespace, "Username"); - if (userElement != null) { - String userName = userElement.getValue(); - userDB.Entry userEntry = ((plasmaSwitchboard)this.switchboard).userDB.md5Auth(userName,authString); - if (userEntry.hasRight(userDB.Entry.SOAP_RIGHT)) - // we need to return the ADMIN_ACCOUNT_B64MD5 here because some servlets also do - // user/admin authentication - return adminAccountBase64MD5; - } - - // validate MD5 hash against the static-admin account - if (!(adminAccountBase64MD5.equals(authString))) { - throw new AxisFault("log-in required"); - } - return adminAccountBase64MD5; - } - throw new AxisFault("log-in required"); - } - - protected Document convertContentToXML(String contentString) throws Exception { - return convertContentToXML(contentString.getBytes("UTF-8")); - } - - protected Document convertContentToXML(byte[] content) throws Exception { - Document doc = null; - try { - DocumentBuilderFactory newDocBuilderFactory = DocumentBuilderFactory.newInstance(); - -// // disable dtd validation -// newDocBuilderFactory.setValidating(false); -// newDocBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); -// newDocBuilderFactory.setFeature("http://xml.org/sax/features/validation", false); -// - DocumentBuilder newDocBuilder = newDocBuilderFactory.newDocumentBuilder(); - - ByteArrayInputStream byteIn = new ByteArrayInputStream(content); - doc = newDocBuilder.parse(byteIn); - } catch (Exception e) { - String errorMessage = "Unable to parse the search result XML data. " + e.getClass().getName() + ". " + e.getMessage(); - throw new Exception(errorMessage); - } - - return doc; - } - - public Document createNewXMLDocument(String rootElementName) throws ParserConfigurationException { - // creating a new document builder factory - DocumentBuilderFactory newDocBuilderFactory = DocumentBuilderFactory.newInstance(); - - // creating a new document builder - DocumentBuilder newDocBuilder = newDocBuilderFactory.newDocumentBuilder(); - - // creating a new xml document - Document newXMLDocument = newDocBuilder.newDocument(); - - if (rootElementName != null) { - // creating the xml root document - Element rootElement = newXMLDocument.createElement(rootElementName); - newXMLDocument.appendChild(rootElement); - } - - return newXMLDocument; - } - - -} diff --git a/source/de/anomic/soap/ServerContext.java b/source/de/anomic/soap/ServerContext.java deleted file mode 100644 index 1ef84e3bf..000000000 --- a/source/de/anomic/soap/ServerContext.java +++ /dev/null @@ -1,234 +0,0 @@ -package de.anomic.soap; - -//ServerContext.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.HashMap; - -import org.apache.axis.AxisFault; - -import de.anomic.http.httpHeader; -import de.anomic.http.httpTemplate; -import de.anomic.server.serverClassLoader; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; - -public class ServerContext { - - protected String rootPath; - protected serverClassLoader provider; - protected HashMap templates; - protected serverSwitch switchboard; - - - public ServerContext(String root, serverClassLoader cLoader, HashMap templateMap, serverSwitch switchb) { - this.rootPath = root; - this.provider = cLoader; - this.templates = templateMap; - this.switchboard = switchb; - } - - public serverClassLoader getProvider() { - return this.provider; - } - - /** - * This function is called by the service functions to - * invoke the desired server-internal method and to generate - * a output document using one of the available templates. - * - * @param templateName - * @param args - * @return the output document - * @throws AxisFault - */ - public byte[] writeTemplate(String templateName, serverObjects args, httpHeader requestHeader) throws AxisFault { - try { - // invoke servlet - serverObjects tp = invokeServlet(templateName,args,requestHeader); - - // generate output - byte[] result = buildServletOutput(templateName, tp); - return result; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public byte[] buildServletOutput(String templateName, serverObjects tp) throws AxisFault { - try { - File templateFile = getTemplateFile(templateName); - - // generating the output document - ByteArrayOutputStream o = new ByteArrayOutputStream(); - FileInputStream fis = new FileInputStream(templateFile); - httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); - o.close(); - fis.close(); - - // convert it into a byte array and send it back as result - byte[] result = o.toByteArray(); - return result; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public serverObjects invokeServlet(String templateName, serverObjects args, httpHeader requestHeader) throws AxisFault { - try { - // getting the template class file - File rc = getServletClassFile(templateName); - - // invoke the desired method - serverObjects tp = (serverObjects) rewriteMethod(rc).invoke(null, new Object[] {requestHeader, args, this.switchboard}); - - // testing if a authentication was needed by the invoked method - validateAuthentication(tp); - - // adding all available templates - tp.putAll(this.templates); - - // return result - return tp; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - e.printStackTrace(); - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public File getTemplateFile(String templateName) { - // determining the proper class that should be invoked - File file = new File(this.rootPath, templateName); - return file; - } - - public File getServletClassFile(String templateName) { - File templateFile = getTemplateFile(templateName); - File templateClassFile = getServletClassFile(templateFile); - return templateClassFile; - } - - public File getServletClassFile(File templateFile) { - File templateClassFile = rewriteClassFile(templateFile); - return templateClassFile; - } - - /** - * This method was copied from the {@link httpdFileHandler httpdFileHandler-class} - * @param template - * @return the .class-{@link File} belonging to the given template or null - * if no fitting .class-{@link File} does exist - */ - protected File rewriteClassFile(File template) { - try { - String f = template.getCanonicalPath(); - int p = f.lastIndexOf("."); - if (p < 0) return null; - f = f.substring(0, p) + ".class"; - //System.out.println("constructed class path " + f); - File cf = new File(f); - if (cf.exists()) return cf; - return null; - } catch (IOException e) { - return null; - } - } - - /** - * This method was copied from the {@link httpdFileHandler httpdFileHandler-class} - * @param classFile - * @return the resond({@link httpHeader}, {@link serverObjects}, {@link serverSwitch}) - * {@link Method} of the specified class file or null if this file doesn't contain - * such a method - */ - protected Method rewriteMethod(File classFile) { - Method m = null; - // now make a class out of the stream - try { - //System.out.println("**DEBUG** loading class file " + classFile); - Class c = this.provider.loadClass(classFile); - Class[] params = new Class[] { - httpHeader.class, // Class.forName("de.anomic.http.httpHeader"), - serverObjects.class, // Class.forName("de.anomic.server.serverObjects"), - serverSwitch.class }; // Class.forName("de.anomic.server.serverSwitch")}; - m = c.getMethod("respond", params); - } catch (ClassNotFoundException e) { - System.out.println("INTERNAL ERROR: class " + classFile + " is missing:" + e.getMessage()); - } catch (NoSuchMethodException e) { - System.out.println("INTERNAL ERROR: method respond not found in class " + classFile + ": " + e.getMessage()); - } - //System.out.println("found method: " + m.toString()); - return m; - } - - - /** - * This function is used to test if an invoked method requires authentication - * - * @param tp the properties returned by a previous method invocation - * - * @throws AxisFault if an authentication was required. - */ - protected void validateAuthentication(serverObjects tp) throws AxisFault { - // check if the servlets requests authentification - if (tp.containsKey("AUTHENTICATE")) { - throw new AxisFault("log-in required"); - } - } -} diff --git a/source/de/anomic/soap/SoapException.java b/source/de/anomic/soap/SoapException.java deleted file mode 100644 index de79c1f9a..000000000 --- a/source/de/anomic/soap/SoapException.java +++ /dev/null @@ -1,132 +0,0 @@ -//SoapException.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import javax.xml.namespace.QName; - -import org.apache.axis.AxisFault; -import org.apache.axis.Constants; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.message.SOAPEnvelope; -import org.apache.axis.message.SOAPFault; - -import de.anomic.http.httpHeader; - -public class SoapException extends Exception { - - private static final long serialVersionUID = 1L; - private int statusCode = 500; - private String statusText = (String) httpHeader.http1_1.get(Integer.toString(this.statusCode)); - private AxisFault fault = new AxisFault(this.statusText); - - public SoapException(int httpStatusCode, String httpStatusText, String errorMsg) { - this.statusCode = httpStatusCode; - this.statusText = httpStatusText; - this.fault = new AxisFault(errorMsg); - } - - public SoapException(int httpStatusCode, String httpStatusText, Exception e) { - super(httpStatusCode + " " + httpStatusText); - - this.statusCode = httpStatusCode; - this.statusText = httpStatusText; - - // convert the exception into an axisfault - this.fault = AxisFault.makeFault(e); - } - - public SoapException(AxisFault soapFault) { - - QName faultCode = soapFault.getFaultCode(); - if (Constants.FAULT_SOAP12_SENDER.equals(faultCode)) { - this.statusCode = 400; - this.statusText = "Bad request"; - } else if ("Server.Unauthorized".equals(faultCode.getLocalPart())) { - this.statusCode = 401; - this.statusText = "Unauthorized"; - } else { - this.statusCode = 500; - this.statusText = "Internal server error"; - } - - // convert the exception into an axisfault - this.fault = soapFault; - } - - public int getStatusCode() { - return this.statusCode; - } - - public String getStatusText() { - return this.statusText; - } - - public Object getFault() { - return this.fault; - } - - public Message getFaultMessage(MessageContext msgContext) { - Message responseMsg = msgContext.getResponseMessage(); - if (responseMsg == null) { - responseMsg = new Message(this.fault); - responseMsg.setMessageContext(msgContext); - } else { - try { - SOAPEnvelope env = responseMsg.getSOAPEnvelope(); - env.clearBody(); - env.addBodyElement(new SOAPFault(this.fault)); - } catch (AxisFault e) { - // Should never reach here! - } - } - return responseMsg; - } - - public String getMessage() { - return this.statusCode + " " + this.statusText; - } -} diff --git a/source/de/anomic/soap/build.xml b/source/de/anomic/soap/build.xml deleted file mode 100644 index 7215354f8..000000000 --- a/source/de/anomic/soap/build.xml +++ /dev/null @@ -1,154 +0,0 @@ - - - - A SOAP API for YaCy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/source/de/anomic/soap/httpdSoapHandler.java b/source/de/anomic/soap/httpdSoapHandler.java deleted file mode 100644 index 9c2860e1a..000000000 --- a/source/de/anomic/soap/httpdSoapHandler.java +++ /dev/null @@ -1,777 +0,0 @@ -//httpdSoapHandler.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PushbackInputStream; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Properties; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import javax.xml.namespace.QName; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.EngineConfiguration; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.WSDDEngineConfiguration; -import org.apache.axis.configuration.FileProvider; -import org.apache.axis.deployment.wsdd.WSDDDeployment; -import org.apache.axis.deployment.wsdd.WSDDDocument; -import org.apache.axis.server.AxisServer; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; - -import de.anomic.http.httpChunkedInputStream; -import de.anomic.http.httpChunkedOutputStream; -import de.anomic.http.httpContentLengthInputStream; -import de.anomic.http.httpHeader; -import de.anomic.http.httpd; -import de.anomic.plasma.plasmaParser; -import de.anomic.server.serverClassLoader; -import de.anomic.server.serverCore; -import de.anomic.server.serverFileUtils; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.server.logging.serverLog; - -/** - * Class to accept SOAP Requests and invoke the desired soapService. - * An example how to do a soap call from php: - * - * 1, - * "exceptions" => 1)); - * - * try - * { - * $result = $client->__call("crawling", array("http://test.at"), NULL, - * new SoapHeader("http://http.anomic.de/header", "Authorization", md5(base64_encode("admin:xxxxxxx")))); - * } - * catch (SoapFault $fault) - * { - * $result = $fault->faultstring; - * } - * - * print($result); - * ?> - * - * - * - * - * @author Martin Thelian - */ -public final class httpdSoapHandler { - public static final String SOAP_HANDLER_VERSION = "YaCySOAP V0.1"; - - private serverLog theLogger; - - /* =============================================================== - * Constants needed to set some SOAP properties - * =============================================================== */ - /** - * SOAP Header Namespace needed to access the soap header field containing - * the user authentication - */ - public static final String serviceHeaderNamespace = "http://http.anomic.de/header"; - - /** - * define the needed deployment strings - */ - public static final String serviceDeploymentString = - "" - + "" - + "" - + "" - + "" - + "" - + "" - + ""; - - private static final String[] defaultServices = new String[] { - "search=de.anomic.soap.services.SearchService", - "crawl=de.anomic.soap.services.CrawlService", - "status=de.anomic.soap.services.StatusService", - "admin=de.anomic.soap.services.AdminService", - "blacklist=de.anomic.soap.services.BlacklistService", - "share=de.anomic.soap.services.ShareService", - "bookmarks=de.anomic.soap.services.BookmarkService", - "messages=de.anomic.soap.services.MessageService" - }; - - /* =============================================================== - * Constants needed to set the SOAP message context - * =============================================================== */ - /** - * CONSTANT: tge server switchboard - */ - public static final String MESSAGE_CONTEXT_SERVER_SWITCH = "serverSwitch"; - /** - * CONSTANT: received http headers - */ - public static final String MESSAGE_CONTEXT_HTTP_HEADER = "httpHeader"; - /** - * CONSTANT: soap utility class - */ - public static final String MESSAGE_CONTEXT_SERVER_CONTEXT = "serverContext"; - - - /* =============================================================== - * Other object fields - * =============================================================== */ - private static final Object initSync = new Object(); - - private serverClassLoader provider = null; - private HashMap templates; - private serverSwitch switchboard; - - private static AxisServer engine = null; - private File htRootPath; - private File htTemplatePath; - - private static Properties additionalServices = null; - - /** - * Constructor of this class - * @param theSwitchboard - * @throws Exception - */ - public httpdSoapHandler(serverSwitch theSwitchboard) throws Exception { - super(); - - this.switchboard = theSwitchboard; - this.theLogger = new serverLog("SOAP"); - - // create a htRootPath: system pages - if (this.htRootPath == null) { - this.htRootPath = new File(this.switchboard.getRootPath(), this.switchboard.getConfig("htRootPath","htroot")); - // if (!(htRootPath.exists())) htRootPath.mkdir(); - } - - if (this.htTemplatePath == null) { - this.htTemplatePath = new File(theSwitchboard.getRootPath(), theSwitchboard.getConfig("htTemplatePath","htroot/env/templates")); - // if (!(this.htTemplatePath.exists())) this.htTemplatePath.mkdir(); - } - - if (this.provider == null) { - this.provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); - } - - if (this.templates == null) { - this.templates = loadTemplates(this.htTemplatePath); - } - - // deploy default soap services - if (engine == null) synchronized (initSync) { deployDefaultServices(); } - - // init additional soap services - if (additionalServices == null) synchronized (initSync) { deployAdditionalServices(); } - } - - private void deployDefaultServices() throws Exception { - try { - // testing if a security manager is active. - SecurityManager sm = System.getSecurityManager(); - this.theLogger.logInfo("Security Manager is: " + ((sm==null)?"not ":"") + "active"); - - // create an Axis server - this.theLogger.logInfo("Init soap engine ..."); - engine = new AxisServer(new FileProvider("server-config.wsdd")); - - // setting some options ... - engine.setShouldSaveConfig(false); - - } catch (Exception e) { - this.theLogger.logSevere("Unable to initialize soap engine",e); - throw e; - } catch (Error e) { - this.theLogger.logSevere("Unable to initialize soap engine",e); - throw e; - } - - try { - this.theLogger.logInfo("Deploying default services ..."); - for (int i=0; i < defaultServices.length; i++) { - String[] nextService = defaultServices[i].split("="); - this.theLogger.logInfo("Deploying service " + nextService[0] + ": " + nextService[1]); - String deploymentStr = serviceDeploymentString - .replaceAll("@serviceName@", nextService[0]) - .replaceAll("@className@", nextService[1]); - - // deploy the service - deployService(deploymentStr,engine); - } - } catch (Exception e) { - this.theLogger.logSevere("Unable to deploy default soap services.",e); - throw e; - } catch (Error e) { - this.theLogger.logSevere("Unable to deploy default soap services.",e); - throw e; - } - } - - private void deployAdditionalServices() { - additionalServices = new Properties(); - - // getting the property filename containing the file list - String fileName = this.switchboard.getConfig("soap.serviceDeploymentList",""); - if (fileName.length() > 0) { - BufferedInputStream fileInput = null; - try { - File deploymentFile = new File(this.switchboard.getRootPath(),fileName); - fileInput = new BufferedInputStream(new FileInputStream(deploymentFile)); - - // load property list - additionalServices.load(fileInput); - fileInput.close(); - - // loop through and deploy services - if (additionalServices.size() > 0) { - Enumeration serviceNameEnum = additionalServices.keys(); - while (serviceNameEnum.hasMoreElements()) { - String serviceName = (String) serviceNameEnum.nextElement(); - String className = additionalServices.getProperty(serviceName); - - String deploymentStr = serviceDeploymentString - .replaceAll("@serviceName@", serviceName) - .replaceAll("@className@", className); - - // deploy the service - deployService(deploymentStr,engine); - } - } - } catch (Exception e) { - this.theLogger.logSevere("Unable to deploy additional services: " + e.getMessage(), e); - } finally { - if (fileInput != null) try { fileInput.close(); } catch (Exception e){/* ignore this */} - } - } - } - - private InputStream getBodyInputStream(httpHeader requestHeader, PushbackInputStream body) throws SoapException{ - InputStream input; - - // getting the content length - long contentLength = requestHeader.contentLength(); - String transferEncoding = (String) requestHeader.get(httpHeader.TRANSFER_ENCODING); - String contentEncoding = (String) requestHeader.get(httpHeader.CONTENT_ENCODING); - - /* =========================================================================== - * Handle TRANSFER ENCODING - * =========================================================================== */ - if (transferEncoding != null && !transferEncoding.equalsIgnoreCase("identity")) { - // read using transfer encoding - if (transferEncoding.equalsIgnoreCase("chunked")) { - input = new httpChunkedInputStream(body); - } else { - String errorMsg = "Unsupported transfer-encoding: "+ transferEncoding; - this.theLogger.logSevere(errorMsg); - throw new SoapException(501,"Not Implemented",errorMsg); - } - } else if (contentLength > 0) { - // read contentLength bytes - input = new httpContentLengthInputStream(body,contentLength); - } else { - // read until EOF - input = body; - } - - /* =========================================================================== - * Handle CONTENT ENCODING - * =========================================================================== */ - try { - if (contentEncoding != null && !contentEncoding.equals("identity")) { - if (contentEncoding.equalsIgnoreCase(httpHeader.CONTENT_ENCODING_GZIP)) { - input = new GZIPInputStream(input); - } else { - String errorMsg = "Unsupported content encoding: " + contentEncoding; - this.theLogger.logSevere(errorMsg); - throw new SoapException(415,"Unsupported Media Type",errorMsg); - } - } - } catch (IOException e) { - throw new SoapException(400,"Bad Request",e); - } - - return input; - } - - /** - * HTTP HEAD method. Not needed for soap. - * @param conProp - * @param header - * @param response - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doHead(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream) - */ - public void doHead(Properties conProp, httpHeader requestHeader, OutputStream clientOut) throws IOException { - sendMessage(conProp, requestHeader, clientOut, 501, "Not Implemented", "Connection method is not supported by this handler",null); - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - - - /** - * HTTP Connect Method. Not needed for SOAP - * @param conProp - * @param requestHeader - * @param clientIn - * @param clientOut - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doConnect(java.util.Properties, de.anomic.http.httpHeader, java.io.InputStream, java.io.OutputStream) - */ - public void doConnect(Properties conProp, httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException { - sendMessage(conProp, requestHeader, clientOut, 501, "Not Implemented", "Connection method is not supported by this handler",null); - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - - /** - * Handle http-GET requests. For soap this is usually a query for the wsdl-file. - * Therefore we always return the wsdl file for a get request - * - * @param conProp - * @param requestHeader all received http headers - * @param response {@link OutputStream} to the client - * - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream) - */ - public void doGet(Properties conProp, httpHeader requestHeader, OutputStream response) { - MessageContext msgContext = null; - String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); - - try { - // generating message context - msgContext = this.generateMessageContext(path, requestHeader, conProp); - - if (path.equals("/soap/")) { - serverObjects args = new serverObjects(); - args.put("SOAP.engine",httpdSoapHandler.engine); - ServerContext sContext = (ServerContext) msgContext.getProperty(MESSAGE_CONTEXT_SERVER_CONTEXT); - byte[] result = sContext.writeTemplate("soap/ServiceList.html", args, requestHeader); - sendMessage(conProp, requestHeader, response, 200, "OK", "text/html; charset=utf-8", result); - } else if (path.equals("/soap/favicon.ico")) { - sendMessage(conProp, requestHeader, response, 404, "File not found", "text/plain",null); - } else { - // generating wsdl file - Document doc = generateWSDL(msgContext); - - if (doc != null) { - // TODO: what about doc.getInputEncoding()? - // TODO: what about getXmlEncoding? - // Converting the the wsdl document into a byte-array - String responseDoc = XMLUtils.DocumentToString(doc); - byte[] result = responseDoc.getBytes("UTF-8"); - - // send back the result - sendMessage(conProp, requestHeader, response, 200, "OK", "text/xml; charset=utf-8", result); - - if (!(requestHeader.get(httpHeader.CONNECTION, "close").equals("keep-alive"))) { - // wait a little time until everything closes so that clients can read from the streams/sockets - try {Thread.currentThread().join(200);} catch (InterruptedException e) {/* ignore this */} - } - } else { - // if we where unable to generate the wsdl file .... - String errorMsg = "Internal Server Error: Unable to generate the WSDL file."; - sendMessage(conProp, requestHeader, response, 500, "Internal Error", "text/plain",errorMsg.getBytes("UTF-8")); - } - } - - return; - } catch (Exception e) { - // handle error - handleException(conProp,requestHeader,msgContext,response,e); - } - - } - - /** - * HTTP Post method. Needed to call a soap service on this server from a soap client - * @param conProp the connection properties - * @param requestHeader the received http headers - * @param response {@link OutputStream} to the client - * @param body the request body containing the SOAP message - * - * @see de.anomic.http.httpdHandler#doPost(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream, java.io.PushbackInputStream) - */ - public void doPost(Properties conProp, httpHeader requestHeader, OutputStream response, PushbackInputStream body) { - - MessageContext msgContext = null; - String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); - try { - /* ======================================================================== - * GENERATE REQUEST MESSAGE - * ======================================================================== */ - // read the request message - InputStream bodyStream = getBodyInputStream(requestHeader, body); - - // generating the SOAP message context that will be passed over to the invoked service - msgContext = this.generateMessageContext(path, requestHeader, conProp); - - // Generating a SOAP Request Message Object - String mime = plasmaParser.getRealMimeType(requestHeader.mime()); // this is important !!!! - Message requestMsg = new Message( - bodyStream, - false, - mime, - (String)requestHeader.get(httpHeader.CONTENT_LOCATION) - ); - msgContext.setRequestMessage(requestMsg); - - - /* ======================================================================== - * SERVICE INVOCATION - * ======================================================================== */ - Message responseMsg = this.invokeService(msgContext); - - if (responseMsg != null) { - sendMessage(conProp, requestHeader, response, 200, "OK", responseMsg); - } else { - sendMessage(conProp, requestHeader, response, 202, "Accepted", "text/plain", null); - } - - return; - } catch (Exception e) { - // handle error - handleException(conProp, requestHeader, msgContext, response,e); - } - } - - private void handleException(Properties conProp, httpHeader requestHeader, MessageContext messageContext, OutputStream response, Exception e) { - try { - Message soapErrorMsg = null; - - if (!conProp.containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { - // if no header was send until now we can send back an error message - - SoapException soapEx = null; - if (!(e instanceof SoapException)) { - soapEx = new SoapException(500,"internal server error",e); - } else { - soapEx = (SoapException) e; - } - // generating a soap error message - soapErrorMsg = soapEx.getFaultMessage(messageContext); - - // send error message back to the client - sendMessage(conProp,requestHeader,response,soapEx.getStatusCode(),soapEx.getStatusText(),soapErrorMsg); - } else { - this.theLogger.logSevere("Unexpected Exception while sending data to client",e); - - } - } catch (Exception ex) { - // the http response header was already send. Just log the error - this.theLogger.logSevere("Unexpected Exception while sending error message",e); - } finally { - // force connection close - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - } - - private Document generateWSDL(MessageContext msgContext) throws SoapException { - try { - engine.generateWSDL(msgContext); - Document doc = (Document) msgContext.getProperty("WSDL"); - return doc; - } catch (Exception ex) { - if (ex instanceof AxisFault) throw new SoapException((AxisFault)ex); - throw new SoapException(500,"Unable to generate WSDL",ex); - } - } - - protected Message invokeService(MessageContext msgContext) throws SoapException { - try { - // invoke the service - engine.invoke(msgContext); - - // Retrieve the response from Axis - return msgContext.getResponseMessage(); - } catch (Exception ex) { - if (ex instanceof AxisFault) throw new SoapException((AxisFault)ex); - throw new SoapException(500,"Unable to invoke service",ex); - } - } - - - - /** - * This function deplays all java classes that should be available via SOAP call. - * - * @param deploymentString the deployment string containing detailed information about - * the java class that should be deployed - * @param theAxisServer the apache axis engine where the service should be deployed - * - * @return true if the deployment was done successfully or false - * otherwise - */ - private static boolean deployService(String deploymentString, AxisServer theAxisServer) - { - // convert WSDD file string into bytestream for furhter processing - InputStream deploymentStream = null; - if (deploymentString != null) { - deploymentStream = new ByteArrayInputStream(deploymentString.getBytes()); - Document root = null; - - try { - // build XML document from stream - root = XMLUtils.newDocument(deploymentStream); - - // parse WSDD file - WSDDDocument wsddDoc = new WSDDDocument(root); - - // get the configuration of this axis engine - EngineConfiguration config = theAxisServer.getConfig(); - - if (config instanceof WSDDEngineConfiguration) { - // get the current configuration of the Axis engine - WSDDDeployment deploymentWSDD = - ((WSDDEngineConfiguration) config).getDeployment(); - - // undeply unneeded standard services - deploymentWSDD.undeployService(new QName("Version")); - deploymentWSDD.undeployService(new QName("AdminService")); - - // deploy the new service - // an existing service with the same name gets deleted - wsddDoc.deploy(deploymentWSDD); - } - } catch (ParserConfigurationException e) { - System.err.println("Could not deploy service."); - return false; - } catch (SAXException e) { - System.err.println("Could not deploy service."); - return false; - } catch (IOException e) { - System.err.println("Could not deploy service."); - return false; - } - } else { - System.err.println("Service deployment string is NULL! SOAP Service not deployed."); - return false; - } - return true; - } - - /** - * This function is used to generate the SOAP Message Context that is handed over to - * the called service. - * This message context contains some fields needed by the service to fullfil the request. - * - * @param path the path of the request - * @param requestHeader the http headers of the request - * @param conProps TODO - * @return the generated {@link MessageContext} - * @throws SoapException - * - * @throws Exception if the {@link MessageContext} could not be generated successfully. - */ - private MessageContext generateMessageContext(String path, httpHeader requestHeader, Properties conProps) throws SoapException { - try { - // getting the requestes service name - String serviceName = path.substring("/soap/".length()); - - // create and initialize a message context - MessageContext msgContext = new MessageContext(httpdSoapHandler.engine); - msgContext.setTransportName("YaCy-SOAP"); - msgContext.setProperty(MessageContext.TRANS_URL, "http://" + requestHeader.get(httpHeader.HOST) + ((((String)requestHeader.get(httpHeader.HOST)).indexOf(":") > -1)?"":Integer.toString(serverCore.getPortNr(this.switchboard.getConfig("port","8080")))) + - "/soap/" + serviceName); - - // the used http verson - String version = conProps.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); - msgContext.setProperty(MessageContext.HTTP_TRANSPORT_VERSION,version); - - // generate the serverContext object - ServerContext serverContext = new ServerContext(this.htRootPath.toString(),this.provider,this.templates,this.switchboard); - msgContext.setProperty(MESSAGE_CONTEXT_SERVER_CONTEXT ,serverContext); - - // YaCy specific objects - msgContext.setProperty(MESSAGE_CONTEXT_SERVER_SWITCH,this.switchboard); - msgContext.setProperty(MESSAGE_CONTEXT_HTTP_HEADER ,requestHeader); - - // setting the service to execute - msgContext.setTargetService(serviceName); - - return msgContext; - } catch (Exception e) { - if (e instanceof AxisFault) throw new SoapException((AxisFault)e); - throw new SoapException(500,"Unable to generate message context",e); - } - } - - /** - * This method was copied from {@link httpdFileHandler}. Maybe it would be a good idea - * to move this function up into {@link httpdAbstractHandler} - * - * @param path the path to the template dir - * @return a hasmap containing all templates - */ - private static HashMap loadTemplates(File path) { - // reads all templates from a path - // we use only the folder from the given file path - HashMap result = new HashMap(); - if (path == null) return result; - if (!(path.isDirectory())) path = path.getParentFile(); - if ((path == null) || (!(path.isDirectory()))) return result; - String[] templates = path.list(); - for (int i = 0; i < templates.length; i++) { - if (templates[i].endsWith(".template")) try { - //System.out.println("TEMPLATE " + templates[i].substring(0, templates[i].length() - 9) + ": " + new String(buf, 0, c)); - result.put(templates[i].substring(0, templates[i].length() - 9), - new String(serverFileUtils.read(new File(path, templates[i])), "UTF-8")); - } catch (Exception e) {} - } - return result; - } - - /** - * TODO: handle accept-charset http header - * TODO: what about content-encoding, transfer-encoding here? - */ - protected void sendMessage(Properties conProp, httpHeader requestHeader, OutputStream out, int statusCode, String statusText, String contentType, byte[] MessageBody) throws IOException { - // write out the response header - respondHeader(conProp, out, statusCode, statusText, (MessageBody==null)?null:contentType, (MessageBody==null)?-1:MessageBody.length, null, null); - - // write the message body - if (MessageBody != null) out.write(MessageBody); - out.flush(); - } - - /** - * TODO: handle accept-charset http header - */ - protected void sendMessage(Properties conProp, httpHeader requestHeader, OutputStream out, int statusCode, String statusText, Message soapMessage) throws IOException, SOAPException { - httpChunkedOutputStream chunkedOut = null; - GZIPOutputStream gzipOut = null; - OutputStream bodyOut = out; - - // getting the content type of the response - String contentType = soapMessage.getContentType(soapMessage.getMessageContext().getSOAPConstants()); - - // getting the content length - String transferEncoding = null; - String contentEncoding = null; - long contentLength = -1; - - if (httpHeader.supportChunkedEncoding(conProp)) { - // we use chunked transfer encoding - transferEncoding = "chunked"; - } else { - contentLength = soapMessage.getContentLength(); - } - if (requestHeader.acceptGzip()) { - // send the response gzip encoded - contentEncoding = "gzip"; - - // we don't know the content length of the compressed body - contentLength = -1; - - // if chunked transfer encoding is not used we need to close the connection - if (!transferEncoding.equals("chunked")) { - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - } - - // sending the soap header - respondHeader(conProp, out, statusCode, statusText, contentType, contentLength, contentEncoding, transferEncoding); - - if (transferEncoding != null) bodyOut = chunkedOut = new httpChunkedOutputStream(bodyOut); - if (contentEncoding != null) bodyOut = gzipOut = new GZIPOutputStream(bodyOut); - - // sending the body - soapMessage.writeTo(bodyOut); - bodyOut.flush(); - - if (gzipOut != null) { - gzipOut.flush(); - gzipOut.finish(); - } - if (chunkedOut != null) { - chunkedOut.finish(); - } - } - - - - protected void respondHeader( - Properties conProp, - OutputStream respond, - int httpStatusCode, - String httpStatusText, - String conttype, - long contlength, - String contentEncoding, - String transferEncoding - ) throws IOException { - httpHeader outgoingHeader = new httpHeader(); - outgoingHeader.put(httpHeader.SERVER, SOAP_HANDLER_VERSION); - if (conttype != null) outgoingHeader.put(httpHeader.CONTENT_TYPE,conttype); - if (contlength != -1) outgoingHeader.put(httpHeader.CONTENT_LENGTH, Long.toString(contlength)); - if (contentEncoding != null) outgoingHeader.put(httpHeader.CONTENT_ENCODING, contentEncoding); - if (transferEncoding != null) outgoingHeader.put(httpHeader.TRANSFER_ENCODING, transferEncoding); - - // getting the http version of the soap client - String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); - - // sending http headers - httpd.sendRespondHeader(conProp,respond,httpVer,httpStatusCode,httpStatusText,outgoingHeader); - } -} diff --git a/source/de/anomic/soap/services/AdminService.java b/source/de/anomic/soap/services/AdminService.java deleted file mode 100644 index 675ccd800..000000000 --- a/source/de/anomic/soap/services/AdminService.java +++ /dev/null @@ -1,837 +0,0 @@ -//AdminService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Properties; -import java.util.logging.Handler; -import java.util.logging.LogRecord; -import java.util.logging.Logger; -import java.util.logging.XMLFormatter; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.http.httpRemoteProxyConfig; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; -import de.anomic.server.serverObjects; -import de.anomic.server.serverThread; -import de.anomic.server.logging.GuiHandler; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNewsPool; -import de.anomic.yacy.yacyNewsRecord; -import de.anomic.yacy.yacySeed; - -public class AdminService extends AbstractService { - - /* ===================================================================== - * Used Plasmaswitchboard config properties - * ===================================================================== */ - private static final String _10_HTTPD = "10_httpd"; - private static final String RESTART = "restart"; - - // peer properties - private static final String PORT = "port"; - private static final String PEER_NAME = "peerName"; - - // remote proxy properties - private static final String REMOTE_PROXY_USE = "remoteProxyUse"; - private static final String REMOTE_PROXY_USE4SSL = "remoteProxyUse4SSL"; - private static final String REMOTE_PROXY_USE4YACY = "remoteProxyUse4Yacy"; - private static final String REMOTE_PROXY_NO_PROXY = "remoteProxyNoProxy"; - private static final String REMOTE_PROXY_PWD = "remoteProxyPwd"; - private static final String REMOTE_PROXY_USER = "remoteProxyUser"; - private static final String REMOTE_PROXY_PORT = "remoteProxyPort"; - private static final String REMOTE_PROXY_HOST = "remoteProxyHost"; - - // remote triggered crawl properties - private static final String CRAWL_RESPONSE = "crawlResponse"; - private static final String _62_REMOTETRIGGEREDCRAWL_BUSYSLEEP = plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP; - private static final String _62_REMOTETRIGGEREDCRAWL = plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL; - - // index transfer properties - private static final String INDEX_RECEIVE_BLOCK_BLACKLIST = "indexReceiveBlockBlacklist"; - private static final String ALLOW_RECEIVE_INDEX = "allowReceiveIndex"; - private static final String ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING = "allowDistributeIndexWhileCrawling"; - private static final String ALLOW_DISTRIBUTE_INDEX = "allowDistributeIndex"; - - // message forwarding properties - private static final String MSG_FORWARDING_TO = "msgForwardingTo"; - private static final String MSG_FORWARDING_CMD = "msgForwardingCmd"; - private static final String MSG_FORWARDING_ENABLED = "msgForwardingEnabled"; - private static final String MSG_FORWARDING = "msgForwarding"; - - // peer profile - private static final String PEERPROFILE_COMMENT = "comment"; - private static final String PEERPROFILE_MSN = "msn"; - private static final String PEERPROFILE_YAHOO = "yahoo"; - private static final String PEERPROFILE_JABBER = "jabber"; - private static final String PEERPROFILE_ICQ = "icq"; - private static final String PEERPROFILE_EMAIL = "email"; - private static final String PEERPROFILE_HOMEPAGE = "homepage"; - private static final String PEERPROFILE_NICKNAME = "nickname"; - private static final String PEERPROFILE_NAME = "name"; - private static final String PEER_PROFILE_FETCH_SUCCESS = "success"; - private static final String PEER_HASH = "hash"; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_CONFIG_XML = "xml/config_p.xml"; - private static final String TEMPLATE_VERSION_XML = "xml/version.xml"; - private static final String TEMPLATE_PROFILE_XML = "ViewProfile.xml"; - private static final String TEMPLATE_PERFORMANCE_QUEUES = "PerformanceQueues_p.xml"; - - /** - * This function can be used to set a configuration option - * @param key the name of the option - * @param value the value of the option as String - * @throws AxisFault if authentication failed - */ - public void setConfigProperty(String key, String value) throws AxisFault { - // Check for errors - if ((key == null)||(key.length() == 0)) throw new IllegalArgumentException("Key must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // add key to switchboard - if (value == null) value = ""; - this.switchboard.setConfig(key,value); - } - - /** - * This function can be used to set multiple configuration option - * @param keys an array containing the names of all options - * @param values an array containing the values of all options - * @throws AxisFault if authentication failed - * @throws IllegalArgumentException if key.length != value.length - */ - public void setProperties(String[] keys, String values[]) throws AxisFault{ - // Check for errors - if ((keys == null)||(keys.length == 0)) throw new IllegalArgumentException("Key array must not be null or empty."); - if ((values == null)||(values.length == 0)) throw new IllegalArgumentException("Values array must not be null or empty."); - if (keys.length != values.length) throw new IllegalArgumentException("Invalid input. " + keys.length + " keys but " + values.length + " values received."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - for (int i=0; i < keys.length; i++) { - // get the key - String nextKey = keys[i]; - if ((nextKey == null)||(nextKey.length() == 0)) throw new IllegalArgumentException("Key at position " + i + " was null or empty."); - - // get the value - String nextValue = values[i]; - if (nextValue == null) nextValue = ""; - - // save the value - this.switchboard.setConfig(nextKey,nextValue); - } - } - - /** - * This function can be used to geht the value of a single configuration option - * @param key the name of the option - * @return the value of the option as string - * @throws AxisFault if authentication failed - */ - public String getConfigProperty(String key) throws AxisFault { - // Check for errors - if ((key == null)||(key.length() == 0)) throw new IllegalArgumentException("Key must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the config property - return this.switchboard.getConfig(key,null); - } - - /** - * This function can be used to query the value of multiple configuration options - * @param keys an array containing the names of the configuration options to query - * @return an array containing the values of the configuration options as string - * @throws AxisFault if authentication failed - */ - public String[] getConfigProperties(String[] keys) throws AxisFault { - // Check for errors - if ((keys == null)||(keys.length== 0)) throw new IllegalArgumentException("Key array must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the properties - ArrayList returnValues = new ArrayList(keys.length); - for (int i=0; i < keys.length; i++) { - String nextKey = keys[i]; - if ((nextKey == null)||(nextKey.length() == 0)) throw new IllegalArgumentException("Key at position " + i + " was null or empty."); - - returnValues.add(this.switchboard.getConfig(nextKey,null)); - } - - // return the result - return (String[]) returnValues.toArray(new String[keys.length]); - } - - - /** - * Returns the current configuration of this peer as XML Document - * @return a XML document of the following format - *
-     * <?xml version="1.0"?>
-     * <settings>
-     *   <option>
-	 *     <key>option-name</key>
-	 *     <value>option-value</value>
-	 *   </option>
-	 * </settings>
-     * 
- * - * @throws AxisFault if authentication failed - * @throws Exception - */ - public Document getConfigPropertyList() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_CONFIG_XML, new serverObjects(),this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Returns detailed version information about this peer - * @return a XML document of the following format - *
-     * <?xml version="1.0"?>
-     * <version>
-     *	  <number>0.48202791</number>
-     *	  <svnRevision>2791</svnRevision>
-     *	  <buildDate>20061017</buildDate>
-	 * </version>
-     * 
- * @throws AxisFault if authentication failed - * @throws Exception - */ - public Document getVersion() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_VERSION_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public Document getPerformanceQueues() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_PERFORMANCE_QUEUES, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * This function can be used to configure the peer name - * @param newName the new name of the peer - * @throws AxisFault if authentication failed or peer name was not accepted - */ - public void setPeerName(String newName) throws AxisFault { - // Check for errors - if ((newName == null)||(newName.length() == 0)) throw new IllegalArgumentException("The peer name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the previous name - String prevName = this.switchboard.getConfig(PEER_NAME, ""); - if (prevName.equals("newName")) return; - - // take a look if there is already an other peer with this name - yacySeed oldSeed = yacyCore.seedDB.lookupByName(newName); - if (oldSeed != null) throw new AxisFault("Other peer '" + oldSeed.getName() + "/" + oldSeed.getHexHash() + "' with this name found"); - - // name must not be too short - if (newName.length() < 3) throw new AxisFault("Name is too short"); - - // name must not be too long - if (newName.length() > 80) throw new AxisFault("Name is too long."); - - // check for invalid chars - for (int i = 0; i < newName.length(); i++) { - if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_".indexOf(newName.charAt(i)) < 0) - throw new AxisFault("Invalid char at position " + i); - } - - // use the new name - this.switchboard.setConfig(PEER_NAME, newName); - } - - /** - * Changes the port the server socket is bound to. - * - * Please not that after the request was accepted the server waits - * a timeout of 5 seconds before the server port binding is changed - * - * @param newPort the new server port - * @throws AxisFault if authentication failed - */ - public void setPeerPort(int newPort) throws AxisFault { - if (newPort <= 0) throw new IllegalArgumentException("Invalid port number"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the old value - int oldPort = (int) this.switchboard.getConfigLong(PORT, 8080); - if (oldPort == newPort) return; - - // getting the server thread - serverCore theServerCore = (serverCore) this.switchboard.getThread(_10_HTTPD); - - // store the new value - this.switchboard.setConfig(PORT, newPort); - - // restart the port listener - // TODO: check if the port is free - theServerCore.reconnect(5000); - } - - /** - * This function can be enabled the usage of an already configured remote proxy - * @param enableProxy true to enable and false to disable remote proxy usage - * @throws AxisFault if authentication failed or remote proxy configuration is missing - */ - public void enableRemoteProxy(boolean enableProxy) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check for errors - String proxyHost = this.switchboard.getConfig(REMOTE_PROXY_HOST, ""); - if (proxyHost.length() == 0) throw new AxisFault("Remote proxy hostname is not configured"); - - String proxyPort = this.switchboard.getConfig(REMOTE_PROXY_PORT, ""); - if (proxyPort.length() == 0) throw new AxisFault("Remote proxy port is not configured"); - - // store the new state - plasmaSwitchboard sb = (plasmaSwitchboard) this.switchboard; - sb.setConfig(REMOTE_PROXY_USE,Boolean.toString(enableProxy)); - sb.remoteProxyConfig = httpRemoteProxyConfig.init(sb); - } - - /** - * This function can be used to configured another remote proxy that should be used by - * yacy as parent proxy. - * If a parameter value is null then the current configuration value is not - * changed. - * - * @param enableRemoteProxy to enable or disable remote proxy usage - * @param proxyHost the remote proxy host name - * @param proxyPort the remote proxy user name - * @param proxyUserName login name for the remote proxy - * @param proxyPwd password to login to the remote proxy - * @param noProxyList a list of addresses that should not be accessed via the remote proxy - * @param useProxy4YaCy specifies if the remote proxy should be used for the yacy core protocol - * @param useProxy4SSL specifies if the remote proxy should be used for ssl - * - * @throws AxisFault if authentication failed - */ - public void setRemoteProxy( - Boolean enableRemoteProxy, - String proxyHost, - Integer proxyPort, - String proxyUserName, - String proxyPwd, - String noProxyList, - Boolean useProxy4YaCy, - Boolean useProxy4SSL - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (proxyHost != null) - this.switchboard.setConfig(REMOTE_PROXY_HOST, proxyHost); - - if (proxyPort != null) - this.switchboard.setConfig(REMOTE_PROXY_PORT, proxyPort.toString()); - - if (proxyUserName != null) - this.switchboard.setConfig(REMOTE_PROXY_USER, proxyUserName); - - if (proxyPwd != null) - this.switchboard.setConfig(REMOTE_PROXY_PWD, proxyPwd); - - if (noProxyList != null) - this.switchboard.setConfig(REMOTE_PROXY_NO_PROXY, noProxyList); - - if (useProxy4YaCy != null) - this.switchboard.setConfig(REMOTE_PROXY_USE4YACY, useProxy4YaCy.toString()); - - if (useProxy4SSL != null) - this.switchboard.setConfig(REMOTE_PROXY_USE4SSL, useProxy4SSL.toString()); - - // enable remote proxy usage - if (enableRemoteProxy != null) this.enableRemoteProxy(enableRemoteProxy.booleanValue()); - } - - /** - * Shutdown this peer - * @throws AxisFault if authentication failed - */ - public void shutdownPeer() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - this.switchboard.setConfig(RESTART, "false"); - - // Terminate the peer in 3 seconds (this gives us enough time to finish the request - ((plasmaSwitchboard)this.switchboard).terminate(3000); - } - - /** - * This function can be used to configure Remote Triggered Crawling for this peer. - * - * @param enableRemoteTriggeredCrawls to enable remote triggered crawling - * @param maximumAllowedPPM to configure the maximum allowed pages per minute that should be crawled. - * Set this to 0 for unlimited crawling. - * - * @throws AxisFault - */ - public void setDistributedCrawling( - Boolean enableRemoteTriggeredCrawls, - Integer maximumAllowedPPM - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // if the ppm was set, change it - if (maximumAllowedPPM != null) { - long newBusySleep; - - // calculate the new sleep time for the remote triggered crawl thread - if (maximumAllowedPPM.intValue() < 1) { - // unlimited crawling - newBusySleep = 100; - } else { - // limited crawling - newBusySleep = 60000 / maximumAllowedPPM.intValue(); - if (newBusySleep < 100) newBusySleep = 100; - } - - // get the server thread - serverThread rct = this.switchboard.getThread(_62_REMOTETRIGGEREDCRAWL); - - // set the new sleep time - if (rct != null) rct.setBusySleep(newBusySleep); - - // store it - this.switchboard.setConfig(_62_REMOTETRIGGEREDCRAWL_BUSYSLEEP, Long.toString(newBusySleep)); - } - - // if set enable/disable remote triggered crawls - if (enableRemoteTriggeredCrawls != null) { - this.switchboard.setConfig(CRAWL_RESPONSE, enableRemoteTriggeredCrawls.toString()); - } - } - - public void setTransferProperties( - Boolean indexDistribution, - Boolean indexDistributeWhileCrawling, - Boolean indexReceive, - Boolean indexReceiveBlockBlacklist - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // index Distribution on/off - if (indexDistribution != null) { - this.switchboard.setConfig(ALLOW_DISTRIBUTE_INDEX, indexDistribution.toString()); - } - - // Index Distribution while crawling - if (indexDistributeWhileCrawling != null) { - this.switchboard.setConfig(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING, indexDistributeWhileCrawling.toString()); - } - - // Index Receive - if (indexReceive != null) { - this.switchboard.setConfig(ALLOW_RECEIVE_INDEX, indexReceive.toString()); - } - - // block URLs received by DHT by blocklist - if (indexReceiveBlockBlacklist != null) { - this.switchboard.setConfig(INDEX_RECEIVE_BLOCK_BLACKLIST, indexReceiveBlockBlacklist.toString()); - } - } - - public Document getTransferProperties() throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument("transferProperties"); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(ALLOW_DISTRIBUTE_INDEX); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_DISTRIBUTE_INDEX,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(ALLOW_RECEIVE_INDEX); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_RECEIVE_INDEX,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(INDEX_RECEIVE_BLOCK_BLACKLIST); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(INDEX_RECEIVE_BLOCK_BLACKLIST,true)))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to configure the message forwarding settings of a peer. - * @see
Peer Configuration - Message Forwarding - * - * @param enableForwarding specifies if forwarding should be enabled - * @param forwardingCommand the forwarding command to use. e.g. /usr/sbin/sendmail - * @param forwardingTo the delivery destination. e.g. root@localhost - * - * @throws AxisFault if authentication failed - */ - public void setMessageForwarding( - Boolean enableForwarding, - String forwardingCommand, - String forwardingTo - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // index Distribution on/off - if (enableForwarding != null) { - this.switchboard.setConfig(MSG_FORWARDING_ENABLED, enableForwarding.toString()); - } - - if (forwardingCommand != null) { - this.switchboard.setConfig(MSG_FORWARDING_CMD, forwardingCommand); - } - - if (forwardingTo != null) { - this.switchboard.setConfig(MSG_FORWARDING_TO, forwardingTo); - } - } - - /** - * Function to query the current message forwarding configuration of a peer. - * @see Peer Configuration - Message Forwarding - * - * @return a XML document of the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <msgForwarding>
-     *   <msgForwardingEnabled>false</msgForwardingEnabled>
-     *   <msgForwardingCmd>/usr/sbin/sendmail</msgForwardingCmd>
-     *   <msgForwardingTo>root@localhost</msgForwardingTo>
-     * </msgForwarding>
-     * 
- * - * @throws AxisFault if authentication failed - * @throws ParserConfigurationException on XML parser errors - */ - public Document getMessageForwarding() throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument(MSG_FORWARDING); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_ENABLED); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(MSG_FORWARDING_ENABLED,false)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_CMD); - xmlElement.appendChild(xmlDoc.createTextNode(this.switchboard.getConfig(MSG_FORWARDING_CMD,""))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_TO); - xmlElement.appendChild(xmlDoc.createTextNode(this.switchboard.getConfig(MSG_FORWARDING_TO,""))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to query the last peer logging records. Please note that the maximum amount of records - * depends on the peer GuiHandler logging configuration.
- * Per default a maximum of 400 entries are kept in memory. - * - * See: DATA/LOG/yacy.logging: - *
de.anomic.server.logging.GuiHandler.size = 400
- * - * @param sequenceNumber all logging records with a squence number greater than this parameter are fetched. - * - * @return a XML document of the following format - *
<?xml version="1.0" encoding="UTF-8"?>
-     * <log>
-     * <record>
-     *   <date>2006-11-03T15:35:09</date>
-     *   <millis>1162564509850</millis>
-     *   <sequence>15</sequence>
-     *   <logger>KELONDRO</logger>
-     *   <level>FINE</level>
-     *   <thread>10</thread>
-     *   <message>KELONDRO DEBUG /home/yacy/DATA/PLASMADB/ACLUSTER/indexAssortment009.db: preloaded 1 records into cache</message>
-     * </record>
-     * [...]
-     * </log>
-     * 
- * This is the default format of the java logging {@link XMLFormatter} class. - * See: Sample XML Output - * - * @throws AxisFault if authentication failed - * @throws ParserConfigurationException on XML parser errors - **/ - public Document getServerLog(Long sequenceNumber) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - Handler logHandler = null; - LogRecord[] log = null; - - // getting the root handler - Logger logger = Logger.getLogger(""); - - // take a look for the GuiHandler - Handler[] handlers = logger.getHandlers(); - for (int i=0; i", "")); - - // format the logging entries - for (int i=0; i < log.length; i++) { - buffer.append(formatter.format(log[i])); - } - - // adding tailer - buffer.append(formatter.getTail(logHandler)); - - // convert into dom - return convertContentToXML(buffer.toString()); - } - - /** - * Function to configure the profile of this peer. - * If a input parameters is null the old value will not be overwritten. - * - * @param name the name of the peer owner - * @param nickname peer owner nick name - * @param homepage - * @param email - * @param icq - * @param jabber - * @param yahoo - * @param msn - * @param comments - * - * @throws AxisFault if authentication failed - */ - public void setLocalPeerProfile( - String name, - String nickname, - String homepage, - String email, - String icq, - String jabber, - String yahoo, - String msn, - String comments - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // load peer properties - final Properties profile = new Properties(); - FileInputStream fileIn = null; - try { - fileIn = new FileInputStream(new File("DATA/SETTINGS/profile.txt")); - profile.load(fileIn); - } catch(IOException e) { - throw new AxisFault("Unable to load the peer profile"); - } finally { - if (fileIn != null) try { fileIn.close(); } catch (Exception e) {/* */} - } - - // set all properties - if (name != null) profile.setProperty(PEERPROFILE_NAME,name); - if (nickname != null) profile.setProperty(PEERPROFILE_NICKNAME,nickname); - if (homepage != null) profile.setProperty(PEERPROFILE_HOMEPAGE,homepage); - if (email != null) profile.setProperty(PEERPROFILE_EMAIL,email); - if (icq != null) profile.setProperty(PEERPROFILE_ICQ,icq); - if (jabber != null) profile.setProperty(PEERPROFILE_JABBER,jabber); - if (yahoo != null) profile.setProperty(PEERPROFILE_YAHOO,yahoo); - if (msn != null) profile.setProperty(PEERPROFILE_MSN,msn); - if (comments != null) profile.setProperty(PEERPROFILE_COMMENT,comments); - - // store it - FileOutputStream fileOut = null; - try { - fileOut = new FileOutputStream(new File("DATA/SETTINGS/profile.txt")); - profile.store(fileOut , null ); - - // generate a news message - Properties news = profile; - news.remove(PEERPROFILE_COMMENT); - yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_PROFILE_UPDATE, news)); - } catch(IOException e) { - throw new AxisFault("Unable to write profile data to file"); - } finally { - if (fileOut != null) try { fileOut.close(); } catch (Exception e) {/* */} - } - } - - /** - * Returns the peer profile of this peer - * @return a xml document in the same format as returned by function {@link #getPeerProfile(String)} - * @throws Exception - */ - public Document getLocalPeerProfile() throws Exception { - return this.getPeerProfile("localhash"); - } - - /** - * Function to query the profile of a remote peer - * @param peerhash the peer hash - * @return a xml document in the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <profile>
-     * 	<status code="3">Peer profile successfully fetched</status>
-     * 	<name><![CDATA[myName]]></name>
-     * 	<nickname><![CDATA[myNickName]]></nickname>
-     * 	<homepage><![CDATA[http://myhompage.de]]></homepage>
-     * 	<email/>
-     * 	<icq/>
-     * 	<jabber/>
-     * 	<yahoo/>
-     * 	<msn/>
-     * 	<comment><![CDATA[Comments]]></comment>
-     * </profile>
-     * 
- * @throws Exception if authentication failed - */ - public Document getPeerProfile(String peerhash) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - serverObjects args = new serverObjects(); - args.put(PEER_HASH,peerhash); - - // invoke servlet - serverObjects tp = this.serverContext.invokeServlet(TEMPLATE_PROFILE_XML,args, this.requestHeader); - - // query status - if (tp.containsKey(PEER_PROFILE_FETCH_SUCCESS)) { - String success = tp.get(PEER_PROFILE_FETCH_SUCCESS,"3"); - if (success.equals("0")) throw new AxisFault("Invalid parameters passed to servlet."); - else if (success.equals("1")) throw new AxisFault("The requested peer is unknown or can not be accessed."); - else if (success.equals("2")) throw new AxisFault("The requested peer is offline"); - } else { - throw new AxisFault("Unkown error. Unable to determine profile fetching status."); - } - - - // generate output - byte[] result = this.serverContext.buildServletOutput(TEMPLATE_PROFILE_XML, tp); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public void doGarbageCollection() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // execute garbage collection - System.gc(); - } -} diff --git a/source/de/anomic/soap/services/BlacklistService.java b/source/de/anomic/soap/services/BlacklistService.java deleted file mode 100644 index 5f570ae7e..000000000 --- a/source/de/anomic/soap/services/BlacklistService.java +++ /dev/null @@ -1,608 +0,0 @@ -//BlacklistService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -// -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; - -import javax.activation.DataHandler; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.data.listManager; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.urlPattern.abstractURLPattern; -import de.anomic.plasma.urlPattern.plasmaURLPattern; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -public class BlacklistService extends AbstractService { - - - private static final String LIST_MANAGER_LISTS_PATH = "listManager.listsPath"; - private static final String BLACKLISTS = ".BlackLists"; - //private static final String BLACKLISTS_TYPES = "BlackLists.types"; - private final static String BLACKLIST_SHARED = "BlackLists.Shared"; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_BLACKLIST_XML = "xml/blacklists_p.xml"; - - - public boolean urlIsBlacklisted(String blacklistType, String urlString) throws AxisFault, MalformedURLException { - if (blacklistType == null || blacklistType.length() == 0) throw new IllegalArgumentException("The blacklist type must not be null or empty."); - if (urlString == null || urlString.length() == 0) throw new IllegalArgumentException("The url must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check if we know all type passed to this function - checkForKnownBlacklistTypes(new String[]{blacklistType}); - - // check for url validity - URL url = new URL(urlString); - String hostlow = url.getHost().toLowerCase(); - String file = url.getFile(); - - // check if the specified url is listed - return (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, file)); - } - - public Document getBlacklistList() throws Exception { - try { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_BLACKLIST_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - } - - public void createBlacklist(String blacklistName, boolean shareBlacklist, String[] activateForBlacklistTypes) throws IOException { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - if (blacklistName.indexOf("/") != -1) - throw new IllegalArgumentException("Blacklist name must not contain '/'."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(activateForBlacklistTypes); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist already exists - if (blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' already exist."); - - // creating the new file - createBlacklistFile(blacklistName); - - // share the newly created blacklist - if (shareBlacklist) doShareBlacklist(blacklistName); - - // activate blacklist - this.activateBlacklistForTypes(blacklistName,activateForBlacklistTypes); - } - - public void deleteBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // deactivate list - deativateBlacklistForAllTypes(blacklistName); - - // unshare list - doUnshareBlacklist(blacklistName); - - // delete the file - deleteBlacklistFile(blacklistName); - } - - public void shareBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // share blacklist - this.doShareBlacklist(blacklistName); - } - - public void unshareBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // share blacklist - this.doUnshareBlacklist(blacklistName); - } - - public void activateBlacklist(String blacklistName, String[] activateForBlacklistTypes) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(activateForBlacklistTypes); - - // activate blacklist - activateBlacklistForTypes(blacklistName, activateForBlacklistTypes); - } - - public void deactivateBlacklist(String blacklistName, String[] deactivateForBlacklistTypes) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(deactivateForBlacklistTypes); - - // activate blacklist - deactivateBlacklistForTypes(blacklistName, deactivateForBlacklistTypes); - } - - public void addBlacklistItem(String blacklistName, String blacklistItem) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - if ((blacklistItem == null)||(blacklistItem.length() == 0)) - throw new IllegalArgumentException("Blacklist item must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // prepare item - blacklistItem = prepareBlacklistItem(blacklistItem); - - // TODO: check if the entry is already in there - - // append the line to the file - addBlacklistItemToFile(blacklistItem, blacklistName); - - // pass the entry to the blacklist engine - addBlacklistItemToBlacklist(blacklistItem, blacklistName); - } - - public void removeBlacklistItem(String blacklistName, String blacklistItem) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - if ((blacklistItem == null)||(blacklistItem.length() == 0)) - throw new IllegalArgumentException("Blacklist item must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // prepare item - blacklistItem = prepareBlacklistItem(blacklistItem); - - // remove blacklist from file - removeBlacklistItemFromBlacklistFile(blacklistItem,blacklistName); - - // remove it from the blacklist engine - removeBlacklistItemFromBlacklist(blacklistItem,blacklistName); - } - - public void importBlacklist(String blacklistName) throws IOException, SOAPException { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) { - // create blacklist - createBlacklistFile(blacklistName); - } - - // get attachment - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - - PrintWriter writer = null; - BufferedReader reader = null; - try { - // getting a reader - reader = new BufferedReader(new InputStreamReader(dh.getInputStream(),"UTF-8")); - - // getting blacklist file writer - writer = getBlacklistFileWriter(blacklistName); - - // read new item - String blacklistItem = null; - while ((blacklistItem = reader.readLine()) != null) { - // convert it into a proper format - blacklistItem = prepareBlacklistItem(blacklistItem); - - // TODO: check if the item already exits - - // write item to blacklist file - writer.println(blacklistItem); - writer.flush(); - - // inform blacklist engine about new item - addBlacklistItemToBlacklist(blacklistItem, blacklistName); - } - } finally { - if (reader != null) try { reader.close(); } catch (Exception e) {/* */} - if (writer != null) try { writer.close(); } catch (Exception e) {/* */} - } - } - - public String[] getBlacklistTypes() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // return supported types - return getSupportedBlacklistTypeArray(); - } - - private void addBlacklistItemToBlacklist(String blacklistItem, String blacklistName) { - // split the item into host part and path - String[] itemParts = getBlacklistItemParts(blacklistItem); - - // getting the supported blacklist types - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - - // loop through the various types - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - - // if the current blacklist is activated for the type, add the item to the list - if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + BLACKLISTS,blacklistName)) { - plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],itemParts[0], itemParts[1]); - } - } - } - - private void addBlacklistItemToFile(String blacklistItem, String blacklistName) throws AxisFault { - PrintWriter pw = null; - try { - pw = getBlacklistFileWriter(blacklistName); - pw.println(blacklistItem); - pw.flush(); - pw.close(); - } catch (IOException e) { - throw new AxisFault("Unable to append blacklist entry.",e); - } finally { - if (pw != null) try { pw.close(); } catch (Exception e){ /* */} - } - } - - private PrintWriter getBlacklistFileWriter(String blacklistName) throws AxisFault { - try { - return new PrintWriter(new FileWriter(getBlacklistFile(blacklistName), true)); - } catch (IOException e) { - throw new AxisFault("Unable to open blacklist file.",e); - } - } - - private void removeBlacklistItemFromBlacklistFile(String blacklistItem, String blacklistName) { - // load blacklist data from file - ArrayList list = listManager.getListArray(getBlacklistFile(blacklistName)); - - // delete the old entry from file - if (list != null) { - for (int i=0; i < list.size(); i++) { - if (((String)list.get(i)).equals(blacklistItem)) { - list.remove(i); - break; - } - } - listManager.writeList(getBlacklistFile(blacklistName), (String[])list.toArray(new String[list.size()])); - } - } - - private void removeBlacklistItemFromBlacklist(String blacklistItem, String blacklistName) { - String[] itemParts = getBlacklistItemParts(blacklistItem); - - // getting the supported blacklist types - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - - // loop through the various types - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - - // if the current blacklist is activated for the type, remove the item from the list - if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + BLACKLISTS,blacklistName)) { - plasmaSwitchboard.urlBlacklist.remove(supportedBlacklistTypes[blTypes],itemParts[0], itemParts[1]); - } - } - } - - private String prepareBlacklistItem(String blacklistItem) { - if (blacklistItem == null) throw new NullPointerException("Item is null"); - - // cut of heading http:// - if (blacklistItem.startsWith("http://") ){ - blacklistItem = blacklistItem.substring("http://".length()); - } - - // adding missing parts - int pos = blacklistItem.indexOf("/"); - if (pos < 0) { - // add default empty path pattern - blacklistItem = blacklistItem + "/.*"; - } - return blacklistItem; - } - - private String[] getBlacklistItemParts(String blacklistItem) { - if (blacklistItem == null) throw new NullPointerException("Item is null"); - - int pos = blacklistItem.indexOf("/"); - if (pos == -1) throw new IllegalArgumentException("Item format is not correct."); - - return new String[] { - blacklistItem.substring(0, pos), - blacklistItem.substring(pos + 1) - }; - } - - /* not used - private String[] getSharedBlacklistArray() { - String sharedBlacklists = this.switchboard.getConfig(BLACKLIST_SHARED, ""); - String[] supportedBlacklistTypeArray = sharedBlacklists.split(","); - return supportedBlacklistTypeArray; - } - */ - - private File getBlacklistFile(String blacklistName) { - File blacklistFile = new File(listManager.listsPath, blacklistName); - return blacklistFile; - } - - private boolean blacklistExists(String blacklistName) { - File blacklistFile = getBlacklistFile(blacklistName); - return blacklistFile.exists(); - } - - /* not used - private HashSet getSharedBlacklistSet() { - HashSet supportedTypesSet = new HashSet(Arrays.asList(getSharedBlacklistArray())); - return supportedTypesSet; - } - */ - - private String[] getSupportedBlacklistTypeArray() { - String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING; - String[] supportedBlacklistTypeArray = supportedBlacklistTypesStr.split(","); - return supportedBlacklistTypeArray; - } - - private void createBlacklistFile(String blacklistName) throws IOException { - File newFile = getBlacklistFile(blacklistName); - newFile.createNewFile(); - } - - private void deleteBlacklistFile(String blacklistName) { - File BlackListFile = new File(listManager.listsPath, blacklistName); - BlackListFile.delete(); - } - - private void doShareBlacklist(String blacklistName) { - listManager.addListToListslist(BLACKLIST_SHARED, blacklistName); - } - - private void doUnshareBlacklist(String blacklistName) { - listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistName); - } - - private void initBlacklistManager() { - // init Manager properties - if (listManager.switchboard == null) - listManager.switchboard = (plasmaSwitchboard) this.switchboard; - - if (listManager.listsPath == null) - listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig(LIST_MANAGER_LISTS_PATH, "DATA/LISTS")); - } - - /* not used - private void ativateBlacklistForAllTypes(String blacklistName) { - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - this.activateBlacklistForTypes(blacklistName,supportedBlacklistTypes); - } - */ - - private void activateBlacklistForTypes(String blacklistName, String[] activateForBlacklistTypes) { - if (activateForBlacklistTypes == null) return; - - for (int blTypes=0; blTypes < activateForBlacklistTypes.length; blTypes++) { - listManager.addListToListslist(activateForBlacklistTypes[blTypes] + BLACKLISTS, blacklistName); - } - } - - private void deativateBlacklistForAllTypes(String blacklistName) { - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - this.deactivateBlacklistForTypes(blacklistName,supportedBlacklistTypes); - } - - private void deactivateBlacklistForTypes(String blacklistName, String[] deactivateForBlacklistTypes) { - if (deactivateForBlacklistTypes == null) return; - - for (int blTypes=0; blTypes < deactivateForBlacklistTypes.length; blTypes++) { - listManager.removeListFromListslist(deactivateForBlacklistTypes[blTypes] + BLACKLISTS, blacklistName); - } - } - - private HashSet getSupportedBlacklistTypeSet() { - HashSet supportedTypesSet = new HashSet(Arrays.asList(getSupportedBlacklistTypeArray())); - return supportedTypesSet; - } - - private void checkForKnownBlacklistTypes(String[] types) throws AxisFault { - if (types == null) return; - - // get kown blacklist types - HashSet supportedTypesSet = getSupportedBlacklistTypeSet(); - - // check if we know all types stored in the array - for (int i=0; i < types.length; i++) { - if (!supportedTypesSet.contains(types[i])) - throw new AxisFault("Unknown blaclist type '" + types[i] + "' at position " + i); - } - } - -} diff --git a/source/de/anomic/soap/services/BookmarkService.java b/source/de/anomic/soap/services/BookmarkService.java deleted file mode 100644 index f837445a6..000000000 --- a/source/de/anomic/soap/services/BookmarkService.java +++ /dev/null @@ -1,511 +0,0 @@ -//BookmarkService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - - -package de.anomic.soap.services; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.HashSet; - -import javax.activation.DataHandler; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.data.bookmarksDB; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNewsPool; -import de.anomic.yacy.yacyNewsRecord; - -public class BookmarkService extends AbstractService { - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_BOOKMARK_LIST_GET_XML = "xml/bookmarks/posts/get.xml"; - private static final String TEMPLATE_BOOKMARK_LIST_ALL_XML = "xml/bookmarks/posts/all.xml"; - private static final String TEMPLATE_BOOKMARK_TAGS_XML = "xml/bookmarks/tags/get.xml"; - - /** - * @return a handler to the YaCy Bookmark DB - */ - private bookmarksDB getBookmarkDB() { - assert (this.switchboard != null) : "Switchboard object is null"; - assert (this.switchboard instanceof plasmaSwitchboard) : "Incorrect switchboard object"; - assert (((plasmaSwitchboard)this.switchboard).bookmarksDB != null) : "Bookmark DB is null"; - - return ((plasmaSwitchboard)this.switchboard).bookmarksDB; - } - - /** - * @return returns the input stream of a soap attachment - * @throws AxisFault if no attachment was found or attachments are not supported - * @throws SOAPException if attachment decoding didn't work - * @throws IOException on attachment read errors - */ - private InputStream getAttachmentInputstream() throws AxisFault, SOAPException, IOException { - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - - // return the input stream - return dh.getInputStream(); - } - - /** - * Converts an array of tags into a HashSet - * @param tagArray the array of tags - * @return the HashSet - */ - private HashSet tagArrayToHashSet(String[] tagArray) { - HashSet tagSet = new HashSet(); - if (tagArray == null) return tagSet; - - for (int i=0; i < tagArray.length; i++) { - String nextTag = tagArray[i].trim(); - if (nextTag.length() > 0) tagSet.add(nextTag); - } - - return tagSet; - } - - /** - * Converts the tag array into a space separated list - * @param tagArray the tag array - * @return space separated list of tags - */ - private String tagArrayToSepString(String[] tagArray, String sep) { - StringBuffer buffer = new StringBuffer(); - - for (int i=0; i < tagArray.length; i++) { - String nextTag = tagArray[i].trim(); - if (nextTag.length() > 0) { - if (i > 0) buffer.append(sep); - buffer.append(nextTag); - } - } - - return buffer.toString(); - } - - /** - * To publish a YaCy news message that a new bookmark was added. - * This is only done for public bookmarks - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - */ - private void publisNewBookmarkNews(String url, String title, String description, String[] tags) { - if (title == null) title = ""; - if (description == null) description = ""; - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - - // convert tag array into hashset - String tagString = tagArrayToSepString(tags," "); - - // create a news message - HashMap map = new HashMap(); - map.put("url", url.replace(',', '|')); - map.put("title", title.replace(',', ' ')); - map.put("description", description.replace(',', ' ')); - map.put("tags", tagString); - yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_BOOKMARK_ADD, map)); - } - - /** - * Sets the properties of a {@link bookmarksDB.Bookmark} object - * @param isEdit specifies if we are in edit mode or would like to create a new bookmark - * @param bookmark the {@link bookmarksDB.Bookmark} object - * - * @param isPublic specifies if the bookmark is public - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - */ - private void setBookmarkProperties(boolean isEdit, bookmarksDB.Bookmark bookmark, String url, String title, String description, Boolean isPublic, String[] tags) { - - if (!isEdit) { - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - if (title == null) title = ""; - if (description == null) description = ""; - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - if (isPublic == null) isPublic = Boolean.FALSE; - } - - // convert tag array into hashset - HashSet tagSet = null; - if (tags != null) tagSet = tagArrayToHashSet(tags); - - // set properties - if (url != null) bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_URL, url); - if (title != null) bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_TITLE, title); - if (description != null)bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description); - if (isPublic != null) bookmark.setPublic(isPublic.booleanValue()); - if (tags != null) bookmark.setTags(tagSet, true); - } - - /** - * Function to add a new bookmark to the yacy bookmark DB. - * - * @param isPublic specifies if the bookmark is public - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - * - * @return the url hash of the newly created bookmark - * - * @throws AxisFault if authentication failed - */ - public String addBookmark( - String url, - String title, - String description, - String[] tags, - Boolean isPublic - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - - // create new bookmark object - bookmarksDB.Bookmark bookmark = getBookmarkDB().createBookmark(url, "admin"); //FIXME: "admin" can be user.getUserName() for users with bookmarkrights - - // set bookmark properties - if(bookmark != null){ - this.setBookmarkProperties(false,bookmark,url,title,description,isPublic,tags); - if (isPublic != null && isPublic.booleanValue()) { - // create a news message - publisNewBookmarkNews(url,title,description,tags); - } - getBookmarkDB().saveBookmark(bookmark); - } else { - throw new AxisFault("Unable to create bookmark. Unknown reason."); - } - - return bookmark.getUrlHash(); - } - - /** - * Function to delete a bookmark from the yacy bookmark db - * - * @param urlHash the url hash to identify the bookmark - * - * @throws AxisFault if authentication failed - */ - public void deleteBookmarkByHash(String urlHash) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash must not be null or empty"); - - // delete bookmark - getBookmarkDB().removeBookmark(urlHash); - } - - public void deleteBookmarksByHash(String[] urlHashs) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHashs == null || urlHashs.length==0) throw new IllegalArgumentException("The url hash array must not be null or empty"); - - for (int i=0; i < urlHashs.length; i++) { - String urlHash = urlHashs[i]; - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash at position " + i + " is null or empty."); - - // delete bookmark - getBookmarkDB().removeBookmark(urlHash); - } - } - - public void deleteBookmark(String url) throws AxisFault { - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - - // generating the url hash - String hash = plasmaURL.urlHash(url); - - // delete url - this.deleteBookmarkByHash(hash); - } - - public void deleteBookmarks(String[] urls) throws AxisFault { - if (urls == null || urls.length==0) throw new IllegalArgumentException("The url array must not be null or empty"); - - String[] hashs = new String[urls.length]; - for (int i=0; i < urls.length; i++) { - String url = urls[i]; - if (url == null || url.length()==0) throw new IllegalArgumentException("The url at position " + i + " is null or empty"); - - // generating the url hash - hashs[i] = plasmaURL.urlHash(url); - } - - // delete url - this.deleteBookmarksByHash(hashs); - } - - - public String bookmarkIsKnown(String url) throws AxisFault { - String urlHash = plasmaURL.urlHash(url); - return this.bookmarkIsKnownByHash(urlHash); - } - - public String bookmarkIsKnownByHash(String urlHash) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url-hash must not be null or empty"); - - // get the bookmark object - bookmarksDB.Bookmark bookmark = getBookmarkDB().getBookmark(urlHash); - - // set bookmark properties - if(bookmark == null) return null; - return bookmark.getTagsString(); - } - - /** - * Function to change the properties of a bookmark stored in the YaCy Bookmark DB - * - * @param urlHash the url hash to identify the bookmark - * @param isPublic specifies if the bookmark is public - * @param url the changed url of the bookmark - * @param title the changed bookmark title - * @param description the changed bookmark description - * @param tags the changed array of tags - * - * @return the url hash of the changed bookmark (this will be different to the urlHash input parameter if the bookmark url was changed - * - * @throws AxisFault if authentication failed - */ - public String editBookmark( - String urlHash, - String url, - String title, - String description, - String[] tags, - Boolean isPublic - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash must not be null or empty"); - - // getting the bookmark - bookmarksDB.Bookmark bookmark = getBookmarkDB().getBookmark(urlHash); - if (bookmark == null) throw new AxisFault("Bookmark with hash " + urlHash + " could not be found"); - - // edit values - setBookmarkProperties(true,bookmark,url,title,description,isPublic,tags); - - // return the url has (may have been changed) - return bookmark.getUrlHash(); - } - - /** - * To rename a bookmark tag - * @param oldTagName the old tag name - * @param newTagName the new name - * @throws AxisFault if authentication failed - */ - public void renameTag(String oldTagName, String newTagName) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (oldTagName == null || oldTagName.length()==0) throw new IllegalArgumentException("The old tag name not be null or empty"); - if (newTagName == null || newTagName.length()==0) throw new IllegalArgumentException("The nwe tag name not be null or empty"); - - boolean success = getBookmarkDB().renameTag(oldTagName,newTagName); - if (!success) throw new AxisFault("Unable to rename tag. Unknown reason."); - } - - /** - * Returns the list of bookmarks stored in the bookmark db - * @param tag the tag name for which the corresponding bookmarks should be fetched - * @param date the bookmark date - * - * @return a XML document of the following format: - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <posts>
-	 *   <post description="YaCy Bookmarks Test" extended="YaCy Bookmarks junit test" hash="c294613d42343009949c0369bc56f722" href="http://www.yacy.de/testurl2" tag="bookmarks testing yacy" time="2006-11-04T14:33:01Z"/>
-	 * </posts>
-	 * 
-	 * 
-	 * @throws AxisFault if authentication failed
-	 * @throws Exception if xml generation failed
-	 */
-	public Document getBookmarkList(String tag, String date) throws Exception {
-		
-        // extracting the message context
-        extractMessageContext(AUTHENTICATION_NEEDED);          	
-        
-        // generating the template containing the network status information
-        serverObjects args = new serverObjects();
-        args.put("extendedXML", "");
-        if (tag != null) args.put("tag",tag);
-        if (date != null) args.put("date",date);
-        
-        byte[] result = this.serverContext.writeTemplate((date != null)?TEMPLATE_BOOKMARK_LIST_GET_XML:TEMPLATE_BOOKMARK_LIST_ALL_XML, args, this.requestHeader);
-        
-        // sending back the result to the client
-        return this.convertContentToXML(result);    		
-	}
-	
-	/**
-	 * Returns the list of bookmark tags for which bookmarks exists in the YaCy bookmark db
-	 * 
-	 * @return a XML document of the following format:
-	 * 
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <tags>
-	 *   <tag count="1" tag="bookmarks"/>
-	 *   <tag count="1" tag="testing"/>
-	 *   <tag count="1" tag="yacy"/>
-	 * </tags>
-	 * 
-	 * 
-	 * @throws AxisFault if authentication failed
-	 * @throws Exception if xml generation failed
-	 */	
-	public Document getBookmarkTagList() throws Exception {
-		
-        // extracting the message context
-        extractMessageContext(AUTHENTICATION_NEEDED);          	
-        
-        // generate the xml document
-        byte[] result = this.serverContext.writeTemplate(TEMPLATE_BOOKMARK_TAGS_XML, new serverObjects(), this.requestHeader);
-        
-        // sending back the result to the client
-        return this.convertContentToXML(result);    
-	}
-	
-	/**
-	 * Function to import YaCy from XML (transfered via SOAP Attachment).
- * This function expects a xml document in the same format as returned by - * function {@link #getBookmarkList(String, String)}. - * @param isPublic specifies if the imported bookmarks are public or local - * @return the amount of imported bookmarks - * - * @throws SOAPException if there is no data in the attachment - * @throws IOException if authentication failed or the attachment could not be read - */ - public int importBookmarkXML(boolean isPublic) throws SOAPException, IOException { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the attachment input stream - InputStream xmlIn = getAttachmentInputstream(); - - // import bookmarks - int importCount = getBookmarkDB().importFromXML(xmlIn, isPublic); - - // return amount of imported bookmarks - return importCount; - } - - /** - * Function to import YaCy from a html document (transfered via SOAP Attachment).
- * This function expects a well formed html document. - * - * @param baseURL the base url. This is needed to generate absolut URLs from relative URLs - * @param tags a list of bookmarks tags that should be assigned to the new bookmarks - * @param isPublic specifies if the imported bookmarks are public or local - * @return the amount of imported bookmarks - * - * @throws SOAPException if there is no data in the attachment - * @throws IOException if authentication failed or the attachment could not be read - */ - public int importHtmlBookmarkFile(String baseURL, String[] tags, boolean isPublic) throws SOAPException, IOException { - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the attachment input stream - InputStream htmlIn = getAttachmentInputstream(); - InputStreamReader htmlReader = new InputStreamReader(htmlIn,"UTF-8"); - - // import bookmarks - URL theBaseURL = new URL(baseURL); - String tagList = tagArrayToSepString(tags,","); - int importCount = getBookmarkDB().importFromBookmarks(theBaseURL,htmlReader, tagList,isPublic); - - // return amount of imported bookmarks - return importCount; - } -} diff --git a/source/de/anomic/soap/services/CrawlService.java b/source/de/anomic/soap/services/CrawlService.java deleted file mode 100644 index e856f92b9..000000000 --- a/source/de/anomic/soap/services/CrawlService.java +++ /dev/null @@ -1,229 +0,0 @@ -//CrawlService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -public class CrawlService extends AbstractService { - - private static final String GLOBALCRAWLTRIGGER = "globalcrawltrigger"; - private static final String REMOTETRIGGEREDCRAWL = "remotetriggeredcrawl"; - private static final String LOCAL_CRAWL = "localCrawl"; - private static final String CRAWL_STATE = "crawlState"; - - - /** - * Constant: template for crawling - */ - private static final String TEMPLATE_CRAWLING = "QuickCrawlLink_p.xml"; - - /** - * Function to crawl a single link with depth 0 - */ - public Document crawlSingleUrl(String crawlingURL) throws AxisFault { - return this.crawling(crawlingURL, "CRAWLING-ROOT", new Integer(0), ".*", Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, null, Boolean.TRUE); - } - - public Document crawling( - String crawlingURL, - String crawljobTitel, - Integer crawlingDepth, - String crawlingFilter, - Boolean indexText, - Boolean indexMedia, - Boolean crawlingQ, - Boolean storeHTCache, - Boolean crawlOrder, - String crawlOrderIntention, - Boolean xsstopw - ) throws AxisFault { - try { - // extracting the message context - extractMessageContext(true); - - // setting the crawling properties - serverObjects args = new serverObjects(); - args.put("url",crawlingURL); - if (crawljobTitel != null && crawljobTitel.length() > 0) - args.put("title",crawljobTitel); - if (crawlingFilter != null && crawlingFilter.length() > 0) - args.put("crawlingFilter",crawlingFilter); - if (crawlingDepth != null && crawlingDepth.intValue() > 0) - args.put("crawlingDepth",crawlingDepth.toString()); - if (indexText != null) - args.put("indexText",indexText.booleanValue()?"on":"off"); - if (indexMedia != null) - args.put("indexMedia",indexMedia.booleanValue()?"on":"off"); - if (crawlingQ != null) - args.put("crawlingQ",crawlingQ.booleanValue()?"on":"off"); - if (storeHTCache != null) - args.put("storeHTCache",storeHTCache.booleanValue()?"on":"off"); - if (crawlOrder != null) - args.put("crawlOrder",crawlOrder.booleanValue()?"on":"off"); - if (crawlOrderIntention != null) - args.put("intention",crawlOrderIntention); - if (xsstopw != null) - args.put("xsstopw",xsstopw.booleanValue()?"on":"off"); - - // triggering the crawling - byte[] result = this.serverContext.writeTemplate(TEMPLATE_CRAWLING, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * Function to pause crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @throws AxisFault - */ - public void pauseCrawling() throws AxisFault { - this.pauseResumeCrawling(Boolean.TRUE, Boolean.TRUE, Boolean.TRUE); - } - - /** - * Function to resume crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @throws AxisFault - */ - public void resumeCrawling() throws AxisFault { - this.pauseResumeCrawling(Boolean.FALSE, Boolean.FALSE, Boolean.FALSE); - } - - /** - * Function to pause or resume crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @param localCrawl if null current status is not changed. pause local crawls if true or - * resumes local crawls if false - * @param remoteTriggeredCrawl if null current status is not changed. pause remote crawls if true or - * resumes remote crawls if false - * @param globalCrawlTrigger if null current status is not changed. stops sending of global crawl triggers to other peers if true or - * resumes sending of global crawl triggers if false - * @throws AxisFault - */ - public void pauseResumeCrawling(Boolean localCrawl, Boolean remoteTriggeredCrawl, Boolean globalCrawlTrigger) throws AxisFault { - // extracting the message context - extractMessageContext(true); - - if (localCrawl != null) { - if (localCrawl.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } - } - - if (remoteTriggeredCrawl != null) { - if (remoteTriggeredCrawl.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } - } - - if (globalCrawlTrigger != null) { - if (globalCrawlTrigger.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); - } - } - } - - /** - * Function to query the current state of the following crawling queues: - *
    - *
  • local crawl jobs
  • - *
  • remote crawl jobs
  • - *
  • of remote crawl job triggers
  • - *
- * @return returns a XML document in the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <crawlState>
-     * 	<localCrawl>true</localCrawl>
-     * 	<remotetriggeredcrawl>false</remotetriggeredcrawl>
-     * 	<globalcrawltrigger>false</globalcrawltrigger>
-     * </crawlState>
-     * 
- * @throws AxisFault if authentication failed - * @throws ParserConfigurationException if xml generation failed - */ - public Document getCrawlPauseResumeState() throws AxisFault, ParserConfigurationException { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - plasmaSwitchboard sb = (plasmaSwitchboard)this.switchboard; - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument(CRAWL_STATE); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(LOCAL_CRAWL); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(REMOTETRIGGEREDCRAWL); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(GLOBALCRAWLTRIGGER); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER)))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - -} diff --git a/source/de/anomic/soap/services/MessageService.java b/source/de/anomic/soap/services/MessageService.java deleted file mode 100644 index 0fdcadc5d..000000000 --- a/source/de/anomic/soap/services/MessageService.java +++ /dev/null @@ -1,321 +0,0 @@ -//MessageService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.data.messageBoard; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyClient; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; - -public class MessageService extends AbstractService { - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_MESSAGE_HEADER_LIST_XML = "Messages_p.xml"; - - /* ===================================================================== - * Other used constants - * ===================================================================== */ - private static final String MESSAGES_CATEGORY_REMOTE = "remote"; - - /** - * @return a handler to the YaCy Messages DB - */ - private messageBoard getMessageDB() { - assert (this.switchboard != null) : "Switchboard object is null"; - assert (this.switchboard instanceof plasmaSwitchboard) : "Incorrect switchboard object"; - assert (((plasmaSwitchboard)this.switchboard).messageDB != null) : "Messsage DB is null"; - - return ((plasmaSwitchboard)this.switchboard).messageDB; - } - - /** - * Function to read the identifiers of all messages stored in the message db - * @return an array of message identifiers currently stored in the message DB - * @throws IOException if authentication failed or a DB read error occured - */ - public String[] getMessageIDs() throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the messageDB - messageBoard db = getMessageDB(); - - // loop through the messages and receive the message ids - ArrayList idList = new ArrayList(db.size()); - Iterator i = getMessageDB().keys(MESSAGES_CATEGORY_REMOTE, true); - while (i.hasNext()) { - String messageKey = (String) i.next(); - if (messageKey != null) idList.add(messageKey); - } - - //return array - return (String[]) idList.toArray(new String[idList.size()]); - } - - /** - * Returns a list with the sender, subject and date of all messages stored in the message db - * - * @return a xml document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <messages>
-	 * 	<message id="remote______2005060901120600">
-	 * 		<date>2005/06/09 01:12:06</date>
-	 * 		<from hash="peerhash">SourcePeerName</from>
-	 * 		<to>DestPeerName</to>
-	 * 		<subject><![CDATA[Message subject]]></subject>
-	 * 	</message>
-	 * </messages>
-	 * 
- * - * @throws Exception if authentication failed - */ - public Document getMessageHeaderList() throws Exception { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generate the xml document - serverObjects args = new serverObjects(); - args.put("action","list"); - - byte[] result = this.serverContext.writeTemplate(TEMPLATE_MESSAGE_HEADER_LIST_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Function to geht detailes about a message stored in the message db - * @param messageID the identifier of the message to query - * @return a xml document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <message id="remote______2005060901120600">
-	 * 	<date>2005/06/09 01:12:06</date>
-	 * 	<from hash="peerhash">sourcePeerName</from>
-	 * 	<to>destPeerName</to>
-	 * 	<subject><![CDATA[Test-Subject]]></subject>
-	 * 	<message><![CDATA[Message-Body]]>
-	 * </message>
-	 * 
- * - * @throws Exception if authentication failed - */ - public Document getMessage(String messageID) throws Exception { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (messageID == null || messageID.length() == 0) throw new IllegalArgumentException("The message id must not be null or empty."); - - // generate the xml document - serverObjects args = new serverObjects(); - args.put("action","view"); - args.put("object",messageID); - - byte[] result = this.serverContext.writeTemplate(TEMPLATE_MESSAGE_HEADER_LIST_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Function to delete a message - * @param messageID the message identifier of the message that should be deleted - * @throws AxisFault if authentication failed or the message ID is unknown - */ - public void deleteMessage(String messageID) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (messageID == null || messageID.length() == 0) throw new IllegalArgumentException("The message id must not be null or empty."); - - // getting the messageDB - messageBoard db = getMessageDB(); - - // check if the message exists - if (db.read(messageID) == null) throw new AxisFault("Message with ID " + messageID + " does not exist."); - - // delete the message - db.remove(messageID); - } - - /** - * Function to delete multiple messages - * @param messageIDs an array of message ids - * @throws AxisFault if authentication failed or one of the message IDs is unknown - */ - public void deleteMessages(String[] messageIDs) throws AxisFault { - if (messageIDs == null || messageIDs.length == 0) throw new IllegalArgumentException("The message id array must not be null or empty."); - - // loop through the ids - for (int i=0; i < messageIDs.length; i++) { - String nextID = messageIDs[i]; - if (nextID == null || nextID.length() == 0) throw new IllegalArgumentException("The message id at position " + i + " is null or empty."); - - this.deleteMessage(nextID); - } - } - - /** - * A function to check if the destination peer will accept a message of this peer. - * @param destinationPeerHash the peer hash of the destination peer - * @return a XML document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <messageSendPermission>
-	 * 	<permission>true</permission>
-	 * 	<response>Welcome to my peer!</response>
-	 * 	<messageSize>10240</messageSize>
-	 * 	<attachmentsize>0</attachmentsize>
-	 * </messageSendPermission>
-	 * 
- * The tag permission specifies if we are allowed to send a messag to this peer. Response is a textual - * description why we are allowed or not allowed to send a message. messageSize specifies the maximum - * allowed message size. attachmentsize specifies the maximum attachment size accepted. - * - * @throws AxisFault if authentication failed or the destination peer is not reachable - * @throws ParserConfigurationException if xml generation failed - */ - public Document getMessageSendPermission(String destinationPeerHash) throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (destinationPeerHash == null || destinationPeerHash.length() == 0) throw new IllegalArgumentException("The destination peer hash must not be null or empty."); - - // get the peer from the db - yacySeed targetPeer = yacyCore.seedDB.getConnected(destinationPeerHash); - if (targetPeer == null) throw new AxisFault("The destination peer is not connected"); - - // check for permission to send message - HashMap result = yacyClient.permissionMessage(destinationPeerHash); - if (result == null) throw new AxisFault("No response received from peer"); - - boolean accepted = false; - String reason = "Unknown reason"; - if (result.containsKey("response")) { - String response = (String) result.get("response"); - if (response.equals("-1")) { - accepted = false; - reason = "request rejected"; - } else { - accepted = true; - reason = response; - } - } - - // return XML Document - Element xmlElement = null, xmlRoot; - Document xmlDoc = createNewXMLDocument("messageSendPermission"); - xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement("permission"); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(accepted))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("response"); - xmlElement.appendChild(xmlDoc.createTextNode(reason)); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("messageSize"); - xmlElement.appendChild(xmlDoc.createTextNode((String)result.get("messagesize"))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("attachmentsize"); - xmlElement.appendChild(xmlDoc.createTextNode((String)result.get("attachmentsize"))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to send a message to a remote peer - * @param destinationPeerHash the peer hash of the remot peer - * @param subject the message subject - * @param message the message body - * - * @return the a response status message of the remote peer. - * - * @throws AxisFault if authentication failed - */ - public String sendMessage(String destinationPeerHash, String subject, String message) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (destinationPeerHash == null || destinationPeerHash.length() == 0) throw new IllegalArgumentException("The destination peer hash must not be null or empty."); - if (subject == null || subject.length() == 0) throw new IllegalArgumentException("The subject must not be null or empty."); - if (message == null || message.length() == 0) throw new IllegalArgumentException("The message body must not be null or empty."); - - // convert the string into a byte array - byte[] mb; - try { - mb = message.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - mb = message.getBytes(); - } - - // send the message to the remote peer - HashMap result = yacyClient.postMessage(destinationPeerHash, subject, mb); - - // get the peer resonse - if (result == null) throw new AxisFault("No response received from peer"); - return (String) (result.containsKey("response") ? result.get("response") : "Unknown response"); - } -} diff --git a/source/de/anomic/soap/services/SearchService.java b/source/de/anomic/soap/services/SearchService.java deleted file mode 100644 index 8e6449c25..000000000 --- a/source/de/anomic/soap/services/SearchService.java +++ /dev/null @@ -1,346 +0,0 @@ -//httpdSoapService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; - -import de.anomic.data.htmlTools; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSearchPreOrder; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -/** - * SOAP Service Class that will be invoked by the httpdSoapHandler - * - * @author Martin Thelian - */ -public class SearchService extends AbstractService -{ - /* ================================================================ - * Constants needed to set the template that should be used to - * fullfil the request - * ================================================================ */ - private static final String TEMPLATE_SEARCH = "yacysearch.soap"; - private static final String TEMPLATE_URLINFO = "ViewFile.soap"; - private static final String TEMPLATE_SNIPPET = "xml/snippet.xml"; - private static final String TEMPLATE_OPENSEARCH = "opensearchdescription.xml"; - private static final String TEMPLATE_SEARCHSTATS = "SearchStatistics_p.html"; - - /* ===================================================================== - * Constants needed to get search statistic info - * ===================================================================== */ - private static final int SEARCHSTATS_LOCAL_SEARCH_LOG = 1; - private static final int SEARCHSTATS_LOCAL_SEARCH_TRACKER = 2; - private static final int SEARCHSTATS_REMOTE_SEARCH_LOG = 3; - private static final int SEARCHSTATS_REMOTE_SEARCH_TRACKER = 4; - - /** - * Constructor of this class - */ - public SearchService() { - super(); - // nothing special todo here at the moment - } - - /** - * Service for doing a simple search with the standard settings - * - * @param searchString the search string that should be used - * @param maxSearchCount the maximum amount of search result that should be returned - * @param searchOrder can be a combination of YBR, Date and Quality, e.g. YBR-Date-Quality or Date-Quality-YBR - * @param searchMode can be global or local - * @param searchMode the total amount of seconds to use for the search - * @param urlMask if the urlMaskfilter parameter should be used - * @param urlMaskfilter e.g. .* - * @param prefermaskfilter - * @param category can be image or href - * - * @return an xml document containing the search results. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document search( - String searchString, - int maxSearchCount, - String searchOrder, - String searchMode, - int maxSearchTime, - boolean urlMask, - String urlMaskfilter, - String prefermaskfilter, - String category - ) - throws AxisFault { - try { - // extracting the message context - extractMessageContext(false); - - if ((searchMode == null) || !(searchMode.equalsIgnoreCase("global") || searchMode.equalsIgnoreCase("locale"))) { - searchMode = "global"; - } - - if (maxSearchCount < 0) { - maxSearchCount = 10; - } - - if (searchOrder == null || searchOrder.length() == 0) { - searchOrder = plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR"; - } - - if (maxSearchTime < 0) { - maxSearchTime = 10; - } - - if (urlMaskfilter == null) { - urlMaskfilter = ".*"; - } - - if (prefermaskfilter == null) { - prefermaskfilter = ""; - } - - if (category == null || category.length() == 0) { - category = "href"; - } - - // setting the searching properties - serverObjects args = new serverObjects(); - args.put("search",searchString); - args.put("count",Integer.toString(maxSearchCount)); - args.put("order",searchOrder); - args.put("resource",searchMode); - args.put("time",Integer.toString(maxSearchTime)); - args.put("urlmask",(!urlMask)?"no":"yes"); - args.put("urlmaskfilter",urlMaskfilter); - args.put("prefermaskfilter",prefermaskfilter); - args.put("cat",category); - - args.put("Enter","Search"); - - // invoke servlet - serverObjects searchResult = this.serverContext.invokeServlet(TEMPLATE_SEARCH, args, this.requestHeader); - - // Postprocess search ... - int count = Integer.valueOf(searchResult.get("type_results","0")).intValue(); - for (int i=0; i < count; i++) { - searchResult.put("type_results_" + i + "_url",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_url",""), false)); - searchResult.put("type_results_" + i + "_description",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_description",""), true)); - searchResult.put("type_results_" + i + "_urlname",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_urlname",""), true)); - } - - // format the result - byte[] result = this.serverContext.buildServletOutput(TEMPLATE_SEARCH, searchResult); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - - /** - * @param link the url - * @param viewMode one of (VIEW_MODE_AS_PLAIN_TEXT = 1, - * VIEW_MODE_AS_PARSED_TEXT = 2, - * VIEW_MODE_AS_PARSED_SENTENCES = 3) [Source: ViewFile.java] - * @return an xml document containing the url info. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document urlInfo(String urlStr, int viewMode) throws AxisFault { - try { - // getting the url hash for this url - URL url = new URL(urlStr); - String urlHash = plasmaURL.urlHash(url); - - // fetch urlInfo - return this.urlInfoByHash(urlHash, viewMode); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * @param urlHash the url hash - * @param viewMode one of (VIEW_MODE_AS_PLAIN_TEXT = 1, - * VIEW_MODE_AS_PARSED_TEXT = 2, - * VIEW_MODE_AS_PARSED_SENTENCES = 3) [Source: ViewFile.java] - * @return an xml document containing the url info. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document urlInfoByHash(String urlHash, int viewMode) throws AxisFault { - try { - // extracting the message context - extractMessageContext(true); - - if (urlHash == null || urlHash.trim().equals("")) { - throw new AxisFault("No Url-hash provided."); - } - - if (viewMode < 1 || viewMode > 3) { - viewMode = 2; - } - - String viewModeStr = "sentences"; - if (viewMode == 1) viewModeStr = "plain"; - else if (viewMode == 2) viewModeStr = "parsed"; - else if (viewMode == 3) viewModeStr = "sentences"; - - - // setting the properties - final serverObjects args = new serverObjects(); - args.put("urlHash",urlHash); - args.put("viewMode",viewModeStr); - - // generating the template containing the url info - byte[] result = this.serverContext.writeTemplate(TEMPLATE_URLINFO, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - public Document snippet(String url, String searchString) throws AxisFault { - try { - if (url == null || url.trim().equals("")) throw new AxisFault("No url provided."); - if (searchString == null || searchString.trim().equals("")) throw new AxisFault("No search string provided."); - - // extracting the message context - extractMessageContext(false); - - // setting the properties - final serverObjects args = new serverObjects(); - args.put("url",url); - args.put("search",searchString); - - // generating the template containing the url info - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SNIPPET, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * Returns the OpenSearch-Description of this peer - * @return a XML document of the following format: - *
-	* <?xml version="1.0" encoding="UTF-8"?>
-	* <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
-	*   <ShortName>YaCy/peerName</ShortName>
-	*   <LongName>YaCy.net - P2P WEB SEARCH</LongName>
-	*   <Image type="image/gif">http://ip-address:port/env/grafics/yacy.gif</Image>
-	*   <Image type="image/vnd.microsoft.icon">http://ip-address:port/env/grafics/yacy.ico</Image>
-	*   <Language>en-us</Language>
-	*   <OutputEncoding>UTF-8</OutputEncoding>
-	*   <InputEncoding>UTF-8</InputEncoding>
-	*   <AdultContent>true</AdultContent>
-	*   <Description>YaCy is a open-source GPL-licensed software that can be used for stand-alone search engine installations or as a client for a multi-user P2P-based web indexing cluster. This is the access to peer 'peername'.</Description>
-	*   <Url type="application/rss+xml" template="http://ip-address:port/yacysearch.rss?search={searchTerms}&Enter=Search" />
-	*   <Developer>See http://developer.berlios.de/projects/yacy/</Developer>
-	*   <Query role="example" searchTerms="yacy" />
-	*   <Tags>YaCy P2P Web Search</Tags>
-	*   <Contact>See http://ip-address:port/ViewProfile.html?hash=localhash</Contact>
-	*   <Attribution>YaCy Software &copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
-	*   <SyndicationRight>open</SyndicationRight>
-	* </OpenSearchDescription>
-	* 
- * @throws Exception - */ - public Document getOpenSearchDescription() throws Exception { - // extracting the message context - extractMessageContext(false); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_OPENSEARCH, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - private Document getSearchStatData(int page) throws Exception { - if (page < 1 || page > 4) throw new IllegalArgumentException("Illegal page number."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - serverObjects post = new serverObjects(); - post.put("page", Integer.toString(page)); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SEARCHSTATS, post, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public Document getLocalSearchLog() throws Exception { - return this.getSearchStatData(SEARCHSTATS_LOCAL_SEARCH_LOG); - } - - public Document getLocalSearchTracker() throws Exception { - return this.getSearchStatData(SEARCHSTATS_LOCAL_SEARCH_TRACKER); - } - - public Document getRemoteSearchLog() throws Exception { - return this.getSearchStatData(SEARCHSTATS_REMOTE_SEARCH_LOG); - } - - public Document getRemoteSearchTracker() throws Exception { - return this.getSearchStatData(SEARCHSTATS_REMOTE_SEARCH_TRACKER); - } - -} diff --git a/source/de/anomic/soap/services/ShareService.java b/source/de/anomic/soap/services/ShareService.java deleted file mode 100644 index ff68a63d4..000000000 --- a/source/de/anomic/soap/services/ShareService.java +++ /dev/null @@ -1,714 +0,0 @@ -//ShareService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -// -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.File; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.lang.reflect.Method; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.activation.FileDataSource; -import javax.xml.soap.AttachmentPart; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCodings; -import de.anomic.server.serverFileUtils; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSystem; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; - -public class ShareService extends AbstractService { - - private static final int FILEINFO_MD5_STRING = 0; - private static final int FILEINFO_COMMENT = 1; - - private static final int GENMD5_MD5_ARRAY = 0; - //private static final int GENMD5_MD5_STRING = 1; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_SHARE_XML = "htdocsdefault/dir.xml"; - - /** - * @return the yacy HTDOCS directory, e.g. DATA/HTDOCS - * @throws AxisFault if the directory does not exist - */ - private File getHtDocsPath() throws AxisFault { - // get htroot path - File htdocs = new File(this.switchboard.getRootPath(), this.switchboard.getConfig("htDocsPath", "DATA/HTDOCS")); - if (!htdocs.exists()) throw new AxisFault("htDocsPath directory does not exists."); - return htdocs; - } - - /** - * @return the yacy fileshare directory, e.g. DATA/HTDOCS/share/ - * @throws AxisFault if the directory does not exist - */ - private File getShareDir() throws AxisFault { - File htdocs = getHtDocsPath(); - File share = new File(htdocs,"share/"); - if (!share.exists()) throw new AxisFault("Share directory does not exists."); - return share; - } - - /** - * Converts the relative path received as input parameter into an absolut - * path pointing to a location in the yacy file-share. - * @param path the relative path - * @return the absolut path - * - * @throws AxisFault if the directory does not exist - * @throws AxisFault if the directory is not a directory - * @throws AxisFault if the directory is not readable - * @throws AxisFault if the directory path is too long - * @throws AxisFault if the directory path is outside of the yacy share directory - * @throws IOException other io errors - */ - private File getWorkingDir(String path) throws IOException { - File share = getShareDir(); - - // cut of a tailing slash - if (path != null && path.startsWith("/")) path = path.substring(1); - - // construct directory - File workingDir = (path==null)?share:new File(share,path); - - if (!workingDir.exists()) - throw new AxisFault("Working directory does not exists"); - - if (!workingDir.isDirectory()) - throw new AxisFault("Working directory is not a directory"); - - if (!workingDir.canRead()) - throw new AxisFault("Working directory is not readable."); - - if (!workingDir.canWrite()) - throw new AxisFault("Working directory is not writeable."); - - if (workingDir.getAbsolutePath().length() > serverSystem.maxPathLength) - throw new AxisFault("Path name is too long"); - - if (!workingDir.getCanonicalPath().startsWith(share.getCanonicalPath())) - throw new AxisFault("Invalid path. Path does not start with " + share.getCanonicalPath()); - - return workingDir; - } - - /** - * Returns a file object representing a file in the yacy fileshare directory - * @param workingDir the current working directory (must be a subdirectory of the share directory) - * @param workingFileName the name of the file - * @return a file object pointing to a file or directory in the yacy fileshare directory - * - * @throws NullPointerException if the filename is null - * @throws AxisFault if the file name contains (back)slashes - * @throws AxisFault if the file path is too long - * @throws AxisFault if the file path is outside the yacy share directory - * @throws AxisFault if the file path is pointing to share itself - * - * @throws IOException on other io errors - */ - private File getWorkingFile(File workingDir, String workingFileName) throws AxisFault, IOException { - if (workingDir == null) throw new NullPointerException("Working dir is null"); - - // getting file-share directory - File share = getShareDir(); - - // check filename for illegal characters - if (workingFileName != null) { - if ((workingFileName.indexOf("/") != -1) || (workingFileName.indexOf("\\") != -1)) - throw new AxisFault("Filename contains illegal characters."); - } - - File workingFile = (workingFileName==null)?workingDir:new File(workingDir, workingFileName); - - if (workingFile.getAbsolutePath().length() > serverSystem.maxPathLength) - throw new AxisFault("Path name is too long"); - - if (!workingFile.getCanonicalPath().startsWith(workingDir.getCanonicalPath())) - throw new AxisFault("Invalid path. Path does not start with " + workingDir.getCanonicalPath()); - - if (share.getCanonicalPath().equals(workingFile.getCanonicalPath())) - throw new AxisFault("Invalid path. You can not operate on htroot."); - - return workingFile; - } - - /** - * Returns the md5 sum of a file - * @param theFile the file for which the MD5 sum should be calculated - * @return the md5 sum as byte array - */ - private byte[] generateFileMD5(File theFile) { - byte[] md5 = serverCodings.encodeMD5Raw(theFile); - return md5; - } - - /** - * Returns the hex. representation of a md5 sum array - * @param md5Array the md5 sum as byte array - * @return the string representation of the md5 sum - */ - private String convertMD5ArrayToString(byte[] md5Array) { - String md5s = serverCodings.encodeHex(md5Array); - return md5s; - } - - /** - * Returns a file object representing the md5-file that belongs to a regular yacy fileshare file - * @param theFile the original file - * @return the md5 file that belongs to the original file - * - * @throws IOException - */ - private File getFileMD5File(File theFile) throws IOException { - final File md5File = new File(theFile.getCanonicalPath() + ".md5"); - return md5File; - } - - private void deleteFileMD5File(File theFile) throws IOException { - File md5File = getFileMD5File(theFile); - if (md5File.exists()) { - md5File.delete(); - } - } - - /** - * Generates a md5 sum of a file and store it together with an optional comment - * in a special md5 file. - * @param theFile the original file - * @param comment description of the file - * @return an Object array containing - *
    - *
  • [{@link GENMD5_MD5_ARRAY}] the md5 sum of the file as byte array
  • - *
  • [{@link GENMD5_MD5_STRING}] the md5 sum of the file as string
  • - *
- * @throws UnsupportedEncodingException should never occur - * @throws IOException if the md5 file could not be written or the source file could not be read - */ - private Object[] generateMD5File(File theFile, String comment) throws UnsupportedEncodingException, IOException { - if (comment == null) comment = ""; - - // calculate md5 - byte[] md5b = generateFileMD5(theFile); - - // convert md5 sum to string - String md5s = convertMD5ArrayToString(md5b); - - // write comment + md5 to file - File md5File = getFileMD5File(theFile); - if (md5File.exists()) md5File.delete(); - serverFileUtils.write((md5s + "\n" + comment).getBytes("UTF-8"), md5File); - - return new Object[]{md5b,md5s}; - } - - /** - * Returns the content of the md5-file that belongs to a regular yacy file-share file - * @param theFile the regular file-share file - * @return an array containing - *
    - *
  • [{@link FILEINFO_MD5_STRING}] the md5 sum of the file as string
  • - *
  • [{@link FILEINFO_COMMENT}] the comment
  • - *
- * @throws IOException if the md5 file could not be read - */ - private String[] readFileInfo(File theFile) throws IOException { - File md5File = getFileMD5File(theFile); - - String md5s = ""; - String description = ""; - if (md5File.exists()) { - try { - md5s = new String(serverFileUtils.read(md5File),"UTF-8"); - int pos = md5s.indexOf('\n'); - if (pos >= 0) { - description = md5s.substring(pos + 1); - md5s = md5s.substring(0, pos); - } - } catch (IOException e) {/* */} - } - return new String[]{md5s,description}; - } - - private String readFileComment(File theFile) throws IOException { - String[] info = readFileInfo(theFile); - return info[FILEINFO_COMMENT]; - } - - private String readFileMD5String(File theFile) throws IOException { - String[] info = readFileInfo(theFile); - return info[FILEINFO_MD5_STRING]; - } - - private String yacyhURL(yacySeed seed, String filename, String md5) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("yacyhURL", new Class[]{yacySeed.class,String.class,String.class}); - - // invoke the desired method - return (String) m.invoke(null, new Object[] {seed,filename,md5}); - } catch (Exception e) { - throw new AxisFault("Unable to generate the yacyhURL"); - } - } - - private void indexPhrase(String urlstring, String phrase, String descr, byte[] md5) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("indexPhrase", new Class[]{plasmaSwitchboard.class,String.class,String.class,String.class,byte[].class}); - - // invoke the desired method - m.invoke(null, new Object[] {this.switchboard,urlstring,phrase,(descr==null)?"":descr,md5}); - } catch (Exception e) { - throw new AxisFault("Unable to index the file"); - } - } - - private void deletePhrase(String urlstring, String phrase, String descr) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("deletePhrase", new Class[]{plasmaSwitchboard.class,String.class,String.class,String.class}); - - // invoke the desired method - m.invoke(null, new Object[] {this.switchboard,urlstring,phrase,(descr==null)?"":descr}); - } catch (Exception e) { - throw new AxisFault("Unable to index the file"); - } - } - - private String getPhrase(String filename) { - return filename.replace('.', ' ').replace('_', ' ').replace('-', ' '); - } - - private void indexFile(File newFile, String comment, byte[] md5b) throws IOException { - if (comment == null) comment = ""; - - // getting the file name - String newFileName = newFile.getName(); - String phrase = this.getPhrase(newFileName); - - // convert md5 sum to string - String md5s = convertMD5ArrayToString(md5b); - - // index file - String urlstring = yacyhURL(yacyCore.seedDB.mySeed, newFileName, md5s); - indexPhrase(urlstring, phrase, comment, md5b); - } - - private void unIndexFile(File file) throws IOException { - String filename = file.getName(); - String phrase = this.getPhrase(filename); - - // getting file info [0=md5s,1=comment] - String[] fileInfo = readFileInfo(file); - - // delete old indexed phrases - String urlstring = yacyhURL(yacyCore.seedDB.mySeed, filename, fileInfo[FILEINFO_MD5_STRING]); - deletePhrase(urlstring, phrase, fileInfo[FILEINFO_COMMENT]); - } - - - private void deleteRecursive(File file) throws IOException { - if (file == null) throw new NullPointerException("File object is null"); - if (!file.exists()) return; - if (!file.canWrite()) throw new IllegalArgumentException("File object can not be deleted. No write access."); - - if (file.isDirectory()) { - // delete all subdirectories and files - File[] subFiles = file.listFiles(); - for (int i = 0; i < subFiles.length; i++) deleteRecursive(subFiles[i]); - } else { - // unindex the file - this.unIndexFile(file); - - // delete md5 file - this.deleteFileMD5File(file); - } - - // delete file / directory - file.delete(); - } - - /** - * Returns a directory listing in xml format - * @param workingDirPath a relative path within the yacy file-share - * @return the directory listing of the specified path as XML document - * - * @throws Exception if the directory does not exist or can not be read - */ - public Document getDirList(String workingDirPath) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the htDocs and sub-directory - File htdocs = getHtDocsPath(); - File workingDir = getWorkingDir(workingDirPath); - - // generate the proper path for the servlet - workingDirPath = workingDir.getCanonicalPath().substring(htdocs.getCanonicalPath().length()+1); - if (!workingDirPath.endsWith("/")) workingDirPath = workingDirPath + "/"; - - // construct arguments - this.requestHeader.put("PATH",workingDirPath); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SHARE_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Uploads a new file into the specified subdirectory of the yacy file-share directory. - * The Uploaded file must be passed via SOAP Attachment - * - * @param workingDirPath a relative path within the yacy file-share - * @param indexFile specifies if the file should be indexed by yacy - * @param comment a description of the file - * - * @throws IOException - * @throws SOAPException - */ - public void uploadFile(String workingDirPath, boolean indexFile, String comment) throws IOException, SOAPException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - String newFileName = attachments[0].getContentId(); - if (newFileName == null) newFileName = "newFile"; - - // getting directory to create - File newFile = getWorkingFile(workingDir,newFileName); - if (newFile.exists()) throw new AxisFault("File '" + newFileName + "' already exists"); - - // copy datahandler content to file - serverFileUtils.copy(dh.getInputStream(),newFile); - - // generate md5 sum and index the file - Object[] info = generateMD5File(newFile,comment); - if (indexFile) indexFile(newFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - /** - * Creates a new directory - * @param workingDirPath a relative path within the yacy file-share - * @param newDirName the name of the new directory - * @throws IOException - */ - public void createDirectory(String workingDirPath, String newDirName) throws IOException { - if (newDirName == null || newDirName.length() == 0) throw new AxisFault("The new directory name must not be null"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting directory to create - File newDirFile = getWorkingFile(workingDir,newDirName); - if (newDirFile.exists()) - throw new AxisFault("Directory '" + newDirName + "' already exists"); - - // create Directory - newDirFile.mkdirs(); - } - - /** - * Deletes a file or directory located in the yacy file-share directory - * @param workingDirPath a relative path within the yacy file-share - * @param nameToDelete the name of the file or directory that should be deleted. - * Attention: Directories will be deleted recursively - * - * @throws IOException - */ - public void delete(String workingDirPath, String nameToDelete) throws IOException { - if (nameToDelete == null || nameToDelete.length() == 0) throw new AxisFault("The file name must not be null"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting directory or file to delete - File fileToDelete = getWorkingFile(workingDir, nameToDelete); - - // delete file/dir - this.deleteRecursive(fileToDelete); - } - - - /** - * Reads the comment assigned to a file located in the yacy file-share - * @param workingDirPath a relative path within the yacy file-share - * @param fileName the name of the file - * @return the comment assigned to a file located in the yacy file-share or an emty string if no comment is available - * @throws AxisFault - * @throws IOException - */ - public String getFileComment(String workingDirPath, String fileName) throws AxisFault, IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // get the old file comment - return this.readFileComment(workingFile); - } - - /** - * Reads the MD5 checksum of a file located in the yacy file-share - * @param workingDirPatha relative path within the yacy file-share - * @param fileName the name of the file - * @return the MD5 checksum of the file or an empty string if the checksum is not available - * @throws IOException - */ - public String getFileMD5(String workingDirPath, String fileName) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // get the old file comment - return this.readFileMD5String(workingFile); - } - - /** - * To download a file located in the yacy file-share. - * This function returns the requested file as soap attachment to the caller of this function. - * - * @param workingDirPath a relative path within the yacy file-share - * @param fileName the name of the file that should be downloaded - * @return the md5 sum of the downloaded file - * - * @throws IOException - * @throws SOAPException - */ - public String getFile(String workingDirPath, String fileName) throws IOException, SOAPException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // getting the md5 string and comment - String[] info = readFileInfo(workingFile); - - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the response message - Message respMsg = msgContext.getResponseMessage(); - - // creating a datasource and data handler - DataSource data = new FileDataSource(workingFile); - DataHandler attachmentFile = new DataHandler(data); - - AttachmentPart attachmentPart = respMsg.createAttachmentPart(); - attachmentPart.setDataHandler(attachmentFile); - attachmentPart.setContentId(workingFile.getName()); - - respMsg.addAttachmentPart(attachmentPart); - respMsg.saveChanges(); - - // return the md5 hash of the file as result - return info[FILEINFO_MD5_STRING]; - } - - public void renameFile(String workingDirPath, String oldFileName, String newFileName, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting file - File sourceFile = getWorkingFile(workingDir,oldFileName); - if (!sourceFile.exists()) throw new AxisFault("Source file does not exist."); - if (!sourceFile.isFile()) throw new AxisFault("Source file is not a file."); - - File destFile = getWorkingFile(workingDir,newFileName); - if (destFile.exists()) throw new AxisFault("Destination file already exists."); - - // get the old file comment - String comment = this.readFileComment(sourceFile); - - // unindex the old file and delete the old MD5 file - this.unIndexFile(sourceFile); - this.deleteFileMD5File(sourceFile); - - // rename file - sourceFile.renameTo(destFile); - - // generate MD5 file and index file - Object[] info = generateMD5File(destFile,comment); - if (indexFile) indexFile(destFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - /** - * To change the comment of a file located in the yacy file-share - * @param workingDirPatha relative path within the yacy file-share - * @param fileName the name of the file - * @param comment the new comment - * @param indexFile specifies if the file should be indexed by yacy - * - * @throws IOException - */ - public void changeComment(String workingDirPath, String fileName, String comment, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting wroking file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // unindex file and delete MD5 file - this.unIndexFile(workingFile); - this.deleteFileMD5File(workingFile); - - // generate new MD5 file and index file - Object[] info = generateMD5File(workingFile,comment); - if (indexFile) indexFile(workingFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - public void moveFile(String sourceDirName, String destDirName, String fileName, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting source and destination directory - File sourceDir = getWorkingDir(sourceDirName); - File destDir = getWorkingDir(destDirName); - - // getting source and destination file - File sourceFile = getWorkingFile(sourceDir,fileName); - if (!sourceFile.exists()) throw new AxisFault("Source file does not exist."); - if (!sourceFile.isFile()) throw new AxisFault("Source file is not a file."); - - File destFile = getWorkingFile(destDir,fileName); - if (destFile.exists()) throw new AxisFault("Destination file already exists."); - - // getting the old comment - String comment = this.readFileComment(sourceFile); - - // unindex old file and delete MD5 file - this.unIndexFile(sourceFile); - this.deleteFileMD5File(sourceFile); - - // rename file - sourceFile.renameTo(destFile); - - // index file and generate MD5 - Object[] info = generateMD5File(destFile,comment); - if (indexFile) indexFile(destFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } -} diff --git a/source/de/anomic/soap/services/StatusService.java b/source/de/anomic/soap/services/StatusService.java deleted file mode 100644 index 0a7f8f5fb..000000000 --- a/source/de/anomic/soap/services/StatusService.java +++ /dev/null @@ -1,261 +0,0 @@ -//StatusService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; - -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; - -public class StatusService extends AbstractService { - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - /** - * Constant: template for the network status page - */ - private static final String TEMPLATE_NETWORK_XML = "Network.xml"; - private static final String TEMPLATE_QUEUES_XML = "xml/queues_p.xml"; - private static final String TEMPLATE_STATUS_XML = "xml/status_p.xml"; - - /* ===================================================================== - * Constants needed to query the network status - * ===================================================================== */ - private static final int NETWORK_OVERVIEW = 0; - private static final int NETWORK_ACTIVE_PEERS = 1; - private static final int NETWORK_PASSIVE_PEERS = 2; - private static final int NETWORK_POTENTIAL_PEERS = 3; - - /** - * @return - * @throws Exception - * @deprecated kept for backward compatibility - */ - public Document network() throws Exception { - return this.getNetworkOverview(); - } - - public Document getNetworkOverview() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_OVERVIEW); - } - - public Document getActivePeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_ACTIVE_PEERS); - } - - public Document getPassivePeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_PASSIVE_PEERS); - } - - public Document getPotentialPeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_POTENTIAL_PEERS); - } - - private Document getNetworkData(int page) throws Exception { - if (page < 0 || page > 3) page = 0; - - serverObjects post = new serverObjects(); - post.put("page", Integer.toString(page)); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_NETWORK_XML, post, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - /** - * Returns a list of peers this peer currently knows - * @param peerType the peer types to query. This could be - *
    - *
  • active
  • - *
  • passive
  • - *
  • potential
  • - *
- * @param maxCount the maximum amount of records to return - * @param details if detailed informations should be returned - * - * @return a XML document of the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <peers>
-     *   <peer>
-     *     <hash>XXXXXXX</hash>
-     *     <fullname>Peer Name</fullname>
-     *     <version>0.424/01505</version>
-     *     <ppm>0</ppm>
-     *     <uptime>2 days 14:37</uptime>
-     *     <links>-</links>
-     *     <words>-</words>
-     *     <lastseen>48</lastseen>
-     *     <sendWords>-</sendWords>
-     *     <receivedWords>-</receivedWords>
-     *     <sendURLs>-</sendURLs>
-     *     <receivedURLs>-</receivedURLs>    
-     *     <age>369</age>
-     *     <seeds>61</seeds>
-     *     <connects>2</connects>
-     *     <address>127.0.0.1:8080</address>        
-     *   </peer>
-     * </peers>
-     * 
- * @throws Exception - */ - public Document peerList(String peerType, int maxCount, boolean details) throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - if (peerType == null || peerType.length() == 0) throw new IllegalArgumentException("The peer type must not be null or empty."); - if (!(peerType.equalsIgnoreCase("active") || peerType.equalsIgnoreCase("passive") || peerType.equalsIgnoreCase("Potential"))) - throw new IllegalArgumentException("Unknown peer type. Should be (active|passive|potential)"); - - // configuring output mode - serverObjects args = new serverObjects(); - if (peerType.equalsIgnoreCase("active")) args.put("page","1"); - else if (peerType.equalsIgnoreCase("passive")) args.put("page","2"); - else if (peerType.equalsIgnoreCase("potential")) args.put("page","3"); - - // specifying if the detailed list should be returned - if (details) args.put("ip","1"); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_NETWORK_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - /** - * Returns the current status of the following queues - *
    - *
  • Indexing Queue
  • - *
  • Loader Queue
  • - *
  • Local Crawling Queue
  • - *
  • Remote Triggered Crawling Queue
  • - *
- * @param localqueueCount the amount of items that should be returned. If this is null 10 items will be returned - * @param loaderqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @param localcrawlerqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @param remotecrawlerqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @return a XML document containing the status information. For the detailed format, take a look into the template file - * htroot/xml/queues_p.xml - * - * @throws AxisFault if authentication failed - * @throws Exception on other unexpected errors - * - * @since 2835 - */ - public Document getQueueStatus( - Integer localqueueCount, - Integer loaderqueueCount, - Integer localcrawlerqueueCount, - Integer remotecrawlerqueueCount - ) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // passing parameters to servlet - serverObjects input = new serverObjects(); - if (localqueueCount != null) input.put("num",localqueueCount.toString()); - //if (loaderqueueCount != null) input.put("num",loaderqueueCount.toString()); - //if (localcrawlerqueueCount != null) input.put("num",localcrawlerqueueCount.toString()); - //if (remotecrawlerqueueCount != null) input.put("num",remotecrawlerqueueCount.toString()); - - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_QUEUES_XML, input, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Query status information about this peer - * @return the status-{@link Document} - * @throws Exception - */ - public Document getStatus() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_STATUS_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - public String getPeerHash() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // return the peer hash - return yacyCore.seedDB.mySeed.hash; - } - -} diff --git a/source/de/anomic/soap/services/admin.wsdl b/source/de/anomic/soap/services/admin.wsdl deleted file mode 100644 index f212ac2f7..000000000 --- a/source/de/anomic/soap/services/admin.wsdl +++ /dev/null @@ -1,504 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/blacklist.wsdl b/source/de/anomic/soap/services/blacklist.wsdl deleted file mode 100644 index 2a1e21722..000000000 --- a/source/de/anomic/soap/services/blacklist.wsdl +++ /dev/null @@ -1,285 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/bookmarks.wsdl b/source/de/anomic/soap/services/bookmarks.wsdl deleted file mode 100644 index 38c608088..000000000 --- a/source/de/anomic/soap/services/bookmarks.wsdl +++ /dev/null @@ -1,318 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/crawl.wsdl b/source/de/anomic/soap/services/crawl.wsdl deleted file mode 100644 index 9ff191968..000000000 --- a/source/de/anomic/soap/services/crawl.wsdl +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/messages.wsdl b/source/de/anomic/soap/services/messages.wsdl deleted file mode 100644 index 2396851c3..000000000 --- a/source/de/anomic/soap/services/messages.wsdl +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/search.wsdl b/source/de/anomic/soap/services/search.wsdl deleted file mode 100644 index 2677c9ff9..000000000 --- a/source/de/anomic/soap/services/search.wsdl +++ /dev/null @@ -1,233 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/share.wsdl b/source/de/anomic/soap/services/share.wsdl deleted file mode 100644 index fb21158ab..000000000 --- a/source/de/anomic/soap/services/share.wsdl +++ /dev/null @@ -1,258 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/status.wsdl b/source/de/anomic/soap/services/status.wsdl deleted file mode 100644 index 49695ec88..000000000 --- a/source/de/anomic/soap/services/status.wsdl +++ /dev/null @@ -1,224 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index 50690e377..99d1e22bc 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -46,7 +46,7 @@ import java.util.Hashtable; import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class loaderThreads { @@ -83,11 +83,11 @@ public class loaderThreads { this.failed = 0; } - public void newPropLoaderThread(String name, URL url) { + public void newPropLoaderThread(String name, yacyURL url) { newThread(name, url, new propLoader()); } - public void newThread(String name, URL url, loaderProcess process) { + public void newThread(String name, yacyURL url, loaderProcess process) { Thread t = new loaderThread(url, process); threads.put(name, t); t.start(); @@ -130,13 +130,13 @@ public class loaderThreads { } protected class loaderThread extends Thread { - private URL url; + private yacyURL url; private Exception error; private loaderProcess process; private byte[] page; private boolean loaded; - public loaderThread(URL url, loaderProcess process) { + public loaderThread(yacyURL url, loaderProcess process) { this.url = url; this.process = process; this.error = null; @@ -232,7 +232,7 @@ public class loaderThreads { httpRemoteProxyConfig proxyConfig = httpRemoteProxyConfig.init("192.168.1.122", 3128); loaderThreads loader = new loaderThreads(proxyConfig); try { - loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt")); + loader.newPropLoaderThread("load1", new yacyURL("http://www.anomic.de/superseed.txt", null)); } catch (MalformedURLException e) { } diff --git a/source/de/anomic/urlRedirector/urlRedirectord.java b/source/de/anomic/urlRedirector/urlRedirectord.java index 4a36021de..4e18d3fa7 100644 --- a/source/de/anomic/urlRedirector/urlRedirectord.java +++ b/source/de/anomic/urlRedirector/urlRedirectord.java @@ -10,8 +10,6 @@ import java.util.Date; import de.anomic.data.userDB; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -20,6 +18,7 @@ import de.anomic.server.serverHandler; import de.anomic.server.logging.serverLog; import de.anomic.server.serverCore.Session; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public class urlRedirectord implements serverHandler { @@ -180,7 +179,7 @@ public class urlRedirectord implements serverHandler { String reasonString = null; try { // generating URL Object - URL reqURL = new URL(this.nextURL); + yacyURL reqURL = new yacyURL(this.nextURL, null); // getting URL mimeType httpHeader header = httpc.whead(reqURL, reqURL.getHost(), 10000, null, null, switchboard.remoteProxyConfig); @@ -191,7 +190,7 @@ public class urlRedirectord implements serverHandler { header.mime()) ) { // first delete old entry, if exists - String urlhash = plasmaURL.urlHash(this.nextURL); + String urlhash = reqURL.hash(); switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.noticeURL.remove(urlhash); switchboard.errorURL.remove(urlhash); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index ad1729aae..dd1c3abee 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -55,13 +55,12 @@ import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSearchContainer; +import de.anomic.plasma.plasmaSearchProcessing; import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; @@ -107,7 +106,7 @@ public final class yacyClient { yacyCore.log.logFine("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacting peer at " + address); // send request result = nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/hello.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/hello.html", null), yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", 12000, null, @@ -243,7 +242,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 8000, null, @@ -274,7 +273,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 8000, null, @@ -307,7 +306,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 6000, null, @@ -390,7 +389,7 @@ public final class yacyClient { HashMap result = null; try { result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/search.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/search.html", null), target.getHexHash() + ".yacyh", 60000, null, @@ -538,7 +537,7 @@ public final class yacyClient { if (singleAbstract == null) singleAbstract = new TreeMap(); ci = new serverByteBuffer(((String) entry.getValue()).getBytes()); //System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); - plasmaURL.decompressIndex(singleAbstract, ci, target.hash); + plasmaSearchProcessing.decompressIndex(singleAbstract, ci, target.hash); abstractCache.put(wordhash, singleAbstract); } } @@ -583,7 +582,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + targetAddress(targetHash) + "/yacy/message.html"), + httpc.wput(new yacyURL("http://" + targetAddress(targetHash) + "/yacy/message.html", null), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", 8000, null, @@ -619,7 +618,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + targetAddress(targetHash) + "/yacy/message.html"), + httpc.wput(new yacyURL("http://" + targetAddress(targetHash) + "/yacy/message.html", null), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", 20000, null, @@ -663,7 +662,7 @@ public final class yacyClient { // send request try { - final URL url = new URL("http://" + targetAddress + "/yacy/transfer.html"); + final yacyURL url = new yacyURL("http://" + targetAddress + "/yacy/transfer.html", null); final HashMap result = nxTools.table( httpc.wput(url, url.getHost(), @@ -699,7 +698,7 @@ public final class yacyClient { // send request try { - final URL url = new URL("http://" + targetAddress + "/yacy/transfer.html"); + final yacyURL url = new yacyURL("http://" + targetAddress + "/yacy/transfer.html", null); final HashMap result = nxTools.table( httpc.wput(url, url.getHost(), @@ -743,11 +742,11 @@ public final class yacyClient { return "wrong protocol: " + protocol; } - public static HashMap crawlOrder(yacySeed targetSeed, URL url, URL referrer, int timeout) { - return crawlOrder(targetSeed, new URL[]{url}, new URL[]{referrer}, timeout); + public static HashMap crawlOrder(yacySeed targetSeed, yacyURL url, yacyURL referrer, int timeout) { + return crawlOrder(targetSeed, new yacyURL[]{url}, new yacyURL[]{referrer}, timeout); } - public static HashMap crawlOrder(yacySeed target, URL[] url, URL[] referrer, int timeout) { + public static HashMap crawlOrder(yacySeed target, yacyURL[] url, yacyURL[] referrer, int timeout) { assert (target != null); assert (yacyCore.seedDB.mySeed != null); assert (yacyCore.seedDB.mySeed != target); @@ -775,7 +774,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/crawlOrder.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/crawlOrder.html", null), target.getHexHash() + ".yacyh", timeout, null, @@ -842,7 +841,7 @@ public final class yacyClient { // send request try { return nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/crawlReceipt.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/crawlReceipt.html", null), target.getHexHash() + ".yacyh", 60000, null, @@ -991,7 +990,7 @@ public final class yacyClient { try { final ArrayList v = nxTools.strings( httpc.wput( - new URL("http://" + address + "/yacy/transferRWI.html"), + new yacyURL("http://" + address + "/yacy/transferRWI.html", null), targetSeed.getHexHash() + ".yacyh", timeout, null, @@ -1046,7 +1045,7 @@ public final class yacyClient { try { final ArrayList v = nxTools.strings( httpc.wput( - new URL("http://" + address + "/yacy/transferURL.html"), + new yacyURL("http://" + address + "/yacy/transferURL.html", null), targetSeed.getHexHash() + ".yacyh", timeout, null, @@ -1081,7 +1080,7 @@ public final class yacyClient { try { return nxTools.table( httpc.wput( - new URL("http://" + address + "/yacy/profile.html"), + new yacyURL("http://" + address + "/yacy/profile.html", null), targetSeed.getHexHash() + ".yacyh", 10000, null, @@ -1118,14 +1117,14 @@ public final class yacyClient { final HashMap result = nxTools.table( httpc.wget( - new URL("http://" + target.getPublicAddress() + "/yacy/search.html" + + new yacyURL("http://" + target.getPublicAddress() + "/yacy/search.html" + "?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + "&youare=" + target.hash + "&key=" + "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + "&count=10" + "&resource=global" + "&query=" + wordhashe + - "&network.unit.name=" + plasmaSwitchboard.getSwitchboard().getConfig("network.unit.name", yacySeed.DFLT_NETWORK_UNIT)), + "&network.unit.name=" + plasmaSwitchboard.getSwitchboard().getConfig("network.unit.name", yacySeed.DFLT_NETWORK_UNIT), null), target.getHexHash() + ".yacyh", 5000, null, diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index d5f77dea2..43ce7edc2 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -70,7 +70,6 @@ import java.util.List; import java.util.Map; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverSemaphore; @@ -755,7 +754,7 @@ public class yacyCore { } // ensure that the seed file url is configured properly - URL seedURL; + yacyURL seedURL; try{ final String seedURLStr = sb.getConfig("seedURL", ""); if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); } @@ -765,7 +764,7 @@ public class yacyCore { )){ throw new MalformedURLException("Unsupported protocol."); } - seedURL = new URL(seedURLStr); + seedURL = new yacyURL(seedURLStr, null); } catch(MalformedURLException e) { final String errorMsg = "Malformed seed file URL '" + sb.getConfig("seedURL", "") + "'. " + e.getMessage(); log.logWarning("SaveSeedList: " + errorMsg); @@ -783,7 +782,7 @@ public class yacyCore { "\n\tPrevious peerType is '" + seedDB.mySeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR) + "'."); // logt = seedDB.uploadCache(seedFTPServer, seedFTPAccount, seedFTPPassword, seedFTPPath, seedURL); - logt = seedDB.uploadCache(uploader,sb, seedDB, seedURL); + logt = seedDB.uploadCache(uploader, sb, seedDB, seedURL); if (logt != null) { if (logt.indexOf("Error") >= 0) { seedDB.mySeed.put(yacySeed.PEERTYPE, prevStatus); diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 9a5a5a44c..3785d006e 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -50,7 +50,6 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Map; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.plasmaURLPattern; @@ -326,13 +325,13 @@ public class yacyNewsPool { if (record.created().getTime() == 0) return; Map attributes = record.attributes(); if (attributes.containsKey("url")){ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("url")))){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new yacyURL((String) attributes.get("url"), null))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); return; } } if (attributes.containsKey("startURL")){ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("startURL")))){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new yacyURL((String) attributes.get("startURL"), null))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); return; } diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index fc1e32143..193a4ea56 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -51,7 +51,6 @@ import java.util.Iterator; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; @@ -145,7 +144,7 @@ public class yacyPeerActions { yacySeed ys; String seedListFileURL; - URL url; + yacyURL url; ArrayList seedList; Iterator enu; int lc; @@ -171,7 +170,7 @@ public class yacyPeerActions { reqHeader.put(httpHeader.PRAGMA,"no-cache"); reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache"); - url = new URL(seedListFileURL); + url = new yacyURL(seedListFileURL, null); long start = System.currentTimeMillis(); header = httpc.whead(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader); long loadtime = System.currentTimeMillis() - start; diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 35199a36a..1341d41b7 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -68,7 +68,6 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; @@ -730,7 +729,7 @@ public final class yacySeedDB { // String seedFTPAccount, // String seedFTPPassword, // File seedFTPPath, - URL seedURL) throws Exception { + yacyURL seedURL) throws Exception { // upload a seed file, if possible if (seedURL == null) throw new NullPointerException("UPLOAD - Error: URL not given"); @@ -766,7 +765,7 @@ public final class yacySeedDB { return log; } - private ArrayList downloadSeedFile(URL seedURL) throws IOException { + private ArrayList downloadSeedFile(yacyURL seedURL) throws IOException { httpc remote = null; try { // init httpc diff --git a/source/de/anomic/net/URL.java b/source/de/anomic/yacy/yacyURL.java similarity index 53% rename from source/de/anomic/net/URL.java rename to source/de/anomic/yacy/yacyURL.java index 76f0eaaf4..ec1e2a302 100644 --- a/source/de/anomic/net/URL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -1,6 +1,6 @@ -// URL.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 13.07.2006 on http://www.anomic.de +// yacyURL.java +// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 13.07.2006 on http://yacy.net // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedRevision: 1986 $ @@ -22,27 +22,384 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -package de.anomic.net; +package de.anomic.yacy; // this class exsist to provide a system-wide normal form representation of urls, // and to prevent that java.net.URL usage causes DNS queries which are used in java.net. import java.io.File; +import java.net.InetAddress; import java.net.MalformedURLException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class URL { +import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.server.serverCodings; +import de.anomic.server.serverDomains; - private String protocol, host, userInfo, path, quest, ref; +public class yacyURL { + + + // TLD separation in political and cultural parts + // https://www.cia.gov/cia/publications/factbook/index.html + // http://en.wikipedia.org/wiki/List_of_countries_by_continent + + private static final String[] TLD_NorthAmericaOceania={ + // primary english-speaking countries + // english-speaking countries from central america are also included + // includes also dutch and french colonies in the caribbean sea + // and US/English/Australian military bases in asia + "EDU=US Educational", + "GOV=US Government", + "MIL=US Military", + "NET=Network", + "ORG=Non-Profit Organization", + "AN=Netherlands Antilles", + "AS=American Samoa", + "AG=Antigua and Barbuda", + "AI=Anguilla", + "AU=Australia", + "BB=Barbados", + "BZ=Belize", + "BM=Bermuda", + "BS=Bahamas", + "CA=Canada", + "CC=Cocos (Keeling) Islands", + "CK=Cook Islands", + "CX=Christmas Island", // located in the Indian Ocean, but belongs to Australia + "DM=Dominica", + "FM=Micronesia", + "FJ=Fiji", + "GD=Grenada", + "GP=Guadeloupe", + "GS=South Georgia and the South Sandwich Islands", // south of south america, but administrated by british, has only a scientific base + "GU=Guam", // strategical US basis close to Japan + "HM=Heard and McDonald Islands", // uninhabited, sub-Antarctic island, owned by Australia + "HT=Haiti", + "IO=British Indian Ocean Territory", // UK-US naval support facility in the Indian Ocean + "KI=Kiribati", // 33 coral atolls in the pacific, formerly owned by UK + "KN=Saint Kitts and Nevis", // islands in the carribean see + "KY=Cayman Islands", + "LC=Saint Lucia", + "MH=Marshall Islands", // formerly US atomic bomb test site, now a key installation in the US missile defense network + "MP=Northern Mariana Islands", // US strategic location in the western Pacific Ocean + "NC=New Caledonia", + "NF=Norfolk Island", + "NR=Nauru", // independent UN island + "NU=Niue", // one of world's largest coral islands + "NZ=New Zealand (Aotearoa)", + "PG=Papua New Guinea", + "PN=Pitcairn", // overseas territory of the UK + "PR=Puerto Rico", // territory of the US with commonwealth status + "PW=Palau", // was once governed by Micronesia + "Sb=Solomon Islands", + "TC=Turks and Caicos Islands", // overseas territory of the UK + "TK=Tokelau", // group of three atolls in the South Pacific Ocean, british protectorat + "TO=Tonga", + "TT=Trinidad and Tobago", + "TV=Tuvalu", // nine coral atolls in the South Pacific Ocean; in 2000, Tuvalu leased its TLD ".tv" for $50 million over a 12-year period + "UM=US Minor Outlying Islands", // nine insular United States possessions in the Pacific Ocean and the Caribbean Sea + "US=United States", + "VC=Saint Vincent and the Grenadines", + "VG=Virgin Islands (British)", + "VI=Virgin Islands (U.S.)", + "VU=Vanuatu", + "WF=Wallis and Futuna Islands", + "WS=Samoa" + }; + private static final String[] TLD_MiddleSouthAmerica = { + // primary spanish and portugese-speaking + "AR=Argentina", + "AW=Aruba", + "BR=Brazil", + "BO=Bolivia", + "CL=Chile", + "CO=Colombia", + "CR=Costa Rica", + "CU=Cuba", + "DO=Dominican Republic", + "EC=Ecuador", + "FK=Falkland Islands (Malvinas)", + "GF=French Guiana", + "GT=Guatemala", + "GY=Guyana", + "HN=Honduras", + "JM=Jamaica", + "MX=Mexico", + "NI=Nicaragua", + "PA=Panama", + "PE=Peru", + "PY=Paraguay", + "SR=Suriname", + "SV=El Salvador", + "UY=Uruguay", + "VE=Venezuela" + }; + private static final String[] TLD_EuropaRussia = { + // includes also countries that are mainly french- dutch- speaking + // and culturally close to europe + "AD=Andorra", + "AL=Albania", + "AQ=Antarctica", + "AT=Austria", + "BA=Bosnia and Herzegovina", + "BE=Belgium", + "BG=Bulgaria", + "BV=Bouvet Island", // this island is uninhabited and covered by ice, south of africa but governed by Norway + "BY=Belarus", + "CH=Switzerland", + "CS=Czechoslovakia (former)", + "CZ=Czech Republic", + "CY=Cyprus", + "DE=Germany", + "DK=Denmark", + "ES=Spain", + "EE=Estonia", + "FI=Finland", + "FO=Faroe Islands", // Viking Settlers + "FR=France", + "FX=France, Metropolitan", + "GB=Great Britain (UK)", + "GI=Gibraltar", + "GL=Greenland", + "GR=Greece", + "HR=Croatia (Hrvatska)", + "HU=Hungary", + "IE=Ireland", + "IS=Iceland", + "IT=Italy", + "LI=Liechtenstein", + "LT=Lithuania", + "LU=Luxembourg", + "LV=Latvia", + "MD=Moldova", + "MC=Monaco", + "MK=Macedonia", + "MN=Mongolia", + "MS=Montserrat", // British island in the Caribbean Sea, almost not populated because of strong vulcanic activity + "MT=Malta", + "MQ=Martinique", // island in the eastern Caribbean Sea, overseas department of France + "NATO=Nato field", + "NL=Netherlands", + "NO=Norway", + "PF=French Polynesia", // French annexed Polynesian island in the South Pacific, French atomic bomb test site + "PL=Poland", + "PM=St. Pierre and Miquelon", // french-administrated colony close to canada, belongs to France + "PT=Portugal", + "RO=Romania", + "RU=Russia", + "SE=Sweden", + "SI=Slovenia", + "SJ=Svalbard and Jan Mayen Islands", // part of Norway + "SM=San Marino", + "SK=Slovak Republic", + "SU=USSR (former)", + "TF=French Southern Territories", // islands in the arctic see, no inhabitants + "UK=United Kingdom", + "UA=Ukraine", + "VA=Vatican City State (Holy See)", + "YU=Yugoslavia" + }; + + private static final String[] TLD_MiddleEastWestAsia = { + // states that are influenced by islamic culture and arabic language + // includes also eurasia states and those that had been part of the former USSR and close to southwest asia + "AE=United Arab Emirates", + "AF=Afghanistan", + "AM=Armenia", + "AZ=Azerbaijan", + "BH=Bahrain", + "GE=Georgia", + "IL=Israel", + "IQ=Iraq", + "IR=Iran", + "JO=Jordan", + "KG=Kyrgyzstan", + "KZ=Kazakhstan", + "KW=Kuwait", + "LB=Lebanon", + "OM=Oman", + "QA=Qatar", + "SA=Saudi Arabia", + "SY=Syria", + "TJ=Tajikistan", + "TM=Turkmenistan", + "PK=Pakistan", + "TR=Turkey", + "UZ=Uzbekistan", + "YE=Yemen" + }; + private static final String[] TLD_SouthEastAsia = { + "BD=Bangladesh", + "BN=Brunei Darussalam", + "BT=Bhutan", + "CN=China", + "HK=Hong Kong", + "ID=Indonesia", + "IN=India", + "LA=Laos", + "NP=Nepal", + "JP=Japan", + "KH=Cambodia", + "KP=Korea (North)", + "KR=Korea (South)", + "LK=Sri Lanka", + "MY=Malaysia", + "MM=Myanmar", // formerly known as Burma + "MO=Macau", // Portuguese settlement, part of China, but has some autonomy + "MV=Maldives", // group of atolls in the Indian Ocean + "PH=Philippines", + "SG=Singapore", + "TP=East Timor", + "TH=Thailand", + "TW=Taiwan", + "VN=Viet Nam" + }; + private static final String[] TLD_Africa = { + "AO=Angola", + "BF=Burkina Faso", + "BI=Burundi", + "BJ=Benin", + "BW=Botswana", + "CF=Central African Republic", + "CG=Congo", + "CI=Cote D'Ivoire (Ivory Coast)", + "CM=Cameroon", + "CV=Cape Verde", + "DJ=Djibouti", + "DZ=Algeria", + "EG=Egypt", + "EH=Western Sahara", + "ER=Eritrea", + "ET=Ethiopia", + "GA=Gabon", + "GH=Ghana", + "GM=Gambia", + "GN=Guinea", + "GQ=Equatorial Guinea", + "GW=Guinea-Bissau", + "KE=Kenya", + "KM=Comoros", + "LR=Liberia", + "LS=Lesotho", + "LY=Libya", + "MA=Morocco", + "MG=Madagascar", + "ML=Mali", + "MR=Mauritania", + "MU=Mauritius", + "MW=Malawi", + "MZ=Mozambique", + "NA=Namibia", + "NE=Niger", + "NG=Nigeria", + "RE=Reunion", + "RW=Rwanda", + "SC=Seychelles", + "SD=Sudan", + "SH=St. Helena", + "SL=Sierra Leone", + "SN=Senegal", + "SO=Somalia", + "ST=Sao Tome and Principe", + "SZ=Swaziland", + "TD=Chad", + "TG=Togo", + "TN=Tunisia", + "TZ=Tanzania", + "UG=Uganda", + "ZA=South Africa", + "ZM=Zambia", + "ZR=Zaire", + "ZW=Zimbabwe", + "YT=Mayotte" + }; + private static final String[] TLD_Generic = { + "COM=US Commercial", + "AERO=", + "BIZ=", + "COOP=", + "INFO=", + "MUSEUM=", + "NAME=", + "PRO=", + "ARPA=", + "INT=International", + "ARPA=Arpanet", + "NT=Neutral Zone" + }; + + + /* + * TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net, + * org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR, + * ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM, + * BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL, + * CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ, + * EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE, + * GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM, + * HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE, + * JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI, + * LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO, + * MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC, + * NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH, + * PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC, + * SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC, + * TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ, + * UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT, + * YU, ZA, ZM, ZW + */ + + public static String dummyHash; + + private static HashMap TLDID = new HashMap(); + private static HashMap TLDName = new HashMap(); + + private static void insertTLDProps(String[] TLDList, int id) { + int p; + String tld, name; + Integer ID = new Integer(id); + for (int i = 0; i < TLDList.length; i++) { + p = TLDList[i].indexOf('='); + if (p > 0) { + tld = TLDList[i].substring(0, p).toLowerCase(); + name = TLDList[i].substring(p + 1); + TLDID.put(tld, ID); + TLDName.put(tld, name); + } + } + } + + static { + // create a dummy hash + dummyHash = ""; + for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; + + // assign TLD-ids and names + insertTLDProps(TLD_EuropaRussia, 0); + insertTLDProps(TLD_MiddleSouthAmerica, 1); + insertTLDProps(TLD_SouthEastAsia, 2); + insertTLDProps(TLD_MiddleEastWestAsia, 3); + insertTLDProps(TLD_NorthAmericaOceania, 4); + insertTLDProps(TLD_Africa, 5); + insertTLDProps(TLD_Generic, 6); + // the id=7 is used to flag local addresses + } + + // class variables + private String protocol, host, userInfo, path, quest, ref, hash; private int port; - public URL(String url) throws MalformedURLException { + public yacyURL(String url, String hash) throws MalformedURLException { if (url == null) throw new MalformedURLException("url string is null"); parseURLString(url); + this.hash = hash; } - public void parseURLString(String url) throws MalformedURLException { + private void parseURLString(String url) throws MalformedURLException { // identify protocol assert (url != null); url = url.trim(); @@ -100,40 +457,41 @@ public class URL { } } - public URL(File file) throws MalformedURLException { + public yacyURL(File file) throws MalformedURLException { this("file", "", -1, file.getAbsolutePath()); } - public static URL newURL(String baseURL, String relPath) throws MalformedURLException { + public static yacyURL newURL(String baseURL, String relPath) throws MalformedURLException { if ((baseURL == null) || (relPath.startsWith("http://")) || (relPath.startsWith("https://")) || (relPath.startsWith("ftp://")) || (relPath.startsWith("file://")) || (relPath.startsWith("smb://"))) { - return new URL(relPath); + return new yacyURL(relPath, null); } else { - return new URL(new URL(baseURL), relPath); + return new yacyURL(new yacyURL(baseURL, null), relPath); } } - public static URL newURL(URL baseURL, String relPath) throws MalformedURLException { + public static yacyURL newURL(yacyURL baseURL, String relPath) throws MalformedURLException { if ((baseURL == null) || (relPath.startsWith("http://")) || (relPath.startsWith("https://")) || (relPath.startsWith("ftp://")) || (relPath.startsWith("file://")) || (relPath.startsWith("smb://"))) { - return new URL(relPath); + return new yacyURL(relPath, null); } else { - return new URL(baseURL, relPath); + return new yacyURL(baseURL, relPath); } } - private URL(URL baseURL, String relPath) throws MalformedURLException { + private yacyURL(yacyURL baseURL, String relPath) throws MalformedURLException { if (baseURL == null) throw new MalformedURLException("base URL is null"); if (relPath == null) throw new MalformedURLException("relPath is null"); + this.hash = null; this.protocol = baseURL.protocol; this.host = baseURL.host; this.port = baseURL.port; @@ -176,12 +534,13 @@ public class URL { escape(); } - public URL(String protocol, String host, int port, String path) throws MalformedURLException { + public yacyURL(String protocol, String host, int port, String path) throws MalformedURLException { if (protocol == null) throw new MalformedURLException("protocol is null"); this.protocol = protocol; this.host = host; this.port = port; this.path = path; + this.hash = null; identRef(); identQuest(); escape(); @@ -400,11 +759,11 @@ public class URL { int r = this.host.indexOf(':'); if (r < 0) { this.port = dflt; - } else { + } else { try { - String portStr = this.host.substring(r + 1); + String portStr = this.host.substring(r + 1); if (portStr.trim().length() > 0) this.port = Integer.parseInt(portStr); - else this.port = -1; + else this.port = -1; this.host = this.host.substring(0, r); } catch (NumberFormatException e) { throw new MalformedURLException("wrong port in host fragment '" + this.host + "' of input url '" + inputURL + "'"); @@ -522,7 +881,7 @@ public class URL { this.getHost().toLowerCase() + ((defaultPort) ? ("") : (":" + this.port)) + path; } - public boolean equals(URL other) { + public boolean equals(yacyURL other) { return (((this.protocol == other.protocol) || (this.protocol.equals(other.protocol))) && ((this.host == other.host ) || (this.host.equals(other.host))) && ((this.userInfo == other.userInfo) || (this.userInfo.equals(other.userInfo))) && @@ -537,10 +896,178 @@ public class URL { } public int compareTo(Object h) { - assert (h instanceof URL); - return this.toString().compareTo(((URL) h).toString()); + assert (h instanceof yacyURL); + return this.toString().compareTo(((yacyURL) h).toString()); } + // static methods from plasmaURL + + public static final int flagTypeID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; + } + + public static final int flagTLDID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 28) >> 2; + } + + public static final int flagLengthID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 3); + } + + public final String hash() { + // in case that the object was initialized without a known url hash, compute it now + if (this.hash == null) this.hash = urlHashComputation(); + return this.hash; + } + + private final String urlHashComputation() { + // the url hash computation needs a DNS lookup to check if the addresses domain is local + // that causes that this method may be very slow + + assert this.hash == null; // should only be called if the hash was not computed bevore + + int p = this.host.lastIndexOf('.'); + String tld = "", dom = tld; + if (p > 0) { + tld = host.substring(p + 1); + dom = host.substring(0, p); + } + Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses + int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 + boolean isHTTP = this.protocol.equals("http"); + p = dom.lastIndexOf('.'); // locate subdomain + String subdom = ""; + if (p > 0) { + subdom = dom.substring(0, p); + dom = dom.substring(p + 1); + } + + // find rootpath + String pathx = new String(this.path); + if (pathx.startsWith("/")) + pathx = pathx.substring(1); + if (pathx.endsWith("/")) + pathx = pathx.substring(0, pathx.length() - 1); + p = pathx.indexOf('/'); + String rootpath = ""; + if (p > 0) { + rootpath = pathx.substring(0, p); + } + + // we collected enough information to compute the fragments that are + // basis for hashes + int l = dom.length(); + int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3; + byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey); + + // combine the attributes + StringBuffer hash = new StringBuffer(12); + // form the 'local' part of the hash + hash.append(kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(toNormalform(true, true))).substring(0, 5)); // 5 chars + hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char + // form the 'global' part of the hash + hash.append(protocolHostPort(this.protocol, host, port)); // 5 chars + hash.append(kelondroBase64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char + + // return result hash + return new String(hash); + } + + private static char subdomPortPath(String subdom, int port, String rootpath) { + return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(subdom + ":" + port + ":" + rootpath)).charAt(0); + } + + private static final char rootURLFlag = subdomPortPath("www", 80, ""); + + public static final boolean probablyRootURL(String urlHash) { + return (urlHash.charAt(5) == rootURLFlag); + } + + private static String protocolHostPort(String protocol, String host, int port) { + return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(protocol + ":" + host + ":" + port)).substring(0, 5); + } + + private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" }; + + public static final yacyURL probablyWordURL(String urlHash, TreeSet words) { + Iterator wi = words.iterator(); + String word; + while (wi.hasNext()) { + word = (String) wi.next(); + if ((word == null) || (word.length() == 0)) continue; + String pattern = urlHash.substring(6, 11); + for (int i = 0; i < testTLDs.length; i++) { + if (pattern.equals(protocolHostPort("http", "www." + word.toLowerCase() + "." + testTLDs[i], 80))) + try { + return new yacyURL("http://www." + word.toLowerCase() + "." + testTLDs[i], null); + } catch (MalformedURLException e) { + return null; + } + } + } + return null; + } + + public static final boolean isWordRootURL(String givenURLHash, TreeSet words) { + if (!(probablyRootURL(givenURLHash))) return false; + yacyURL wordURL = probablyWordURL(givenURLHash, words); + if (wordURL == null) return false; + if (wordURL.hash().equals(givenURLHash)) return true; + return false; + } + + public static final int domLengthEstimation(String urlHash) { + // generates an estimation of the original domain length + assert (urlHash != null); + assert (urlHash.length() == 12) : "urlhash = " + urlHash; + int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); + int domLengthKey = flagbyte & 3; + switch (domLengthKey) { + case 0: + return 4; + case 1: + return 10; + case 2: + return 14; + case 3: + return 20; + } + return 20; + } + + public static int domLengthNormalized(String urlHash) { + return 255 * domLengthEstimation(urlHash) / 30; + } + + public static final int domDomain(String urlHash) { + // returns the ID of the domain of the domain + assert (urlHash != null); + assert (urlHash.length() == 12) : "urlhash = " + urlHash; + int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); + return (flagbyte & 12) >> 2; + } + + public static boolean isGlobalDomain(String urlhash) { + return domDomain(urlhash) != 7; + } + + // checks for local/global IP range and local IP + public boolean isLocal() { + InetAddress hostAddress = serverDomains.dnsResolve(this.host); // TODO: use a check with the hash first + if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets + return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); + } + + // language calculation + public static String language(yacyURL url) { + String language = "uk"; + String host = url.getHost(); + int pos = host.lastIndexOf("."); + if ((pos > 0) && (host.length() - pos == 3)) language = host.substring(pos + 1).toLowerCase(); + return language; + } + + public static void main(String[] args) { String[][] test = new String[][]{ new String[]{null, "http://www.anomic.de/home/test?x=1#home"}, @@ -570,12 +1097,12 @@ public class URL { new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&showuser=23585"} }; String environment, url; - de.anomic.net.URL aURL, aURL1; + yacyURL aURL, aURL1; java.net.URL jURL; for (int i = 0; i < test.length; i++) { environment = test[i][0]; url = test[i][1]; - try {aURL = de.anomic.net.URL.newURL(environment, url);} catch (MalformedURLException e) {aURL = null;} + try {aURL = yacyURL.newURL(environment, url);} catch (MalformedURLException e) {aURL = null;} if (environment == null) { try {jURL = new java.net.URL(url);} catch (MalformedURLException e) {jURL = null;} } else { @@ -593,7 +1120,7 @@ public class URL { // check stability: the normalform of the normalform must be equal to the normalform if (aURL != null) try { - aURL1 = new de.anomic.net.URL(aURL.toNormalform(false, true)); + aURL1 = new yacyURL(aURL.toNormalform(false, true), null); if (!(aURL1.toNormalform(false, true).equals(aURL.toNormalform(false, true)))) { System.out.println("no stability for url:"); System.out.println("aURL0=" + aURL.toString()); diff --git a/source/de/anomic/yacy/yacyVersion.java b/source/de/anomic/yacy/yacyVersion.java index b86c0e36f..4c1f67f91 100644 --- a/source/de/anomic/yacy/yacyVersion.java +++ b/source/de/anomic/yacy/yacyVersion.java @@ -41,7 +41,6 @@ import java.util.regex.Pattern; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverSystem; @@ -73,10 +72,10 @@ public final class yacyVersion implements Comparator, Comparable { public String dateStamp; public int svn; public boolean proRelease, mainRelease; - public URL url; + public yacyURL url; public String name; - public yacyVersion(URL url) { + public yacyVersion(yacyURL url) { this(url.getFileName()); this.url = url; } @@ -256,7 +255,7 @@ public final class yacyVersion implements Comparator, Comparable { // {promainreleases, prodevreleases, stdmainreleases, stddevreleases} Object[] a = new Object[latestReleaseLocations.size()]; for (int j = 0; j < latestReleaseLocations.size(); j++) { - a[j] = getReleases((URL) latestReleaseLocations.get(j), force); + a[j] = getReleases((yacyURL) latestReleaseLocations.get(j), force); } TreeSet[] r = new TreeSet[4]; TreeSet s; @@ -270,7 +269,7 @@ public final class yacyVersion implements Comparator, Comparable { return r; } - private static TreeSet[] getReleases(URL location, boolean force) { + private static TreeSet[] getReleases(yacyURL location, boolean force) { // get release info from a internet resource // {promainreleases, prodevreleases, stdmainreleases, stddevreleases} TreeSet[] latestRelease = (TreeSet[]) latestReleases.get(location); @@ -286,7 +285,7 @@ public final class yacyVersion implements Comparator, Comparable { return latestRelease; } - private static TreeSet[] allReleaseFrom(URL url) { + private static TreeSet[] allReleaseFrom(yacyURL url) { // retrieves the latest info about releases // this is done by contacting a release location, // parsing the content and filtering+parsing links @@ -308,7 +307,7 @@ public final class yacyVersion implements Comparator, Comparable { yacyVersion release; while (i.hasNext()) { try { - url = new URL((String) i.next()); + url = new yacyURL((String) i.next(), null); } catch (MalformedURLException e1) { continue; // just ignore invalid urls } diff --git a/source/yacy.java b/source/yacy.java index d97e5c314..d4df71164 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -74,7 +74,6 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlLURL; @@ -92,6 +91,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; /** @@ -412,7 +412,7 @@ public final class yacy { server.terminate(false); server.interrupt(); if (server.isAlive()) try { - URL u = new URL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port)); + yacyURL u = new yacyURL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null); httpc.wget(u, u.getHost(), 1000, null, null, null, null, null); // kick server serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket"); } catch (IOException ee) { diff --git a/test/de/anomic/soap/services/AbstractServiceTest.java b/test/de/anomic/soap/services/AbstractServiceTest.java deleted file mode 100644 index 6bca3292a..000000000 --- a/test/de/anomic/soap/services/AbstractServiceTest.java +++ /dev/null @@ -1,77 +0,0 @@ -package de.anomic.soap.services; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.rmi.Remote; -import java.util.Hashtable; -import java.util.Properties; - -import javax.xml.rpc.ServiceException; - -import junit.framework.TestCase; - -import org.apache.axis.MessageContext; -import org.apache.axis.client.Stub; -import org.apache.axis.transport.http.HTTPConstants; - -import de.anomic.http.httpd; - -public abstract class AbstractServiceTest extends TestCase { - protected static final String SOAP_HEADER_NAMESPACE = "http://http.anomic.de/header"; - protected static final String SOAP_HEADER_AUTHORIZATION = "Authorization"; - - protected static String authString; - protected static String peerPort; - protected static Remote service; - - protected void setUp() throws Exception { - this.loadConfigProperties(); - super.setUp(); - } - - protected abstract void createServiceClass() throws ServiceException; - - protected String getBaseServiceURL() { - return "http://localhost:" + peerPort + "/soap/"; - } - - protected void loadConfigProperties() throws Exception { - BufferedInputStream fileInput = null; - try { - File configFile = new File("DATA/SETTINGS/httpProxy.conf"); - System.out.println("Reading config file: " + configFile.getAbsoluteFile().toString()); - fileInput = new BufferedInputStream(new FileInputStream(configFile)); - - // load property list - Properties peerProperties = new Properties(); - peerProperties.load(fileInput); - fileInput.close(); - - // getting admin account auth string - authString = peerProperties.getProperty(httpd.ADMIN_ACCOUNT_B64MD5); - if (authString == null) throw new Exception("Unable to find authentication information."); - - peerPort = peerProperties.getProperty("port"); - if (authString == null) throw new Exception("Unable to find peer port information."); - - // creating the service class - createServiceClass(); - - // setting the authentication header - ((Stub)service).setHeader(SOAP_HEADER_NAMESPACE,SOAP_HEADER_AUTHORIZATION,authString); - - // configure axis to use HTTP 1.1 - ((Stub)service)._setProperty(MessageContext.HTTP_TRANSPORT_VERSION,HTTPConstants.HEADER_PROTOCOL_V11); - - // configure axis to use chunked transfer encoding - Hashtable userHeaderTable = new Hashtable(); - userHeaderTable.put(HTTPConstants.HEADER_TRANSFER_ENCODING, HTTPConstants.HEADER_TRANSFER_ENCODING_CHUNKED); - ((Stub)service)._setProperty(HTTPConstants.REQUEST_HEADERS,userHeaderTable); - } catch (Exception e) { - e.printStackTrace(); - } finally { - if (fileInput != null) try { fileInput.close(); } catch (Exception e){/* ignore this */} - } - } -} diff --git a/test/de/anomic/soap/services/AdminServiceTest.java b/test/de/anomic/soap/services/AdminServiceTest.java deleted file mode 100644 index 10eb7861b..000000000 --- a/test/de/anomic/soap/services/AdminServiceTest.java +++ /dev/null @@ -1,75 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; -import java.util.HashMap; - -import javax.xml.rpc.ServiceException; -import javax.xml.transform.TransformerException; - -import org.apache.axis.utils.XMLUtils; -import org.apache.xpath.XPathAPI; -import org.w3c.dom.DOMException; -import org.w3c.dom.Document; - -import yacy.soap.admin.AdminService; -import yacy.soap.admin.AdminServiceServiceLocator; - -public class AdminServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - AdminServiceServiceLocator locator = new AdminServiceServiceLocator(); - locator.setadminEndpointAddress(getBaseServiceURL() + "admin"); - - service = locator.getadmin(); - } - - private HashMap getMessageForwardingProperties(Document xml) throws DOMException, TransformerException { - HashMap result = new HashMap(); - - result.put("msgForwardingEnabled",Boolean.valueOf(XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingEnabled").getFirstChild().getNodeValue())); - result.put("msgForwardingCmd",XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingCmd").getFirstChild().getNodeValue()); - result.put("msgForwardingTo",XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingTo").getFirstChild().getNodeValue()); - - return result; - } - - public void testMessageForwarding() throws RemoteException, TransformerException { - // backup old values - HashMap oldValues = getMessageForwardingProperties(((AdminService)service).getMessageForwarding()); - - // set new values - Boolean msgEnabled = Boolean.TRUE; - String msgCmd = "/usr/sbin/sendmail"; - String msgTo = "yacy@localhost"; - ((AdminService)service).setMessageForwarding(msgEnabled.booleanValue(),msgCmd,msgTo); - - // query configured properties - Document xml = ((AdminService)service).getMessageForwarding(); - - // check if values are equal - assertEquals(msgEnabled,Boolean.valueOf(XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingEnabled").getFirstChild().getNodeValue())); - assertEquals(msgCmd,XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingCmd").getFirstChild().getNodeValue()); - assertEquals(msgTo,XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingTo").getFirstChild().getNodeValue()); - - // print it out - System.out.println(XMLUtils.DocumentToString(xml)); - - // set back to old values - ((AdminService)service).setMessageForwarding( - ((Boolean)oldValues.get("msgForwardingEnabled")).booleanValue(), - (String)oldValues.get("msgForwardingCmd"), - (String)oldValues.get("msgForwardingTo") - ); - } - - public void testGetServerLog() throws RemoteException { - Document xml = ((AdminService)service).getServerLog(0); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testGetPeerProfile() throws RemoteException { - Document xml = ((AdminService)service).getPeerProfile("localhash"); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/BlacklistServiceTest.java b/test/de/anomic/soap/services/BlacklistServiceTest.java deleted file mode 100644 index 29e4063c2..000000000 --- a/test/de/anomic/soap/services/BlacklistServiceTest.java +++ /dev/null @@ -1,103 +0,0 @@ -package de.anomic.soap.services; - -import java.io.IOException; -import java.rmi.RemoteException; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.blacklist.BlacklistService; -import yacy.soap.blacklist.BlacklistServiceServiceLocator; - -public class BlacklistServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - BlacklistServiceServiceLocator locator = new BlacklistServiceServiceLocator(); - locator.setblacklistEndpointAddress(getBaseServiceURL() + "blacklist"); - - service = locator.getblacklist(); - } - - public void testGetBlacklistList() throws RemoteException { - Document xml = ((BlacklistService)service).getBlacklistList(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testBlacklist() throws RemoteException { - BlacklistService bl = ((BlacklistService)service); - - // create new blacklist - String blacklistName = "junit_test_" + System.currentTimeMillis(); - bl.createBlacklist(blacklistName,false,null); - - // share blacklist - bl.shareBlacklist(blacklistName); - - // getting supported blacklist Types - String[] blTypes = bl.getBlacklistTypes(); - - // activate blacklist - bl.activateBlacklist(blacklistName,blTypes); - - // add blacklist item - String item = "http://www.yacy.net"; - bl.addBlacklistItem(blacklistName,item); - - // getting the blacklist list - Document xml = bl.getBlacklistList(); - System.out.println(XMLUtils.DocumentToString(xml)); - - // test is listed - boolean isListed = bl.urlIsBlacklisted("proxy","http://www.yacy.net/blacklisttest"); - assertEquals(true,isListed); - - // remove blacklist item - bl.removeBlacklistItem(blacklistName,item); - - // unshare - bl.unshareBlacklist(blacklistName); - - // deactivate for proxy and dht - bl.deactivateBlacklist(blacklistName,new String[]{"proxy","dht"}); - - // delete blacklist - bl.deleteBlacklist(blacklistName); - } - - public void testBacklistImport() throws IOException { - BlacklistService bl = ((BlacklistService)service); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("import.txt","www.yacy.net/.*\r\n" + - "www.yacy-websuche.de/.*"); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - part.setContentType("text/plain"); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import it - String blacklistName = "junit_test_" + System.currentTimeMillis(); - bl.importBlacklist(blacklistName); - - // clear attachment - ((Stub)service).clearAttachments(); - - // delete blacklist - bl.deleteBlacklist(blacklistName); - } - -} diff --git a/test/de/anomic/soap/services/BookmarkServiceTest.java b/test/de/anomic/soap/services/BookmarkServiceTest.java deleted file mode 100644 index 2203dab05..000000000 --- a/test/de/anomic/soap/services/BookmarkServiceTest.java +++ /dev/null @@ -1,153 +0,0 @@ -package de.anomic.soap.services; - -import java.net.MalformedURLException; -import java.rmi.RemoteException; -import java.util.Date; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.bookmarks.BookmarkService; -import yacy.soap.bookmarks.BookmarkServiceServiceLocator; -import de.anomic.data.bookmarksDB; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; - -public class BookmarkServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - BookmarkServiceServiceLocator locator = new BookmarkServiceServiceLocator(); - locator.setbookmarksEndpointAddress(getBaseServiceURL() + "bookmarks"); - - service = locator.getbookmarks(); - } - - public void testBookmarks() throws Exception { - BookmarkService bm = ((BookmarkService)service); - - String testURL1 = "http://www.yacy.de/testurl1"; - String testURL2 = "http://www.yacy.de/testurl2"; - - // create new bookmark - String urlHash = bm.addBookmark(testURL1,"YaCy Bookmarks Test","YaCy Bookmarks junit test",new String[]{"yacy","bookmarks","testing"},false); - - // change bookmark - urlHash = bm.editBookmark(urlHash,testURL2,null,null,null,false); - - // get bookmark listing - Document xml = bm.getBookmarkList("testing",bookmarksDB.dateToiso8601(new Date(System.currentTimeMillis()))); - System.out.println(XMLUtils.DocumentToString(xml)); - - // get tag list - xml = bm.getBookmarkTagList(); - System.out.println(XMLUtils.DocumentToString(xml)); - - // rename tag - bm.renameTag("testing","tested"); - - // delete tag - bm.deleteBookmarkByHash(urlHash); - } - - public void testImportHtmlBookmarklist() throws RemoteException { - BookmarkService bm = ((BookmarkService)service); - String[] hashs = new String[5]; - - // generate the html file - StringBuffer xmlStr = new StringBuffer(); - xmlStr.append(""); - for (int i=0; i < hashs.length; i++) { - String url = "/testxmlimport" + i; - String title = "YaCy Bookmark XML Import " + i; - String hash = plasmaURL.urlHash("http://www.yacy.de"+ url); - - xmlStr.append("\t").append(title).append("\r\n"); - - hashs[i] = hash; - } - xmlStr.append(""); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("bookmarks.html",xmlStr.toString()); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import xml - int importCount = bm.importHtmlBookmarkFile("http://www.yacy.de/",new String[]{"yacy","bookmarks","htmlimport"},false); - assertEquals(hashs.length,importCount); - - // query imported documents - Document xml = bm.getBookmarkList("htmlimport",null); - System.out.println(XMLUtils.DocumentToString(xml)); - - // delete imported URLS - bm.deleteBookmarksByHash(hashs); - } - - public void testImportXML() throws MalformedURLException, RemoteException { - BookmarkService bm = ((BookmarkService)service); - - String dateString = bookmarksDB.dateToiso8601(new Date(System.currentTimeMillis())); - String[] hashs = new String[5]; - - // generate xml document to import - StringBuffer xmlStr = new StringBuffer(); - xmlStr.append("\r\n") - .append("\r\n"); - - for (int i=0; i < hashs.length; i++) { - URL url = new URL("http://www.yacy.de/testxmlimport" + i); - String title = "YaCy Bookmark XML Import " + i; - String description = "YaCy Bookmarkx XML Import junit test with url " + i; - String hash = plasmaURL.urlHash(url); - String tags = "yacy bookmarks xmlimport"; - - xmlStr.append("\t\r\n"); - - hashs[i] = hash; - } - - xmlStr.append(""); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("bookmarks.xml",xmlStr.toString()); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import xml - int importCount = bm.importBookmarkXML(false); - assertEquals(hashs.length,importCount); - - // query imported documents - Document xml = bm.getBookmarkList("xmlimport",dateString); - System.out.println(XMLUtils.DocumentToString(xml)); - - // delete imported URLS - bm.deleteBookmarksByHash(hashs); - } - -} diff --git a/test/de/anomic/soap/services/CrawlServiceTest.java b/test/de/anomic/soap/services/CrawlServiceTest.java deleted file mode 100644 index 3c2ec5ddb..000000000 --- a/test/de/anomic/soap/services/CrawlServiceTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.AxisFault; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.crawl.CrawlService; -import yacy.soap.crawl.CrawlServiceServiceLocator; - -public class CrawlServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - CrawlServiceServiceLocator locator = new CrawlServiceServiceLocator(); - locator.setcrawlEndpointAddress(getBaseServiceURL() + "crawl"); - - service = locator.getcrawl(); - } - - public void testGetCrawlPauseResumeState() throws RemoteException { - Document xml = ((CrawlService)service).getCrawlPauseResumeState(); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/MessageServiceTest.java b/test/de/anomic/soap/services/MessageServiceTest.java deleted file mode 100644 index 8b0a8bd37..000000000 --- a/test/de/anomic/soap/services/MessageServiceTest.java +++ /dev/null @@ -1,62 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.rpc.ServiceException; - -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.messages.MessageService; -import yacy.soap.messages.MessageServiceServiceLocator; - - - -public class MessageServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - MessageServiceServiceLocator locator = new MessageServiceServiceLocator(); - locator.setmessagesEndpointAddress(getBaseServiceURL() + "messages"); - - service = locator.getmessages(); - } - - public void testGetMessageIDs() throws RemoteException { - MessageService ms = ((MessageService)service); - String[] IDs = ms.getMessageIDs(); - - StringBuffer idList = new StringBuffer(); - for (int i=0; i < IDs.length; i++) { - if (i > 0) idList.append(", "); - idList.append(IDs[i]); - } - - System.out.println(idList); - } - - public void testGetMessageHeaderList() throws RemoteException { - MessageService ms = ((MessageService)service); - Document xml = ms.getMessageHeaderList(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testMessage() throws RemoteException { - MessageService ms = ((MessageService)service); - - // get message IDs - String[] IDs = ms.getMessageIDs(); - - if (IDs != null && IDs.length > 0) { - Document xml = ms.getMessage(IDs[0]); - System.out.println(XMLUtils.DocumentToString(xml)); - } - } - - public void testGetMessageSendPermission() throws RemoteException { - MessageService ms = ((MessageService)service); - - Document xml = ms.getMessageSendPermission("mseSVGrNKKnw"); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/ServiceTests.java b/test/de/anomic/soap/services/ServiceTests.java deleted file mode 100644 index fe51621aa..000000000 --- a/test/de/anomic/soap/services/ServiceTests.java +++ /dev/null @@ -1,21 +0,0 @@ -package de.anomic.soap.services; - -import junit.framework.Test; -import junit.framework.TestSuite; - -public class ServiceTests { - - public static Test suite() { - TestSuite suite = new TestSuite("Test for de.anomic.soap.services"); - //$JUnit-BEGIN$ - suite.addTestSuite(AdminServiceTest.class); - suite.addTestSuite(ShareServiceTest.class); - suite.addTestSuite(StatusServiceTest.class); - suite.addTestSuite(BlacklistServiceTest.class); - suite.addTestSuite(BookmarkServiceTest.class); - suite.addTestSuite(MessageServiceTest.class); - //$JUnit-END$ - return suite; - } - -} diff --git a/test/de/anomic/soap/services/ShareServiceTest.java b/test/de/anomic/soap/services/ShareServiceTest.java deleted file mode 100644 index 0f7dde64f..000000000 --- a/test/de/anomic/soap/services/ShareServiceTest.java +++ /dev/null @@ -1,109 +0,0 @@ -package de.anomic.soap.services; - -import java.io.IOException; -import java.util.Date; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; -import javax.xml.soap.SOAPException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.share.ShareService; -import yacy.soap.share.ShareServiceServiceLocator; -import de.anomic.server.serverFileUtils; - -public class ShareServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - ShareServiceServiceLocator locator = new ShareServiceServiceLocator(); - locator.setshareEndpointAddress(getBaseServiceURL() + "share"); - service = locator.getshare(); - } - - public void testCreateDeleteDir() throws SOAPException, IOException { - String newDirName = "junit_test_" + System.currentTimeMillis(); - String newFileName = "import.txt"; - - /* =================================================================== - * Create directory - * =================================================================== */ - System.out.println("Creating new directory ..."); - ((ShareService)service).createDirectory("/",newDirName); - - /* =================================================================== - * Upload file - * =================================================================== */ - System.out.println("Uploading test file ..."); - - // create datasource to hold the attachment content - String testText = "Test text of the test file"; - DataSource data = new PlainTextDataSource(newFileName,testText); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - part.setContentType("text/plain"); - part.setContentId(newFileName); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - ((ShareService)service).uploadFile(newDirName,true,"jUnit Testupload at " + new Date()); - - // clear attachment - ((Stub)service).clearAttachments(); - - /* =================================================================== - * Download file - * =================================================================== */ - System.out.println("Downloading test file ..."); - - // execute service call - String md5 = ((ShareService)service).getFile(newDirName,newFileName); - - // get received attachments - Object[] attachments = ((Stub)service).getAttachments(); - - assertTrue(attachments.length == 1); - assertTrue(attachments[0] instanceof AttachmentPart); - - // get datahandler - DataHandler dh = ((AttachmentPart)attachments[0]).getDataHandler(); - - // cread content - byte[] content = serverFileUtils.read(dh.getInputStream()); - assertTrue(content.length > 0); - - // convert it to string - String contentString = new String(content,"UTF-8"); - assertEquals(testText,contentString); - - /* =================================================================== - * Change file comment - * =================================================================== */ - System.out.println("Changing file comment ..."); - ((ShareService)service).changeComment(newDirName,newFileName,"New comment on this file",true); - - /* =================================================================== - * Get dirlist - * =================================================================== */ - System.out.println("Get dirlist ... "); - Document xml =((ShareService)service).getDirList(newDirName); - System.out.println(XMLUtils.DocumentToString(xml)); - - /* =================================================================== - * Delete directory - * =================================================================== */ - System.out.println("Deleting directory and testfile ... "); - ((ShareService)service).delete("/",newDirName); - } - -} diff --git a/test/de/anomic/soap/services/StatusServiceTest.java b/test/de/anomic/soap/services/StatusServiceTest.java deleted file mode 100644 index 3d8ffa846..000000000 --- a/test/de/anomic/soap/services/StatusServiceTest.java +++ /dev/null @@ -1,42 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.rpc.ServiceException; - -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.status.StatusService; -import yacy.soap.status.StatusServiceServiceLocator; - -public class StatusServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - StatusServiceServiceLocator locator = new StatusServiceServiceLocator(); - locator.setstatusEndpointAddress(getBaseServiceURL() + "status"); - - service = locator.getstatus(); - } - - public void testNetworkOverview() throws RemoteException { - Document xml = ((StatusService)service).getNetworkOverview(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testGetQueueStatus() throws RemoteException { - Document xml = ((StatusService)service).getQueueStatus(10,10,10,10); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testStatus() throws RemoteException { - Document xml = ((StatusService)service).getStatus(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testPeerList() throws RemoteException { - Document xml = ((StatusService)service).peerList("active",300,true); - System.out.println(XMLUtils.DocumentToString(xml)); - } -}