From daf0f74361f708ec1372a9d70171594154115fed Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 5 Sep 2007 09:01:35 +0000 Subject: [PATCH] joined anomic.net.URL, plasmaURL and url hash computation: search profiling showed, that a major amount of time is wasted by computing url hashes. The computation does an intranet-check, which needs a DNS lookup. This caused that each urlhash computation needed 100-200 milliseconds, which caused remote searches to delay at least 1 second more that necessary. The solution to this problem is to attach a URL hash to the URL data structure, because that means that the url hash value can be filled after retrieval of the URL from the database. The redesign of the url/urlhash management caused a major redesign of many parts of the software. Since some parts had been decided to be given up they had been removed during this change to avoid unnecessary maintenance of unused code. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4074 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.xml | 39 +- htroot/Blacklist_p.java | 6 +- htroot/Bookmarks.java | 4 +- htroot/CacheAdmin_p.java | 4 +- htroot/ConfigLanguage_p.java | 4 +- htroot/ConfigSkins_p.java | 4 +- htroot/ConfigUpdate_p.java | 4 +- htroot/CrawlResults.java | 4 +- htroot/CrawlStartSimple_p.html | 2 +- htroot/CrawlStartSimple_p.java | 6 +- htroot/CrawlURLFetchStack_p.java | 12 +- htroot/CrawlURLFetch_p.java | 24 +- htroot/FeedReader_p.java | 6 +- htroot/IndexControl_p.java | 23 +- htroot/IndexCreateIndexingQueue_p.java | 4 +- htroot/IndexCreateWWWGlobalQueue_p.java | 2 +- htroot/IndexCreateWWWLocalQueue_p.java | 4 +- htroot/IndexCreateWWWRemoteQueue_p.java | 2 +- htroot/QuickCrawlLink_p.java | 11 +- htroot/Statistics.java | 9 +- htroot/Status.java | 4 +- htroot/Supporter.java | 24 +- htroot/Surftips.java | 19 +- htroot/Thumbnail.html | 1 - htroot/Thumbnail.java | 69 -- htroot/ViewFile.java | 6 +- htroot/ViewImage.java | 6 +- htroot/WatchCrawler_p.java | 25 +- htroot/WebStructurePicture_p.java | 5 +- htroot/htdocsdefault/dir.java | 13 +- htroot/index.java | 9 +- htroot/sharedBlacklist_p.java | 6 +- htroot/soap/ServiceList.html | 33 - htroot/soap/ServiceList.java | 105 --- htroot/xml/bookmarks/posts/delete_p.java | 22 +- htroot/xml/queues_p.java | 2 +- htroot/xml/util/getpageinfo_p.java | 8 +- htroot/yacy/crawlOrder.java | 17 +- htroot/yacy/list.java | 4 +- htroot/yacy/search.java | 27 +- htroot/yacy/transferURL.java | 2 +- htroot/yacy/urls.java | 2 +- htroot/yacysearch.java | 13 +- htroot/yacysearchitem.java | 15 +- source/de/anomic/data/SitemapParser.java | 16 +- source/de/anomic/data/URLFetcherStack.java | 8 +- source/de/anomic/data/URLLicense.java | 10 +- source/de/anomic/data/bookmarksDB.java | 22 +- source/de/anomic/data/robotsParser.java | 26 +- source/de/anomic/data/userDB.java | 3 +- .../htmlFilter/htmlFilterContentScraper.java | 22 +- .../htmlFilter/htmlFilterImageEntry.java | 8 +- .../htmlFilter/htmlFilterInputStream.java | 5 +- .../anomic/htmlFilter/htmlFilterWriter.java | 4 +- source/de/anomic/http/httpHeader.java | 8 +- source/de/anomic/http/httpc.java | 18 +- source/de/anomic/http/httpd.java | 75 +- source/de/anomic/http/httpdFileHandler.java | 1 - source/de/anomic/http/httpdProxyHandler.java | 28 +- source/de/anomic/icap/icapd.java | 4 +- source/de/anomic/index/indexURLEntry.java | 38 +- .../kelondro/kelondroAbstractRecords.java | 2 +- .../kelondro/kelondroCollectionIndex.java | 4 +- source/de/anomic/net/natLib.java | 9 +- .../de/anomic/plasma/cache/IResourceInfo.java | 11 +- .../plasma/cache/ResourceInfoFactory.java | 7 +- .../anomic/plasma/cache/ftp/ResourceInfo.java | 24 +- .../plasma/cache/http/ResourceInfo.java | 20 +- .../plasma/crawler/AbstractCrawlWorker.java | 13 +- .../plasma/crawler/ftp/CrawlWorker.java | 4 +- .../plasma/crawler/http/CrawlWorker.java | 9 +- .../plasma/dbImport/SitemapImporter.java | 6 +- .../dbImport/plasmaCrawlNURLImporter.java | 4 +- .../anomic/plasma/parser/AbstractParser.java | 12 +- source/de/anomic/plasma/parser/Parser.java | 8 +- .../anomic/plasma/parser/ParserException.java | 14 +- .../anomic/plasma/parser/bzip/bzipParser.java | 5 +- .../anomic/plasma/parser/doc/docParser.java | 8 +- .../anomic/plasma/parser/gzip/gzipParser.java | 4 +- .../parser/mimeType/mimeTypeParser.java | 10 +- .../anomic/plasma/parser/odt/odtParser.java | 8 +- .../anomic/plasma/parser/pdf/pdfParser.java | 4 +- .../anomic/plasma/parser/ppt/pptParser.java | 4 +- .../de/anomic/plasma/parser/ps/psParser.java | 6 +- .../anomic/plasma/parser/rpm/rpmParser.java | 8 +- .../anomic/plasma/parser/rss/rssParser.java | 8 +- .../anomic/plasma/parser/rtf/rtfParser.java | 8 +- .../sevenzip/SZParserExtractCallback.java | 4 +- .../parser/sevenzip/sevenzipParser.java | 10 +- .../anomic/plasma/parser/swf/swfParser.java | 4 +- .../anomic/plasma/parser/tar/tarParser.java | 6 +- .../anomic/plasma/parser/vcf/vcfParser.java | 8 +- .../anomic/plasma/parser/xls/xlsParser.java | 4 +- .../anomic/plasma/parser/zip/zipParser.java | 6 +- .../de/anomic/plasma/plasmaCrawlBalancer.java | 13 +- source/de/anomic/plasma/plasmaCrawlEntry.java | 26 +- source/de/anomic/plasma/plasmaCrawlLURL.java | 11 +- .../de/anomic/plasma/plasmaCrawlLoader.java | 6 +- .../plasma/plasmaCrawlLoaderMessage.java | 6 +- .../de/anomic/plasma/plasmaCrawlStacker.java | 27 +- source/de/anomic/plasma/plasmaCrawlZURL.java | 26 +- source/de/anomic/plasma/plasmaHTCache.java | 219 +++-- source/de/anomic/plasma/plasmaParser.java | 30 +- .../de/anomic/plasma/plasmaParserConfig.java | 6 +- .../anomic/plasma/plasmaParserDocument.java | 26 +- .../plasma/plasmaRankingRCIEvaluation.java | 7 +- .../de/anomic/plasma/plasmaSearchEvent.java | 11 +- .../de/anomic/plasma/plasmaSearchImages.java | 8 +- .../anomic/plasma/plasmaSearchPreOrder.java | 4 +- .../anomic/plasma/plasmaSearchProcessing.java | 73 ++ .../plasma/plasmaSearchRankingProfile.java | 8 +- .../de/anomic/plasma/plasmaSnippetCache.java | 54 +- .../de/anomic/plasma/plasmaSwitchboard.java | 75 +- .../anomic/plasma/plasmaSwitchboardQueue.java | 30 +- source/de/anomic/plasma/plasmaURL.java | 744 ---------------- .../de/anomic/plasma/plasmaWebStructure.java | 35 +- source/de/anomic/plasma/plasmaWordIndex.java | 8 +- .../plasma/urlPattern/abstractURLPattern.java | 13 +- .../plasma/urlPattern/plasmaURLPattern.java | 6 +- source/de/anomic/server/serverDomains.java | 11 +- source/de/anomic/soap/AbstractService.java | 189 ---- source/de/anomic/soap/ServerContext.java | 234 ----- source/de/anomic/soap/SoapException.java | 132 --- source/de/anomic/soap/build.xml | 154 ---- source/de/anomic/soap/httpdSoapHandler.java | 777 ---------------- .../de/anomic/soap/services/AdminService.java | 837 ------------------ .../soap/services/BlacklistService.java | 608 ------------- .../anomic/soap/services/BookmarkService.java | 511 ----------- .../de/anomic/soap/services/CrawlService.java | 229 ----- .../anomic/soap/services/MessageService.java | 321 ------- .../anomic/soap/services/SearchService.java | 346 -------- .../de/anomic/soap/services/ShareService.java | 714 --------------- .../anomic/soap/services/StatusService.java | 261 ------ source/de/anomic/soap/services/admin.wsdl | 504 ----------- source/de/anomic/soap/services/blacklist.wsdl | 285 ------ source/de/anomic/soap/services/bookmarks.wsdl | 318 ------- source/de/anomic/soap/services/crawl.wsdl | 168 ---- source/de/anomic/soap/services/messages.wsdl | 181 ---- source/de/anomic/soap/services/search.wsdl | 233 ----- source/de/anomic/soap/services/share.wsdl | 258 ------ source/de/anomic/soap/services/status.wsdl | 224 ----- source/de/anomic/tools/loaderThreads.java | 12 +- .../anomic/urlRedirector/urlRedirectord.java | 7 +- source/de/anomic/yacy/yacyClient.java | 43 +- source/de/anomic/yacy/yacyCore.java | 7 +- source/de/anomic/yacy/yacyNewsPool.java | 5 +- source/de/anomic/yacy/yacyPeerActions.java | 5 +- source/de/anomic/yacy/yacySeedDB.java | 5 +- .../{net/URL.java => yacy/yacyURL.java} | 579 +++++++++++- source/de/anomic/yacy/yacyVersion.java | 13 +- source/yacy.java | 4 +- .../soap/services/AbstractServiceTest.java | 77 -- .../soap/services/AdminServiceTest.java | 75 -- .../soap/services/BlacklistServiceTest.java | 103 --- .../soap/services/BookmarkServiceTest.java | 153 ---- .../soap/services/CrawlServiceTest.java | 29 - .../soap/services/MessageServiceTest.java | 62 -- .../de/anomic/soap/services/ServiceTests.java | 21 - .../soap/services/ShareServiceTest.java | 109 --- .../soap/services/StatusServiceTest.java | 42 - 160 files changed, 1481 insertions(+), 10006 deletions(-) delete mode 100644 htroot/Thumbnail.html delete mode 100644 htroot/Thumbnail.java delete mode 100644 htroot/soap/ServiceList.html delete mode 100644 htroot/soap/ServiceList.java delete mode 100644 source/de/anomic/plasma/plasmaURL.java delete mode 100644 source/de/anomic/soap/AbstractService.java delete mode 100644 source/de/anomic/soap/ServerContext.java delete mode 100644 source/de/anomic/soap/SoapException.java delete mode 100644 source/de/anomic/soap/build.xml delete mode 100644 source/de/anomic/soap/httpdSoapHandler.java delete mode 100644 source/de/anomic/soap/services/AdminService.java delete mode 100644 source/de/anomic/soap/services/BlacklistService.java delete mode 100644 source/de/anomic/soap/services/BookmarkService.java delete mode 100644 source/de/anomic/soap/services/CrawlService.java delete mode 100644 source/de/anomic/soap/services/MessageService.java delete mode 100644 source/de/anomic/soap/services/SearchService.java delete mode 100644 source/de/anomic/soap/services/ShareService.java delete mode 100644 source/de/anomic/soap/services/StatusService.java delete mode 100644 source/de/anomic/soap/services/admin.wsdl delete mode 100644 source/de/anomic/soap/services/blacklist.wsdl delete mode 100644 source/de/anomic/soap/services/bookmarks.wsdl delete mode 100644 source/de/anomic/soap/services/crawl.wsdl delete mode 100644 source/de/anomic/soap/services/messages.wsdl delete mode 100644 source/de/anomic/soap/services/search.wsdl delete mode 100644 source/de/anomic/soap/services/share.wsdl delete mode 100644 source/de/anomic/soap/services/status.wsdl rename source/de/anomic/{net/URL.java => yacy/yacyURL.java} (53%) delete mode 100644 test/de/anomic/soap/services/AbstractServiceTest.java delete mode 100644 test/de/anomic/soap/services/AdminServiceTest.java delete mode 100644 test/de/anomic/soap/services/BlacklistServiceTest.java delete mode 100644 test/de/anomic/soap/services/BookmarkServiceTest.java delete mode 100644 test/de/anomic/soap/services/CrawlServiceTest.java delete mode 100644 test/de/anomic/soap/services/MessageServiceTest.java delete mode 100644 test/de/anomic/soap/services/ServiceTests.java delete mode 100644 test/de/anomic/soap/services/ShareServiceTest.java delete mode 100644 test/de/anomic/soap/services/StatusServiceTest.java diff --git a/build.xml b/build.xml index 8fd6508bb..d9d15970d 100644 --- a/build.xml +++ b/build.xml @@ -221,7 +221,7 @@ @@ -241,7 +241,6 @@ @@ -297,33 +296,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -359,7 +331,7 @@ - + - - - + @@ -527,8 +497,6 @@ - - @@ -545,7 +513,6 @@ - diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 4ab355c9d..2411caf8c 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -60,7 +60,6 @@ import java.util.TreeMap; import de.anomic.data.listManager; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern; @@ -68,6 +67,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Blacklist_p { private final static String DISABLED = "disabled_"; @@ -95,9 +95,9 @@ public class Blacklist_p { prop.put("testlist",1); String urlstring = post.get("testurl", ""); if(!urlstring.startsWith("http://")) urlstring = "http://"+urlstring; - URL testurl = null; + yacyURL testurl = null; try { - testurl = new URL(urlstring); + testurl = new yacyURL(urlstring, null); } catch (MalformedURLException e) { } if(testurl != null) { prop.put("testlist_url",testurl.toString()); diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 95451fe69..8f0eb3a87 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -59,7 +59,6 @@ import de.anomic.data.bookmarksDB.Tag; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; @@ -69,6 +68,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class Bookmarks { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -214,7 +214,7 @@ public class Bookmarks { } try { File file=new File((String)post.get("bookmarksfile")); - switchboard.bookmarksDB.importFromBookmarks(new URL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic); + switchboard.bookmarksDB.importFromBookmarks(new yacyURL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic); } catch (MalformedURLException e) {} }else if(post.containsKey("xmlfile")){ diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java index 279c8231d..27e6aec23 100644 --- a/htroot/CacheAdmin_p.java +++ b/htroot/CacheAdmin_p.java @@ -62,7 +62,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSwitchboard; @@ -71,6 +70,7 @@ import de.anomic.plasma.cache.UnsupportedProtocolException; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class CacheAdmin_p { @@ -118,7 +118,7 @@ public class CacheAdmin_p { final StringBuffer tree = new StringBuffer(); final StringBuffer info = new StringBuffer(); - final URL url = plasmaHTCache.getURL(file); + final yacyURL url = plasmaHTCache.getURL(file); String urlstr = ""; diff --git a/htroot/ConfigLanguage_p.java b/htroot/ConfigLanguage_p.java index 5e5a90853..668ac3b7a 100644 --- a/htroot/ConfigLanguage_p.java +++ b/htroot/ConfigLanguage_p.java @@ -58,11 +58,11 @@ import de.anomic.data.listManager; import de.anomic.data.translator; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class ConfigLanguage_p { @@ -97,7 +97,7 @@ public class ConfigLanguage_p { String url = (String)post.get("url"); ArrayList langVector; try{ - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); }catch(IOException e){ prop.put("status", 1);//unable to get url diff --git a/htroot/ConfigSkins_p.java b/htroot/ConfigSkins_p.java index 61405b36c..9a80f2b1e 100644 --- a/htroot/ConfigSkins_p.java +++ b/htroot/ConfigSkins_p.java @@ -56,12 +56,12 @@ import java.util.Iterator; import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class ConfigSkins_p { @@ -126,7 +126,7 @@ public class ConfigSkins_p { String url = (String)post.get("url"); ArrayList skinVector; try{ - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); }catch(IOException e){ prop.put("status", 1);//unable to get URL diff --git a/htroot/ConfigUpdate_p.java b/htroot/ConfigUpdate_p.java index 459d4ab4e..381e2835f 100644 --- a/htroot/ConfigUpdate_p.java +++ b/htroot/ConfigUpdate_p.java @@ -31,11 +31,11 @@ import java.util.Iterator; import java.util.TreeSet; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.serverSystem; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class ConfigUpdate_p { @@ -54,7 +54,7 @@ public class ConfigUpdate_p { String release = post.get("releasedownload", ""); if (release.length() > 0) { try { - yacyVersion.downloadRelease(new yacyVersion(new URL(release))); + yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null))); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 29e4e676d..7b992180e 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -30,7 +30,6 @@ import java.util.Locale; import de.anomic.http.httpHeader; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; @@ -39,6 +38,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class CrawlResults { @@ -170,7 +170,7 @@ public class CrawlResults { urlstr = comp.url().toNormalform(false, true); urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL - cachepath = plasmaHTCache.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1); + cachepath = plasmaHTCache.getCachePath(new yacyURL(urlstr, null)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1); prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0); if (showControl) { diff --git a/htroot/CrawlStartSimple_p.html b/htroot/CrawlStartSimple_p.html index 9708320d5..69e9f701e 100644 --- a/htroot/CrawlStartSimple_p.html +++ b/htroot/CrawlStartSimple_p.html @@ -53,7 +53,7 @@ : Wide: depth   |   - Complete Single Domain + Complete Domain The range defines if the crawl shall consider a complete domain, or a wide crawl up to a specific depth. diff --git a/htroot/CrawlStartSimple_p.java b/htroot/CrawlStartSimple_p.java index 7926e7d27..8cd1693b2 100644 --- a/htroot/CrawlStartSimple_p.java +++ b/htroot/CrawlStartSimple_p.java @@ -28,7 +28,6 @@ import java.util.Enumeration; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -36,6 +35,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class CrawlStartSimple_p { @@ -152,8 +152,8 @@ public class CrawlStartSimple_p { if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed.isVirgin()) || (yacyCore.seedDB.mySeed.isJunior())) { prop.put("remoteCrawlPeers", 0); } else { - Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true); - Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false); + Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, true); + Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, false); if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) { prop.put("remoteCrawlPeers", 0); //no peers availible } else { diff --git a/htroot/CrawlURLFetchStack_p.java b/htroot/CrawlURLFetchStack_p.java index f0180b354..974e8e77a 100644 --- a/htroot/CrawlURLFetchStack_p.java +++ b/htroot/CrawlURLFetchStack_p.java @@ -55,7 +55,6 @@ import de.anomic.data.URLFetcherStack; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaSwitchboard; @@ -64,6 +63,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class CrawlURLFetchStack_p { @@ -200,16 +200,16 @@ public class CrawlURLFetchStack_p { prop.put("upload", 1); } else if (type.equals("html")) { try { - final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file)); + final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file)); final Writer writer = new htmlFilterWriter(null, null, scraper, null, false); serverFileUtils.write(content, writer); writer.close(); final Iterator it = ((HashMap)scraper.getAnchors()).keySet().iterator(); int added = 0, failed = 0; - URL url; + yacyURL url; while (it.hasNext()) try { - url = new URL((String)it.next()); + url = new yacyURL((String) it.next(), null); if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url)) { failed++; continue; @@ -264,7 +264,7 @@ public class CrawlURLFetchStack_p { private static boolean addURL(String url, boolean blCheck, URLFetcherStack stack) { try { if (url == null || url.length() == 0) return false; - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, u)) return false; stack.push(u); return true; @@ -288,7 +288,7 @@ public class CrawlURLFetchStack_p { url = post.get("url" + i, null); if (url == null || url.length() == 0) continue; try { - stack.push(new URL(url)); + stack.push(new yacyURL(url, null)); count++; } catch (MalformedURLException e) { serverLog.logInfo("URLFETCHER", "retrieved invalid url for adding to the stack: " + url); diff --git a/htroot/CrawlURLFetch_p.java b/htroot/CrawlURLFetch_p.java index caa7bc752..1488f4592 100644 --- a/htroot/CrawlURLFetch_p.java +++ b/htroot/CrawlURLFetch_p.java @@ -49,7 +49,6 @@ import java.util.Iterator; import java.util.Random; import java.util.TreeMap; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; @@ -62,6 +61,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class CrawlURLFetch_p { @@ -139,10 +139,10 @@ public class CrawlURLFetch_p { count, frequency); } else { - URL url = null; + yacyURL url = null; if (post.get("source", "").equals("url")) { try { - url = new URL(post.get("host", null)); + url = new yacyURL(post.get("host", null), null); if (!savedURLs.contains(url.toNormalform(true, true))) savedURLs.add(url.toNormalform(true, true)); prop.put("host", post.get("host", url.toString())); @@ -152,7 +152,7 @@ public class CrawlURLFetch_p { } } else if (post.get("source", "").equals("savedURL")) { try { - url = new URL(post.get("saved", "")); + url = new yacyURL(post.get("saved", ""), null); } catch (MalformedURLException e) { /* should never appear, except for invalid input, see above */ } @@ -355,7 +355,7 @@ public class CrawlURLFetch_p { public String lastServerResponse = null; public int lastFailed = 0; - public final URL url; + public final yacyURL url; public final int count; public long delay; public final plasmaSwitchboard sb; @@ -363,7 +363,7 @@ public class CrawlURLFetch_p { public boolean paused = false; - public static URL getListServletURL(String host, int mode, int count, String peerHash) { + public static yacyURL getListServletURL(String host, int mode, int count, String peerHash) { String r = "http://" + host + "/yacy/list.html?list=queueUrls&display="; switch (mode) { @@ -380,7 +380,7 @@ public class CrawlURLFetch_p { } try { - return new URL(r); + return new yacyURL(r, null); } catch (MalformedURLException e) { return null; } @@ -389,7 +389,7 @@ public class CrawlURLFetch_p { public URLFetcher( serverSwitch env, plasmaCrawlProfile.entry profile, - URL url, + yacyURL url, int count, long delayMs) { if (env == null || profile == null || url == null) @@ -420,7 +420,7 @@ public class CrawlURLFetch_p { public void run() { this.paused = false; long start; - URL url; + yacyURL url; while (!isInterrupted()) { try { start = System.currentTimeMillis(); @@ -449,7 +449,7 @@ public class CrawlURLFetch_p { } } - private URL getDLURL() { + private yacyURL getDLURL() { if (this.url != null) return this.url; // choose random seed @@ -493,7 +493,7 @@ public class CrawlURLFetch_p { this.failed.put(urls[i], reason); try { plasmaCrawlZURL.Entry ee = this.sb.errorURL.newEntry( - new URL(urls[i]), + new yacyURL(urls[i], null), reason); ee.store(); this.sb.errorURL.stackPushEntry(ee); @@ -503,7 +503,7 @@ public class CrawlURLFetch_p { return this.lastFetchedURLs; } - private String[] getURLs(URL url) { + private String[] getURLs(yacyURL url) { if (url == null) return null; String[] r = null; try { diff --git a/htroot/FeedReader_p.java b/htroot/FeedReader_p.java index 128d28ccb..94634f004 100644 --- a/htroot/FeedReader_p.java +++ b/htroot/FeedReader_p.java @@ -24,11 +24,11 @@ import java.net.MalformedURLException; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.servletProperties; import de.anomic.xml.rssReader; +import de.anomic.yacy.yacyURL; // test url: // http://localhost:8080/FeedReader_p.html?url=http://www.tagesthemen.de/xml/rss2 @@ -40,9 +40,9 @@ public class FeedReader_p { prop.put("page", 0); if (post != null) { - URL url; + yacyURL url; try { - url = new URL((String) post.get("url")); + url = new yacyURL((String) post.get("url"), null); } catch (MalformedURLException e) { prop.put("page", 2); return prop; diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index c8543a8aa..70fb63705 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -62,11 +62,9 @@ import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroRotateIterator; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; @@ -76,6 +74,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class IndexControl_p { @@ -196,7 +195,11 @@ public class IndexControl_p { } if (post.containsKey("urldelete")) { - urlhash = plasmaURL.urlHash(urlstring); + try { + urlhash = (new yacyURL(urlstring, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if ((urlhash == null) || (urlstring == null)) { prop.put("result", "No input given; nothing deleted."); } else { @@ -307,8 +310,8 @@ public class IndexControl_p { if (post.containsKey("urlstringsearch")) { try { - URL url = new URL(urlstring); - urlhash = plasmaURL.urlHash(url); + yacyURL url = new yacyURL(urlstring, null); + urlhash = url.hash(); prop.put("urlhash", urlhash); indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null); if (entry == null) { @@ -369,7 +372,7 @@ public class IndexControl_p { try { String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(","); pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true)); - URL url; + yacyURL url; for (int i=0; i= maxCount) break; urlString = (String) map.get("key"); - try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + try { url = new yacyURL(urlString, null); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (!url.isLocal())) { prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark; prop.put("page_backlinks_list_" + count + "_url", urlString); prop.put("page_backlinks_list_" + count + "_date", map.get("date")); diff --git a/htroot/Status.java b/htroot/Status.java index 14b3568c2..59517d9d8 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -54,7 +54,6 @@ import de.anomic.http.httpHeader; import de.anomic.http.httpd; import de.anomic.http.httpdByteCountInputStream; import de.anomic.http.httpdByteCountOutputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDomains; @@ -64,6 +63,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; public class Status { @@ -120,7 +120,7 @@ public class Status { String release = post.get("releasedownload", ""); if (release.length() > 0) { try { - yacyVersion.downloadRelease(new yacyVersion(new URL(release))); + yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null))); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); diff --git a/htroot/Supporter.java b/htroot/Supporter.java index 65823abba..a02c34753 100644 --- a/htroot/Supporter.java +++ b/htroot/Supporter.java @@ -32,8 +32,6 @@ import java.util.HashMap; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroNaturalOrder; @@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Supporter { @@ -125,10 +124,9 @@ public class Supporter { if (row == null) continue; url = row.getColString(0, null); - try{ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url))) - continue; - }catch(MalformedURLException e){continue;}; + try { + if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, urlhash))) continue; + } catch(MalformedURLException e) {continue;} title = row.getColString(1,"UTF-8"); description = row.getColString(2,"UTF-8"); if ((url == null) || (title == null) || (description == null)) continue; @@ -241,10 +239,18 @@ public class Supporter { // add/subtract votes and write record if (entry != null) { - urlhash = plasmaURL.urlHash(url); + try { + urlhash = (new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if (urlhash == null) - urlhash=plasmaURL.urlHash("http://"+url); - if(urlhash==null){ + try { + urlhash = (new yacyURL("http://" + url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } + if (urlhash==null) { System.out.println("Supporter: bad url '" + url + "' from news record " + record.toString()); continue; } diff --git a/htroot/Surftips.java b/htroot/Surftips.java index 8db16bfc7..00c47aeea 100644 --- a/htroot/Surftips.java +++ b/htroot/Surftips.java @@ -32,8 +32,6 @@ import java.util.HashMap; import java.util.Iterator; import de.anomic.http.httpHeader; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroNaturalOrder; @@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class Surftips { @@ -134,7 +133,7 @@ public class Surftips { url = row.getColString(0, null); try{ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url))) + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, null))) continue; }catch(MalformedURLException e){continue;}; title = row.getColString(1,"UTF-8"); @@ -302,10 +301,18 @@ public class Surftips { // add/subtract votes and write record if (entry != null) { - urlhash = plasmaURL.urlHash(url); + try { + urlhash = (new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } if (urlhash == null) - urlhash=plasmaURL.urlHash("http://"+url); - if(urlhash==null){ + try { + urlhash = (new yacyURL("http://"+url, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } + if (urlhash == null) { System.out.println("Surftips: bad url '" + url + "' from news record " + record.toString()); continue; } diff --git a/htroot/Thumbnail.html b/htroot/Thumbnail.html deleted file mode 100644 index 20f06a069..000000000 --- a/htroot/Thumbnail.html +++ /dev/null @@ -1 +0,0 @@ -#[image]# \ No newline at end of file diff --git a/htroot/Thumbnail.java b/htroot/Thumbnail.java deleted file mode 100644 index cfc3ea659..000000000 --- a/htroot/Thumbnail.java +++ /dev/null @@ -1,69 +0,0 @@ -//Thumbnail.java -//------------ -// part of YACY -// -// (C) 2007 Alexander Schier -// -// last change: $LastChangedDate: $ by $LastChangedBy: $ -// $LastChangedRevision: $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; - -import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.server.servletProperties; - -public class Thumbnail{ - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { - servletProperties prop = new servletProperties(); - String command=env.getConfig("thumbnailProgram", ""); - if(command.equals("")||post==null||!post.containsKey("url")){ - prop.put("image", "thumbnail cannot be generated"); //TODO: put a "thumbnail not possible" image. - return prop; - } - String[] cmdline=new String[3]; - cmdline[0]=env.getConfig("thumbnailProgram", ""); - cmdline[1]=post.get("url", ""); - plasmaSwitchboard sb=plasmaSwitchboard.getSwitchboard(); - File path=new File(sb.workPath, plasmaURL.urlHash(cmdline[1])+".png"); - cmdline[2]=path.getAbsolutePath();//does not contain an extension! - try { - Runtime.getRuntime().exec(cmdline); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path))); - String line; - StringBuffer image=new StringBuffer(); - while((line=br.readLine())!=null){ - image.append(line); - } - //path.delete(); //we do not cache, yet. - prop.put("image", image.toString()); - } catch (IOException e) { - prop.put("image", "error creating thumbnail");//TODO: put a "thumbnail error" image. - } - httpHeader out_header=new httpHeader(); - out_header.put(httpHeader.CONTENT_TYPE, "image/png"); - prop.setOutgoingHeader(out_header); - return prop; - } -} diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 94c5c1557..806c0a0d9 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -58,7 +58,6 @@ import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParserDocument; @@ -70,6 +69,7 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class ViewFile { @@ -99,7 +99,7 @@ public class ViewFile { String viewMode = post.get("viewMode","sentences"); prop.put("error_vMode-" + viewMode, 1); - URL url = null; + yacyURL url = null; String descr = ""; int wordCount = 0; int size = 0; @@ -144,7 +144,7 @@ public class ViewFile { } // define an url by post parameter - url = new URL(urlString); + url = new yacyURL(urlString, null); pre = post.get("pre", "false").equals("true"); } catch (MalformedURLException e) {} diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index b0fee1001..b8abf6b22 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -48,12 +48,12 @@ import java.io.InputStream; import java.net.MalformedURLException; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; import de.anomic.ymage.ymageImageParser; public class ViewImage { @@ -70,9 +70,9 @@ public class ViewImage { String urlLicense = post.get("code", ""); boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights - URL url = null; + yacyURL url = null; if ((urlString.length() > 0) && (auth)) try { - url = new URL(urlString); + url = new yacyURL(urlString, null); } catch (MalformedURLException e1) { url = null; } diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 66f5f334f..3358af088 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -38,11 +38,9 @@ import java.util.regex.PatternSyntaxException; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.dbImport.dbImporter; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; @@ -50,6 +48,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class WatchCrawler_p { public static final String CRAWLING_MODE_URL = "url"; @@ -101,12 +100,12 @@ public class WatchCrawler_p { String newcrawlingfilter = post.get("crawlingFilter", ".*"); if (fullDomain) try { - newcrawlingfilter = ".*" + (new URL(post.get("crawlingURL",""))).getHost() + ".*"; + newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*"; } catch (MalformedURLException e) {} env.setConfig("crawlingFilter", newcrawlingfilter); - int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0")); - if (fullDomain) newcrawlingdepth = 99; + int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8")); + if (fullDomain) newcrawlingdepth = 8; env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on"); @@ -158,12 +157,12 @@ public class WatchCrawler_p { if (pos == -1) crawlingStart = "http://" + crawlingStart; // normalizing URL - try {crawlingStart = new URL(crawlingStart).toNormalform(true, true);} catch (MalformedURLException e1) {} + try {crawlingStart = new yacyURL(crawlingStart, null).toNormalform(true, true);} catch (MalformedURLException e1) {} // check if url is proper - URL crawlingStartURL = null; + yacyURL crawlingStartURL = null; try { - crawlingStartURL = new URL(crawlingStart); + crawlingStartURL = new yacyURL(crawlingStart, null); } catch (MalformedURLException e) { crawlingStartURL = null; } @@ -181,7 +180,7 @@ public class WatchCrawler_p { // stack request // first delete old entry, if exists - String urlhash = plasmaURL.urlHash(crawlingStart); + String urlhash = (new yacyURL(crawlingStart, null)).hash(); switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.noticeURL.remove(urlhash); switchboard.errorURL.remove(urlhash); @@ -258,7 +257,7 @@ public class WatchCrawler_p { String fileString = new String(fileContent,"UTF-8"); // parsing the bookmark file and fetching the headline and contained links - htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file)); + htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file)); //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); Writer writer = new htmlFilterWriter(null,null,scraper,null,false); serverFileUtils.write(fileString,writer); @@ -282,12 +281,12 @@ public class WatchCrawler_p { nexturlstring = nexturlstring.trim(); // normalizing URL - nexturlstring = new URL(nexturlstring).toNormalform(true, true); + nexturlstring = new yacyURL(nexturlstring, null).toNormalform(true, true); // generating an url object - URL nexturlURL = null; + yacyURL nexturlURL = null; try { - nexturlURL = new URL(nexturlstring); + nexturlURL = new yacyURL(nexturlstring, null); } catch (MalformedURLException ex) { nexturlURL = null; c++; diff --git a/htroot/WebStructurePicture_p.java b/htroot/WebStructurePicture_p.java index 18195e2e8..df533f12d 100644 --- a/htroot/WebStructurePicture_p.java +++ b/htroot/WebStructurePicture_p.java @@ -32,12 +32,11 @@ import java.util.Map; import de.anomic.http.httpHeader; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaWebStructure; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; import de.anomic.ymage.ymageGraph; import de.anomic.ymage.ymageMatrix; import de.anomic.ymage.ymageToolPrint; @@ -92,7 +91,7 @@ public class WebStructurePicture_p { // find start hash String hash = null; try { - hash = plasmaURL.urlHash(new URL("http://" + host)).substring(6); + hash = (new yacyURL("http://" + host, null)).hash().substring(6); } catch (MalformedURLException e) {e.printStackTrace();} assert (sb.webStructure.references(hash) != null); diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index 9ad104c97..c2c097216 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -60,12 +60,11 @@ import de.anomic.data.userDB; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.http.httpd; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; +import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCodings; import de.anomic.server.serverCore; @@ -79,6 +78,7 @@ import de.anomic.tools.dirlistComparator; import de.anomic.tools.md5DirFileFilter; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class dir { @@ -364,7 +364,7 @@ public class dir { public static void indexPhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr, byte[] md5) { try { - final URL url = new URL(urlstring); + final yacyURL url = new yacyURL(urlstring, null); final plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes()), "UTF-8"); final indexURLEntry newEntry = new indexURLEntry( url, @@ -379,7 +379,7 @@ public class dir { md5, // md5 (long) phrase.length(), // size condenser.RESULT_NUMB_WORDS, // word count - plasmaURL.DT_SHARE, // doctype + plasmaHTCache.DT_SHARE, // doctype new kelondroBitfield(4), "**", // language 0,0,0,0,0,0 @@ -392,14 +392,13 @@ public class dir { 5 /*process case*/ ); - final String urlHash = newEntry.hash(); - /*final int words =*/ switchboard.wordIndex.addPageIndex(url, urlHash, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaURL.DT_SHARE, 0, 0); + /*final int words =*/ switchboard.wordIndex.addPageIndex(url, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaHTCache.DT_SHARE, 0, 0); } catch (IOException e) {} } public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { try { - final String urlhash = plasmaURL.urlHash(new URL(urlstring)); + final String urlhash = (new yacyURL(urlstring, null)).hash(); final Iterator words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes("UTF-8"), "UTF-8").keySet().iterator(); String word; while (words.hasNext()) { diff --git a/htroot/index.java b/htroot/index.java index fc0324bcd..9eb764a78 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -33,15 +33,14 @@ import java.net.MalformedURLException; import java.util.HashMap; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class index { @@ -80,13 +79,13 @@ public class index { final String referer = (String) header.get(httpHeader.REFERER); if (referer != null) { - URL url; + yacyURL url; try { - url = new URL(referer); + url = new yacyURL(referer, null); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + if ((url != null) && (!url.isLocal())) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get(httpHeader.CONNECTION_PROP_CLIENTIP)); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 1a62fd05f..bee3a1023 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -58,7 +58,6 @@ import java.util.HashSet; import de.anomic.data.listManager; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.abstractURLPattern; import de.anomic.server.serverObjects; @@ -66,6 +65,7 @@ import de.anomic.server.serverSwitch; import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class sharedBlacklist_p { @@ -131,7 +131,7 @@ public class sharedBlacklist_p { reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache"); // get List - URL u = new URL(downloadURL); + yacyURL u = new yacyURL(downloadURL, null); otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader, null), "UTF-8"); } catch (Exception e) { prop.put("status", STATUS_PEER_UNKNOWN); @@ -147,7 +147,7 @@ public class sharedBlacklist_p { prop.put("page_source", downloadURL); try { - URL u = new URL(downloadURL); + yacyURL u = new yacyURL(downloadURL, null); otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); //get List } catch (Exception e) { prop.put("status", STATUS_URL_PROBLEM); diff --git a/htroot/soap/ServiceList.html b/htroot/soap/ServiceList.html deleted file mode 100644 index 79e1d4f44..000000000 --- a/htroot/soap/ServiceList.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - SOAP Service List - #%env/templates/metas.template%# - - - #%env/templates/header.template%# -

Deployed SOAP Services

-

Currently #[services]# services are deployed.

- - - - - - - - #{services}# - - - - #{/services}# - - #%env/templates/footer.template%# - - \ No newline at end of file diff --git a/htroot/soap/ServiceList.java b/htroot/soap/ServiceList.java deleted file mode 100644 index bac6808e8..000000000 --- a/htroot/soap/ServiceList.java +++ /dev/null @@ -1,105 +0,0 @@ -// ServiceList.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// -// This File is contributed by Martin Thelian -// -// $LastChangedDate: 2007-02-24 13:56:32 +0000 (Sa, 24 Feb 2007) $ -// $LastChangedRevision: 3391 $ -// $LastChangedBy: karlchenofhell $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -// You must compile this file with -// javac -classpath .:../classes Blacklist_p.java -// if the shell's current path is HTROOT - - -package soap; - -import java.util.ArrayList; -import java.util.Iterator; - -import org.apache.axis.AxisEngine; -import org.apache.axis.ConfigurationException; -import org.apache.axis.description.OperationDesc; -import org.apache.axis.description.ServiceDesc; - -import de.anomic.http.httpHeader; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; - -public class ServiceList { - - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws ConfigurationException { - - serverObjects prop = new serverObjects(); - - // getting the SOAP engine - AxisEngine engine = (AxisEngine) post.get("SOAP.engine"); - - // loop through the deployed services - int i = 0; - boolean dark = true; - Iterator serviceIter = engine.getConfig().getDeployedServices(); - while (serviceIter.hasNext()) { - // getting the service description - ServiceDesc serviceDescription = (ServiceDesc)serviceIter.next(); - prop.put("services_" + i + "_name",serviceDescription.getName()); - prop.put("services_" + i + "_style",serviceDescription.getStyle()); - prop.put("services_" + i + "_dark", ((dark) ? 1 : 0) ); dark =! dark; - - // loop through the methods of this service - int j = 0; - ArrayList operations = serviceDescription.getOperations(); - while (j < operations.size()) { - OperationDesc op = (OperationDesc)operations.get(j); - - prop.put("services_" + i + "_methods_" + j + "_name",op.getName()); - prop.put("services_" + i + "_methods_" + j + "_method",op.getMethod()); - j++; - } - prop.put("services_" + i + "_methods",j); - - i++; - } - prop.put("services",i); - - return prop; - } - -} diff --git a/htroot/xml/bookmarks/posts/delete_p.java b/htroot/xml/bookmarks/posts/delete_p.java index dd73a4d3e..ce214aed4 100644 --- a/htroot/xml/bookmarks/posts/delete_p.java +++ b/htroot/xml/bookmarks/posts/delete_p.java @@ -42,11 +42,13 @@ // Contributions and changes to the program code must be marked as such. package xml.bookmarks.posts; +import java.net.MalformedURLException; + import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class delete_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -54,13 +56,17 @@ public class delete_p { plasmaSwitchboard switchboard = (plasmaSwitchboard) env; serverObjects prop = new serverObjects(); if(post!= null){ - if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark(plasmaURL.urlHash(post.get("url", "nourl"))) ){ - prop.put("result", 1); - }else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){ - prop.put("result", 1); - }else{ - prop.put("result",0); - } + try { + if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark((new yacyURL(post.get("url", "nourl"), null)).hash())) { + prop.put("result", 1); + }else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){ + prop.put("result", 1); + }else{ + prop.put("result",0); + } + } catch (MalformedURLException e) { + prop.put("result",0); + } }else{ prop.put("result",0); } diff --git a/htroot/xml/queues_p.java b/htroot/xml/queues_p.java index d6713f05a..78cc327c4 100644 --- a/htroot/xml/queues_p.java +++ b/htroot/xml/queues_p.java @@ -200,7 +200,7 @@ public class queues_p { prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate())); prop.put(tableName + "_" + showNum + "_anchor", urle.name()); prop.put(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true)); - prop.put(tableName + "_" + showNum + "_hash", urle.urlhash()); + prop.put(tableName + "_" + showNum + "_hash", urle.url().hash()); showNum++; } } diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java index 444c9d8e7..fd53919b3 100644 --- a/htroot/xml/util/getpageinfo_p.java +++ b/htroot/xml/util/getpageinfo_p.java @@ -55,11 +55,11 @@ import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; public class getpageinfo_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -82,7 +82,7 @@ public class getpageinfo_p { } if (actions.indexOf("title")>=0) { try { - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); String contentString=new String(httpc.wget(u, u.getHost(), 6000, null, null, ((plasmaSwitchboard) env).remoteProxyConfig, null, null)) ; htmlFilterContentScraper scraper = new htmlFilterContentScraper(u); @@ -110,13 +110,13 @@ public class getpageinfo_p { } if(actions.indexOf("robots")>=0){ try { - URL theURL = new URL(url); + yacyURL theURL = new yacyURL(url, null); // determine if crawling of the current URL is allowed prop.put("robots-allowed", robotsParser.isDisallowed(theURL) ? 0:1); // get the sitemap URL of the domain - URL sitemapURL = robotsParser.getSitemapURL(theURL); + yacyURL sitemapURL = robotsParser.getSitemapURL(theURL); prop.put("sitemap", (sitemapURL==null)?"":sitemapURL.toString()); } catch (MalformedURLException e) {} } diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java index 76c265e45..c79d210b4 100644 --- a/htroot/yacy/crawlOrder.java +++ b/htroot/yacy/crawlOrder.java @@ -45,13 +45,12 @@ // You must compile this file with // javac -classpath .:../classes crawlOrder.java +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Date; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -59,6 +58,7 @@ import de.anomic.tools.crypt; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class crawlOrder { @@ -182,16 +182,16 @@ public final class crawlOrder { // old method: only one url // normalizing URL - String newURL = new URL((String) urlv.get(0)).toNormalform(true, true); + String newURL = new yacyURL((String) urlv.get(0), null).toNormalform(true, true); if (!newURL.equals(urlv.get(0))) { env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0)); } - String refURL = (refv.get(0) == null) ? null : new URL((String) refv.get(0)).toNormalform(true, true); + String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true); if ((refURL != null) && (!refURL.equals(refv.get(0)))) { env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); } - if (!switchboard.acceptURL(new URL(newURL))) { + if (!switchboard.acceptURL(new yacyURL(newURL, null))) { env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL); return null; } @@ -263,7 +263,12 @@ public final class crawlOrder { // case where we have already the url loaded; reason = reasonString; // send lurl-Entry as response - indexURLEntry entry = switchboard.wordIndex.loadedURL.load(plasmaURL.urlHash(url), null); + indexURLEntry entry; + try { + entry = switchboard.wordIndex.loadedURL.load((new yacyURL(url, null)).hash(), null); + } catch (MalformedURLException e) { + entry = null; + } if (entry == null) { response = "rejected"; lurl = ""; diff --git a/htroot/yacy/list.java b/htroot/yacy/list.java index 3c8afcb44..581bbf927 100644 --- a/htroot/yacy/list.java +++ b/htroot/yacy/list.java @@ -56,7 +56,6 @@ import de.anomic.data.URLFetcherStack; import de.anomic.data.htmlTools; import de.anomic.data.listManager; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; @@ -65,6 +64,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class list { @@ -121,7 +121,7 @@ public final class list { if (count > 0 && db.size() > 0) { final StringBuffer b = new StringBuffer(); - URL url; + yacyURL url; int cnt = 0; for (int i=0; i 0) sb.requestedQueries = sb.requestedQueries + 1d / (double) partitions; // increase query counter - + // prepare reference hints + localProcess.startTimer(); Object[] ws = theSearch.references(); StringBuffer refstr = new StringBuffer(); for (int j = 0; j < ws.length; j++) refstr.append(",").append((String) ws[j]); prop.putASIS("references", (refstr.length() > 0) ? refstr.substring(1) : new String(refstr)); + localProcess.yield("reference collection", ws.length); } prop.putASIS("indexabstract", new String(indexabstract)); @@ -241,6 +242,7 @@ public final class search { } else { // result is a List of urlEntry elements + localProcess.startTimer(); StringBuffer links = new StringBuffer(); String resource = null; plasmaSearchEvent.ResultEntry entry; @@ -253,6 +255,7 @@ public final class search { } prop.putASIS("links", new String(links)); prop.put("linkcount", accu.size()); + localProcess.yield("result list preparation", accu.size()); } // add information about forward peers @@ -278,7 +281,7 @@ public final class search { yacyCore.log.logInfo("EXIT HASH SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + joincount + " links found, " + prop.get("linkcount", "?") + " links selected, " + - indexabstractContainercount + " index abstract references attached, " + + indexabstractContainercount + " index abstracts, " + (System.currentTimeMillis() - timestamp) + " milliseconds"); prop.putASIS("searchtime", Long.toString(System.currentTimeMillis() - timestamp)); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index f15e95016..c229fca3d 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -137,7 +137,7 @@ public final class transferURL { } // check if the entry is blacklisted - if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, lEntry.hash(), comp.url()))) { + if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url()))) { int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash()); yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs"); lEntry = null; diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index 58a525a90..f802b2643 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -72,7 +72,7 @@ public class urls { prop.put("item_" + c + "_description", entry.name()); prop.put("item_" + c + "_author", ""); prop.put("item_" + c + "_pubDate", serverDate.shortSecondTime(entry.appdate())); - prop.put("item_" + c + "_guid", entry.urlhash()); + prop.put("item_" + c + "_guid", entry.url().hash()); c++; count--; } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 8e654847f..af4b8858f 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -58,7 +58,6 @@ import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaSearchEvent; @@ -69,7 +68,6 @@ import de.anomic.plasma.plasmaSearchProcessing; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; -import de.anomic.server.serverDomains; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -78,6 +76,7 @@ import de.anomic.tools.crypt; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; +import de.anomic.yacy.yacyURL; public class yacysearch { @@ -104,9 +103,9 @@ public class yacysearch { // save referrer // System.out.println("HEADER=" + header.toString()); if (referer != null) { - URL url; - try { url = new URL(referer); } catch (MalformedURLException e) { url = null; } - if ((url != null) && (!serverDomains.isLocal(url))) { + yacyURL url; + try { url = new yacyURL(referer, null); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (!url.isLocal())) { final HashMap referrerprop = new HashMap(); referrerprop.put("count", "1"); referrerprop.put("clientip", header.get("CLIENTIP")); @@ -454,8 +453,8 @@ public class yacysearch { int depth = post.getInt("depth", 0); int columns = post.getInt("columns", 6); - URL url = null; - try {url = new URL(post.get("url", ""));} catch (MalformedURLException e) {} + yacyURL url = null; + try {url = new yacyURL(post.get("url", ""), null);} catch (MalformedURLException e) {} plasmaSearchImages si = new plasmaSearchImages(6000, url, depth); Iterator i = si.entries(); htmlFilterImageEntry ie; diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 1dcb6240e..ac9447dc8 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -30,13 +30,11 @@ import java.net.URLEncoder; import java.util.TreeSet; import de.anomic.http.httpHeader; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchPreOrder; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.crypt; @@ -44,6 +42,7 @@ import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public class yacysearchitem { @@ -86,9 +85,9 @@ public class yacysearchitem { prop.put("content_url", result.urlstring()); int port=result.url().getPort(); - URL faviconURL; + yacyURL faviconURL; try { - faviconURL = new URL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico"); + faviconURL = new yacyURL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico", null); } catch (MalformedURLException e1) { faviconURL = null; } @@ -102,14 +101,14 @@ public class yacysearchitem { prop.put("content_size", Long.toString(result.filesize())); TreeSet[] query = theQuery.queryWords(); - URL wordURL = null; + yacyURL wordURL = null; try { prop.put("content_words", URLEncoder.encode(query[0].toString(),"UTF-8")); } catch (UnsupportedEncodingException e) {} prop.put("content_former", theQuery.queryString); - prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.hash()) + - ((plasmaURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + - (((wordURL = plasmaURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); + prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + yacyURL.domLengthEstimation(result.hash()) + + ((yacyURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + + (((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); prop.putASIS("content_snippet", result.textSnippet().getLineMarked(theQuery.queryHashes)); diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java index 5e7e731f9..45f421dea 100644 --- a/source/de/anomic/data/SitemapParser.java +++ b/source/de/anomic/data/SitemapParser.java @@ -60,14 +60,13 @@ import org.xml.sax.helpers.DefaultHandler; import de.anomic.http.httpc; import de.anomic.http.httpdByteCountInputStream; import de.anomic.index.indexURLEntry; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverDate; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; /** * Class to parse a sitemap file.
@@ -140,7 +139,7 @@ public class SitemapParser extends DefaultHandler { /** * The location of the sitemap file */ - private URL siteMapURL = null; + private yacyURL siteMapURL = null; /** * The next URL to enqueue @@ -153,7 +152,7 @@ public class SitemapParser extends DefaultHandler { private Date lastMod = null; - public SitemapParser(plasmaSwitchboard sb, URL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) { + public SitemapParser(plasmaSwitchboard sb, yacyURL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) { if (sb == null) throw new NullPointerException("The switchboard must not be null"); if (sitemap == null) throw new NullPointerException("The sitemap URL must not be null"); this.switchboard = sb; @@ -276,7 +275,12 @@ public class SitemapParser extends DefaultHandler { if (this.nextURL == null) return; // get the url hash - String nexturlhash = plasmaURL.urlHash(this.nextURL); + String nexturlhash; + try { + nexturlhash = (new yacyURL(this.nextURL, null)).hash(); + } catch (MalformedURLException e1) { + nexturlhash = null; + } // check if the url is known and needs to be recrawled if (this.lastMod != null) { @@ -314,7 +318,7 @@ public class SitemapParser extends DefaultHandler { this.logger.logInfo("The URL '" + this.nextURL + "' can not be crawled. Reason: " + error); // insert URL into the error DB - plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new URL(this.nextURL), error); + plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new yacyURL(this.nextURL, null), error); ee.store(); this.switchboard.errorURL.stackPushEntry(ee); } catch (MalformedURLException e) {/* ignore this */ } diff --git a/source/de/anomic/data/URLFetcherStack.java b/source/de/anomic/data/URLFetcherStack.java index aab43392d..42703ad0c 100644 --- a/source/de/anomic/data/URLFetcherStack.java +++ b/source/de/anomic/data/URLFetcherStack.java @@ -51,8 +51,8 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; -import de.anomic.net.URL; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class URLFetcherStack { @@ -84,7 +84,7 @@ public class URLFetcherStack { this.db.close(); } - public boolean push(URL url) { + public boolean push(yacyURL url) { try { this.db.push(this.db.row().newEntry( new byte[][] { url.toNormalform(true, true).getBytes() } @@ -97,14 +97,14 @@ public class URLFetcherStack { } } - public URL pop() { + public yacyURL pop() { try { kelondroRow.Entry r = this.db.pop(); if (r == null) return null; final String url = r.getColString(0, null); try { this.popped++; - return new URL(url); + return new yacyURL(url, null); } catch (MalformedURLException e) { this.log.logSevere("found invalid URL-entry: " + url); return null; diff --git a/source/de/anomic/data/URLLicense.java b/source/de/anomic/data/URLLicense.java index b7ed0e132..f4e437456 100644 --- a/source/de/anomic/data/URLLicense.java +++ b/source/de/anomic/data/URLLicense.java @@ -29,7 +29,7 @@ package de.anomic.data; import java.util.HashMap; import java.util.Random; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class URLLicense { @@ -46,7 +46,7 @@ public class URLLicense { this.keylen = keylen; } - public String aquireLicense(URL url) { + public String aquireLicense(yacyURL url) { // generate license key String license = ""; while (license.length() < keylen) license += Integer.toHexString(random.nextInt()); @@ -59,10 +59,10 @@ public class URLLicense { return license; } - public URL releaseLicense(String license) { - URL url = null; + public yacyURL releaseLicense(String license) { + yacyURL url = null; synchronized (permissions) { - url = (URL) permissions.remove(license); + url = (yacyURL) permissions.remove(license); } /* if (url == null) { diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index b9295717f..842ed5c15 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -48,6 +48,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.io.Writer; +import java.net.MalformedURLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -72,7 +73,6 @@ import org.xml.sax.SAXException; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.plasma.plasmaCondenser; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroException; @@ -80,10 +80,10 @@ import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroObjects; import de.anomic.kelondro.kelondroObjectsMapEntry; -import de.anomic.net.URL; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class bookmarksDB { kelondroMapObjects tagsTable; @@ -390,7 +390,7 @@ public class bookmarksDB { } - public int importFromBookmarks(URL baseURL, String input, String tag, boolean importPublic){ + public int importFromBookmarks(yacyURL baseURL, String input, String tag, boolean importPublic){ try { // convert string to inputstream ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); @@ -402,7 +402,7 @@ public class bookmarksDB { return 0; } } - public int importFromBookmarks(URL baseURL, InputStreamReader input, String tag, boolean importPublic){ + public int importFromBookmarks(yacyURL baseURL, InputStreamReader input, String tag, boolean importPublic){ int importCount = 0; HashMap links=new HashMap(); @@ -712,7 +712,11 @@ public class bookmarksDB { if(!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")){ url="http://"+url; } - this.urlHash=plasmaURL.urlHash(url); + try { + this.urlHash=(new yacyURL(url, null)).hash(); + } catch (MalformedURLException e) { + this.urlHash = null; + } entry.put(BOOKMARK_URL, url); this.timestamp=System.currentTimeMillis(); tags=new HashSet(); @@ -728,7 +732,7 @@ public class bookmarksDB { removeBookmark(this.urlHash); //prevent empty tags } - public Bookmark(String urlHash, URL url){ + public Bookmark(String urlHash, yacyURL url){ super(); this.urlHash=urlHash; entry.put(BOOKMARK_URL, url.toNormalform(false, true)); @@ -742,9 +746,9 @@ public class bookmarksDB { tags=new HashSet(); timestamp=System.currentTimeMillis(); } - - public Bookmark(kelondroObjectsMapEntry map) { - this(plasmaURL.urlHash((String)map.map().get(BOOKMARK_URL)), map.map()); + + public Bookmark(kelondroObjectsMapEntry map) throws MalformedURLException { + this((new yacyURL((String)map.map().get(BOOKMARK_URL), null)).hash(), map.map()); } private Map toMap(){ diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index 16351fd17..e33b006ea 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -56,11 +56,11 @@ import java.util.Date; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlRobotsTxt; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /* * A class for Parsing robots.txt files. @@ -227,7 +227,7 @@ public final class robotsParser{ return new Object[]{denyList,sitemap,crawlDelay}; } - private static final int getPort(URL theURL) { + private static final int getPort(yacyURL theURL) { int port = theURL.getPort(); if (port == -1) { if (theURL.getProtocol().equalsIgnoreCase("http")) { @@ -240,7 +240,7 @@ public final class robotsParser{ return port; } - private static final String getHostPort(URL theURL) { + private static final String getHostPort(yacyURL theURL) { String urlHostPort = null; int port = getPort(theURL); urlHostPort = theURL.getHost() + ":" + port; @@ -249,9 +249,9 @@ public final class robotsParser{ return urlHostPort; } - public static URL getSitemapURL(URL theURL) { + public static yacyURL getSitemapURL(yacyURL theURL) { if (theURL == null) throw new IllegalArgumentException(); - URL sitemapURL = null; + yacyURL sitemapURL = null; // generating the hostname:poart string needed to do a DB lookup String urlHostPort = getHostPort(theURL); @@ -265,13 +265,13 @@ public final class robotsParser{ try { String sitemapUrlStr = robotsTxt4Host.getSitemap(); - if (sitemapUrlStr != null) sitemapURL = new URL(sitemapUrlStr); + if (sitemapUrlStr != null) sitemapURL = new yacyURL(sitemapUrlStr, null); } catch (MalformedURLException e) {/* ignore this */} return sitemapURL; } - public static Integer getCrawlDelay(URL theURL) { + public static Integer getCrawlDelay(yacyURL theURL) { if (theURL == null) throw new IllegalArgumentException(); Integer crawlDelay = null; @@ -292,7 +292,7 @@ public final class robotsParser{ return crawlDelay; } - public static boolean isDisallowed(URL nexturl) { + public static boolean isDisallowed(yacyURL nexturl) { if (nexturl == null) throw new IllegalArgumentException(); // generating the hostname:poart string needed to do a DB lookup @@ -309,10 +309,10 @@ public final class robotsParser{ (robotsTxt4Host.getLoadedDate() == null) || (System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() > 7*24*60*60*1000) ) { - URL robotsURL = null; + yacyURL robotsURL = null; // generating the proper url to download the robots txt try { - robotsURL = new URL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt"); + robotsURL = new yacyURL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt"); } catch (MalformedURLException e) { serverLog.logSevere("ROBOTS","Unable to generate robots.txt URL for URL '" + nexturl.toString() + "'."); return false; @@ -371,7 +371,7 @@ public final class robotsParser{ return false; } - static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception { + static Object[] downloadRobotsTxt(yacyURL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception { if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null}; redirectionCount--; @@ -392,7 +392,7 @@ public final class robotsParser{ httpHeader reqHeaders = new httpHeader(); // adding referer - reqHeaders.put(httpHeader.REFERER, (URL.newURL(robotsURL,"/")).toNormalform(true, true)); + reqHeaders.put(httpHeader.REFERER, (yacyURL.newURL(robotsURL,"/")).toNormalform(true, true)); if (entry != null) { oldEtag = entry.getETag(); @@ -447,7 +447,7 @@ public final class robotsParser{ redirectionUrlString = redirectionUrlString.trim(); // generating the new URL object - URL redirectionUrl = URL.newURL(robotsURL, redirectionUrlString); + yacyURL redirectionUrl = yacyURL.newURL(robotsURL, redirectionUrlString); // returning the used httpc httpc.returnInstance(con); diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index 509cc8aba..1d6e70a3f 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -314,12 +314,11 @@ public final class userDB { public static final String BLOG_RIGHT = "blogRight"; public static final String WIKIADMIN_RIGHT = "wikiAdminRight"; public static final String BOOKMARK_RIGHT = "bookmarkRight"; - public static final String SOAP_RIGHT = "soapRight"; //to create new rights, you just need to edit this strings public static final String RIGHT_TYPES= ADMIN_RIGHT+","+DOWNLOAD_RIGHT+","+UPLOAD_RIGHT+","+PROXY_RIGHT+","+ - BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT+","+SOAP_RIGHT; + BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT; public static final String RIGHT_NAMES="Admin,Download,Upload,Proxy usage,Blog,Bookmark,Wiki Admin,SOAP"; public static final int PROXY_ALLOK = 0; //can Surf diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 847c2a0ca..dfd8603eb 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -62,10 +62,10 @@ import java.util.TreeSet; import javax.swing.event.EventListenerList; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper { @@ -112,14 +112,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen /** * {@link URL} to the favicon that belongs to the document */ - private URL favicon; + private yacyURL favicon; /** * The document root {@link URL} */ - private URL root; + private yacyURL root; - public htmlFilterContentScraper(URL root) { + public htmlFilterContentScraper(yacyURL root) { // the root value here will not be used to load the resource. // it is only the reference for relative links super(linkTags0, linkTags1); @@ -161,7 +161,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen private String absolutePath(String relativePath) { try { - return URL.newURL(root, relativePath).toNormalform(false, true); + return yacyURL.newURL(root, relativePath).toNormalform(false, true); } catch (Exception e) { return ""; } @@ -175,13 +175,13 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen height = Integer.parseInt(tagopts.getProperty("height", "-1")); } catch (NumberFormatException e) {} try { - URL url = new URL(absolutePath(tagopts.getProperty("src", ""))); + yacyURL url = new yacyURL(absolutePath(tagopts.getProperty("src", "")), null); htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt",""), width, height); images.add(ie); } catch (MalformedURLException e) {} } if (tagname.equalsIgnoreCase("base")) try { - root = new URL(tagopts.getProperty("href", "")); + root = new yacyURL(tagopts.getProperty("href", ""), null); } catch (MalformedURLException e) {} if (tagname.equalsIgnoreCase("frame")) { anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name","")); @@ -204,9 +204,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen if (href.length() > 0) anchors.put(absolutePath(href), areatitle); } if (tagname.equalsIgnoreCase("link")) { - URL newLink = null; + yacyURL newLink = null; try { - newLink = new URL(absolutePath(tagopts.getProperty("href", ""))); + newLink = new yacyURL(absolutePath(tagopts.getProperty("href", "")), null); } catch (MalformedURLException e) {} if (newLink != null) { @@ -363,7 +363,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen /** * @return the {@link URL} to the favicon that belongs to the document */ - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } @@ -478,7 +478,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } } - public static htmlFilterContentScraper parseResource(URL location) throws IOException { + public static htmlFilterContentScraper parseResource(yacyURL location) throws IOException { // load page byte[] page = httpc.wget( location, diff --git a/source/de/anomic/htmlFilter/htmlFilterImageEntry.java b/source/de/anomic/htmlFilter/htmlFilterImageEntry.java index 4064ef1e5..abbb697e5 100644 --- a/source/de/anomic/htmlFilter/htmlFilterImageEntry.java +++ b/source/de/anomic/htmlFilter/htmlFilterImageEntry.java @@ -40,22 +40,22 @@ package de.anomic.htmlFilter; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class htmlFilterImageEntry implements Comparable { - private URL url; + private yacyURL url; private String alt; private int width, height; - public htmlFilterImageEntry(URL url, String alt, int width, int height) { + public htmlFilterImageEntry(yacyURL url, String alt, int width, int height) { this.url = url; this.alt = alt; this.width = width; this.height = height; } - public URL url() { + public yacyURL url() { return this.url; } diff --git a/source/de/anomic/htmlFilter/htmlFilterInputStream.java b/source/de/anomic/htmlFilter/htmlFilterInputStream.java index 8e9885542..f7df006e5 100644 --- a/source/de/anomic/htmlFilter/htmlFilterInputStream.java +++ b/source/de/anomic/htmlFilter/htmlFilterInputStream.java @@ -36,8 +36,7 @@ import java.io.Writer; import java.util.Properties; import de.anomic.http.httpHeader; -import de.anomic.net.URL; - +import de.anomic.yacy.yacyURL; public class htmlFilterInputStream extends InputStream implements htmlFilterEventListener { @@ -59,7 +58,7 @@ public class htmlFilterInputStream extends InputStream implements htmlFilterEven public htmlFilterInputStream( InputStream inStream, String inputStreamCharset, - URL rooturl, + yacyURL rooturl, htmlFilterTransformer transformer, boolean passbyIfBinarySuspect ) throws UnsupportedEncodingException { diff --git a/source/de/anomic/htmlFilter/htmlFilterWriter.java b/source/de/anomic/htmlFilter/htmlFilterWriter.java index 8d7175e4d..d00dc7673 100644 --- a/source/de/anomic/htmlFilter/htmlFilterWriter.java +++ b/source/de/anomic/htmlFilter/htmlFilterWriter.java @@ -63,8 +63,8 @@ import java.net.MalformedURLException; import java.util.Enumeration; import java.util.Properties; -import de.anomic.net.URL; import de.anomic.server.serverCharBuffer; +import de.anomic.yacy.yacyURL; public final class htmlFilterWriter extends Writer { @@ -508,7 +508,7 @@ public final class htmlFilterWriter extends Writer { if (args.length != 1) return; char[] buffer = new char[512]; try { - htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL("http://localhost:8080")); + htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost:8080", null)); htmlFilterTransformer transformer = new htmlFilterContentTransformer(); // TODO: this does not work at the moment System.exit(0); diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java index 73669fe24..78e7255f9 100644 --- a/source/de/anomic/http/httpHeader.java +++ b/source/de/anomic/http/httpHeader.java @@ -73,9 +73,9 @@ import java.util.TimeZone; import java.util.TreeMap; import java.util.Vector; -import de.anomic.net.URL; import de.anomic.server.serverCore; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class httpHeader extends TreeMap implements Map { @@ -675,7 +675,7 @@ public final class httpHeader extends TreeMap implements Map { } public static boolean supportChunkedEncoding(Properties conProp) { - // getting the http version of the soap client + // getting the http version of the client String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); // only clients with http version 1.1 supports chunk @@ -793,7 +793,7 @@ public final class httpHeader extends TreeMap implements Map { theHeader.append("\r\n"); } - public static URL getRequestURL(Properties conProp) throws MalformedURLException { + public static yacyURL getRequestURL(Properties conProp) throws MalformedURLException { String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/' String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given @@ -807,7 +807,7 @@ public final class httpHeader extends TreeMap implements Map { host = host.substring(0, pos); } - URL url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + yacyURL url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); return url; } diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 1bcf1a189..bad06b831 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -76,7 +76,6 @@ import javax.net.ssl.X509TrustManager; import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCore; import de.anomic.server.serverDomains; @@ -84,6 +83,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; /** * This class implements an http client. While http access is built-in in java @@ -959,7 +959,7 @@ public final class httpc { } public static byte[] singleGET( - URL u, + yacyURL u, String vhost, int timeout, String user, @@ -1017,7 +1017,7 @@ public final class httpc { } public static byte[] singlePOST( - URL u, + yacyURL u, String vhost, int timeout, String user, @@ -1049,7 +1049,7 @@ public final class httpc { } public static byte[] wget( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1090,7 +1090,7 @@ public final class httpc { return a; } - public static Map loadHashMap(URL url, httpRemoteProxyConfig proxy) { + public static Map loadHashMap(yacyURL url, httpRemoteProxyConfig proxy) { try { // should we use the proxy? boolean useProxy = (proxy != null) && @@ -1119,7 +1119,7 @@ public final class httpc { } public static httpHeader whead( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1130,7 +1130,7 @@ public final class httpc { } public static httpHeader whead( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1172,7 +1172,7 @@ public final class httpc { } public static byte[] wput( - URL url, + yacyURL url, String vhost, int timeout, String user, @@ -1217,7 +1217,7 @@ public final class httpc { httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort); try { - URL u = new URL(url); + yacyURL u = new yacyURL(url, null); text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig, null, null)); } catch (MalformedURLException e) { System.out.println("The url '" + url + "' is wrong."); diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index b7b8b421c..dc38d277b 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -51,7 +51,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; -import java.lang.reflect.Constructor; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URLDecoder; @@ -67,7 +66,6 @@ import java.util.StringTokenizer; import de.anomic.data.htmlTools; import de.anomic.data.userDB; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; @@ -78,9 +76,9 @@ import de.anomic.server.serverHandler; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; -import de.anomic.soap.httpdSoapHandler; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; /** @@ -119,7 +117,6 @@ public final class httpd implements serverHandler { public static final String hline = "-------------------------------------------------------------------------------"; public static HashMap reverseMappingCache = new HashMap(); - private httpdSoapHandler soapHandler = null; private static plasmaSwitchboard switchboard = null; private static String virtualHost = null; @@ -488,36 +485,8 @@ public final class httpd implements serverHandler { if (this.prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) { // pass to server if (this.allowServer) { - - /* - * Handling SOAP Requests here ... - */ - if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) { - if (this.soapHandler == null) { - try { - Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler"); - Constructor classConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } ); - this.soapHandler = (httpdSoapHandler) classConstructor.newInstance(new Object[] { switchboard }); - } catch (Exception e) { - sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e); - return serverCore.TERMINATE_CONNECTION; - } catch (NoClassDefFoundError e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } catch (Error e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } - } - this.soapHandler.doGet(this.prop, header, this.session.out); - - /* - * Handling HTTP requests here ... - */ - } else { - if (this.handleServerAuthentication(header)) { - httpdFileHandler.doGet(this.prop, header, this.session.out); - } + if (this.handleServerAuthentication(header)) { + httpdFileHandler.doGet(this.prop, header, this.session.out); } } else { // not authorized through firewall blocking (ip does not match filter) @@ -637,40 +606,8 @@ public final class httpd implements serverHandler { if (prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) { // pass to server if (allowServer) { - - /* - * Handling SOAP Requests here ... - */ - if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) { - if (this.soapHandler == null) { - try { - // creating the soap handler class by name - Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler"); - - // Look for the proper constructor - Constructor soapHandlerConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } ); - - // creating the new object - this.soapHandler = (httpdSoapHandler)soapHandlerConstructor.newInstance( new Object[] { switchboard } ); - } catch (Exception e) { - sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e); - return serverCore.TERMINATE_CONNECTION; - } catch (NoClassDefFoundError e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } catch (Error e) { - sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e); - return serverCore.TERMINATE_CONNECTION; - } - } - this.soapHandler.doPost(this.prop, header, this.session.out, this.session.in); - /* - * Handling normal HTTP requests here ... - */ - } else { - if (handleServerAuthentication(header)) { - httpdFileHandler.doPost(prop, header, this.session.out, this.session.in); - } + if (handleServerAuthentication(header)) { + httpdFileHandler.doPost(prop, header, this.session.out, this.session.in); } } else { // not authorized through firewall blocking (ip does not match filter) @@ -1199,7 +1136,7 @@ public final class httpd implements serverHandler { String urlString; try { - urlString = (new URL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString(); + urlString = (new yacyURL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString(); } catch (MalformedURLException e) { urlString = "invalid URL"; } diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index 051662f6e..65f4f6435 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -962,7 +962,6 @@ public final class httpdFileHandler { } } - //System.out.println("**DEBUG** loading class file " + classFile); Class c = provider.loadClass(classFile); Class[] params = new Class[] { httpHeader.class, diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 64ef59f70..5faedd672 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -93,7 +93,6 @@ import java.util.zip.GZIPOutputStream; import de.anomic.htmlFilter.htmlFilterContentTransformer; import de.anomic.htmlFilter.htmlFilterTransformer; import de.anomic.htmlFilter.htmlFilterWriter; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -107,6 +106,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverMiniLogFormatter; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public final class httpdProxyHandler { @@ -319,7 +319,7 @@ public final class httpdProxyHandler { int pos=0; int port=0; - URL url = null; + yacyURL url = null; try { url = httpHeader.getRequestURL(conProp); @@ -329,11 +329,11 @@ public final class httpdProxyHandler { redirectorWriter.println(url.toNormalform(false, true)); redirectorWriter.flush(); } - String newUrl=redirectorReader.readLine(); - if(!newUrl.equals("")){ - try{ - url=new URL(newUrl); - }catch(MalformedURLException e){}//just keep the old one + String newUrl = redirectorReader.readLine(); + if (!newUrl.equals("")) { + try { + url = new yacyURL(newUrl, null); + } catch(MalformedURLException e){}//just keep the old one } conProp.setProperty(httpHeader.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort()); conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, url.getPath()); @@ -474,7 +474,7 @@ public final class httpdProxyHandler { } } - private static void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) { + private static void fulfillRequestFromWeb(Properties conProp, yacyURL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) { GZIPOutputStream gzippedOut = null; httpChunkedOutputStream chunkedOut = null; @@ -727,7 +727,7 @@ public final class httpdProxyHandler { private static void fulfillRequestFromCache( Properties conProp, - URL url, + yacyURL url, String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, @@ -865,7 +865,7 @@ public final class httpdProxyHandler { httpc remote = null; httpc.response res = null; - URL url = null; + yacyURL url = null; try { // remembering the starting time of the request Date requestDate = new Date(); // remember the time... @@ -892,7 +892,7 @@ public final class httpdProxyHandler { } try { - url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); } catch (MalformedURLException e) { String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; @@ -968,7 +968,7 @@ public final class httpdProxyHandler { public static void doPost(Properties conProp, httpHeader requestHeader, OutputStream respond, PushbackInputStream body) throws IOException { httpc remote = null; - URL url = null; + yacyURL url = null; try { // remembering the starting time of the request Date requestDate = new Date(); // remember the time... @@ -993,7 +993,7 @@ public final class httpdProxyHandler { } try { - url = new URL("http", host, port, (args == null) ? path : path + "?" + args); + url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args); } catch (MalformedURLException e) { String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; @@ -1308,7 +1308,7 @@ public final class httpdProxyHandler { out.flush(); } */ - private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, URL url) { + private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, yacyURL url) { // this may happen if // - the targeted host does not exist // - anything with the remote server was wrong. diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java index dc5e172b9..de7ab0813 100644 --- a/source/de/anomic/icap/icapd.java +++ b/source/de/anomic/icap/icapd.java @@ -60,7 +60,6 @@ import java.util.Properties; import de.anomic.http.httpChunkedInputStream; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -71,6 +70,7 @@ import de.anomic.server.serverFileUtils; import de.anomic.server.serverHandler; import de.anomic.server.logging.serverLog; import de.anomic.server.serverCore.Session; +import de.anomic.yacy.yacyURL; /** * @author theli @@ -346,7 +346,7 @@ public class icapd implements serverHandler { httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true); // getting the request URL - URL httpRequestURL = httpHeader.getRequestURL(httpReqProps); + yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps); /* ========================================================================= * Parsing response data diff --git a/source/de/anomic/index/indexURLEntry.java b/source/de/anomic/index/indexURLEntry.java index 5f89d4bac..bbc0cefcb 100644 --- a/source/de/anomic/index/indexURLEntry.java +++ b/source/de/anomic/index/indexURLEntry.java @@ -38,15 +38,14 @@ import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaSearchQuery; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverCodings; import de.anomic.server.serverDate; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; import de.anomic.index.indexRWIEntry; public class indexURLEntry { @@ -120,7 +119,7 @@ public class indexURLEntry { private indexRWIEntry word; // this is only used if the url is transported via remote search requests public indexURLEntry( - URL url, + yacyURL url, String descr, String author, String tags, @@ -143,12 +142,12 @@ public class indexURLEntry { int lapp) { // create new entry and store it into database this.entry = rowdef.newEntry(); - this.entry.setCol(col_hash, plasmaURL.urlHash(url), null); + this.entry.setCol(col_hash, url.hash(), null); this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); this.entry.setCol(col_mod, encodeDate(mod)); this.entry.setCol(col_load, encodeDate(load)); this.entry.setCol(col_fresh, encodeDate(fresh)); - this.entry.setCol(col_referrer, referrer.getBytes()); + this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes()); this.entry.setCol(col_md5, md5); this.entry.setCol(col_size, size); this.entry.setCol(col_wc, wc); @@ -170,7 +169,7 @@ public class indexURLEntry { return kelondroNaturalOrder.encodeLong(d.getTime() / 86400000, 4); } - public static byte[] encodeComp(URL url, String descr, String author, String tags, String ETag) { + public static byte[] encodeComp(yacyURL url, String descr, String author, String tags, String ETag) { serverCharBuffer s = new serverCharBuffer(200); s.append(url.toNormalform(false, true)).append(10); s.append(descr).append(10); @@ -190,9 +189,9 @@ public class indexURLEntry { // generates an plasmaLURLEntry using the properties from the argument // the property names must correspond to the one from toString //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); - URL url; + yacyURL url; try { - url = new URL(crypt.simpleDecode(prop.getProperty("url", ""), null)); + url = new yacyURL(crypt.simpleDecode(prop.getProperty("url", ""), null), prop.getProperty("hash")); } catch (MalformedURLException e) { url = null; } @@ -202,7 +201,7 @@ public class indexURLEntry { String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); if (ETag == null) ETag = ""; this.entry = rowdef.newEntry(); - this.entry.setCol(col_hash, plasmaURL.urlHash(url), null); + this.entry.setCol(col_hash, url.hash(), null); this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag)); try { this.entry.setCol(col_mod, encodeDate(serverDate.shortDayFormatter.parse(prop.getProperty("mod", "20000101")))); @@ -219,7 +218,7 @@ public class indexURLEntry { } catch (ParseException e) { this.entry.setCol(col_fresh, encodeDate(new Date())); } - this.entry.setCol(col_referrer, prop.getProperty("referrer", plasmaURL.dummyHash).getBytes()); + this.entry.setCol(col_referrer, prop.getProperty("referrer", yacyURL.dummyHash).getBytes()); this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", ""))); this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0"))); this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0"))); @@ -301,6 +300,7 @@ public class indexURLEntry { ArrayList cl = nxTools.strings(this.entry.getCol("comp", null), "UTF-8"); return new indexURLEntry.Components( (cl.size() > 0) ? ((String) cl.get(0)).trim() : "", + hash(), (cl.size() > 1) ? ((String) cl.get(1)).trim() : "", (cl.size() > 2) ? ((String) cl.get(2)).trim() : "", (cl.size() > 3) ? ((String) cl.get(3)).trim() : "", @@ -442,12 +442,12 @@ public class indexURLEntry { } public class Components { - private URL url; + private yacyURL url; private String title, author, tags, ETag; - public Components(String url, String title, String author, String tags, String ETag) { + public Components(String url, String urlhash, String title, String author, String tags, String ETag) { try { - this.url = new URL(url); + this.url = new yacyURL(url, urlhash); } catch (MalformedURLException e) { this.url = null; } @@ -456,18 +456,18 @@ public class indexURLEntry { this.tags = tags; this.ETag = ETag; } - public Components(URL url, String descr, String author, String tags, String ETag) { + public Components(yacyURL url, String descr, String author, String tags, String ETag) { this.url = url; this.title = descr; this.author = author; this.tags = tags; this.ETag = ETag; } - public URL url() { return this.url; } - public String title() { return this.title; } - public String author() { return this.author; } - public String tags() { return this.tags; } - public String ETag() { return this.ETag; } + public yacyURL url() { return this.url; } + public String title() { return this.title; } + public String author() { return this.author; } + public String tags() { return this.tags; } + public String ETag() { return this.ETag; } } } \ No newline at end of file diff --git a/source/de/anomic/kelondro/kelondroAbstractRecords.java b/source/de/anomic/kelondro/kelondroAbstractRecords.java index 6bfefc616..002340989 100644 --- a/source/de/anomic/kelondro/kelondroAbstractRecords.java +++ b/source/de/anomic/kelondro/kelondroAbstractRecords.java @@ -307,7 +307,7 @@ public abstract class kelondroAbstractRecords implements kelondroRecords { } private synchronized void checkConsistency() { - if (debugmode) try { // in debug mode + if ((debugmode) && (entryFile != null)) try { // in debug mode long efl = entryFile.length(); assert ((efl - POS_NODES) % ((long) recordsize)) == 0 : "rest = " + ((entryFile.length() - POS_NODES) % ((long) recordsize)) + ", USEDC = " + this.USEDC + ", FREEC = " + this.FREEC + ", recordsize = " + recordsize + ", file = " + filename; long calculated_used = (efl - POS_NODES) / ((long) recordsize); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 06d18b4ec..6406d1178 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -41,11 +41,11 @@ import java.util.TimeZone; import java.util.TreeMap; import de.anomic.index.indexContainer; -import de.anomic.plasma.plasmaURL; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class kelondroCollectionIndex { @@ -762,7 +762,7 @@ public class kelondroCollectionIndex { while (i.hasNext()) { entry = (kelondroRow.Entry) i.next(); ref = entry.getColBytes(0); - if ((ref.length == 12) && (plasmaURL.probablyRootURL(new String(ref)))) { + if ((ref.length == 12) && (yacyURL.probablyRootURL(new String(ref)))) { survival.addUnique(entry); i.remove(); } diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 9cd2633e1..bca4e7108 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -53,6 +53,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverDomains; import de.anomic.tools.disorderHeap; import de.anomic.tools.nxTools; +import de.anomic.yacy.yacyURL; public class natLib { @@ -64,7 +65,7 @@ public class natLib { rm status.htm */ try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://192.168.0.1:80/status.htm", null), "192.168.0.1", 5000, "admin", password, null, null, null)); x = nxTools.grep(x, 1, "IP Address"); if ((x == null) || (x.size() == 0)) return null; String line = nxTools.tail1(x); @@ -76,7 +77,7 @@ public class natLib { private static String getWhatIsMyIP() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.whatismyip.com/", null), "www.whatsmyip.com", 5000, null, null, null, null, null)); x = nxTools.grep(x, 0, "Your IP is"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 4); @@ -87,7 +88,7 @@ public class natLib { private static String getStanford() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null, null, null)); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null), "www.slac.stanford.edu", 5000, null, null, null, null, null)); x = nxTools.grep(x, 0, "firewall protecting your browser"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 7); @@ -98,7 +99,7 @@ public class natLib { private static String getIPID() { try { - ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8"); + ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://ipid.shat.net/", null), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8"); x = nxTools.grep(x, 2, "Your IP address"); String line = nxTools.tail1(x); return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); diff --git a/source/de/anomic/plasma/cache/IResourceInfo.java b/source/de/anomic/plasma/cache/IResourceInfo.java index 3edfd5440..8b58eecb0 100644 --- a/source/de/anomic/plasma/cache/IResourceInfo.java +++ b/source/de/anomic/plasma/cache/IResourceInfo.java @@ -51,7 +51,7 @@ package de.anomic.plasma.cache; import java.util.Date; import java.util.Map; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; /** * A class containing metadata about a downloaded resource @@ -66,13 +66,13 @@ public interface IResourceInfo { /** * @return the URL of this content */ - public URL getUrl(); + public yacyURL getUrl(); /** * Returns the referer URL of this URL * @return referer URL */ - public URL getRefererUrl(); + public yacyURL getRefererUrl(); /** * Returns the mimetype of the cached object @@ -92,11 +92,6 @@ public interface IResourceInfo { */ public Date getModificationDate(); - /** - * @return the url hash of the content URL - */ - public String getUrlHash(); - /** * Specifies if the resource was requested with a * if modified since date diff --git a/source/de/anomic/plasma/cache/ResourceInfoFactory.java b/source/de/anomic/plasma/cache/ResourceInfoFactory.java index 54f281dc4..1af06c977 100644 --- a/source/de/anomic/plasma/cache/ResourceInfoFactory.java +++ b/source/de/anomic/plasma/cache/ResourceInfoFactory.java @@ -51,11 +51,12 @@ package de.anomic.plasma.cache; import java.lang.reflect.Constructor; import java.util.Map; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; + public class ResourceInfoFactory { public IResourceInfo buildResourceInfoObj( - URL resourceURL, + yacyURL resourceURL, Map resourceMetadata ) throws UnsupportedProtocolException, IllegalAccessException { @@ -73,7 +74,7 @@ public class ResourceInfoFactory { // getting the constructor final Constructor classConstructor = moduleClass.getConstructor( new Class[] { - URL.class, + yacyURL.class, Map.class } ); diff --git a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java index bf98cc639..76c7fb63e 100644 --- a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java @@ -51,10 +51,9 @@ import java.util.Date; import java.util.HashMap; import java.util.Map; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; +import de.anomic.yacy.yacyURL; public class ResourceInfo implements IResourceInfo { @@ -62,8 +61,7 @@ public class ResourceInfo implements IResourceInfo { public static final String MODIFICATION_DATE = "modificationDate"; public static final String REFERER = "referer"; - private URL url; - private String urlHash; + private yacyURL url; private HashMap propertyMap; /** @@ -71,24 +69,22 @@ public class ResourceInfo implements IResourceInfo { * @param objectURL * @param objectInfo */ - public ResourceInfo(URL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.propertyMap = new HashMap(objectInfo); } - public ResourceInfo(URL objectURL, String refererUrl, String mimeType, Date fileDate) { + public ResourceInfo(yacyURL objectURL, String refererUrl, String mimeType, Date fileDate) { if (objectURL == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.propertyMap = new HashMap(); @@ -113,22 +109,18 @@ public class ResourceInfo implements IResourceInfo { return new Date(Long.valueOf((String) this.propertyMap.get(MODIFICATION_DATE)).longValue()); } - public URL getRefererUrl() { + public yacyURL getRefererUrl() { try { - return (this.propertyMap == null) ? null : new URL((String)this.propertyMap.get(REFERER)); + return (this.propertyMap == null) ? null : new yacyURL((String)this.propertyMap.get(REFERER), null); } catch (MalformedURLException e) { return null; } } - public URL getUrl() { + public yacyURL getUrl() { return this.url; } - - public String getUrlHash() { - return this.urlHash; - } - + public Date ifModifiedSince() { return null; } diff --git a/source/de/anomic/plasma/cache/http/ResourceInfo.java b/source/de/anomic/plasma/cache/http/ResourceInfo.java index d3cc7a657..c942cd2a5 100644 --- a/source/de/anomic/plasma/cache/http/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/http/ResourceInfo.java @@ -52,16 +52,14 @@ import java.util.Date; import java.util.Map; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; import de.anomic.server.serverDate; +import de.anomic.yacy.yacyURL; public class ResourceInfo implements IResourceInfo { - private URL url; - private String urlHash; + private yacyURL url; private httpHeader responseHeader; private httpHeader requestHeader; @@ -70,25 +68,23 @@ public class ResourceInfo implements IResourceInfo { * @param objectURL * @param objectInfo */ - public ResourceInfo(URL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); // create the http header object this.responseHeader = new httpHeader(null, objectInfo); } - public ResourceInfo(URL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) { + public ResourceInfo(yacyURL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) { if (objectURL == null) throw new NullPointerException(); if (responseHeaders == null) throw new NullPointerException(); // generating the url hash this.url = objectURL; - this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true)); this.requestHeader = requestHeaders; this.responseHeader = responseHeaders; @@ -131,10 +127,10 @@ public class ResourceInfo implements IResourceInfo { return docDate; } - public URL getRefererUrl() { + public yacyURL getRefererUrl() { if (this.requestHeader == null) return null; try { - return new URL((String) this.requestHeader.get(httpHeader.REFERER, "")); + return new yacyURL((String) this.requestHeader.get(httpHeader.REFERER, ""), null); } catch (Exception e) { return null; } @@ -143,7 +139,7 @@ public class ResourceInfo implements IResourceInfo { /** * @see de.anomic.plasma.cache.IResourceInfo#getUrl() */ - public URL getUrl() { + public yacyURL getUrl() { return this.url; } @@ -151,7 +147,7 @@ public class ResourceInfo implements IResourceInfo { * @see de.anomic.plasma.cache.IResourceInfo#getUrlHash() */ public String getUrlHash() { - return this.urlHash; + return this.url.hash(); } public void setRequestHeader(httpHeader reqestHeader) { diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index 88a46c0cc..d64b9a088 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -49,10 +49,9 @@ package de.anomic.plasma.crawler; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.util.Date; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaCrawlProfile; @@ -61,6 +60,7 @@ import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlWorker { @@ -86,7 +86,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW * Crawl job specific variables * ============================================================ */ public plasmaCrawlLoaderMessage theMsg; - protected URL url; + protected yacyURL url; protected String name; protected String refererURLString; protected String initiator; @@ -281,7 +281,12 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW this.errorMessage = failreason; // convert the referrer URL into a hash value - String referrerHash = (this.refererURLString==null)?null:plasmaURL.urlHash(this.refererURLString); + String referrerHash; + try { + referrerHash = (this.refererURLString == null) ? null : (new yacyURL(this.refererURLString, null)).hash(); + } catch (MalformedURLException e) { + referrerHash = null; + } // create a new errorURL DB entry plasmaCrawlEntry bentry = new plasmaCrawlEntry( diff --git a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java index ba5b8a60e..fbaaefcbc 100644 --- a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java @@ -55,7 +55,6 @@ import java.io.PrintStream; import java.io.PrintWriter; import java.util.Date; -import de.anomic.net.URL; import de.anomic.net.ftpc; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaHTCache; @@ -68,6 +67,7 @@ import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.plasma.plasmaHTCache.Entry; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorker { @@ -188,7 +188,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke if (isFolder) { fullPath = fullPath + "/"; file = ""; - this.url = URL.newURL(this.url,fullPath); + this.url = yacyURL.newURL(this.url,fullPath); } } diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index d977452ca..329d06da1 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -60,8 +60,6 @@ import de.anomic.http.httpc; import de.anomic.http.httpdBoundedSizeOutputStream; import de.anomic.http.httpdLimitExceededException; import de.anomic.http.httpdProxyHandler; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaCrawlLoader; import de.anomic.plasma.plasmaHTCache; @@ -74,6 +72,7 @@ import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverSystem; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class CrawlWorker extends AbstractCrawlWorker { @@ -140,7 +139,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { return load(DEFAULT_CRAWLING_RETRY_COUNT); } - protected plasmaHTCache.Entry createCacheEntry(URL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) { + protected plasmaHTCache.Entry createCacheEntry(yacyURL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) { IResourceInfo resourceInfo = new ResourceInfo(requestUrl,requestHeader,response.responseHeader); return plasmaHTCache.newEntry( requestDate, @@ -314,7 +313,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { } // normalizing URL - URL redirectionUrl = URL.newURL(this.url, redirectionUrlString); + yacyURL redirectionUrl = yacyURL.newURL(this.url, redirectionUrlString); // returning the used httpc httpc.returnInstance(remote); @@ -332,7 +331,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { } // generating url hash - String urlhash = plasmaURL.urlHash(redirectionUrl); + String urlhash = redirectionUrl.hash(); // removing url from loader queue plasmaCrawlLoader.switchboard.noticeURL.remove(urlhash); diff --git a/source/de/anomic/plasma/dbImport/SitemapImporter.java b/source/de/anomic/plasma/dbImport/SitemapImporter.java index 57f2ff0d1..8109f12e0 100644 --- a/source/de/anomic/plasma/dbImport/SitemapImporter.java +++ b/source/de/anomic/plasma/dbImport/SitemapImporter.java @@ -47,14 +47,14 @@ package de.anomic.plasma.dbImport; import java.util.HashMap; import de.anomic.data.SitemapParser; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.yacy.yacyURL; public class SitemapImporter extends AbstractImporter implements dbImporter { private SitemapParser parser = null; - private URL sitemapURL = null; + private yacyURL sitemapURL = null; public SitemapImporter(plasmaSwitchboard switchboard) { super("sitemap",switchboard); @@ -110,7 +110,7 @@ public class SitemapImporter extends AbstractImporter implements dbImporter { try { // getting the sitemap URL - this.sitemapURL = new URL((String)initParams.get("sitemapURL")); + this.sitemapURL = new yacyURL((String)initParams.get("sitemapURL"), null); // getting the crawling profile to use plasmaCrawlProfile.entry profileEntry = this.sb.profiles.getEntry((String)initParams.get("crawlingProfile")); diff --git a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java index be2b0e6d4..55e5ca384 100644 --- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java @@ -142,13 +142,13 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor this.urlCount++; nextEntry = this.importNurlDB.pop(stackTypes[stackType], false); - nextHash = nextEntry.urlhash(); + nextHash = nextEntry.url().hash(); } else { if (!entryIter.hasNext()) break; this.urlCount++; nextEntry = (plasmaCrawlEntry) entryIter.next(); - nextHash = nextEntry.urlhash(); + nextHash = nextEntry.url().hash(); } } catch (IOException e) { this.log.logWarning("Unable to import entry: " + e.toString()); diff --git a/source/de/anomic/plasma/parser/AbstractParser.java b/source/de/anomic/plasma/parser/AbstractParser.java index 4de015560..caa1c5001 100644 --- a/source/de/anomic/plasma/parser/AbstractParser.java +++ b/source/de/anomic/plasma/parser/AbstractParser.java @@ -52,11 +52,11 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.server.serverThread; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /** * New classes implementing the {@link de.anomic.plasma.parser.Parser} interface @@ -139,7 +139,7 @@ public abstract class AbstractParser implements Parser{ return tempFile; } - public int parseDir(URL location, String prefix, File dir, plasmaParserDocument doc) + public int parseDir(yacyURL location, String prefix, File dir, plasmaParserDocument doc) throws ParserException, InterruptedException, IOException { if (!dir.isDirectory()) throw new ParserException("tried to parse ordinary file " + dir + " as directory", location); @@ -153,7 +153,7 @@ public abstract class AbstractParser implements Parser{ if (file.isDirectory()) { result += parseDir(location, prefix, file, doc); } else try { - URL url = URL.newURL(location, "/" + prefix + "/" + yacyURL url = yacyURL.newURL(location, "/" + prefix + "/" // XXX: workaround for relative paths within document + file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1) + "/" + file.getName()); @@ -185,7 +185,7 @@ public abstract class AbstractParser implements Parser{ * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, byte[]) */ public plasmaParserDocument parse( - URL location, + yacyURL location, String mimeType, String charset, byte[] source @@ -220,7 +220,7 @@ public abstract class AbstractParser implements Parser{ * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.File) */ public plasmaParserDocument parse( - URL location, + yacyURL location, String mimeType, String charset, File sourceFile @@ -254,7 +254,7 @@ public abstract class AbstractParser implements Parser{ * * @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream) */ - public abstract plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; + public abstract plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; /** * @return Returns a list of library names that are needed by this parser diff --git a/source/de/anomic/plasma/parser/Parser.java b/source/de/anomic/plasma/parser/Parser.java index a1adeae06..5b0ec0aad 100644 --- a/source/de/anomic/plasma/parser/Parser.java +++ b/source/de/anomic/plasma/parser/Parser.java @@ -48,9 +48,9 @@ import java.io.File; import java.io.InputStream; import java.util.Hashtable; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; /** * This interface defines a list of methods that needs to be implemented @@ -74,7 +74,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, byte[] source) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, byte[] source) throws ParserException, InterruptedException; /** @@ -88,7 +88,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException; /** @@ -102,7 +102,7 @@ public interface Parser { * * @throws ParserException if the content could not be parsed properly */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException; /** diff --git a/source/de/anomic/plasma/parser/ParserException.java b/source/de/anomic/plasma/parser/ParserException.java index c05d9a484..4d94b7f9a 100644 --- a/source/de/anomic/plasma/parser/ParserException.java +++ b/source/de/anomic/plasma/parser/ParserException.java @@ -44,13 +44,13 @@ package de.anomic.plasma.parser; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.yacy.yacyURL; public class ParserException extends Exception { private String errorCode = null; - private URL url = null; + private yacyURL url = null; private static final long serialVersionUID = 1L; @@ -58,21 +58,21 @@ public class ParserException extends Exception super(); } - public ParserException(String message, URL url) { + public ParserException(String message, yacyURL url) { this(message,url,plasmaCrawlEURL.DENIED_PARSER_ERROR); } - public ParserException(String message, URL url, String errorCode) { + public ParserException(String message, yacyURL url, String errorCode) { super(message); this.errorCode = errorCode; this.url = url; } - public ParserException(String message, URL url, Throwable cause) { + public ParserException(String message, yacyURL url, Throwable cause) { this(message,url,cause,plasmaCrawlEURL.DENIED_PARSER_ERROR); } - public ParserException(String message, URL url, Throwable cause, String errorCode) { + public ParserException(String message, yacyURL url, Throwable cause, String errorCode) { super(message, cause); this.errorCode = errorCode; this.url = url; @@ -82,7 +82,7 @@ public class ParserException extends Exception return this.errorCode; } - public URL getURL() { + public yacyURL getURL() { return this.url; } } diff --git a/source/de/anomic/plasma/parser/bzip/bzipParser.java b/source/de/anomic/plasma/parser/bzip/bzipParser.java index 53b2630dd..77bed5a05 100644 --- a/source/de/anomic/plasma/parser/bzip/bzipParser.java +++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java @@ -50,12 +50,13 @@ import java.util.Hashtable; import org.apache.tools.bzip2.CBZip2InputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; + public class bzipParser extends AbstractParser implements Parser { /** @@ -87,7 +88,7 @@ public class bzipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/doc/docParser.java b/source/de/anomic/plasma/parser/doc/docParser.java index 10dfdaa01..d25e4abcb 100644 --- a/source/de/anomic/plasma/parser/doc/docParser.java +++ b/source/de/anomic/plasma/parser/doc/docParser.java @@ -48,15 +48,13 @@ import java.util.Hashtable; import org.textmining.text.extraction.WordExtractor; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; -public class docParser -extends AbstractParser -implements Parser { +public class docParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -78,7 +76,7 @@ implements Parser { this.parserName = "Word Document Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { diff --git a/source/de/anomic/plasma/parser/gzip/gzipParser.java b/source/de/anomic/plasma/parser/gzip/gzipParser.java index a289eb361..c057377f8 100644 --- a/source/de/anomic/plasma/parser/gzip/gzipParser.java +++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java @@ -49,12 +49,12 @@ import java.io.InputStream; import java.util.Hashtable; import java.util.zip.GZIPInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class gzipParser extends AbstractParser implements Parser { @@ -83,7 +83,7 @@ public class gzipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java index 0d4f90b7e..02fda681b 100644 --- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java +++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java @@ -56,17 +56,15 @@ import net.sf.jmimemagic.MagicMatchNotFoundException; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; -public class mimeTypeParser -extends AbstractParser -implements Parser { +public class mimeTypeParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -127,7 +125,7 @@ implements Parser { return null; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { String orgMimeType = mimeType; @@ -188,7 +186,7 @@ implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException { File dstFile = null; try { dstFile = File.createTempFile("mimeTypeParser",".tmp"); diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java index c0c7681fa..262e29760 100644 --- a/source/de/anomic/plasma/parser/odt/odtParser.java +++ b/source/de/anomic/plasma/parser/odt/odtParser.java @@ -59,7 +59,6 @@ import com.catcode.odf.OpenDocumentMetadata; import com.catcode.odf.OpenDocumentTextInputStream; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; @@ -67,6 +66,7 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class odtParser extends AbstractParser implements Parser { @@ -95,7 +95,7 @@ public class odtParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException { Writer writer = null; File writerFile = null; @@ -209,7 +209,7 @@ public class odtParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File dest = null; try { // creating a tempfile @@ -241,7 +241,7 @@ public class odtParser extends AbstractParser implements Parser { if (args.length != 1) return; // getting the content URL - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); // creating a new parser odtParser testParser = new odtParser(); diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java index 0e491ef35..2a2eab041 100644 --- a/source/de/anomic/plasma/parser/pdf/pdfParser.java +++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java @@ -55,13 +55,13 @@ import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation; import org.pdfbox.util.PDFTextStripper; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCharBuffer; +import de.anomic.yacy.yacyURL; public class pdfParser extends AbstractParser implements Parser { @@ -89,7 +89,7 @@ public class pdfParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { PDDocument theDocument = null; Writer writer = null; diff --git a/source/de/anomic/plasma/parser/ppt/pptParser.java b/source/de/anomic/plasma/parser/ppt/pptParser.java index 6d6fa20d3..bd276028f 100644 --- a/source/de/anomic/plasma/parser/ppt/pptParser.java +++ b/source/de/anomic/plasma/parser/ppt/pptParser.java @@ -50,11 +50,11 @@ import java.util.Hashtable; import org.apache.poi.hslf.extractor.PowerPointExtractor; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class pptParser extends AbstractParser implements Parser { @@ -88,7 +88,7 @@ public class pptParser extends AbstractParser implements Parser { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/ps/psParser.java b/source/de/anomic/plasma/parser/ps/psParser.java index 06abdb15f..3d7900647 100644 --- a/source/de/anomic/plasma/parser/ps/psParser.java +++ b/source/de/anomic/plasma/parser/ps/psParser.java @@ -52,12 +52,12 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.util.Hashtable; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class psParser extends AbstractParser implements Parser { @@ -114,7 +114,7 @@ public class psParser extends AbstractParser implements Parser { } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { File outputFile = null; try { @@ -281,7 +281,7 @@ public class psParser extends AbstractParser implements Parser { super.reset(); } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { File tempFile = null; try { diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java index 1ac0be0dd..1296eb689 100644 --- a/source/de/anomic/plasma/parser/rpm/rpmParser.java +++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java @@ -53,12 +53,12 @@ import com.jguild.jrpm.io.RPMFile; import com.jguild.jrpm.io.datatype.DataTypeIf; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; /** * @author theli @@ -92,7 +92,7 @@ public class rpmParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException { File dstFile = null; try { @@ -106,7 +106,7 @@ public class rpmParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { RPMFile rpmFile = null; try { String summary = null, description = null, packager = null, name = sourceFile.getName(); @@ -177,7 +177,7 @@ public class rpmParser extends AbstractParser implements Parser { public static void main(String[] args) { try { - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); rpmParser testParser = new rpmParser(); byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null); diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index 97fb61610..4ee94ea4f 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -56,7 +56,6 @@ import de.anomic.htmlFilter.htmlFilterAbstractScraper; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterWriter; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; @@ -66,6 +65,7 @@ import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; import de.anomic.xml.rssReader; import de.anomic.xml.rssReader.Item; +import de.anomic.yacy.yacyURL; public class rssParser extends AbstractParser implements Parser { @@ -92,7 +92,7 @@ public class rssParser extends AbstractParser implements Parser { this.parserName = "Rich Site Summary/Atom Feed Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { LinkedList feedSections = new LinkedList(); @@ -114,7 +114,7 @@ public class rssParser extends AbstractParser implements Parser { String feedDescription = reader.getChannel().getDescription(); if (reader.getImage() != null) { - images.add(new htmlFilterImageEntry(new URL(reader.getImage()), feedTitle, -1, -1)); + images.add(new htmlFilterImageEntry(new yacyURL(reader.getImage(), null), feedTitle, -1, -1)); } // loop through the feed items @@ -126,7 +126,7 @@ public class rssParser extends AbstractParser implements Parser { Item item = reader.getItem(i); String itemTitle = item.getTitle(); - URL itemURL = new URL(item.getLink()); + yacyURL itemURL = new yacyURL(item.getLink(), null); String itemDescr = item.getDescription(); String itemCreator = item.getCreator(); if (itemCreator != null && itemCreator.length() > 0) authors.append(",").append(itemCreator); diff --git a/source/de/anomic/plasma/parser/rtf/rtfParser.java b/source/de/anomic/plasma/parser/rtf/rtfParser.java index c3ef24b32..133bbe20b 100644 --- a/source/de/anomic/plasma/parser/rtf/rtfParser.java +++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java @@ -49,15 +49,13 @@ import java.util.Hashtable; import javax.swing.text.DefaultStyledDocument; import javax.swing.text.rtf.RTFEditorKit; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; -public class rtfParser -extends AbstractParser -implements Parser { +public class rtfParser extends AbstractParser implements Parser { /** * a list of mime types that are supported by this parser class @@ -80,7 +78,7 @@ implements Parser { this.parserName = "Rich Text Format Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java index 64ace4f37..cea2a6066 100644 --- a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java +++ b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java @@ -48,13 +48,13 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; import SevenZip.ArchiveExtractCallback; import SevenZip.Archive.IInArchive; @@ -117,7 +117,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback { plasmaParserDocument theDoc; // workaround for relative links in file, normally '#' shall be used behind the location, see // below for reversion of the effects - URL url = URL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath); + yacyURL url = yacyURL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath); String mime = plasmaParser.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1)); if (this.cfos.isFallback()) { theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentFile()); diff --git a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java index 9032a9603..0c15fa30f 100644 --- a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java +++ b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java @@ -51,13 +51,13 @@ import SevenZip.IInStream; import SevenZip.MyRandomAccessFile; import SevenZip.Archive.SevenZip.Handler; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class sevenzipParser extends AbstractParser implements Parser { @@ -81,7 +81,7 @@ public class sevenzipParser extends AbstractParser implements Parser { super.parserName = "7zip Archive Parser"; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, IInStream source, long maxRamSize) throws ParserException, InterruptedException { plasmaParserDocument doc = new plasmaParserDocument(location, mimeType, charset); Handler archive; @@ -111,12 +111,12 @@ public class sevenzipParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, byte[] source) throws ParserException, InterruptedException { return parse(location, mimeType, charset, new ByteArrayIInStream(source), Parser.MAX_KEEP_IN_MEMORY_SIZE - source.length); } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException { try { return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"), Parser.MAX_KEEP_IN_MEMORY_SIZE); @@ -125,7 +125,7 @@ public class sevenzipParser extends AbstractParser implements Parser { } } - public plasmaParserDocument parse(URL location, String mimeType, String charset, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE); diff --git a/source/de/anomic/plasma/parser/swf/swfParser.java b/source/de/anomic/plasma/parser/swf/swfParser.java index e5e92e4ce..9e47ff7db 100644 --- a/source/de/anomic/plasma/parser/swf/swfParser.java +++ b/source/de/anomic/plasma/parser/swf/swfParser.java @@ -44,7 +44,6 @@ package de.anomic.plasma.parser.swf; import java.io.InputStream; -import de.anomic.net.URL; import java.util.Hashtable; import java.util.HashMap; @@ -54,6 +53,7 @@ import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class swfParser extends AbstractParser implements Parser { @@ -90,7 +90,7 @@ public class swfParser extends AbstractParser implements Parser { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { SWF2HTML swf2html = new SWF2HTML(); diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java index f547e1af5..35ef62e4e 100644 --- a/source/de/anomic/plasma/parser/tar/tarParser.java +++ b/source/de/anomic/plasma/parser/tar/tarParser.java @@ -59,7 +59,6 @@ import java.util.zip.GZIPInputStream; import com.ice.tar.TarEntry; import com.ice.tar.TarInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; @@ -67,6 +66,7 @@ import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class tarParser extends AbstractParser implements Parser { @@ -97,7 +97,7 @@ public class tarParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { long docTextLength = 0; OutputStream docText = null; @@ -166,7 +166,7 @@ public class tarParser extends AbstractParser implements Parser { checkInterruption(); // parsing the content - subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile); + subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile); } catch (ParserException e) { this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getMessage()); } finally { diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java index 4a6f2f108..53f8ba869 100644 --- a/source/de/anomic/plasma/parser/vcf/vcfParser.java +++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java @@ -57,11 +57,11 @@ import org.apache.commons.codec.net.QuotedPrintableCodec; import de.anomic.http.httpc; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; /** * Vcard specification: http://www.imc.org/pdi/vcard-21.txt @@ -97,7 +97,7 @@ public class vcfParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { StringBuffer parsedTitle = new StringBuffer(); @@ -212,7 +212,7 @@ public class vcfParser extends AbstractParser implements Parser { parsedData.clear(); } else if (key.toUpperCase().startsWith("URL")) { try { - URL newURL = new URL(value); + yacyURL newURL = new yacyURL(value, null); anchors.put(newURL.toString(),newURL.toString()); //parsedData.put(key,value); } catch (MalformedURLException ex) {/* ignore this */} @@ -268,7 +268,7 @@ public class vcfParser extends AbstractParser implements Parser { public static void main(String[] args) { try { - URL contentUrl = new URL(args[0]); + yacyURL contentUrl = new yacyURL(args[0], null); vcfParser testParser = new vcfParser(); byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null); diff --git a/source/de/anomic/plasma/parser/xls/xlsParser.java b/source/de/anomic/plasma/parser/xls/xlsParser.java index ea5de3899..17780429f 100644 --- a/source/de/anomic/plasma/parser/xls/xlsParser.java +++ b/source/de/anomic/plasma/parser/xls/xlsParser.java @@ -56,11 +56,11 @@ import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; +import de.anomic.yacy.yacyURL; public class xlsParser extends AbstractParser implements Parser, HSSFListener { @@ -102,7 +102,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener { * parses the source documents and returns a plasmaParserDocument containing * all extracted information about the parsed document */ - public plasmaParserDocument parse(URL location, String mimeType, + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { try { diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java index 1c6baff16..c4d0d39c7 100644 --- a/source/de/anomic/plasma/parser/zip/zipParser.java +++ b/source/de/anomic/plasma/parser/zip/zipParser.java @@ -57,7 +57,6 @@ import java.util.TreeSet; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import de.anomic.net.URL; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.parser.AbstractParser; @@ -65,6 +64,7 @@ import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class zipParser extends AbstractParser implements Parser { @@ -95,7 +95,7 @@ public class zipParser extends AbstractParser implements Parser { return SUPPORTED_MIME_TYPES; } - public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { + public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { long docTextLength = 0; OutputStream docText = null; @@ -149,7 +149,7 @@ public class zipParser extends AbstractParser implements Parser { serverFileUtils.copy(zippedContent,subDocTempFile,entry.getSize()); // parsing the zip file entry - subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile); + subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile); } catch (ParserException e) { this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getMessage()); } finally { diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 177631ee3..fe2ed57e8 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -58,7 +58,6 @@ import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroAbstractRecords; -import de.anomic.server.serverDomains; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -269,24 +268,24 @@ public class plasmaCrawlBalancer { public synchronized void push(plasmaCrawlEntry entry) throws IOException { assert entry != null; - if (urlFileIndex.has(entry.urlhash().getBytes())) { - serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed"); + if (urlFileIndex.has(entry.url().hash().getBytes())) { + serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed"); return; } // extend domain stack - String dom = entry.urlhash().substring(6); + String dom = entry.url().hash().substring(6); LinkedList domainList = (LinkedList) domainStacks.get(dom); if (domainList == null) { // create new list domainList = new LinkedList(); synchronized (domainStacks) { - domainList.add(entry.urlhash()); + domainList.add(entry.url().hash()); domainStacks.put(dom, domainList); } } else { // extend existent domain list - domainList.addLast(entry.urlhash()); + domainList.addLast(entry.url().hash()); } // add to index @@ -430,7 +429,7 @@ public class plasmaCrawlBalancer { return null; } plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry); - long minimumDelta = (serverDomains.isLocal(crawlEntry.url())) ? minimumLocalDelta : minimumGlobalDelta; + long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta; plasmaCrawlRobotsTxt.Entry robotsEntry = plasmaSwitchboard.robots.getEntry(crawlEntry.url().getHost()); Integer hostDelay = (robotsEntry == null) ? null : robotsEntry.getCrawlDelay(); long genericDelta = ((robotsEntry == null) || (hostDelay == null)) ? minimumDelta : Math.max(minimumDelta, hostDelay.intValue() * 1000); diff --git a/source/de/anomic/plasma/plasmaCrawlEntry.java b/source/de/anomic/plasma/plasmaCrawlEntry.java index 0d416d4e2..c3ba841a7 100644 --- a/source/de/anomic/plasma/plasmaCrawlEntry.java +++ b/source/de/anomic/plasma/plasmaCrawlEntry.java @@ -34,9 +34,9 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaCrawlEntry { @@ -63,9 +63,8 @@ public class plasmaCrawlEntry { private String initiator; // the initiator hash, is NULL or "" if it is the own proxy; // if this is generated by a crawl, the own peer hash in entered - private String urlhash; // the url's hash private String referrer; // the url's referrer hash - private URL url; // the url as string + private yacyURL url; // the url as string private String name; // the name of the url, from anchor tag name private long appdate; // the time when the url was first time appeared private long loaddate; // the time when the url was loaded @@ -78,7 +77,7 @@ public class plasmaCrawlEntry { private kelondroBitfield flags; private int handle; - public plasmaCrawlEntry(URL url) { + public plasmaCrawlEntry(yacyURL url) { this(yacyCore.seedDB.mySeed.hash, url, null, null, new Date(), null, 0, 0, 0); } @@ -95,7 +94,7 @@ public class plasmaCrawlEntry { */ public plasmaCrawlEntry( String initiator, - URL url, + yacyURL url, String referrer, String name, Date appdate, @@ -106,10 +105,9 @@ public class plasmaCrawlEntry { ) { // create new entry and store it into database assert appdate != null; - this.urlhash = plasmaURL.urlHash(url); this.initiator = initiator; this.url = url; - this.referrer = (referrer == null) ? plasmaURL.dummyHash : referrer; + this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer; this.name = (name == null) ? "" : name; this.appdate = (appdate == null) ? 0 : appdate.getTime(); this.profileHandle = profileHandle; // must not be null @@ -131,10 +129,9 @@ public class plasmaCrawlEntry { private void insertEntry(kelondroRow.Entry entry) throws IOException { String urlstring = entry.getColString(2, null); if (urlstring == null) throw new IOException ("url string is null"); - this.urlhash = entry.getColString(0, null); this.initiator = entry.getColString(1, null); - this.url = new URL(urlstring); - this.referrer = (entry.empty(3)) ? plasmaURL.dummyHash : entry.getColString(3, null); + this.url = new yacyURL(urlstring, entry.getColString(0, null)); + this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.appdate = entry.getColLong(5); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); @@ -168,7 +165,7 @@ public class plasmaCrawlEntry { namebytes = this.name.getBytes(); } byte[][] entry = new byte[][] { - this.urlhash.getBytes(), + this.url.hash().getBytes(), (initiator == null) ? "".getBytes() : this.initiator.getBytes(), this.url.toString().getBytes(), this.referrer.getBytes(), @@ -186,16 +183,11 @@ public class plasmaCrawlEntry { return rowdef.newEntry(entry); } - public URL url() { + public yacyURL url() { // the url return url; } - public String urlhash() { - // the hash of this url - return this.urlhash; - } - public String referrerhash() { // the urlhash of a referer url return this.referrer; diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 4492b8b59..055f6fe4c 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -69,11 +69,11 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverCodings; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLURL { @@ -118,8 +118,8 @@ public final class plasmaCrawlLURL { public synchronized void stack(indexURLEntry e, String initiatorHash, String executorHash, int stackType) { if (e == null) { return; } try { - if (initiatorHash == null) { initiatorHash = plasmaURL.dummyHash; } - if (executorHash == null) { executorHash = plasmaURL.dummyHash; } + if (initiatorHash == null) { initiatorHash = yacyURL.dummyHash; } + if (executorHash == null) { executorHash = yacyURL.dummyHash; } switch (stackType) { case 0: break; case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break; @@ -161,6 +161,7 @@ public final class plasmaCrawlLURL { // - look into the hash cache // - look into the filed properties // if the url cannot be found, this returns null + if (urlHash == null) return null; try { kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes()); if (entry == null) return null; @@ -394,7 +395,7 @@ public final class plasmaCrawlLURL { if ((pos = oldUrlStr.indexOf("://")) != -1) { // trying to correct the url String newUrlStr = "http://" + oldUrlStr.substring(pos + 3); - URL newUrl = new URL(newUrlStr); + yacyURL newUrl = new yacyURL(newUrlStr, null); // doing a http head request to test if the url is correct theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig); @@ -534,7 +535,7 @@ public final class plasmaCrawlLURL { // returns url-hash if (args[0].equals("-h")) try { // arg 1 is url - System.out.println("HASH: " + plasmaURL.urlHash(new URL(args[1]))); + System.out.println("HASH: " + (new yacyURL(args[1], null)).hash()); } catch (MalformedURLException e) {} if (args[0].equals("-l")) try { // arg 1 is path to URLCache diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index 86545d1b2..328d6ec94 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -50,13 +50,13 @@ import java.util.HashSet; import org.apache.commons.pool.impl.GenericKeyedObjectPool; import org.apache.commons.pool.impl.GenericObjectPool; -import de.anomic.net.URL; import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.plasma.crawler.plasmaCrawlerException; import de.anomic.plasma.crawler.plasmaCrawlerFactory; import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue; import de.anomic.plasma.crawler.plasmaCrawlerPool; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLoader extends Thread { @@ -215,7 +215,7 @@ public final class plasmaCrawlLoader extends Thread { } public plasmaHTCache.Entry loadSync( - URL url, + yacyURL url, String urlName, String referer, String initiator, @@ -267,7 +267,7 @@ public final class plasmaCrawlLoader extends Thread { } public void loadAsync( - URL url, + yacyURL url, String urlName, String referer, String initiator, diff --git a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java index cd6eb1cd8..382f0ff06 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java +++ b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java @@ -42,13 +42,13 @@ package de.anomic.plasma; -import de.anomic.net.URL; import de.anomic.server.serverSemaphore; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlLoaderMessage { public final int crawlingPriority; - public final URL url; + public final yacyURL url; public final String name; public final String referer; public final String initiator; @@ -64,7 +64,7 @@ public final class plasmaCrawlLoaderMessage { // loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) { public plasmaCrawlLoaderMessage( - URL url, + yacyURL url, String name, // the name of the url, from anchor tag name String referer, String initiator, diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 4af393197..5d2fcfc79 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -58,7 +58,6 @@ import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.data.robotsParser; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroException; @@ -68,12 +67,12 @@ import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroTree; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverDomains; import de.anomic.server.serverSemaphore; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public final class plasmaCrawlStacker { @@ -190,7 +189,7 @@ public final class plasmaCrawlStacker { } public void enqueue( - URL nexturl, + yacyURL nexturl, String referrerhash, String initiatorHash, String name, @@ -247,7 +246,7 @@ public final class plasmaCrawlStacker { String reason = null; // failure reason // getting the initiator peer hash - if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash; + if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = yacyURL.dummyHash; // strange errors if (nexturlString == null) { @@ -257,21 +256,20 @@ public final class plasmaCrawlStacker { } // getting the referer url and url hash - URL referrerURL = null; + yacyURL referrerURL = null; if (referrerString != null) { try { - referrerURL = new URL(referrerString); + referrerURL = new yacyURL(referrerString, null); } catch (MalformedURLException e) { referrerURL = null; referrerString = null; } } - String referrerHash = (referrerString==null)?null:plasmaURL.urlHash(referrerString); // check for malformed urls - URL nexturl = null; + yacyURL nexturl = null; try { - nexturl = new URL(nexturlString); + nexturl = new yacyURL(nexturlString, null); } catch (MalformedURLException e) { reason = plasmaCrawlEURL.DENIED_MALFORMED_URL; this.log.logSevere("Wrong URL in stackCrawl: " + nexturlString + @@ -367,10 +365,9 @@ public final class plasmaCrawlStacker { // check if the url is double registered checkInterruption(); - String nexturlhash = plasmaURL.urlHash(nexturl); - String dbocc = this.sb.urlExists(nexturlhash); + String dbocc = this.sb.urlExists(nexturl.hash()); indexURLEntry oldEntry = null; - oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null); + oldEntry = this.sb.wordIndex.loadedURL.load(nexturl.hash(), null); boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder()); // apply recrawl rule if ((dbocc != null) && (!(recrawl))) { @@ -396,7 +393,7 @@ public final class plasmaCrawlStacker { } // store information - boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); + boolean local = ((initiatorHash.equals(yacyURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); boolean global = (profile != null) && (profile.remoteIndexing()) /* granted */ && @@ -415,7 +412,7 @@ public final class plasmaCrawlStacker { checkInterruption(); plasmaCrawlEntry ne = new plasmaCrawlEntry(initiatorHash, /* initiator, needed for p2p-feedback */ nexturl, /* url clear text string */ - referrerHash, /* last url in crawling queue */ + (referrerURL == null) ? null : referrerURL.hash(), /* last url in crawling queue */ name, /* load date */ loadDate, /* the anchor name */ (profile == null) ? null : profile.handle(), // profile must not be null! @@ -551,7 +548,7 @@ public final class plasmaCrawlStacker { synchronized(this.urlEntryHashCache) { kelondroRow.Entry oldValue = this.urlEntryCache.put(newMessage.toRow()); if (oldValue == null) { - insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.urlhash()); + insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.url().hash()); } } diff --git a/source/de/anomic/plasma/plasmaCrawlZURL.java b/source/de/anomic/plasma/plasmaCrawlZURL.java index 3c9ae85a8..c64513543 100644 --- a/source/de/anomic/plasma/plasmaCrawlZURL.java +++ b/source/de/anomic/plasma/plasmaCrawlZURL.java @@ -37,9 +37,9 @@ import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; -import de.anomic.net.URL; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaCrawlZURL { @@ -83,12 +83,12 @@ public class plasmaCrawlZURL { public synchronized Entry newEntry( plasmaCrawlEntry bentry, String executor, Date workdate, int workcount, String anycause) { - if ((executor == null) || (executor.length() < yacySeedDB.commonHashLength)) executor = plasmaURL.dummyHash; + if ((executor == null) || (executor.length() < yacySeedDB.commonHashLength)) executor = yacyURL.dummyHash; if (anycause == null) anycause = "unknown"; return new Entry(bentry, executor, workdate, workcount, anycause); } - public synchronized Entry newEntry(URL url, String anycause) { + public synchronized Entry newEntry(yacyURL url, String anycause) { return new Entry(url, anycause); } @@ -139,13 +139,13 @@ public class plasmaCrawlZURL { public class Entry { plasmaCrawlEntry bentry; // the balancer entry - private String executor; // the crawling initiator - private Date workdate; // the time when the url was last time tried to load - private int workcount; // number of tryings - private String anycause; // string describing reason for load fail - private boolean stored; + private String executor; // the crawling initiator + private Date workdate; // the time when the url was last time tried to load + private int workcount; // number of tryings + private String anycause; // string describing reason for load fail + private boolean stored; - public Entry(URL url, String reason) { + public Entry(yacyURL url, String reason) { this(new plasmaCrawlEntry(url), null, new Date(), 0, reason); } @@ -181,7 +181,7 @@ public class plasmaCrawlZURL { this.workcount = (int) entry.getColLong(3); this.anycause = entry.getColString(4, "UTF-8"); this.bentry = new plasmaCrawlEntry(plasmaCrawlEntry.rowdef.newEntry(entry.getColBytes(5))); - assert ((new String(entry.getColBytes(0))).equals(bentry.urlhash())); + assert ((new String(entry.getColBytes(0))).equals(bentry.url().hash())); return; } @@ -190,7 +190,7 @@ public class plasmaCrawlZURL { if (this.stored) return; if (this.bentry == null) return; kelondroRow.Entry newrow = rowdef.newEntry(); - newrow.setCol(0, this.bentry.urlhash().getBytes()); + newrow.setCol(0, this.bentry.url().hash().getBytes()); newrow.setCol(1, this.executor.getBytes()); newrow.setCol(2, this.workdate.getTime()); newrow.setCol(3, this.workcount); @@ -204,7 +204,7 @@ public class plasmaCrawlZURL { } } - public URL url() { + public yacyURL url() { return this.bentry.url(); } @@ -217,7 +217,7 @@ public class plasmaCrawlZURL { // the result is a String of 12 bytes within a 72-bit space // (each byte has an 6-bit range) // that should be enough for all web pages on the world - return this.bentry.urlhash(); + return this.bentry.url().hash(); } public Date workdate() { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index b79680b83..4b62afd8d 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -61,7 +61,6 @@ import java.io.IOException; import java.io.InputStream; import java.lang.StringBuffer; import java.net.InetAddress; -import java.net.MalformedURLException; import java.util.Collections; import java.util.Date; import java.util.HashSet; @@ -73,12 +72,10 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.ResourceInfoFactory; import de.anomic.plasma.cache.UnsupportedProtocolException; @@ -92,6 +89,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaHTCache { @@ -114,6 +112,122 @@ public final class plasmaHTCache { private static ResourceInfoFactory objFactory = new ResourceInfoFactory(); private static serverThread cacheScanThread; + // doctypes: + public static final char DT_PDFPS = 'p'; + public static final char DT_TEXT = 't'; + public static final char DT_HTML = 'h'; + public static final char DT_DOC = 'd'; + public static final char DT_IMAGE = 'i'; + public static final char DT_MOVIE = 'm'; + public static final char DT_FLASH = 'f'; + public static final char DT_SHARE = 's'; + public static final char DT_AUDIO = 'a'; + public static final char DT_BINARY = 'b'; + public static final char DT_UNKNOWN = 'u'; + + // appearance locations: (used for flags) + public static final int AP_TITLE = 0; // title tag from html header + public static final int AP_H1 = 1; // headline - top level + public static final int AP_H2 = 2; // headline, second level + public static final int AP_H3 = 3; // headline, 3rd level + public static final int AP_H4 = 4; // headline, 4th level + public static final int AP_H5 = 5; // headline, 5th level + public static final int AP_H6 = 6; // headline, 6th level + public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam) + public static final int AP_DOM = 8; // word inside an url: in Domain + public static final int AP_PATH = 9; // word inside an url: in path + public static final int AP_IMG = 10; // tag inside image references + public static final int AP_ANCHOR = 11; // anchor description + public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) + public static final int AP_BOLD = 13; // may be interpreted as emphasized + public static final int AP_ITALICS = 14; // may be interpreted as emphasized + public static final int AP_WEAK = 15; // for Text that is small or bareley visible + public static final int AP_INVISIBLE = 16; // good for spam detection + public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) + public static final int AP_AUTHOR = 18; // word appears in author name + public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) + public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) + + // URL attributes + public static final int UA_LOCAL = 0; // URL was crawled locally + public static final int UA_TILDE = 1; // tilde appears in URL + public static final int UA_REDIRECT = 2; // The URL is a redirection + + // local flag attributes + public static final char LT_LOCAL = 'L'; + public static final char LT_GLOBAL = 'G'; + + // doctype calculation + public static char docType(yacyURL url) { + String path = url.getPath().toLowerCase(); + // serverLog.logFinest("PLASMA", "docType URL=" + path); + char doctype = DT_UNKNOWN; + if (path.endsWith(".gif")) { doctype = DT_IMAGE; } + else if (path.endsWith(".ico")) { doctype = DT_IMAGE; } + else if (path.endsWith(".bmp")) { doctype = DT_IMAGE; } + else if (path.endsWith(".jpg")) { doctype = DT_IMAGE; } + else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; } + else if (path.endsWith(".png")) { doctype = DT_IMAGE; } + else if (path.endsWith(".html")) { doctype = DT_HTML; } + else if (path.endsWith(".txt")) { doctype = DT_TEXT; } + else if (path.endsWith(".doc")) { doctype = DT_DOC; } + else if (path.endsWith(".rtf")) { doctype = DT_DOC; } + else if (path.endsWith(".pdf")) { doctype = DT_PDFPS; } + else if (path.endsWith(".ps")) { doctype = DT_PDFPS; } + else if (path.endsWith(".avi")) { doctype = DT_MOVIE; } + else if (path.endsWith(".mov")) { doctype = DT_MOVIE; } + else if (path.endsWith(".qt")) { doctype = DT_MOVIE; } + else if (path.endsWith(".mpg")) { doctype = DT_MOVIE; } + else if (path.endsWith(".md5")) { doctype = DT_SHARE; } + else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; } + else if (path.endsWith(".asf")) { doctype = DT_FLASH; } + return doctype; + } + + public static char docType(String mime) { + // serverLog.logFinest("PLASMA", "docType mime=" + mime); + char doctype = DT_UNKNOWN; + if (mime == null) doctype = DT_UNKNOWN; + else if (mime.startsWith("image/")) doctype = DT_IMAGE; + else if (mime.endsWith("/gif")) doctype = DT_IMAGE; + else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE; + else if (mime.endsWith("/png")) doctype = DT_IMAGE; + else if (mime.endsWith("/html")) doctype = DT_HTML; + else if (mime.endsWith("/rtf")) doctype = DT_DOC; + else if (mime.endsWith("/pdf")) doctype = DT_PDFPS; + else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY; + else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH; + else if (mime.endsWith("/msword")) doctype = DT_DOC; + else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC; + else if (mime.endsWith("/postscript")) doctype = DT_PDFPS; + else if (mime.startsWith("text/")) doctype = DT_TEXT; + else if (mime.startsWith("image/")) doctype = DT_IMAGE; + else if (mime.startsWith("audio/")) doctype = DT_AUDIO; + else if (mime.startsWith("video/")) doctype = DT_MOVIE; + //bz2 = application/x-bzip2 + //dvi = application/x-dvi + //gz = application/gzip + //hqx = application/mac-binhex40 + //lha = application/x-lzh + //lzh = application/x-lzh + //pac = application/x-ns-proxy-autoconfig + //php = application/x-httpd-php + //phtml = application/x-httpd-php + //rss = application/xml + //tar = application/tar + //tex = application/x-tex + //tgz = application/tar + //torrent = application/x-bittorrent + //xhtml = application/xhtml+xml + //xla = application/msexcel + //xls = application/msexcel + //xsl = application/xml + //xml = application/xml + //Z = application/x-compress + //zip = application/zip + return doctype; + } + public static void init(File htCachePath, long CacheSizeMax, long preloadTime, String layout, boolean migration) { cachePath = htCachePath; @@ -252,7 +366,7 @@ public final class plasmaHTCache { return (curCacheSize >= maxCacheSize) ? 0 : maxCacheSize - curCacheSize; } - public static boolean writeResourceContent(URL url, byte[] array) { + public static boolean writeResourceContent(yacyURL url, byte[] array) { if (array == null) return false; File file = getCachePath(url); try { @@ -288,16 +402,16 @@ public final class plasmaHTCache { } } - public static boolean deleteFile(URL url) { + public static boolean deleteFile(yacyURL url) { return deleteURLfromCache("", url, "FROM"); } - private static boolean deleteURLfromCache(String key, URL url, String msg) { + private static boolean deleteURLfromCache(String key, yacyURL url, String msg) { if (deleteFileandDirs(key, getCachePath(url), msg)) { try { // As the file is gone, the entry in responseHeader.db is not needed anymore log.logFinest("Trying to remove responseHeader from URL: " + url.toNormalform(false, true)); - responseHeaderDB.remove(plasmaURL.urlHash(url)); + responseHeaderDB.remove(url.hash()); } catch (IOException e) { resetResponseHeaderDB(); log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); @@ -356,10 +470,10 @@ public final class plasmaHTCache { log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); responseHeaderDB.remove(urlHash); } else { - URL url = getURL(file); + yacyURL url = getURL(file); if (url != null) { log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); - responseHeaderDB.remove(plasmaURL.urlHash(url)); + responseHeaderDB.remove(url.hash()); } } } catch (IOException e) { @@ -497,13 +611,10 @@ public final class plasmaHTCache { * @throws UnsupportedProtocolException if the protocol is not supported and therefore the * info object couldn't be created */ - public static IResourceInfo loadResourceInfo(URL url) throws UnsupportedProtocolException, IllegalAccessException { - - // getting the URL hash - String urlHash = plasmaURL.urlHash(url.toNormalform(true, true)); + public static IResourceInfo loadResourceInfo(yacyURL url) throws UnsupportedProtocolException, IllegalAccessException { // loading data from database - Map hdb = responseHeaderDB.getMap(urlHash); + Map hdb = responseHeaderDB.getMap(url.hash()); if (hdb == null) return null; // generate the cached object @@ -601,7 +712,7 @@ public final class plasmaHTCache { * that path will be generated * @return new File */ - public static File getCachePath(final URL url) { + public static File getCachePath(final yacyURL url) { // this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString()); // peer.yacy || www.peer.yacy = http/yacy/peer @@ -662,18 +773,18 @@ public final class plasmaHTCache { if (cacheLayout.equals("tree")) { File FileTree = treeFile(fileName, "tree", path); if (cacheMigration) { - moveCachedObject(hashFile(fileName, "hash", extention, url), FileTree); - moveCachedObject(hashFile(fileName, null, extention, url), FileTree); // temporary migration + moveCachedObject(hashFile(fileName, "hash", extention, url.hash()), FileTree); + moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileTree); // temporary migration moveCachedObject(treeFile(fileName, null, path), FileTree); // temporary migration } return FileTree; } if (cacheLayout.equals("hash")) { - File FileFlat = hashFile(fileName, "hash", extention, url); + File FileFlat = hashFile(fileName, "hash", extention, url.hash()); if (cacheMigration) { moveCachedObject(treeFile(fileName, "tree", path), FileFlat); moveCachedObject(treeFile(fileName, null, path), FileFlat); // temporary migration - moveCachedObject(hashFile(fileName, null, extention, url), FileFlat); // temporary migration + moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileFlat); // temporary migration } return FileFlat; } @@ -688,8 +799,8 @@ public final class plasmaHTCache { return new File(cachePath, f.toString()); } - private static File hashFile(StringBuffer fileName, String prefix, String extention, URL url) { - String hexHash = yacySeed.b64Hash2hexHash(plasmaURL.urlHash(url)); + private static File hashFile(StringBuffer fileName, String prefix, String extention, String urlhash) { + String hexHash = yacySeed.b64Hash2hexHash(urlhash); StringBuffer f = new StringBuffer(fileName.length() + 30); f.append(fileName); if (prefix != null) f.append('/').append(prefix); @@ -720,11 +831,11 @@ public final class plasmaHTCache { * this is the reverse function to getCachePath: it constructs the url as string * from a given storage path */ - public static URL getURL(final File f) { + public static yacyURL getURL(final File f) { // this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]"); final String urlHash = getHash(f); if (urlHash != null) { - URL url = null; + yacyURL url = null; // try the urlPool try { url = plasmaSwitchboard.getSwitchboard().getURL(urlHash); @@ -744,7 +855,7 @@ public final class plasmaHTCache { String s = ((String)origRequestLine).substring(i).trim(); i = s.indexOf(" "); try { - url = new URL((i<0) ? s : s.substring(0,i)); + url = new yacyURL((i<0) ? s : s.substring(0,i), urlHash); } catch (final Exception e) { url = null; } @@ -831,7 +942,7 @@ public final class plasmaHTCache { // this.log.logFinest("plasmaHTCache: getURL: OUT=" + s); try { - return new URL(protocol + host + path); + return new yacyURL(protocol + host + path, null); } catch (final Exception e) { return null; } @@ -846,7 +957,7 @@ public final class plasmaHTCache { * is available or the cached file is not readable, null * is returned. */ - public static InputStream getResourceContentStream(URL url) { + public static InputStream getResourceContentStream(yacyURL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) try { @@ -858,7 +969,7 @@ public final class plasmaHTCache { return null; } - public static long getResourceContentLength(URL url) { + public static long getResourceContentLength(yacyURL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) { @@ -886,7 +997,7 @@ public final class plasmaHTCache { public static Entry newEntry( Date initDate, int depth, - URL url, + yacyURL url, String name, //httpHeader requestHeader, String responseStatus, @@ -898,7 +1009,7 @@ public final class plasmaHTCache { return new Entry( initDate, depth, - url, + url, name, //requestHeader, responseStatus, @@ -919,10 +1030,8 @@ public final class plasmaHTCache { private String responseStatus; private File cacheFile; // the cache file private byte[] cacheArray; // or the cache as byte-array - private URL url; + private yacyURL url; private String name; // the name of the link, read as anchor from an -tag - private String nomalizedURLHash; - private String nomalizedURLString; //private int status; // cache load/hit/stale etc status private Date lastModified; private char doctype; @@ -933,7 +1042,7 @@ public final class plasmaHTCache { /** * protocolspecific information about the resource */ - private IResourceInfo resInfo; + private IResourceInfo resInfo; protected Object clone() throws CloneNotSupportedException { return new Entry( @@ -952,7 +1061,7 @@ public final class plasmaHTCache { public Entry(Date initDate, int depth, - URL url, + yacyURL url, String name, //httpHeader requestHeader, String responseStatus, @@ -966,22 +1075,11 @@ public final class plasmaHTCache { System.exit(0); } this.resInfo = resourceInfo; - - - // normalize url - this.nomalizedURLString = url.toNormalform(true, true); - - try { - this.url = new URL(this.nomalizedURLString); - } catch (MalformedURLException e) { - System.out.println("internal error at httpdProxyCache.Entry: " + e); - System.exit(-1); - } + this.url = url; this.name = name; this.cacheFile = getCachePath(this.url); - this.nomalizedURLHash = plasmaURL.urlHash(this.nomalizedURLString); - - // assigned: + + // assigned: this.initDate = initDate; this.depth = depth; //this.requestHeader = requestHeader; @@ -994,9 +1092,9 @@ public final class plasmaHTCache { this.lastModified = resourceInfo.getModificationDate(); // getting the doctype - this.doctype = plasmaURL.docType(resourceInfo.getMimeType()); - if (this.doctype == plasmaURL.DT_UNKNOWN) this.doctype = plasmaURL.docType(url); - this.language = plasmaURL.language(url); + this.doctype = docType(resourceInfo.getMimeType()); + if (this.doctype == DT_UNKNOWN) this.doctype = docType(url); + this.language = yacyURL.language(url); // to be defined later: this.cacheArray = null; @@ -1006,12 +1104,12 @@ public final class plasmaHTCache { return this.name; } - public URL url() { + public yacyURL url() { return this.url; } public String urlHash() { - return this.nomalizedURLHash; + return this.url.hash(); } public Date lastModified() { @@ -1041,8 +1139,8 @@ public final class plasmaHTCache { return this.depth; } - public URL referrerURL() { - return (this.resInfo==null)?null:this.resInfo.getRefererUrl(); + public yacyURL referrerURL() { + return (this.resInfo == null) ? null : this.resInfo.getRefererUrl(); } public File cacheFile() { @@ -1070,10 +1168,9 @@ public final class plasmaHTCache { } public boolean writeResourceInfo() { - assert(this.nomalizedURLHash != null) : "URL Hash is null"; if (this.resInfo == null) return false; try { - responseHeaderDB.set(this.nomalizedURLHash, this.resInfo.getMap()); + responseHeaderDB.set(this.url.hash(), this.resInfo.getMap()); } catch (Exception e) { resetResponseHeaderDB(); return false; @@ -1134,8 +1231,8 @@ public final class plasmaHTCache { // -CGI access in request // CGI access makes the page very individual, and therefore not usable in caches - if (isPOST(this.nomalizedURLString) && !this.profile.crawlingQ()) { return "dynamic_post"; } - if (isCGI(this.nomalizedURLString)) { return "dynamic_cgi"; } + if (isPOST(this.url.toNormalform(true, true)) && !this.profile.crawlingQ()) { return "dynamic_post"; } + if (isCGI(this.url.toNormalform(true, true))) { return "dynamic_cgi"; } if (this.resInfo != null) { return this.resInfo.shallStoreCacheForProxy(); @@ -1153,8 +1250,8 @@ public final class plasmaHTCache { // -CGI access in request // CGI access makes the page very individual, and therefore not usable in caches - if (isPOST(this.nomalizedURLString)) { return false; } - if (isCGI(this.nomalizedURLString)) { return false; } + if (isPOST(this.url.toNormalform(true, true))) { return false; } + if (isCGI(this.url.toNormalform(true, true))) { return false; } if (this.resInfo != null) { return this.resInfo.shallUseCacheForProxy(); diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 940d203c6..2d7e165e1 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -75,12 +75,12 @@ import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.htmlFilter.htmlFilterInputStream; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.parser.ParserInfo; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public final class plasmaParser { public static final String PARSER_MODE_PROXY = "PROXY"; @@ -322,11 +322,11 @@ public final class plasmaParser { } } - public static boolean supportedRealTimeContent(URL url, String mimeType) { + public static boolean supportedRealTimeContent(yacyURL url, String mimeType) { return realtimeParsableMimeTypesContains(mimeType) && supportedRealtimeFileExtContains(url); } - public static boolean supportedRealtimeFileExtContains(URL url) { + public static boolean supportedRealtimeFileExtContains(yacyURL url) { String fileExt = getFileExt(url); synchronized (supportedRealtimeFileExt) { return supportedRealtimeFileExt.contains(fileExt); @@ -334,7 +334,7 @@ public final class plasmaParser { } - public static String getFileExt(URL url) { + public static String getFileExt(yacyURL url) { // getting the file path String name = url.getPath(); @@ -566,7 +566,7 @@ public final class plasmaParser { } catch (Exception e) {/* ignore this */} } - public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] sourceArray) + public plasmaParserDocument parseSource(yacyURL location, String mimeType, String charset, byte[] sourceArray) throws InterruptedException, ParserException { ByteArrayInputStream byteIn = null; try { @@ -600,7 +600,7 @@ public final class plasmaParser { } - public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, File sourceFile) throws InterruptedException, ParserException { + public plasmaParserDocument parseSource(yacyURL location, String theMimeType, String theDocumentCharset, File sourceFile) throws InterruptedException, ParserException { BufferedInputStream sourceStream = null; try { @@ -644,7 +644,7 @@ public final class plasmaParser { * @throws InterruptedException * @throws ParserException */ - public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, long contentLength, InputStream sourceStream) throws InterruptedException, ParserException { + public plasmaParserDocument parseSource(yacyURL location, String theMimeType, String theDocumentCharset, long contentLength, InputStream sourceStream) throws InterruptedException, ParserException { Parser theParser = null; String mimeType = null; try { @@ -719,7 +719,7 @@ public final class plasmaParser { } } - private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, InputStream sourceStream) throws IOException, ParserException { + private plasmaParserDocument parseHtml(yacyURL location, String mimeType, String documentCharset, InputStream sourceStream) throws IOException, ParserException { // make a scraper and transformer htmlFilterInputStream htmlFilter = new htmlFilterInputStream(sourceStream,documentCharset,location,null,false); @@ -750,13 +750,13 @@ public final class plasmaParser { return transformScraper(location, mimeType, documentCharset, scraper); } - public plasmaParserDocument transformScraper(URL location, String mimeType, String charSet, htmlFilterContentScraper scraper) { + public plasmaParserDocument transformScraper(yacyURL location, String mimeType, String charSet, htmlFilterContentScraper scraper) { try { String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length]; int p = 0; for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j]; plasmaParserDocument ppd = new plasmaParserDocument( - new URL(location.toNormalform(true, true)), + new yacyURL(location.toNormalform(true, true), null), mimeType, charSet, scraper.getKeywords(), @@ -897,7 +897,7 @@ public final class plasmaParser { httpc remote = null; try { Object content = null; - URL contentURL = null; + yacyURL contentURL = null; long contentLength = -1; String contentMimeType = "application/octet-stream"; String charSet = "UTF-8"; @@ -909,9 +909,9 @@ public final class plasmaParser { String mode = args[0]; if (mode.equalsIgnoreCase("-f")) { content = new File(args[1]); - contentURL = new URL((File)content); + contentURL = new yacyURL((File)content); } else if (mode.equalsIgnoreCase("-u")) { - contentURL = new URL(args[1]); + contentURL = new yacyURL(args[1], null); // downloading the document content remote = httpc.getInstance( @@ -1003,7 +1003,7 @@ public final class plasmaParser { config.enableAllParsers(); } - public static boolean supportedContent(URL url, String mimeType) { + public static boolean supportedContent(yacyURL url, String mimeType) { if (url == null) throw new NullPointerException(); Iterator configs = parserConfigList.values().iterator(); @@ -1017,7 +1017,7 @@ public final class plasmaParser { return false; } - public static boolean supportedContent(String parserMode, URL url, String mimeType) { + public static boolean supportedContent(String parserMode, yacyURL url, String mimeType) { if (!PARSER_MODE.contains(parserMode)) throw new IllegalArgumentException(); if (url == null) throw new NullPointerException(); diff --git a/source/de/anomic/plasma/plasmaParserConfig.java b/source/de/anomic/plasma/plasmaParserConfig.java index 883d8315a..4daf3c483 100644 --- a/source/de/anomic/plasma/plasmaParserConfig.java +++ b/source/de/anomic/plasma/plasmaParserConfig.java @@ -54,10 +54,10 @@ import java.util.Hashtable; import java.util.Iterator; import java.util.Set; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; import de.anomic.plasma.parser.ParserInfo; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class plasmaParserConfig { /** @@ -85,7 +85,7 @@ public class plasmaParserConfig { this.parserMode = theParserMode; } - public boolean supportedContent(URL url, String mimeType) { + public boolean supportedContent(yacyURL url, String mimeType) { // TODO: we need some exceptions here to index URLs like this // http://www.musicabona.com/respighi/12668/cd/index.html.fr mimeType = plasmaParser.getRealMimeType(mimeType); @@ -112,7 +112,7 @@ public class plasmaParserConfig { } - public boolean supportedFileExt(URL url) { + public boolean supportedFileExt(yacyURL url) { if (url == null) throw new NullPointerException(); // getting the file path diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java index 1ce0c1076..e709cb768 100644 --- a/source/de/anomic/plasma/plasmaParserDocument.java +++ b/source/de/anomic/plasma/plasmaParserDocument.java @@ -52,6 +52,7 @@ import java.net.MalformedURLException; import de.anomic.server.serverCachedFileOutputStream; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; import java.util.Arrays; import java.util.HashMap; @@ -62,12 +63,11 @@ import java.util.Map; import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; -import de.anomic.net.URL; import de.anomic.plasma.parser.Parser; public class plasmaParserDocument { - private URL location; // the source url + private yacyURL location; // the source url private String mimeType; // mimeType as taken from http header private String charset; // the charset of the document private List keywords; // most resources provide a keyword field @@ -83,11 +83,11 @@ public class plasmaParserDocument { // text in image tags. private Map hyperlinks, audiolinks, videolinks, applinks; private Map emaillinks; - private URL favicon; + private yacyURL favicon; private boolean resorted; private InputStream textStream; - protected plasmaParserDocument(URL location, String mimeType, String charset, + protected plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, Object text, Map anchors, TreeSet images) { @@ -118,32 +118,32 @@ public class plasmaParserDocument { } } - public plasmaParserDocument(URL location, String mimeType, String charset) { + public plasmaParserDocument(yacyURL location, String mimeType, String charset) { this(location, mimeType, charset, null, null, null, null, null, (Object)null, null, null); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, byte[] text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, File text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public plasmaParserDocument(URL location, String mimeType, String charset, + public plasmaParserDocument(yacyURL location, String mimeType, String charset, String[] keywords, String title, String author, String[] sections, String abstrct, serverCachedFileOutputStream text, Map anchors, TreeSet images) { this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); } - public URL getLocation() { + public yacyURL getLocation() { return this.location; } @@ -304,7 +304,7 @@ public class plasmaParserDocument { // extract hyperlinks, medialinks and emaillinks from anchorlinks Iterator i; - URL url; + yacyURL url; String u; int extpos, qpos; String ext = null; @@ -330,7 +330,7 @@ public class plasmaParserDocument { ext = u.substring(extpos + 1).toLowerCase(); } try { - url = new URL(u); + url = new yacyURL(u, null); u = url.toNormalform(true, true); if (plasmaParser.mediaExtContains(ext)) { // this is not a normal anchor, its a media link @@ -399,14 +399,14 @@ public class plasmaParserDocument { /** * @return the {@link URL} to the favicon that belongs to the document */ - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } /** * @param faviconURL the {@link URL} to the favicon that belongs to the document */ - public void setFavicon(URL faviconURL) { + public void setFavicon(yacyURL faviconURL) { this.favicon = faviconURL; } diff --git a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java index 050feb12e..2ee68a7e1 100644 --- a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java +++ b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java @@ -53,12 +53,11 @@ import java.util.HashSet; import java.util.Iterator; import java.util.TreeSet; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroAttrSeq; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public class plasmaRankingRCIEvaluation { @@ -177,8 +176,8 @@ public class plasmaRankingRCIEvaluation { dom = (String) i.next(); if (dom.startsWith("www.")) dom = dom.substring(4); try { - dommap.put(plasmaURL.urlHash(new URL("http://" + dom)).substring(6), dom); - dommap.put(plasmaURL.urlHash(new URL("http://www." + dom)).substring(6), "www." + dom); + dommap.put((new yacyURL("http://" + dom, null)).hash().substring(6), dom); + dommap.put((new yacyURL("http://www." + dom, null)).hash().substring(6), "www." + dom); } catch (MalformedURLException e) {} } return dommap; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index b43b9c005..4064f43c6 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -41,12 +41,12 @@ import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroRow; -import de.anomic.net.URL; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class plasmaSearchEvent { @@ -213,7 +213,7 @@ public final class plasmaSearchEvent { IAneardhthash = wordhash; } IACount.put(wordhash, new Integer(container.size())); - IAResults.put(wordhash, plasmaURL.compressIndex(container, null, 1000).toString()); + IAResults.put(wordhash, plasmaSearchProcessing.compressIndex(container, null, 1000).toString()); } process.yield("abstract generation", searchContainerMaps[0].size()); } @@ -234,7 +234,7 @@ public final class plasmaSearchEvent { this.rankedCache = new plasmaSearchContainer(query, ranking, plasmaSearchQuery.cleanQuery(query.queryString)[0], rcLocal); } - + if (query.onlineSnippetFetch) { // start worker threads to fetch urls and snippets this.workerThreads = new resultWorker[workerThreadCount]; @@ -245,6 +245,7 @@ public final class plasmaSearchEvent { } else { // prepare result vector directly without worker threads int rankedIndex = 0; + process.startTimer(); while ((rankedIndex < rankedCache.container().size()) && (resultList.size() < (query.neededResults()))) { // fetch next entry to work on indexContainer c = rankedCache.container(); @@ -263,6 +264,7 @@ public final class plasmaSearchEvent { rankedCache.addReferences(resultEntry); } } + process.yield("offline snippet fetch", resultList.size()); } // remove old events in the event cache @@ -285,6 +287,7 @@ public final class plasmaSearchEvent { // load only urls if there was not yet a root url of that hash // find the url entry + indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry); if (page == null) { @@ -773,7 +776,7 @@ public final class plasmaSearchEvent { public String hash() { return urlentry.hash(); } - public URL url() { + public yacyURL url() { return urlcomps.url(); } public kelondroBitfield flags() { diff --git a/source/de/anomic/plasma/plasmaSearchImages.java b/source/de/anomic/plasma/plasmaSearchImages.java index 398c99625..bc4fedaca 100644 --- a/source/de/anomic/plasma/plasmaSearchImages.java +++ b/source/de/anomic/plasma/plasmaSearchImages.java @@ -48,15 +48,15 @@ import java.util.Map; import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; -import de.anomic.net.URL; import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverDate; +import de.anomic.yacy.yacyURL; public final class plasmaSearchImages { private TreeSet images; - public plasmaSearchImages(long maxTime, URL url, int depth) { + public plasmaSearchImages(long maxTime, yacyURL url, int depth) { long start = System.currentTimeMillis(); this.images = new TreeSet(); if (maxTime > 10) { @@ -86,8 +86,8 @@ public final class plasmaSearchImages { Map.Entry e = (Map.Entry) i.next(); String nexturlstring; try { - nexturlstring = new URL((String) e.getKey()).toNormalform(true, true); - addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new URL(nexturlstring), depth - 1)); + nexturlstring = new yacyURL((String) e.getKey(), null).toNormalform(true, true); + addAll(new plasmaSearchImages(serverDate.remainingTime(start, maxTime, 10), new yacyURL(nexturlstring, null), depth - 1)); } catch (MalformedURLException e1) { e1.printStackTrace(); } diff --git a/source/de/anomic/plasma/plasmaSearchPreOrder.java b/source/de/anomic/plasma/plasmaSearchPreOrder.java index 5abdb1f61..1ccb91eeb 100644 --- a/source/de/anomic/plasma/plasmaSearchPreOrder.java +++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java @@ -52,10 +52,10 @@ import java.util.TreeSet; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroBinSearch; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; +import de.anomic.yacy.yacyURL; public final class plasmaSearchPreOrder { @@ -143,7 +143,7 @@ public final class plasmaSearchPreOrder { entry = (Map.Entry) i.next(); iEntry = (indexRWIEntry) entry.getValue(); hashpart = iEntry.urlHash().substring(6); - isWordRootURL = plasmaURL.isWordRootURL(iEntry.urlHash(), querywords); + isWordRootURL = yacyURL.isWordRootURL(iEntry.urlHash(), querywords); if (isWordRootURL) { rootDoms.add(hashpart); } else { diff --git a/source/de/anomic/plasma/plasmaSearchProcessing.java b/source/de/anomic/plasma/plasmaSearchProcessing.java index 5a4496970..38e753744 100644 --- a/source/de/anomic/plasma/plasmaSearchProcessing.java +++ b/source/de/anomic/plasma/plasmaSearchProcessing.java @@ -32,8 +32,11 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import de.anomic.index.indexContainer; +import de.anomic.index.indexRWIEntry; +import de.anomic.server.serverByteBuffer; /** * @@ -179,4 +182,74 @@ public class plasmaSearchProcessing implements Cloneable { return rcLocal; } + + + public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) { + // collect references according to domains + long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; + TreeMap doms = new TreeMap(); + synchronized (inputContainer) { + Iterator i = inputContainer.entries(); + indexRWIEntry iEntry; + String dom, paths; + while (i.hasNext()) { + iEntry = (indexRWIEntry) i.next(); + if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer + dom = iEntry.urlHash().substring(6); + if ((paths = (String) doms.get(dom)) == null) { + doms.put(dom, iEntry.urlHash().substring(0, 6)); + } else { + doms.put(dom, paths + iEntry.urlHash().substring(0, 6)); + } + if (System.currentTimeMillis() > timeout) + break; + } + } + // construct a result string + serverByteBuffer bb = new serverByteBuffer(inputContainer.size() * 6); + bb.append('{'); + Iterator i = doms.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + bb.append((String) entry.getKey()); + bb.append(':'); + bb.append((String) entry.getValue()); + if (System.currentTimeMillis() > timeout) + break; + if (i.hasNext()) + bb.append(','); + } + bb.append('}'); + return bb; + } + + public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) { + // target is a mapping from url-hashes to a string of peer-hashes + if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) { + //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); + ci = ci.trim(1, ci.length() - 2); + String dom, url, peers; + while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + dom = ci.toString(0, 6); + ci.trim(7); + while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + url = ci.toString(0, 6) + dom; + ci.trim(6); + peers = (String) target.get(url); + if (peers == null) { + target.put(url, peerhash); + } else { + target.put(url, peers + peerhash); + } + //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); + } + if (ci.byteAt(0) == ',') ci.trim(1); + } + } + } + + } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index 57684572c..98fda8f1f 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -48,7 +48,7 @@ import java.util.Set; import java.util.TreeSet; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; +import de.anomic.yacy.yacyURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; @@ -252,7 +252,7 @@ public class plasmaSearchRankingProfile { public long preRanking(indexRWIEntry t, indexRWIEntry min, indexRWIEntry max, TreeSet searchedWords) { // the normalizedEntry must be a normalized indexEntry long ranking = 0; - ranking += (256 - plasmaURL.domLengthNormalized(t.urlHash())) << coeff_domlength; + ranking += (256 - yacyURL.domLengthNormalized(t.urlHash())) << coeff_domlength; ranking += plasmaSearchPreOrder.ybr_p(t.urlHash()) << coeff_ybr; ranking += (255 - (255 * (t.virtualAge() - min.virtualAge() ) / (1 + max.virtualAge() - min.virtualAge())) ) << coeff_date; ranking += (255 * (t.wordsintitle() - min.wordsintitle() ) / (1 + max.wordsintitle() - min.wordsintitle())) << coeff_wordsintitle; @@ -281,8 +281,8 @@ public class plasmaSearchRankingProfile { ranking += (flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 256 << coeff_cathasvideo : 0; ranking += (flags.get(plasmaCondenser.flag_cat_hasapp)) ? 256 << coeff_cathasapp : 0; - ranking += (plasmaURL.probablyRootURL(t.urlHash())) ? 16 << coeff_urllength : 0; - if (searchedWords != null) ranking += (plasmaURL.probablyWordURL(t.urlHash(), searchedWords) != null) ? 256 << coeff_appurl : 0; + ranking += (yacyURL.probablyRootURL(t.urlHash())) ? 16 << coeff_urllength : 0; + if (searchedWords != null) ranking += (yacyURL.probablyWordURL(t.urlHash(), searchedWords) != null) ? 256 << coeff_appurl : 0; return ranking; } diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index bda27351a..8664f8025 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -60,16 +60,15 @@ import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMSetTools; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.crawler.plasmaCrawlerException; import de.anomic.plasma.parser.ParserException; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public class plasmaSnippetCache { @@ -113,18 +112,18 @@ public class plasmaSnippetCache { } public static class TextSnippet { - private URL url; + private yacyURL url; private String line; private String error; private int errorCode; private Set remaingHashes; - private URL favicon; + private yacyURL favicon; - public TextSnippet(URL url, String line, int errorCode, Set remaingHashes, String errortext) { + public TextSnippet(yacyURL url, String line, int errorCode, Set remaingHashes, String errortext) { this(url,line,errorCode,remaingHashes,errortext,null); } - public TextSnippet(URL url, String line, int errorCode, Set remaingHashes, String errortext, URL favicon) { + public TextSnippet(yacyURL url, String line, int errorCode, Set remaingHashes, String errortext, yacyURL favicon) { this.url = url; this.line = line; this.errorCode = errorCode; @@ -132,7 +131,7 @@ public class plasmaSnippetCache { this.remaingHashes = remaingHashes; this.favicon = favicon; } - public URL getUrl() { + public yacyURL getUrl() { return this.url; } public boolean exists() { @@ -224,7 +223,7 @@ public class plasmaSnippetCache { return l.toString().trim(); } - public URL getFavicon() { + public yacyURL getFavicon() { return this.favicon; } } @@ -242,26 +241,26 @@ public class plasmaSnippetCache { } } - public static boolean existsInCache(URL url, Set queryhashes) { + public static boolean existsInCache(yacyURL url, Set queryhashes) { String hashes = yacySearch.set2string(queryhashes); - return retrieveFromCache(hashes, plasmaURL.urlHash(url)) != null; + return retrieveFromCache(hashes, url.hash()) != null; } - public static TextSnippet retrieveTextSnippet(URL url, Set queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout) { + public static TextSnippet retrieveTextSnippet(yacyURL url, Set queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int timeout) { // heise = "0OQUNU3JSs05" + if (queryhashes.size() == 0) { //System.out.println("found no queryhashes for URL retrieve " + url); return new TextSnippet(url, null, ERROR_NO_HASH_GIVEN, queryhashes, "no query hashes given"); } - String urlhash = plasmaURL.urlHash(url); // try to get snippet from snippetCache int source = SOURCE_CACHE; String wordhashes = yacySearch.set2string(queryhashes); - String line = retrieveFromCache(wordhashes, urlhash); + String line = retrieveFromCache(wordhashes, url.hash()); if (line != null) { //System.out.println("found snippet for URL " + url + " in cache: " + line); - return new TextSnippet(url, line, source, null, null,(URL) faviconCache.get(urlhash)); + return new TextSnippet(url, line, source, null, null,(yacyURL) faviconCache.get(url.hash())); } /* =========================================================================== @@ -277,7 +276,7 @@ public class plasmaSnippetCache { if (resContent != null) { // if the content was found resContentLength = plasmaHTCache.getResourceContentLength(url); - } else if (fetchOnline) { + } else if (fetchOnline) { // if not found try to download it // download resource using the crawler and keep resource in memory if possible @@ -286,7 +285,7 @@ public class plasmaSnippetCache { // getting resource metadata (e.g. the http headers for http resources) if (entry != null) { resInfo = entry.getDocumentInfo(); - + // read resource body (if it is there) byte []resourceArray = entry.cacheArray(); if (resourceArray != null) { @@ -309,7 +308,7 @@ public class plasmaSnippetCache { if (!(e instanceof plasmaCrawlerException)) e.printStackTrace(); return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "error loading resource: " + e.getMessage()); } - + /* =========================================================================== * PARSING RESOURCE * =========================================================================== */ @@ -327,8 +326,8 @@ public class plasmaSnippetCache { /* =========================================================================== * COMPUTE SNIPPET * =========================================================================== */ - URL resFavicon = document.getFavicon(); - if (resFavicon != null) faviconCache.put(urlhash,resFavicon); + yacyURL resFavicon = document.getFavicon(); + if (resFavicon != null) faviconCache.put(url.hash(), resFavicon); // we have found a parseable non-empty file: use the lines // compute snippet from text @@ -356,7 +355,8 @@ public class plasmaSnippetCache { if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength); // finally store this snippet in our own cache - storeToCache(wordhashes, urlhash, line); + storeToCache(wordhashes, url.hash(), line); + document.close(); return new TextSnippet(url, line, source, null, null, resFavicon); } @@ -370,7 +370,7 @@ public class plasmaSnippetCache { * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache * @return the parsed document as {@link plasmaParserDocument} */ - public static plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline, int timeout, boolean forText) { + public static plasmaParserDocument retrieveDocument(yacyURL url, boolean fetchOnline, int timeout, boolean forText) { // load resource long resContentLength = 0; @@ -615,7 +615,7 @@ public class plasmaSnippetCache { } } - public static ArrayList retrieveMediaSnippets(URL url, Set queryhashes, int mediatype, boolean fetchOnline, int timeout) { + public static ArrayList retrieveMediaSnippets(yacyURL url, Set queryhashes, int mediatype, boolean fetchOnline, int timeout) { if (queryhashes.size() == 0) { serverLog.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList(); @@ -723,7 +723,7 @@ public class plasmaSnippetCache { return map; } - public static plasmaParserDocument parseDocument(URL url, long contentLength, InputStream resourceStream) throws ParserException { + public static plasmaParserDocument parseDocument(yacyURL url, long contentLength, InputStream resourceStream) throws ParserException { return parseDocument(url, contentLength, resourceStream, null); } @@ -736,7 +736,7 @@ public class plasmaSnippetCache { * @return the extracted data * @throws ParserException */ - public static plasmaParserDocument parseDocument(URL url, long contentLength, InputStream resourceStream, IResourceInfo docInfo) throws ParserException { + public static plasmaParserDocument parseDocument(yacyURL url, long contentLength, InputStream resourceStream, IResourceInfo docInfo) throws ParserException { try { if (resourceStream == null) return null; @@ -809,7 +809,7 @@ public class plasmaSnippetCache { *
*
Service nameOperationWSDL
#[name]# [#[style]#] - - #{methods}# - - - #{/methods}# -
#[name]##[method]#
-
WSDL
[1]the content-length as {@link Integer}
*/ - public static Object[] getResource(URL url, boolean fetchOnline, int socketTimeout, boolean forText) { + public static Object[] getResource(yacyURL url, boolean fetchOnline, int socketTimeout, boolean forText) { // load the url as resource from the web try { long contentLength = -1; @@ -845,7 +845,7 @@ public class plasmaSnippetCache { } public static plasmaHTCache.Entry loadResourceFromWeb( - URL url, + yacyURL url, int socketTimeout, boolean keepInMemory, boolean forText @@ -868,7 +868,7 @@ public class plasmaSnippetCache { public static String failConsequences(TextSnippet snippet, String eventID) { // problems with snippet fetch if (yacyCore.seedDB.mySeed.isVirgin()) return snippet.getError() + " (no consequences, no network connection)"; // no consequences if we do not have a network connection - String urlHash = plasmaURL.urlHash(snippet.getUrl()); + String urlHash = snippet.getUrl().hash(); String querystring = kelondroMSetTools.setToString(snippet.getRemainingHashes(), ' '); if ((snippet.getErrorCode() == ERROR_SOURCE_LOADING) || (snippet.getErrorCode() == ERROR_RESOURCE_LOADING) || diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0518dc95b..40b04d178 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -141,7 +141,6 @@ import de.anomic.http.httpd; import de.anomic.http.httpdRobotsTxtConfig; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroCache; @@ -150,7 +149,6 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMapTable; import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.net.URL; import de.anomic.plasma.dbImport.dbImportManager; import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.urlPattern.defaultURLPattern; @@ -165,6 +163,7 @@ import de.anomic.server.serverSwitch; import de.anomic.server.serverThread; import de.anomic.server.logging.serverLog; import de.anomic.tools.crypt; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; @@ -911,7 +910,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser Map initProps; if (networkUnitDefinition.startsWith("http://")) { try { - this.setConfig(httpc.loadHashMap(new URL(networkUnitDefinition), remoteProxyConfig)); + this.setConfig(httpc.loadHashMap(new yacyURL(networkUnitDefinition, null), remoteProxyConfig)); } catch (MalformedURLException e) { } } else { @@ -923,7 +922,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } if (networkGroupDefinition.startsWith("http://")) { try { - this.setConfig(httpc.loadHashMap(new URL(networkGroupDefinition), remoteProxyConfig)); + this.setConfig(httpc.loadHashMap(new yacyURL(networkGroupDefinition, null), remoteProxyConfig)); } catch (MalformedURLException e) { } } else { @@ -941,7 +940,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser location = getConfig("network.unit.update.location" + i, ""); if (location.length() == 0) break; try { - yacyVersion.latestReleaseLocations.add(new URL(location)); + yacyVersion.latestReleaseLocations.add(new yacyURL(location, null)); } catch (MalformedURLException e) { break; } @@ -1476,7 +1475,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } - public boolean acceptURL(URL url) { + public boolean acceptURL(yacyURL url) { // returns true if the url can be accepted accoring to network.unit.domain if (url == null) return false; String host = url.getHost(); @@ -1510,8 +1509,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser errorURL.remove(hash); } - public URL getURL(String urlhash) throws IOException { - if (urlhash.equals(plasmaURL.dummyHash)) return null; + public yacyURL getURL(String urlhash) throws IOException { + if (urlhash.equals(yacyURL.dummyHash)) return null; plasmaCrawlEntry ne = noticeURL.get(urlhash); if (ne != null) return ne.url(); indexURLEntry le = wordIndex.loadedURL.load(urlhash, null); @@ -1739,7 +1738,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // enqueue for further crawling enQueue(this.sbQueue.newEntry( entry.url(), - plasmaURL.urlHash(entry.referrerURL()), + (entry.referrerURL() == null) ? null : entry.referrerURL().hash(), entry.ifModifiedSince(), entry.requestWithCookie(), entry.initiator(), @@ -2363,7 +2362,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return parseResource(entry.url(), mimeType, charset, entry.cacheFile()); } - public plasmaParserDocument parseResource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException { + public plasmaParserDocument parseResource(yacyURL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException { plasmaParserDocument doc = parser.parseSource(location, mimeType, documentCharset, sourceFile); assert(doc != null) : "Unexpected error. Parser returned null."; return doc; @@ -2387,8 +2386,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // 6) local fetching for global crawling (other known or unknwon initiator) int processCase = PROCESSCASE_0_UNKNOWN; yacySeed initiatorPeer = null; - String initiatorPeerHash = (entry.proxy()) ? plasmaURL.dummyHash : entry.initiator(); - if (initiatorPeerHash.equals(plasmaURL.dummyHash)) { + String initiatorPeerHash = (entry.proxy()) ? yacyURL.dummyHash : entry.initiator(); + if (initiatorPeerHash.equals(yacyURL.dummyHash)) { // proxy-load processCase = PROCESSCASE_4_PROXY_LOAD; } else if (initiatorPeerHash.equals(yacyCore.seedDB.mySeed.hash)) { @@ -2442,7 +2441,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser Map hl = document.getHyperlinks(); Iterator i = hl.entrySet().iterator(); String nextUrlString; - URL nextUrl; + yacyURL nextUrl; Map.Entry nextEntry; while (i.hasNext()) { // check for interruption @@ -2452,7 +2451,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser nextEntry = (Map.Entry) i.next(); nextUrlString = (String) nextEntry.getKey(); try { - nextUrl = new URL(nextUrlString); + nextUrl = new yacyURL(nextUrlString, null); // enqueue the hyperlink into the pre-notice-url db sbStackCrawlThread.enqueue(nextUrl, entry.urlHash(), initiatorPeerHash, (String) nextEntry.getValue(), docDate, entry.depth() + 1, entry.profile()); @@ -2467,9 +2466,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * CREATE INDEX * ========================================================================= */ String docDescription = document.getTitle(); - URL referrerURL = entry.referrerURL(); - String referrerUrlHash = plasmaURL.urlHash(referrerURL); - if (referrerUrlHash == null) referrerUrlHash = plasmaURL.dummyHash; + yacyURL referrerURL = entry.referrerURL(); String noIndexReason = plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR; if (processCase == PROCESSCASE_4_PROXY_LOAD) { @@ -2506,13 +2503,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser docDate, // modification date new Date(), // loaded date new Date(ldate + Math.max(0, ldate - docDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula - referrerUrlHash, // referer hash + (referrerURL == null) ? null : referrerURL.hash(), // referer hash new byte[0], // md5 (int) entry.size(), // size condenser.RESULT_NUMB_WORDS, // word count - plasmaURL.docType(document.getMimeType()), // doctype + plasmaHTCache.docType(document.getMimeType()), // doctype condenser.RESULT_FLAGS, // flags - plasmaURL.language(entry.url()), // language + yacyURL.language(entry.url()), // language ioLinks[0].intValue(), // llocal ioLinks[1].intValue(), // lother document.getAudiolinks().size(), // laudio @@ -2567,13 +2564,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * ======================================================================== */ words = wordIndex.addPageIndex( entry.url(), // document url - urlHash, // document url hash docDate, // document mod date (int) entry.size(), // document size document, // document content condenser, // document condenser - plasmaURL.language(entry.url()), // document language - plasmaURL.docType(document.getMimeType()), // document type + yacyURL.language(entry.url()), // document language + plasmaHTCache.docType(document.getMimeType()),// document type ioLinks[0].intValue(), // outlinkSame ioLinks[1].intValue() // outlinkOthers ); @@ -2586,8 +2582,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ArrayList tmpContainers = new ArrayList(condenser.words().size()); - String language = plasmaURL.language(entry.url()); - char doctype = plasmaURL.docType(document.getMimeType()); + String language = yacyURL.language(entry.url()); + char doctype = plasmaHTCache.docType(document.getMimeType()); indexURLEntry.Components comp = newEntry.comp(); int urlLength = comp.url().toNormalform(true, true).length(); int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform(true, true)).length; @@ -2645,14 +2641,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String error = (String) resultObj.get("result"); if (error != null) { words = wordIndex.addPageIndex( - entry.url(), - urlHash, + entry.url(), docDate, (int) entry.size(), document, condenser, - plasmaURL.language(entry.url()), - plasmaURL.docType(document.getMimeType()), + yacyURL.language(entry.url()), + plasmaHTCache.docType(document.getMimeType()), ioLinks[0].intValue(), ioLinks[1].intValue() ); @@ -2692,7 +2687,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } else { log.logFine("Not Indexed Resource '" + entry.url().toNormalform(false, true) + "': process case=" + processCase); - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield()); } } catch (Exception ee) { if (ee instanceof InterruptedException) throw (InterruptedException)ee; @@ -2705,7 +2700,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, ""); } - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield()); } } else { @@ -2713,7 +2708,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser checkInterruption(); log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason); - addURLtoErrorDB(entry.url(), referrerUrlHash, initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield()); + addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield()); if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) { if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash)); yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, ""); @@ -2764,15 +2759,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // convert the referrer hash into the corresponding URL - URL refererURL = null; + yacyURL refererURL = null; String refererHash = urlEntry.referrerhash(); - if ((refererHash != null) && (!refererHash.equals(plasmaURL.dummyHash))) try { + if ((refererHash != null) && (!refererHash.equals(yacyURL.dummyHash))) try { refererURL = this.getURL(refererHash); } catch (IOException e) { refererURL = null; } cacheLoader.loadAsync(urlEntry.url(), urlEntry.name(), (refererURL!=null)?refererURL.toString():null, urlEntry.initiator(), urlEntry.depth(), profile, -1, false); - log.logInfo(stats + ": enqueued for load " + urlEntry.url() + " [" + urlEntry.urlhash() + "]"); + log.logInfo(stats + ": enqueued for load " + urlEntry.url() + " [" + urlEntry.url().hash() + "]"); return; } @@ -2800,8 +2795,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // check if peer for remote crawl is available yacySeed remoteSeed = ((this.isPublicRobinson()) && (getConfig("cluster.mode", "").equals("publiccluster"))) ? - yacyCore.dhtAgent.getPublicClusterCrawlSeed(urlEntry.urlhash(), this.clusterhashes) : - yacyCore.dhtAgent.getGlobalCrawlSeed(urlEntry.urlhash()); + yacyCore.dhtAgent.getPublicClusterCrawlSeed(urlEntry.url().hash(), this.clusterhashes) : + yacyCore.dhtAgent.getGlobalCrawlSeed(urlEntry.url().hash()); if (remoteSeed == null) { log.logFine("plasmaSwitchboard.processRemoteCrawlTrigger: no remote crawl seed available"); return false; @@ -2897,8 +2892,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // method for index deletion - public int removeAllUrlReferences(URL url, boolean fetchOnline) { - return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline); + public int removeAllUrlReferences(yacyURL url, boolean fetchOnline) { + return removeAllUrlReferences(url.hash(), fetchOnline); } public int removeAllUrlReferences(String urlhash, boolean fetchOnline) { @@ -3231,7 +3226,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } private void addURLtoErrorDB( - URL url, + yacyURL url, String referrerHash, String initiator, String name, diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index 01f4c6542..53537107d 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -55,10 +55,10 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; -import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public class plasmaSwitchboardQueue { @@ -105,12 +105,12 @@ public class plasmaSwitchboardQueue { public synchronized void push(Entry entry) throws IOException { sbQueueStack.push(sbQueueStack.row().newEntry(new byte[][]{ entry.url.toString().getBytes(), - (entry.referrerHash == null) ? plasmaURL.dummyHash.getBytes() : entry.referrerHash.getBytes(), + (entry.referrerHash == null) ? yacyURL.dummyHash.getBytes() : entry.referrerHash.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong((entry.ifModifiedSince == null) ? 0 : entry.ifModifiedSince.getTime(), 11).getBytes(), new byte[]{entry.flags}, - (entry.initiator == null) ? plasmaURL.dummyHash.getBytes() : entry.initiator.getBytes(), + (entry.initiator == null) ? yacyURL.dummyHash.getBytes() : entry.initiator.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, rowdef.width(5)).getBytes(), - (entry.profileHandle == null) ? plasmaURL.dummyHash.getBytes() : entry.profileHandle.getBytes(), + (entry.profileHandle == null) ? yacyURL.dummyHash.getBytes() : entry.profileHandle.getBytes(), (entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8") })); } @@ -184,13 +184,13 @@ public class plasmaSwitchboardQueue { } } - public Entry newEntry(URL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, + public Entry newEntry(yacyURL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, String initiator, int depth, String profilehandle, String anchorName) { return new Entry(url, referrer, ifModifiedSince, requestWithCookie, initiator, depth, profilehandle, anchorName); } public class Entry { - private URL url; // plasmaURL.urlStringLength + private yacyURL url; // plasmaURL.urlStringLength private String referrerHash; // plasmaURL.urlHashLength private Date ifModifiedSince; // 6 private byte flags; // 1 @@ -202,9 +202,9 @@ public class plasmaSwitchboardQueue { // computed values private plasmaCrawlProfile.entry profileEntry; private IResourceInfo contentInfo; - private URL referrerURL; + private yacyURL referrerURL; - public Entry(URL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, + public Entry(yacyURL url, String referrer, Date ifModifiedSince, boolean requestWithCookie, String initiator, int depth, String profileHandle, String anchorName) { this.url = url; this.referrerHash = referrer; @@ -224,7 +224,7 @@ public class plasmaSwitchboardQueue { long ims = row.getColLong(2); byte flags = row.getColByte(3); try { - this.url = new URL(row.getColString(0, "UTF-8")); + this.url = new yacyURL(row.getColString(0, "UTF-8"), null); } catch (MalformedURLException e) { this.url = null; } @@ -245,7 +245,7 @@ public class plasmaSwitchboardQueue { long ims = (row[2] == null) ? 0 : kelondroBase64Order.enhancedCoder.decodeLong(new String(row[2], "UTF-8")); byte flags = (row[3] == null) ? 0 : row[3][0]; try { - this.url = new URL(new String(row[0], "UTF-8")); + this.url = new yacyURL(new String(row[0], "UTF-8"), null); } catch (MalformedURLException e) { this.url = null; } @@ -262,12 +262,12 @@ public class plasmaSwitchboardQueue { this.referrerURL = null; } - public URL url() { + public yacyURL url() { return url; } public String urlHash() { - return plasmaURL.urlHash(url); + return url.hash(); } public boolean requestedWithCookie() { @@ -279,7 +279,7 @@ public class plasmaSwitchboardQueue { } public boolean proxy() { - return (initiator == null) || (initiator.equals(plasmaURL.dummyHash)); + return (initiator == null) || (initiator.equals(yacyURL.dummyHash)); } public String initiator() { @@ -324,9 +324,9 @@ public class plasmaSwitchboardQueue { return (info == null) ? new Date() : info.getModificationDate(); } - public URL referrerURL() { + public yacyURL referrerURL() { if (referrerURL == null) { - if ((referrerHash == null) || (referrerHash.equals(plasmaURL.dummyHash))) return null; + if ((referrerHash == null) || (referrerHash.equals(yacyURL.dummyHash))) return null; indexURLEntry entry = lurls.load(referrerHash, null); if (entry == null) referrerURL = null; else referrerURL = entry.comp().url(); } diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java deleted file mode 100644 index 1a6cea2c1..000000000 --- a/source/de/anomic/plasma/plasmaURL.java +++ /dev/null @@ -1,744 +0,0 @@ -// indexURL.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 20.05.2006 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.plasma; - -import java.net.MalformedURLException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.TreeSet; - -import de.anomic.index.indexContainer; -import de.anomic.index.indexRWIEntry; -import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; -import de.anomic.server.serverByteBuffer; -import de.anomic.server.serverCodings; -import de.anomic.server.serverDomains; -import de.anomic.yacy.yacySeedDB; - -public class plasmaURL { - - // TLD separation in political and cultural parts - // https://www.cia.gov/cia/publications/factbook/index.html - // http://en.wikipedia.org/wiki/List_of_countries_by_continent - - private static final String[] TLD_NorthAmericaOceania={ - // primary english-speaking countries - // english-speaking countries from central america are also included - // includes also dutch and french colonies in the caribbean sea - // and US/English/Australian military bases in asia - "EDU=US Educational", - "GOV=US Government", - "MIL=US Military", - "NET=Network", - "ORG=Non-Profit Organization", - "AN=Netherlands Antilles", - "AS=American Samoa", - "AG=Antigua and Barbuda", - "AI=Anguilla", - "AU=Australia", - "BB=Barbados", - "BZ=Belize", - "BM=Bermuda", - "BS=Bahamas", - "CA=Canada", - "CC=Cocos (Keeling) Islands", - "CK=Cook Islands", - "CX=Christmas Island", // located in the Indian Ocean, but belongs to Australia - "DM=Dominica", - "FM=Micronesia", - "FJ=Fiji", - "GD=Grenada", - "GP=Guadeloupe", - "GS=South Georgia and the South Sandwich Islands", // south of south america, but administrated by british, has only a scientific base - "GU=Guam", // strategical US basis close to Japan - "HM=Heard and McDonald Islands", // uninhabited, sub-Antarctic island, owned by Australia - "HT=Haiti", - "IO=British Indian Ocean Territory", // UK-US naval support facility in the Indian Ocean - "KI=Kiribati", // 33 coral atolls in the pacific, formerly owned by UK - "KN=Saint Kitts and Nevis", // islands in the carribean see - "KY=Cayman Islands", - "LC=Saint Lucia", - "MH=Marshall Islands", // formerly US atomic bomb test site, now a key installation in the US missile defense network - "MP=Northern Mariana Islands", // US strategic location in the western Pacific Ocean - "NC=New Caledonia", - "NF=Norfolk Island", - "NR=Nauru", // independent UN island - "NU=Niue", // one of world's largest coral islands - "NZ=New Zealand (Aotearoa)", - "PG=Papua New Guinea", - "PN=Pitcairn", // overseas territory of the UK - "PR=Puerto Rico", // territory of the US with commonwealth status - "PW=Palau", // was once governed by Micronesia - "Sb=Solomon Islands", - "TC=Turks and Caicos Islands", // overseas territory of the UK - "TK=Tokelau", // group of three atolls in the South Pacific Ocean, british protectorat - "TO=Tonga", - "TT=Trinidad and Tobago", - "TV=Tuvalu", // nine coral atolls in the South Pacific Ocean; in 2000, Tuvalu leased its TLD ".tv" for $50 million over a 12-year period - "UM=US Minor Outlying Islands", // nine insular United States possessions in the Pacific Ocean and the Caribbean Sea - "US=United States", - "VC=Saint Vincent and the Grenadines", - "VG=Virgin Islands (British)", - "VI=Virgin Islands (U.S.)", - "VU=Vanuatu", - "WF=Wallis and Futuna Islands", - "WS=Samoa" - }; - private static final String[] TLD_MiddleSouthAmerica = { - // primary spanish and portugese-speaking - "AR=Argentina", - "AW=Aruba", - "BR=Brazil", - "BO=Bolivia", - "CL=Chile", - "CO=Colombia", - "CR=Costa Rica", - "CU=Cuba", - "DO=Dominican Republic", - "EC=Ecuador", - "FK=Falkland Islands (Malvinas)", - "GF=French Guiana", - "GT=Guatemala", - "GY=Guyana", - "HN=Honduras", - "JM=Jamaica", - "MX=Mexico", - "NI=Nicaragua", - "PA=Panama", - "PE=Peru", - "PY=Paraguay", - "SR=Suriname", - "SV=El Salvador", - "UY=Uruguay", - "VE=Venezuela" - }; - private static final String[] TLD_EuropaRussia = { - // includes also countries that are mainly french- dutch- speaking - // and culturally close to europe - "AD=Andorra", - "AL=Albania", - "AQ=Antarctica", - "AT=Austria", - "BA=Bosnia and Herzegovina", - "BE=Belgium", - "BG=Bulgaria", - "BV=Bouvet Island", // this island is uninhabited and covered by ice, south of africa but governed by Norway - "BY=Belarus", - "CH=Switzerland", - "CS=Czechoslovakia (former)", - "CZ=Czech Republic", - "CY=Cyprus", - "DE=Germany", - "DK=Denmark", - "ES=Spain", - "EE=Estonia", - "FI=Finland", - "FO=Faroe Islands", // Viking Settlers - "FR=France", - "FX=France, Metropolitan", - "GB=Great Britain (UK)", - "GI=Gibraltar", - "GL=Greenland", - "GR=Greece", - "HR=Croatia (Hrvatska)", - "HU=Hungary", - "IE=Ireland", - "IS=Iceland", - "IT=Italy", - "LI=Liechtenstein", - "LT=Lithuania", - "LU=Luxembourg", - "LV=Latvia", - "MD=Moldova", - "MC=Monaco", - "MK=Macedonia", - "MN=Mongolia", - "MS=Montserrat", // British island in the Caribbean Sea, almost not populated because of strong vulcanic activity - "MT=Malta", - "MQ=Martinique", // island in the eastern Caribbean Sea, overseas department of France - "NATO=Nato field", - "NL=Netherlands", - "NO=Norway", - "PF=French Polynesia", // French annexed Polynesian island in the South Pacific, French atomic bomb test site - "PL=Poland", - "PM=St. Pierre and Miquelon", // french-administrated colony close to canada, belongs to France - "PT=Portugal", - "RO=Romania", - "RU=Russia", - "SE=Sweden", - "SI=Slovenia", - "SJ=Svalbard and Jan Mayen Islands", // part of Norway - "SM=San Marino", - "SK=Slovak Republic", - "SU=USSR (former)", - "TF=French Southern Territories", // islands in the arctic see, no inhabitants - "UK=United Kingdom", - "UA=Ukraine", - "VA=Vatican City State (Holy See)", - "YU=Yugoslavia" - }; - - private static final String[] TLD_MiddleEastWestAsia = { - // states that are influenced by islamic culture and arabic language - // includes also eurasia states and those that had been part of the former USSR and close to southwest asia - "AE=United Arab Emirates", - "AF=Afghanistan", - "AM=Armenia", - "AZ=Azerbaijan", - "BH=Bahrain", - "GE=Georgia", - "IL=Israel", - "IQ=Iraq", - "IR=Iran", - "JO=Jordan", - "KG=Kyrgyzstan", - "KZ=Kazakhstan", - "KW=Kuwait", - "LB=Lebanon", - "OM=Oman", - "QA=Qatar", - "SA=Saudi Arabia", - "SY=Syria", - "TJ=Tajikistan", - "TM=Turkmenistan", - "PK=Pakistan", - "TR=Turkey", - "UZ=Uzbekistan", - "YE=Yemen" - }; - private static final String[] TLD_SouthEastAsia = { - "BD=Bangladesh", - "BN=Brunei Darussalam", - "BT=Bhutan", - "CN=China", - "HK=Hong Kong", - "ID=Indonesia", - "IN=India", - "LA=Laos", - "NP=Nepal", - "JP=Japan", - "KH=Cambodia", - "KP=Korea (North)", - "KR=Korea (South)", - "LK=Sri Lanka", - "MY=Malaysia", - "MM=Myanmar", // formerly known as Burma - "MO=Macau", // Portuguese settlement, part of China, but has some autonomy - "MV=Maldives", // group of atolls in the Indian Ocean - "PH=Philippines", - "SG=Singapore", - "TP=East Timor", - "TH=Thailand", - "TW=Taiwan", - "VN=Viet Nam" - }; - private static final String[] TLD_Africa = { - "AO=Angola", - "BF=Burkina Faso", - "BI=Burundi", - "BJ=Benin", - "BW=Botswana", - "CF=Central African Republic", - "CG=Congo", - "CI=Cote D'Ivoire (Ivory Coast)", - "CM=Cameroon", - "CV=Cape Verde", - "DJ=Djibouti", - "DZ=Algeria", - "EG=Egypt", - "EH=Western Sahara", - "ER=Eritrea", - "ET=Ethiopia", - "GA=Gabon", - "GH=Ghana", - "GM=Gambia", - "GN=Guinea", - "GQ=Equatorial Guinea", - "GW=Guinea-Bissau", - "KE=Kenya", - "KM=Comoros", - "LR=Liberia", - "LS=Lesotho", - "LY=Libya", - "MA=Morocco", - "MG=Madagascar", - "ML=Mali", - "MR=Mauritania", - "MU=Mauritius", - "MW=Malawi", - "MZ=Mozambique", - "NA=Namibia", - "NE=Niger", - "NG=Nigeria", - "RE=Reunion", - "RW=Rwanda", - "SC=Seychelles", - "SD=Sudan", - "SH=St. Helena", - "SL=Sierra Leone", - "SN=Senegal", - "SO=Somalia", - "ST=Sao Tome and Principe", - "SZ=Swaziland", - "TD=Chad", - "TG=Togo", - "TN=Tunisia", - "TZ=Tanzania", - "UG=Uganda", - "ZA=South Africa", - "ZM=Zambia", - "ZR=Zaire", - "ZW=Zimbabwe", - "YT=Mayotte" - }; - private static final String[] TLD_Generic = { - "COM=US Commercial", - "AERO=", - "BIZ=", - "COOP=", - "INFO=", - "MUSEUM=", - "NAME=", - "PRO=", - "ARPA=", - "INT=International", - "ARPA=Arpanet", - "NT=Neutral Zone" - }; - - - /* - * TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net, - * org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR, - * ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM, - * BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL, - * CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ, - * EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE, - * GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM, - * HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE, - * JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI, - * LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO, - * MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC, - * NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH, - * PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC, - * SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC, - * TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ, - * UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT, - * YU, ZA, ZM, ZW - */ - - public static String dummyHash; - - private static HashMap TLDID = new HashMap(); - private static HashMap TLDName = new HashMap(); - - private static void insertTLDProps(String[] TLDList, int id) { - int p; - String tld, name; - Integer ID = new Integer(id); - for (int i = 0; i < TLDList.length; i++) { - p = TLDList[i].indexOf('='); - if (p > 0) { - tld = TLDList[i].substring(0, p).toLowerCase(); - name = TLDList[i].substring(p + 1); - TLDID.put(tld, ID); - TLDName.put(tld, name); - } - } - } - - static { - // create a dummy hash - dummyHash = ""; - for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; - - // assign TLD-ids and names - insertTLDProps(TLD_EuropaRussia, 0); - insertTLDProps(TLD_MiddleSouthAmerica, 1); - insertTLDProps(TLD_SouthEastAsia, 2); - insertTLDProps(TLD_MiddleEastWestAsia, 3); - insertTLDProps(TLD_NorthAmericaOceania, 4); - insertTLDProps(TLD_Africa, 5); - insertTLDProps(TLD_Generic, 6); - // the id=7 is used to flag local addresses - } - - public static final int flagTypeID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; - } - - public static final int flagTLDID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 28) >> 2; - } - - public static final int flagLengthID(String hash) { - return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 3); - } - - public static final String urlHash(String url) { - if ((url == null) || (url.length() == 0)) - return null; - try { - return urlHash(new URL(url)); - } catch (MalformedURLException e) { - return null; - } - } - - public static final String urlHash(URL url) { - if (url == null) return null; - String host = url.getHost().toLowerCase(); - int p = host.lastIndexOf('.'); - String tld = "", dom = tld; - if (p > 0) { - tld = host.substring(p + 1); - dom = host.substring(0, p); - } - Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses - int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 - boolean isHTTP = url.getProtocol().equals("http"); - p = dom.lastIndexOf('.'); // locate subdomain - String subdom = ""; - if (p > 0) { - subdom = dom.substring(0, p); - dom = dom.substring(p + 1); - } - int port = url.getPort(); - if (port <= 0) { - if (isHTTP) { - port = 80; - } else if (url.getProtocol().equalsIgnoreCase("https")) { - port = 443; - } else { - port = 21; - } - } - String path = url.getPath(); - if (path.startsWith("/")) - path = path.substring(1); - if (path.endsWith("/")) - path = path.substring(0, path.length() - 1); - p = path.indexOf('/'); - String rootpath = ""; - if (p > 0) { - rootpath = path.substring(0, p); - } - - // we collected enough information to compute the fragments that are - // basis for hashes - int l = dom.length(); - int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3; - byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey); - - // combine the attributes - StringBuffer hash = new StringBuffer(12); - // form the 'local' part of the hash - hash.append(kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform(true, true))).substring(0, 5)); // 5 chars - hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char - // form the 'global' part of the hash - hash.append(protocolHostPort(url.getProtocol(), host, port)); // 5 chars - hash.append(kelondroBase64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char - - // return result hash - return new String(hash); - } - - private static char subdomPortPath(String subdom, int port, String rootpath) { - return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(subdom + ":" + port + ":" + rootpath)).charAt(0); - } - - private static final char rootURLFlag = subdomPortPath("www", 80, ""); - - public static final boolean probablyRootURL(String urlHash) { - return (urlHash.charAt(5) == rootURLFlag); - } - - private static String protocolHostPort(String protocol, String host, int port) { - return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(protocol + ":" + host + ":" + port)).substring(0, 5); - } - - private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" }; - - public static final URL probablyWordURL(String urlHash, TreeSet words) { - Iterator wi = words.iterator(); - String word; - while (wi.hasNext()) { - word = (String) wi.next(); - if ((word == null) || (word.length() == 0)) continue; - String pattern = urlHash.substring(6, 11); - for (int i = 0; i < testTLDs.length; i++) { - if (pattern.equals(protocolHostPort("http", "www." + word.toLowerCase() + "." + testTLDs[i], 80))) - try { - return new URL("http://www." + word.toLowerCase() + "." + testTLDs[i]); - } catch (MalformedURLException e) { - return null; - } - } - } - return null; - } - - public static final boolean isWordRootURL(String givenURLHash, TreeSet words) { - if (!(probablyRootURL(givenURLHash))) return false; - URL wordURL = probablyWordURL(givenURLHash, words); - if (wordURL == null) return false; - if (urlHash(wordURL).equals(givenURLHash)) return true; - return false; - } - - public static final int domLengthEstimation(String urlHash) { - // generates an estimation of the original domain length - assert (urlHash != null); - assert (urlHash.length() == 12) : "urlhash = " + urlHash; - int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - int domLengthKey = flagbyte & 3; - switch (domLengthKey) { - case 0: - return 4; - case 1: - return 10; - case 2: - return 14; - case 3: - return 20; - } - return 20; - } - - public static int domLengthNormalized(String urlHash) { - return 255 * domLengthEstimation(urlHash) / 30; - } - - public static final int domDomain(String urlHash) { - // returns the ID of the domain of the domain - assert (urlHash != null); - assert (urlHash.length() == 12) : "urlhash = " + urlHash; - int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - return (flagbyte & 12) >> 2; - } - - public static boolean isGlobalDomain(String urlhash) { - return domDomain(urlhash) != 7; - } - - public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) { - // collect references according to domains - long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; - TreeMap doms = new TreeMap(); - synchronized (inputContainer) { - Iterator i = inputContainer.entries(); - indexRWIEntry iEntry; - String dom, paths; - while (i.hasNext()) { - iEntry = (indexRWIEntry) i.next(); - if ((excludeContainer != null) && (excludeContainer.get(iEntry.urlHash()) != null)) continue; // do not include urls that are in excludeContainer - dom = iEntry.urlHash().substring(6); - if ((paths = (String) doms.get(dom)) == null) { - doms.put(dom, iEntry.urlHash().substring(0, 6)); - } else { - doms.put(dom, paths + iEntry.urlHash().substring(0, 6)); - } - if (System.currentTimeMillis() > timeout) - break; - } - } - // construct a result string - serverByteBuffer bb = new serverByteBuffer(inputContainer.size() * 6); - bb.append('{'); - Iterator i = doms.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - bb.append((String) entry.getKey()); - bb.append(':'); - bb.append((String) entry.getValue()); - if (System.currentTimeMillis() > timeout) - break; - if (i.hasNext()) - bb.append(','); - } - bb.append('}'); - return bb; - } - - public static final void decompressIndex(TreeMap target, serverByteBuffer ci, String peerhash) { - // target is a mapping from url-hashes to a string of peer-hashes - if ((ci.byteAt(0) == '{') && (ci.byteAt(ci.length() - 1) == '}')) { - //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); - ci = ci.trim(1, ci.length() - 2); - String dom, url, peers; - while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - dom = ci.toString(0, 6); - ci.trim(7); - while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - url = ci.toString(0, 6) + dom; - ci.trim(6); - peers = (String) target.get(url); - if (peers == null) { - target.put(url, peerhash); - } else { - target.put(url, peers + peerhash); - } - //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); - } - if (ci.byteAt(0) == ',') ci.trim(1); - } - } - } - - - // doctypes: - public static final char DT_PDFPS = 'p'; - public static final char DT_TEXT = 't'; - public static final char DT_HTML = 'h'; - public static final char DT_DOC = 'd'; - public static final char DT_IMAGE = 'i'; - public static final char DT_MOVIE = 'm'; - public static final char DT_FLASH = 'f'; - public static final char DT_SHARE = 's'; - public static final char DT_AUDIO = 'a'; - public static final char DT_BINARY = 'b'; - public static final char DT_UNKNOWN = 'u'; - - // appearance locations: (used for flags) - public static final int AP_TITLE = 0; // title tag from html header - public static final int AP_H1 = 1; // headline - top level - public static final int AP_H2 = 2; // headline, second level - public static final int AP_H3 = 3; // headline, 3rd level - public static final int AP_H4 = 4; // headline, 4th level - public static final int AP_H5 = 5; // headline, 5th level - public static final int AP_H6 = 6; // headline, 6th level - public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam) - public static final int AP_DOM = 8; // word inside an url: in Domain - public static final int AP_PATH = 9; // word inside an url: in path - public static final int AP_IMG = 10; // tag inside image references - public static final int AP_ANCHOR = 11; // anchor description - public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) - public static final int AP_BOLD = 13; // may be interpreted as emphasized - public static final int AP_ITALICS = 14; // may be interpreted as emphasized - public static final int AP_WEAK = 15; // for Text that is small or bareley visible - public static final int AP_INVISIBLE = 16; // good for spam detection - public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) - public static final int AP_AUTHOR = 18; // word appears in author name - public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) - public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) - - // URL attributes - public static final int UA_LOCAL = 0; // URL was crawled locally - public static final int UA_TILDE = 1; // tilde appears in URL - public static final int UA_REDIRECT = 2; // The URL is a redirection - - // local flag attributes - public static final char LT_LOCAL = 'L'; - public static final char LT_GLOBAL = 'G'; - - // doctype calculation - public static char docType(URL url) { - String path = url.getPath().toLowerCase(); - // serverLog.logFinest("PLASMA", "docType URL=" + path); - char doctype = DT_UNKNOWN; - if (path.endsWith(".gif")) { doctype = DT_IMAGE; } - else if (path.endsWith(".ico")) { doctype = DT_IMAGE; } - else if (path.endsWith(".bmp")) { doctype = DT_IMAGE; } - else if (path.endsWith(".jpg")) { doctype = DT_IMAGE; } - else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; } - else if (path.endsWith(".png")) { doctype = DT_IMAGE; } - else if (path.endsWith(".html")) { doctype = DT_HTML; } - else if (path.endsWith(".txt")) { doctype = DT_TEXT; } - else if (path.endsWith(".doc")) { doctype = DT_DOC; } - else if (path.endsWith(".rtf")) { doctype = DT_DOC; } - else if (path.endsWith(".pdf")) { doctype = DT_PDFPS; } - else if (path.endsWith(".ps")) { doctype = DT_PDFPS; } - else if (path.endsWith(".avi")) { doctype = DT_MOVIE; } - else if (path.endsWith(".mov")) { doctype = DT_MOVIE; } - else if (path.endsWith(".qt")) { doctype = DT_MOVIE; } - else if (path.endsWith(".mpg")) { doctype = DT_MOVIE; } - else if (path.endsWith(".md5")) { doctype = DT_SHARE; } - else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; } - else if (path.endsWith(".asf")) { doctype = DT_FLASH; } - return doctype; - } - - public static char docType(String mime) { - // serverLog.logFinest("PLASMA", "docType mime=" + mime); - char doctype = DT_UNKNOWN; - if (mime == null) doctype = DT_UNKNOWN; - else if (mime.startsWith("image/")) doctype = DT_IMAGE; - else if (mime.endsWith("/gif")) doctype = DT_IMAGE; - else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE; - else if (mime.endsWith("/png")) doctype = DT_IMAGE; - else if (mime.endsWith("/html")) doctype = DT_HTML; - else if (mime.endsWith("/rtf")) doctype = DT_DOC; - else if (mime.endsWith("/pdf")) doctype = DT_PDFPS; - else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY; - else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH; - else if (mime.endsWith("/msword")) doctype = DT_DOC; - else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC; - else if (mime.endsWith("/postscript")) doctype = DT_PDFPS; - else if (mime.startsWith("text/")) doctype = DT_TEXT; - else if (mime.startsWith("image/")) doctype = DT_IMAGE; - else if (mime.startsWith("audio/")) doctype = DT_AUDIO; - else if (mime.startsWith("video/")) doctype = DT_MOVIE; - //bz2 = application/x-bzip2 - //dvi = application/x-dvi - //gz = application/gzip - //hqx = application/mac-binhex40 - //lha = application/x-lzh - //lzh = application/x-lzh - //pac = application/x-ns-proxy-autoconfig - //php = application/x-httpd-php - //phtml = application/x-httpd-php - //rss = application/xml - //tar = application/tar - //tex = application/x-tex - //tgz = application/tar - //torrent = application/x-bittorrent - //xhtml = application/xhtml+xml - //xla = application/msexcel - //xls = application/msexcel - //xsl = application/xml - //xml = application/xml - //Z = application/x-compress - //zip = application/zip - return doctype; - } - - // language calculation - public static String language(URL url) { - String language = "uk"; - String host = url.getHost(); - int pos = host.lastIndexOf("."); - if ((pos > 0) && (host.length() - pos == 3)) language = host.substring(pos + 1).toLowerCase(); - return language; - } - -} diff --git a/source/de/anomic/plasma/plasmaWebStructure.java b/source/de/anomic/plasma/plasmaWebStructure.java index 10be39008..78c640917 100644 --- a/source/de/anomic/plasma/plasmaWebStructure.java +++ b/source/de/anomic/plasma/plasmaWebStructure.java @@ -29,6 +29,7 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.util.ConcurrentModificationException; import java.util.Date; import java.util.Iterator; @@ -39,10 +40,10 @@ import java.util.SortedMap; import java.util.TreeSet; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; +import de.anomic.yacy.yacyURL; public class plasmaWebStructure { @@ -92,8 +93,8 @@ public class plasmaWebStructure { } } - public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(URL url, String baseurlhash, Date docDate, plasmaParserDocument document, plasmaCondenser condenser) { - assert plasmaURL.urlHash(url).equals(baseurlhash); + public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(yacyURL url, String baseurlhash, Date docDate, plasmaParserDocument document, plasmaCondenser condenser) { + assert url.hash().equals(baseurlhash); // generate citation reference Map hl = document.getHyperlinks(); @@ -105,18 +106,20 @@ public class plasmaWebStructure { int GCount = 0; int LCount = 0; while (it.hasNext()) { - nexturlhash = plasmaURL.urlHash((String) ((Map.Entry) it.next()).getKey()); - if (nexturlhash != null) { - if (nexturlhash.substring(6).equals(lhp)) { - // this is a inbound link - cpl.append(nexturlhash.substring(0, 6)); // store only local part - LCount++; - } else { - // this is a outbound link - cpg.append(nexturlhash); // store complete hash - GCount++; + try { + nexturlhash = (new yacyURL((String) ((Map.Entry) it.next()).getKey(), null)).hash(); + if (nexturlhash != null) { + if (nexturlhash.substring(6).equals(lhp)) { + // this is a inbound link + cpl.append(nexturlhash.substring(0, 6)); // store only local part + LCount++; + } else { + // this is a outbound link + cpg.append(nexturlhash); // store complete hash + GCount++; + } } - } + } catch (MalformedURLException e) {} } // append this reference to buffer @@ -270,8 +273,8 @@ public class plasmaWebStructure { } } - private void learn(URL url, StringBuffer reference /*string of b64(12digits)-hashes*/) { - String domhash = plasmaURL.urlHash(url).substring(6); + private void learn(yacyURL url, StringBuffer reference /*string of b64(12digits)-hashes*/) { + String domhash = url.hash().substring(6); // parse the new reference string and join it with the stored references Map refs = references(domhash); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index eba6fce1d..f3ce46eb6 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -49,12 +49,12 @@ import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRotateIterator; -import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.logging.serverLog; import de.anomic.server.serverMemory; import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; public final class plasmaWordIndex implements indexRI { @@ -261,7 +261,7 @@ public final class plasmaWordIndex implements indexRI { return ((long) microDateDays) * ((long) day); } - public int addPageIndex(URL url, String urlHash, Date urlModified, int size, plasmaParserDocument document, plasmaCondenser condenser, String language, char doctype, int outlinksSame, int outlinksOther) { + public int addPageIndex(yacyURL url, Date urlModified, int size, plasmaParserDocument document, plasmaCondenser condenser, String language, char doctype, int outlinksSame, int outlinksOther) { // this is called by the switchboard to put in a new page into the index // use all the words in one condenser object to simultanous create index entries @@ -280,7 +280,7 @@ public final class plasmaWordIndex implements indexRI { word = (String) wentry.getKey(); wprop = (plasmaCondenser.wordStatProp) wentry.getValue(); assert (wprop.flags != null); - ientry = new indexRWIEntry(urlHash, + ientry = new indexRWIEntry(url.hash(), urlLength, urlComps, (document == null) ? urlLength : document.getTitle().length(), wprop.count, condenser.words().size(), @@ -560,7 +560,7 @@ public final class plasmaWordIndex implements indexRI { serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started"); indexContainer container = null; indexRWIEntry entry = null; - URL url = null; + yacyURL url = null; HashSet urlHashs = new HashSet(); Iterator indexContainerIterator = indexContainerSet(startHash, false, false, 100).iterator(); while (indexContainerIterator.hasNext() && run) { diff --git a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java index 73207cdcb..bf480f84d 100644 --- a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java @@ -55,7 +55,7 @@ import java.util.Iterator; import java.util.Set; import de.anomic.kelondro.kelondroMSetTools; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public abstract class abstractURLPattern implements plasmaURLPattern { @@ -193,22 +193,17 @@ public abstract class abstractURLPattern implements plasmaURLPattern { return urlHashCache.contains(urlHash); } - public boolean isListed(String blacklistType, String urlHash, URL url) { + public boolean isListed(String blacklistType, yacyURL url) { Set urlHashCache = getCacheUrlHashsSet(blacklistType); - if (!urlHashCache.contains(urlHash)) { + if (!urlHashCache.contains(url.hash())) { boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile()); if (temp) { - urlHashCache.add(urlHash); + urlHashCache.add(url.hash()); } return temp; } return true; } - public final boolean isListed(String blacklistType, URL url) { - if (url == null) { return true; } - return isListed(blacklistType, url.getHost().toLowerCase(), url.getFile()); - } - } diff --git a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java index 4655f028a..59c826e28 100644 --- a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java @@ -2,7 +2,7 @@ package de.anomic.plasma.urlPattern; import java.io.File; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public interface plasmaURLPattern { @@ -46,10 +46,8 @@ public interface plasmaURLPattern { public boolean hashInBlacklistedCache(String blacklistType, String urlHash); - - public boolean isListed(String blacklistType, String urlHash, URL url); - public boolean isListed(String blacklistType, URL url); + public boolean isListed(String blacklistType, yacyURL url); public boolean isListed(String blacklistType, String hostlow, String path); diff --git a/source/de/anomic/server/serverDomains.java b/source/de/anomic/server/serverDomains.java index b4e85ccd5..f00beceff 100644 --- a/source/de/anomic/server/serverDomains.java +++ b/source/de/anomic/server/serverDomains.java @@ -37,7 +37,6 @@ import java.util.Map; import java.util.Set; import de.anomic.kelondro.kelondroMScoreCluster; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; public class serverDomains { @@ -196,13 +195,6 @@ public class serverDomains { } - // checks for local/global IP range and local IP - public static boolean isLocal(URL url) { - InetAddress hostAddress = dnsResolve(url.getHost()); - if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets - return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); - } - private static InetAddress[] localAddresses = null; static { try { @@ -214,6 +206,9 @@ public class serverDomains { public static boolean isLocal(String address) { + // attention! because this method does a dns resolve to look up an IP address, + // the result may be very slow. Consider 100 milliseconds per access + assert (address != null); // check local ip addresses diff --git a/source/de/anomic/soap/AbstractService.java b/source/de/anomic/soap/AbstractService.java deleted file mode 100644 index a9bb37393..000000000 --- a/source/de/anomic/soap/AbstractService.java +++ /dev/null @@ -1,189 +0,0 @@ -//AbstractService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import java.io.ByteArrayInputStream; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.message.SOAPEnvelope; -import org.apache.axis.message.SOAPHeaderElement; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.data.userDB; -import de.anomic.http.httpHeader; -import de.anomic.http.httpd; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverSwitch; - -public abstract class AbstractService { - protected serverSwitch switchboard; - protected httpHeader requestHeader; - protected MessageContext messageContext; - protected ServerContext serverContext; - - protected static final boolean NO_AUTHENTICATION = false; - protected static final boolean AUTHENTICATION_NEEDED = true; - - - /** - * This function is called by the available service functions to - * extract all needed informations from the SOAP message context. - * @throws AxisFault - */ - protected void extractMessageContext(boolean authenticate) throws AxisFault { - this.messageContext = MessageContext.getCurrentContext(); - - this.switchboard = (serverSwitch) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_SERVER_SWITCH); - this.requestHeader = (httpHeader) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_HTTP_HEADER); - this.serverContext = (ServerContext) this.messageContext.getProperty(httpdSoapHandler.MESSAGE_CONTEXT_SERVER_CONTEXT); - - if (authenticate) { - String authInfo = this.doAuthentication(); - - // modify headers - // This is needed for plasmaSwitchboard.adminAuthenticated to work - this.requestHeader.put(httpHeader.AUTHORIZATION,"Basic " + authInfo); - this.requestHeader.put("CLIENTIP","localhost"); - - } - } - - /** - * Doing the user authentication. To improve security, this client - * accepts the base64 encoded and md5 hashed password directly. - * - * @throws AxisFault if the authentication could not be done successfully - */ - protected String doAuthentication() throws AxisFault { - // accessing the SOAP request message - Message message = this.messageContext.getRequestMessage(); - - // getting the contained soap envelope - SOAPEnvelope envelope = message.getSOAPEnvelope(); - - // getting the proper soap header containing the authorization field - SOAPHeaderElement authElement = envelope.getHeaderByName(httpdSoapHandler.serviceHeaderNamespace, "Authorization"); - if (authElement != null) { - String adminAccountBase64MD5 = this.switchboard.getConfig(httpd.ADMIN_ACCOUNT_B64MD5,""); - - // the base64 encoded and md5 hashed authentication string - String authString = authElement.getValue(); - if (authString.length() == 0) throw new AxisFault("log-in required"); - - // validate MD5 hash against the user-DB - SOAPHeaderElement userElement = envelope.getHeaderByName(httpdSoapHandler.serviceHeaderNamespace, "Username"); - if (userElement != null) { - String userName = userElement.getValue(); - userDB.Entry userEntry = ((plasmaSwitchboard)this.switchboard).userDB.md5Auth(userName,authString); - if (userEntry.hasRight(userDB.Entry.SOAP_RIGHT)) - // we need to return the ADMIN_ACCOUNT_B64MD5 here because some servlets also do - // user/admin authentication - return adminAccountBase64MD5; - } - - // validate MD5 hash against the static-admin account - if (!(adminAccountBase64MD5.equals(authString))) { - throw new AxisFault("log-in required"); - } - return adminAccountBase64MD5; - } - throw new AxisFault("log-in required"); - } - - protected Document convertContentToXML(String contentString) throws Exception { - return convertContentToXML(contentString.getBytes("UTF-8")); - } - - protected Document convertContentToXML(byte[] content) throws Exception { - Document doc = null; - try { - DocumentBuilderFactory newDocBuilderFactory = DocumentBuilderFactory.newInstance(); - -// // disable dtd validation -// newDocBuilderFactory.setValidating(false); -// newDocBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); -// newDocBuilderFactory.setFeature("http://xml.org/sax/features/validation", false); -// - DocumentBuilder newDocBuilder = newDocBuilderFactory.newDocumentBuilder(); - - ByteArrayInputStream byteIn = new ByteArrayInputStream(content); - doc = newDocBuilder.parse(byteIn); - } catch (Exception e) { - String errorMessage = "Unable to parse the search result XML data. " + e.getClass().getName() + ". " + e.getMessage(); - throw new Exception(errorMessage); - } - - return doc; - } - - public Document createNewXMLDocument(String rootElementName) throws ParserConfigurationException { - // creating a new document builder factory - DocumentBuilderFactory newDocBuilderFactory = DocumentBuilderFactory.newInstance(); - - // creating a new document builder - DocumentBuilder newDocBuilder = newDocBuilderFactory.newDocumentBuilder(); - - // creating a new xml document - Document newXMLDocument = newDocBuilder.newDocument(); - - if (rootElementName != null) { - // creating the xml root document - Element rootElement = newXMLDocument.createElement(rootElementName); - newXMLDocument.appendChild(rootElement); - } - - return newXMLDocument; - } - - -} diff --git a/source/de/anomic/soap/ServerContext.java b/source/de/anomic/soap/ServerContext.java deleted file mode 100644 index 1ef84e3bf..000000000 --- a/source/de/anomic/soap/ServerContext.java +++ /dev/null @@ -1,234 +0,0 @@ -package de.anomic.soap; - -//ServerContext.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.HashMap; - -import org.apache.axis.AxisFault; - -import de.anomic.http.httpHeader; -import de.anomic.http.httpTemplate; -import de.anomic.server.serverClassLoader; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; - -public class ServerContext { - - protected String rootPath; - protected serverClassLoader provider; - protected HashMap templates; - protected serverSwitch switchboard; - - - public ServerContext(String root, serverClassLoader cLoader, HashMap templateMap, serverSwitch switchb) { - this.rootPath = root; - this.provider = cLoader; - this.templates = templateMap; - this.switchboard = switchb; - } - - public serverClassLoader getProvider() { - return this.provider; - } - - /** - * This function is called by the service functions to - * invoke the desired server-internal method and to generate - * a output document using one of the available templates. - * - * @param templateName - * @param args - * @return the output document - * @throws AxisFault - */ - public byte[] writeTemplate(String templateName, serverObjects args, httpHeader requestHeader) throws AxisFault { - try { - // invoke servlet - serverObjects tp = invokeServlet(templateName,args,requestHeader); - - // generate output - byte[] result = buildServletOutput(templateName, tp); - return result; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public byte[] buildServletOutput(String templateName, serverObjects tp) throws AxisFault { - try { - File templateFile = getTemplateFile(templateName); - - // generating the output document - ByteArrayOutputStream o = new ByteArrayOutputStream(); - FileInputStream fis = new FileInputStream(templateFile); - httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); - o.close(); - fis.close(); - - // convert it into a byte array and send it back as result - byte[] result = o.toByteArray(); - return result; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public serverObjects invokeServlet(String templateName, serverObjects args, httpHeader requestHeader) throws AxisFault { - try { - // getting the template class file - File rc = getServletClassFile(templateName); - - // invoke the desired method - serverObjects tp = (serverObjects) rewriteMethod(rc).invoke(null, new Object[] {requestHeader, args, this.switchboard}); - - // testing if a authentication was needed by the invoked method - validateAuthentication(tp); - - // adding all available templates - tp.putAll(this.templates); - - // return result - return tp; - } catch (Exception e) { - if (e instanceof AxisFault) throw (AxisFault) e; - - e.printStackTrace(); - - // create a new AxisFault Object - throw new AxisFault(e.getMessage()); - } - } - - public File getTemplateFile(String templateName) { - // determining the proper class that should be invoked - File file = new File(this.rootPath, templateName); - return file; - } - - public File getServletClassFile(String templateName) { - File templateFile = getTemplateFile(templateName); - File templateClassFile = getServletClassFile(templateFile); - return templateClassFile; - } - - public File getServletClassFile(File templateFile) { - File templateClassFile = rewriteClassFile(templateFile); - return templateClassFile; - } - - /** - * This method was copied from the {@link httpdFileHandler httpdFileHandler-class} - * @param template - * @return the .class-{@link File} belonging to the given template or null - * if no fitting .class-{@link File} does exist - */ - protected File rewriteClassFile(File template) { - try { - String f = template.getCanonicalPath(); - int p = f.lastIndexOf("."); - if (p < 0) return null; - f = f.substring(0, p) + ".class"; - //System.out.println("constructed class path " + f); - File cf = new File(f); - if (cf.exists()) return cf; - return null; - } catch (IOException e) { - return null; - } - } - - /** - * This method was copied from the {@link httpdFileHandler httpdFileHandler-class} - * @param classFile - * @return the resond({@link httpHeader}, {@link serverObjects}, {@link serverSwitch}) - * {@link Method} of the specified class file or null if this file doesn't contain - * such a method - */ - protected Method rewriteMethod(File classFile) { - Method m = null; - // now make a class out of the stream - try { - //System.out.println("**DEBUG** loading class file " + classFile); - Class c = this.provider.loadClass(classFile); - Class[] params = new Class[] { - httpHeader.class, // Class.forName("de.anomic.http.httpHeader"), - serverObjects.class, // Class.forName("de.anomic.server.serverObjects"), - serverSwitch.class }; // Class.forName("de.anomic.server.serverSwitch")}; - m = c.getMethod("respond", params); - } catch (ClassNotFoundException e) { - System.out.println("INTERNAL ERROR: class " + classFile + " is missing:" + e.getMessage()); - } catch (NoSuchMethodException e) { - System.out.println("INTERNAL ERROR: method respond not found in class " + classFile + ": " + e.getMessage()); - } - //System.out.println("found method: " + m.toString()); - return m; - } - - - /** - * This function is used to test if an invoked method requires authentication - * - * @param tp the properties returned by a previous method invocation - * - * @throws AxisFault if an authentication was required. - */ - protected void validateAuthentication(serverObjects tp) throws AxisFault { - // check if the servlets requests authentification - if (tp.containsKey("AUTHENTICATE")) { - throw new AxisFault("log-in required"); - } - } -} diff --git a/source/de/anomic/soap/SoapException.java b/source/de/anomic/soap/SoapException.java deleted file mode 100644 index de79c1f9a..000000000 --- a/source/de/anomic/soap/SoapException.java +++ /dev/null @@ -1,132 +0,0 @@ -//SoapException.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import javax.xml.namespace.QName; - -import org.apache.axis.AxisFault; -import org.apache.axis.Constants; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.message.SOAPEnvelope; -import org.apache.axis.message.SOAPFault; - -import de.anomic.http.httpHeader; - -public class SoapException extends Exception { - - private static final long serialVersionUID = 1L; - private int statusCode = 500; - private String statusText = (String) httpHeader.http1_1.get(Integer.toString(this.statusCode)); - private AxisFault fault = new AxisFault(this.statusText); - - public SoapException(int httpStatusCode, String httpStatusText, String errorMsg) { - this.statusCode = httpStatusCode; - this.statusText = httpStatusText; - this.fault = new AxisFault(errorMsg); - } - - public SoapException(int httpStatusCode, String httpStatusText, Exception e) { - super(httpStatusCode + " " + httpStatusText); - - this.statusCode = httpStatusCode; - this.statusText = httpStatusText; - - // convert the exception into an axisfault - this.fault = AxisFault.makeFault(e); - } - - public SoapException(AxisFault soapFault) { - - QName faultCode = soapFault.getFaultCode(); - if (Constants.FAULT_SOAP12_SENDER.equals(faultCode)) { - this.statusCode = 400; - this.statusText = "Bad request"; - } else if ("Server.Unauthorized".equals(faultCode.getLocalPart())) { - this.statusCode = 401; - this.statusText = "Unauthorized"; - } else { - this.statusCode = 500; - this.statusText = "Internal server error"; - } - - // convert the exception into an axisfault - this.fault = soapFault; - } - - public int getStatusCode() { - return this.statusCode; - } - - public String getStatusText() { - return this.statusText; - } - - public Object getFault() { - return this.fault; - } - - public Message getFaultMessage(MessageContext msgContext) { - Message responseMsg = msgContext.getResponseMessage(); - if (responseMsg == null) { - responseMsg = new Message(this.fault); - responseMsg.setMessageContext(msgContext); - } else { - try { - SOAPEnvelope env = responseMsg.getSOAPEnvelope(); - env.clearBody(); - env.addBodyElement(new SOAPFault(this.fault)); - } catch (AxisFault e) { - // Should never reach here! - } - } - return responseMsg; - } - - public String getMessage() { - return this.statusCode + " " + this.statusText; - } -} diff --git a/source/de/anomic/soap/build.xml b/source/de/anomic/soap/build.xml deleted file mode 100644 index 7215354f8..000000000 --- a/source/de/anomic/soap/build.xml +++ /dev/null @@ -1,154 +0,0 @@ - - - - A SOAP API for YaCy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/source/de/anomic/soap/httpdSoapHandler.java b/source/de/anomic/soap/httpdSoapHandler.java deleted file mode 100644 index 9c2860e1a..000000000 --- a/source/de/anomic/soap/httpdSoapHandler.java +++ /dev/null @@ -1,777 +0,0 @@ -//httpdSoapHandler.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PushbackInputStream; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Properties; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import javax.xml.namespace.QName; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.EngineConfiguration; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.WSDDEngineConfiguration; -import org.apache.axis.configuration.FileProvider; -import org.apache.axis.deployment.wsdd.WSDDDeployment; -import org.apache.axis.deployment.wsdd.WSDDDocument; -import org.apache.axis.server.AxisServer; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; - -import de.anomic.http.httpChunkedInputStream; -import de.anomic.http.httpChunkedOutputStream; -import de.anomic.http.httpContentLengthInputStream; -import de.anomic.http.httpHeader; -import de.anomic.http.httpd; -import de.anomic.plasma.plasmaParser; -import de.anomic.server.serverClassLoader; -import de.anomic.server.serverCore; -import de.anomic.server.serverFileUtils; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.server.logging.serverLog; - -/** - * Class to accept SOAP Requests and invoke the desired soapService. - * An example how to do a soap call from php: - * - * 1, - * "exceptions" => 1)); - * - * try - * { - * $result = $client->__call("crawling", array("http://test.at"), NULL, - * new SoapHeader("http://http.anomic.de/header", "Authorization", md5(base64_encode("admin:xxxxxxx")))); - * } - * catch (SoapFault $fault) - * { - * $result = $fault->faultstring; - * } - * - * print($result); - * ?> - * - * - * - * - * @author Martin Thelian - */ -public final class httpdSoapHandler { - public static final String SOAP_HANDLER_VERSION = "YaCySOAP V0.1"; - - private serverLog theLogger; - - /* =============================================================== - * Constants needed to set some SOAP properties - * =============================================================== */ - /** - * SOAP Header Namespace needed to access the soap header field containing - * the user authentication - */ - public static final String serviceHeaderNamespace = "http://http.anomic.de/header"; - - /** - * define the needed deployment strings - */ - public static final String serviceDeploymentString = - "" - + "" - + "" - + "" - + "" - + "" - + "" - + ""; - - private static final String[] defaultServices = new String[] { - "search=de.anomic.soap.services.SearchService", - "crawl=de.anomic.soap.services.CrawlService", - "status=de.anomic.soap.services.StatusService", - "admin=de.anomic.soap.services.AdminService", - "blacklist=de.anomic.soap.services.BlacklistService", - "share=de.anomic.soap.services.ShareService", - "bookmarks=de.anomic.soap.services.BookmarkService", - "messages=de.anomic.soap.services.MessageService" - }; - - /* =============================================================== - * Constants needed to set the SOAP message context - * =============================================================== */ - /** - * CONSTANT: tge server switchboard - */ - public static final String MESSAGE_CONTEXT_SERVER_SWITCH = "serverSwitch"; - /** - * CONSTANT: received http headers - */ - public static final String MESSAGE_CONTEXT_HTTP_HEADER = "httpHeader"; - /** - * CONSTANT: soap utility class - */ - public static final String MESSAGE_CONTEXT_SERVER_CONTEXT = "serverContext"; - - - /* =============================================================== - * Other object fields - * =============================================================== */ - private static final Object initSync = new Object(); - - private serverClassLoader provider = null; - private HashMap templates; - private serverSwitch switchboard; - - private static AxisServer engine = null; - private File htRootPath; - private File htTemplatePath; - - private static Properties additionalServices = null; - - /** - * Constructor of this class - * @param theSwitchboard - * @throws Exception - */ - public httpdSoapHandler(serverSwitch theSwitchboard) throws Exception { - super(); - - this.switchboard = theSwitchboard; - this.theLogger = new serverLog("SOAP"); - - // create a htRootPath: system pages - if (this.htRootPath == null) { - this.htRootPath = new File(this.switchboard.getRootPath(), this.switchboard.getConfig("htRootPath","htroot")); - // if (!(htRootPath.exists())) htRootPath.mkdir(); - } - - if (this.htTemplatePath == null) { - this.htTemplatePath = new File(theSwitchboard.getRootPath(), theSwitchboard.getConfig("htTemplatePath","htroot/env/templates")); - // if (!(this.htTemplatePath.exists())) this.htTemplatePath.mkdir(); - } - - if (this.provider == null) { - this.provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); - } - - if (this.templates == null) { - this.templates = loadTemplates(this.htTemplatePath); - } - - // deploy default soap services - if (engine == null) synchronized (initSync) { deployDefaultServices(); } - - // init additional soap services - if (additionalServices == null) synchronized (initSync) { deployAdditionalServices(); } - } - - private void deployDefaultServices() throws Exception { - try { - // testing if a security manager is active. - SecurityManager sm = System.getSecurityManager(); - this.theLogger.logInfo("Security Manager is: " + ((sm==null)?"not ":"") + "active"); - - // create an Axis server - this.theLogger.logInfo("Init soap engine ..."); - engine = new AxisServer(new FileProvider("server-config.wsdd")); - - // setting some options ... - engine.setShouldSaveConfig(false); - - } catch (Exception e) { - this.theLogger.logSevere("Unable to initialize soap engine",e); - throw e; - } catch (Error e) { - this.theLogger.logSevere("Unable to initialize soap engine",e); - throw e; - } - - try { - this.theLogger.logInfo("Deploying default services ..."); - for (int i=0; i < defaultServices.length; i++) { - String[] nextService = defaultServices[i].split("="); - this.theLogger.logInfo("Deploying service " + nextService[0] + ": " + nextService[1]); - String deploymentStr = serviceDeploymentString - .replaceAll("@serviceName@", nextService[0]) - .replaceAll("@className@", nextService[1]); - - // deploy the service - deployService(deploymentStr,engine); - } - } catch (Exception e) { - this.theLogger.logSevere("Unable to deploy default soap services.",e); - throw e; - } catch (Error e) { - this.theLogger.logSevere("Unable to deploy default soap services.",e); - throw e; - } - } - - private void deployAdditionalServices() { - additionalServices = new Properties(); - - // getting the property filename containing the file list - String fileName = this.switchboard.getConfig("soap.serviceDeploymentList",""); - if (fileName.length() > 0) { - BufferedInputStream fileInput = null; - try { - File deploymentFile = new File(this.switchboard.getRootPath(),fileName); - fileInput = new BufferedInputStream(new FileInputStream(deploymentFile)); - - // load property list - additionalServices.load(fileInput); - fileInput.close(); - - // loop through and deploy services - if (additionalServices.size() > 0) { - Enumeration serviceNameEnum = additionalServices.keys(); - while (serviceNameEnum.hasMoreElements()) { - String serviceName = (String) serviceNameEnum.nextElement(); - String className = additionalServices.getProperty(serviceName); - - String deploymentStr = serviceDeploymentString - .replaceAll("@serviceName@", serviceName) - .replaceAll("@className@", className); - - // deploy the service - deployService(deploymentStr,engine); - } - } - } catch (Exception e) { - this.theLogger.logSevere("Unable to deploy additional services: " + e.getMessage(), e); - } finally { - if (fileInput != null) try { fileInput.close(); } catch (Exception e){/* ignore this */} - } - } - } - - private InputStream getBodyInputStream(httpHeader requestHeader, PushbackInputStream body) throws SoapException{ - InputStream input; - - // getting the content length - long contentLength = requestHeader.contentLength(); - String transferEncoding = (String) requestHeader.get(httpHeader.TRANSFER_ENCODING); - String contentEncoding = (String) requestHeader.get(httpHeader.CONTENT_ENCODING); - - /* =========================================================================== - * Handle TRANSFER ENCODING - * =========================================================================== */ - if (transferEncoding != null && !transferEncoding.equalsIgnoreCase("identity")) { - // read using transfer encoding - if (transferEncoding.equalsIgnoreCase("chunked")) { - input = new httpChunkedInputStream(body); - } else { - String errorMsg = "Unsupported transfer-encoding: "+ transferEncoding; - this.theLogger.logSevere(errorMsg); - throw new SoapException(501,"Not Implemented",errorMsg); - } - } else if (contentLength > 0) { - // read contentLength bytes - input = new httpContentLengthInputStream(body,contentLength); - } else { - // read until EOF - input = body; - } - - /* =========================================================================== - * Handle CONTENT ENCODING - * =========================================================================== */ - try { - if (contentEncoding != null && !contentEncoding.equals("identity")) { - if (contentEncoding.equalsIgnoreCase(httpHeader.CONTENT_ENCODING_GZIP)) { - input = new GZIPInputStream(input); - } else { - String errorMsg = "Unsupported content encoding: " + contentEncoding; - this.theLogger.logSevere(errorMsg); - throw new SoapException(415,"Unsupported Media Type",errorMsg); - } - } - } catch (IOException e) { - throw new SoapException(400,"Bad Request",e); - } - - return input; - } - - /** - * HTTP HEAD method. Not needed for soap. - * @param conProp - * @param header - * @param response - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doHead(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream) - */ - public void doHead(Properties conProp, httpHeader requestHeader, OutputStream clientOut) throws IOException { - sendMessage(conProp, requestHeader, clientOut, 501, "Not Implemented", "Connection method is not supported by this handler",null); - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - - - /** - * HTTP Connect Method. Not needed for SOAP - * @param conProp - * @param requestHeader - * @param clientIn - * @param clientOut - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doConnect(java.util.Properties, de.anomic.http.httpHeader, java.io.InputStream, java.io.OutputStream) - */ - public void doConnect(Properties conProp, httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException { - sendMessage(conProp, requestHeader, clientOut, 501, "Not Implemented", "Connection method is not supported by this handler",null); - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - - /** - * Handle http-GET requests. For soap this is usually a query for the wsdl-file. - * Therefore we always return the wsdl file for a get request - * - * @param conProp - * @param requestHeader all received http headers - * @param response {@link OutputStream} to the client - * - * @throws IOException - * - * @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream) - */ - public void doGet(Properties conProp, httpHeader requestHeader, OutputStream response) { - MessageContext msgContext = null; - String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); - - try { - // generating message context - msgContext = this.generateMessageContext(path, requestHeader, conProp); - - if (path.equals("/soap/")) { - serverObjects args = new serverObjects(); - args.put("SOAP.engine",httpdSoapHandler.engine); - ServerContext sContext = (ServerContext) msgContext.getProperty(MESSAGE_CONTEXT_SERVER_CONTEXT); - byte[] result = sContext.writeTemplate("soap/ServiceList.html", args, requestHeader); - sendMessage(conProp, requestHeader, response, 200, "OK", "text/html; charset=utf-8", result); - } else if (path.equals("/soap/favicon.ico")) { - sendMessage(conProp, requestHeader, response, 404, "File not found", "text/plain",null); - } else { - // generating wsdl file - Document doc = generateWSDL(msgContext); - - if (doc != null) { - // TODO: what about doc.getInputEncoding()? - // TODO: what about getXmlEncoding? - // Converting the the wsdl document into a byte-array - String responseDoc = XMLUtils.DocumentToString(doc); - byte[] result = responseDoc.getBytes("UTF-8"); - - // send back the result - sendMessage(conProp, requestHeader, response, 200, "OK", "text/xml; charset=utf-8", result); - - if (!(requestHeader.get(httpHeader.CONNECTION, "close").equals("keep-alive"))) { - // wait a little time until everything closes so that clients can read from the streams/sockets - try {Thread.currentThread().join(200);} catch (InterruptedException e) {/* ignore this */} - } - } else { - // if we where unable to generate the wsdl file .... - String errorMsg = "Internal Server Error: Unable to generate the WSDL file."; - sendMessage(conProp, requestHeader, response, 500, "Internal Error", "text/plain",errorMsg.getBytes("UTF-8")); - } - } - - return; - } catch (Exception e) { - // handle error - handleException(conProp,requestHeader,msgContext,response,e); - } - - } - - /** - * HTTP Post method. Needed to call a soap service on this server from a soap client - * @param conProp the connection properties - * @param requestHeader the received http headers - * @param response {@link OutputStream} to the client - * @param body the request body containing the SOAP message - * - * @see de.anomic.http.httpdHandler#doPost(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream, java.io.PushbackInputStream) - */ - public void doPost(Properties conProp, httpHeader requestHeader, OutputStream response, PushbackInputStream body) { - - MessageContext msgContext = null; - String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); - try { - /* ======================================================================== - * GENERATE REQUEST MESSAGE - * ======================================================================== */ - // read the request message - InputStream bodyStream = getBodyInputStream(requestHeader, body); - - // generating the SOAP message context that will be passed over to the invoked service - msgContext = this.generateMessageContext(path, requestHeader, conProp); - - // Generating a SOAP Request Message Object - String mime = plasmaParser.getRealMimeType(requestHeader.mime()); // this is important !!!! - Message requestMsg = new Message( - bodyStream, - false, - mime, - (String)requestHeader.get(httpHeader.CONTENT_LOCATION) - ); - msgContext.setRequestMessage(requestMsg); - - - /* ======================================================================== - * SERVICE INVOCATION - * ======================================================================== */ - Message responseMsg = this.invokeService(msgContext); - - if (responseMsg != null) { - sendMessage(conProp, requestHeader, response, 200, "OK", responseMsg); - } else { - sendMessage(conProp, requestHeader, response, 202, "Accepted", "text/plain", null); - } - - return; - } catch (Exception e) { - // handle error - handleException(conProp, requestHeader, msgContext, response,e); - } - } - - private void handleException(Properties conProp, httpHeader requestHeader, MessageContext messageContext, OutputStream response, Exception e) { - try { - Message soapErrorMsg = null; - - if (!conProp.containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { - // if no header was send until now we can send back an error message - - SoapException soapEx = null; - if (!(e instanceof SoapException)) { - soapEx = new SoapException(500,"internal server error",e); - } else { - soapEx = (SoapException) e; - } - // generating a soap error message - soapErrorMsg = soapEx.getFaultMessage(messageContext); - - // send error message back to the client - sendMessage(conProp,requestHeader,response,soapEx.getStatusCode(),soapEx.getStatusText(),soapErrorMsg); - } else { - this.theLogger.logSevere("Unexpected Exception while sending data to client",e); - - } - } catch (Exception ex) { - // the http response header was already send. Just log the error - this.theLogger.logSevere("Unexpected Exception while sending error message",e); - } finally { - // force connection close - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - } - - private Document generateWSDL(MessageContext msgContext) throws SoapException { - try { - engine.generateWSDL(msgContext); - Document doc = (Document) msgContext.getProperty("WSDL"); - return doc; - } catch (Exception ex) { - if (ex instanceof AxisFault) throw new SoapException((AxisFault)ex); - throw new SoapException(500,"Unable to generate WSDL",ex); - } - } - - protected Message invokeService(MessageContext msgContext) throws SoapException { - try { - // invoke the service - engine.invoke(msgContext); - - // Retrieve the response from Axis - return msgContext.getResponseMessage(); - } catch (Exception ex) { - if (ex instanceof AxisFault) throw new SoapException((AxisFault)ex); - throw new SoapException(500,"Unable to invoke service",ex); - } - } - - - - /** - * This function deplays all java classes that should be available via SOAP call. - * - * @param deploymentString the deployment string containing detailed information about - * the java class that should be deployed - * @param theAxisServer the apache axis engine where the service should be deployed - * - * @return true if the deployment was done successfully or false - * otherwise - */ - private static boolean deployService(String deploymentString, AxisServer theAxisServer) - { - // convert WSDD file string into bytestream for furhter processing - InputStream deploymentStream = null; - if (deploymentString != null) { - deploymentStream = new ByteArrayInputStream(deploymentString.getBytes()); - Document root = null; - - try { - // build XML document from stream - root = XMLUtils.newDocument(deploymentStream); - - // parse WSDD file - WSDDDocument wsddDoc = new WSDDDocument(root); - - // get the configuration of this axis engine - EngineConfiguration config = theAxisServer.getConfig(); - - if (config instanceof WSDDEngineConfiguration) { - // get the current configuration of the Axis engine - WSDDDeployment deploymentWSDD = - ((WSDDEngineConfiguration) config).getDeployment(); - - // undeply unneeded standard services - deploymentWSDD.undeployService(new QName("Version")); - deploymentWSDD.undeployService(new QName("AdminService")); - - // deploy the new service - // an existing service with the same name gets deleted - wsddDoc.deploy(deploymentWSDD); - } - } catch (ParserConfigurationException e) { - System.err.println("Could not deploy service."); - return false; - } catch (SAXException e) { - System.err.println("Could not deploy service."); - return false; - } catch (IOException e) { - System.err.println("Could not deploy service."); - return false; - } - } else { - System.err.println("Service deployment string is NULL! SOAP Service not deployed."); - return false; - } - return true; - } - - /** - * This function is used to generate the SOAP Message Context that is handed over to - * the called service. - * This message context contains some fields needed by the service to fullfil the request. - * - * @param path the path of the request - * @param requestHeader the http headers of the request - * @param conProps TODO - * @return the generated {@link MessageContext} - * @throws SoapException - * - * @throws Exception if the {@link MessageContext} could not be generated successfully. - */ - private MessageContext generateMessageContext(String path, httpHeader requestHeader, Properties conProps) throws SoapException { - try { - // getting the requestes service name - String serviceName = path.substring("/soap/".length()); - - // create and initialize a message context - MessageContext msgContext = new MessageContext(httpdSoapHandler.engine); - msgContext.setTransportName("YaCy-SOAP"); - msgContext.setProperty(MessageContext.TRANS_URL, "http://" + requestHeader.get(httpHeader.HOST) + ((((String)requestHeader.get(httpHeader.HOST)).indexOf(":") > -1)?"":Integer.toString(serverCore.getPortNr(this.switchboard.getConfig("port","8080")))) + - "/soap/" + serviceName); - - // the used http verson - String version = conProps.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); - msgContext.setProperty(MessageContext.HTTP_TRANSPORT_VERSION,version); - - // generate the serverContext object - ServerContext serverContext = new ServerContext(this.htRootPath.toString(),this.provider,this.templates,this.switchboard); - msgContext.setProperty(MESSAGE_CONTEXT_SERVER_CONTEXT ,serverContext); - - // YaCy specific objects - msgContext.setProperty(MESSAGE_CONTEXT_SERVER_SWITCH,this.switchboard); - msgContext.setProperty(MESSAGE_CONTEXT_HTTP_HEADER ,requestHeader); - - // setting the service to execute - msgContext.setTargetService(serviceName); - - return msgContext; - } catch (Exception e) { - if (e instanceof AxisFault) throw new SoapException((AxisFault)e); - throw new SoapException(500,"Unable to generate message context",e); - } - } - - /** - * This method was copied from {@link httpdFileHandler}. Maybe it would be a good idea - * to move this function up into {@link httpdAbstractHandler} - * - * @param path the path to the template dir - * @return a hasmap containing all templates - */ - private static HashMap loadTemplates(File path) { - // reads all templates from a path - // we use only the folder from the given file path - HashMap result = new HashMap(); - if (path == null) return result; - if (!(path.isDirectory())) path = path.getParentFile(); - if ((path == null) || (!(path.isDirectory()))) return result; - String[] templates = path.list(); - for (int i = 0; i < templates.length; i++) { - if (templates[i].endsWith(".template")) try { - //System.out.println("TEMPLATE " + templates[i].substring(0, templates[i].length() - 9) + ": " + new String(buf, 0, c)); - result.put(templates[i].substring(0, templates[i].length() - 9), - new String(serverFileUtils.read(new File(path, templates[i])), "UTF-8")); - } catch (Exception e) {} - } - return result; - } - - /** - * TODO: handle accept-charset http header - * TODO: what about content-encoding, transfer-encoding here? - */ - protected void sendMessage(Properties conProp, httpHeader requestHeader, OutputStream out, int statusCode, String statusText, String contentType, byte[] MessageBody) throws IOException { - // write out the response header - respondHeader(conProp, out, statusCode, statusText, (MessageBody==null)?null:contentType, (MessageBody==null)?-1:MessageBody.length, null, null); - - // write the message body - if (MessageBody != null) out.write(MessageBody); - out.flush(); - } - - /** - * TODO: handle accept-charset http header - */ - protected void sendMessage(Properties conProp, httpHeader requestHeader, OutputStream out, int statusCode, String statusText, Message soapMessage) throws IOException, SOAPException { - httpChunkedOutputStream chunkedOut = null; - GZIPOutputStream gzipOut = null; - OutputStream bodyOut = out; - - // getting the content type of the response - String contentType = soapMessage.getContentType(soapMessage.getMessageContext().getSOAPConstants()); - - // getting the content length - String transferEncoding = null; - String contentEncoding = null; - long contentLength = -1; - - if (httpHeader.supportChunkedEncoding(conProp)) { - // we use chunked transfer encoding - transferEncoding = "chunked"; - } else { - contentLength = soapMessage.getContentLength(); - } - if (requestHeader.acceptGzip()) { - // send the response gzip encoded - contentEncoding = "gzip"; - - // we don't know the content length of the compressed body - contentLength = -1; - - // if chunked transfer encoding is not used we need to close the connection - if (!transferEncoding.equals("chunked")) { - conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,"close"); - } - } - - // sending the soap header - respondHeader(conProp, out, statusCode, statusText, contentType, contentLength, contentEncoding, transferEncoding); - - if (transferEncoding != null) bodyOut = chunkedOut = new httpChunkedOutputStream(bodyOut); - if (contentEncoding != null) bodyOut = gzipOut = new GZIPOutputStream(bodyOut); - - // sending the body - soapMessage.writeTo(bodyOut); - bodyOut.flush(); - - if (gzipOut != null) { - gzipOut.flush(); - gzipOut.finish(); - } - if (chunkedOut != null) { - chunkedOut.finish(); - } - } - - - - protected void respondHeader( - Properties conProp, - OutputStream respond, - int httpStatusCode, - String httpStatusText, - String conttype, - long contlength, - String contentEncoding, - String transferEncoding - ) throws IOException { - httpHeader outgoingHeader = new httpHeader(); - outgoingHeader.put(httpHeader.SERVER, SOAP_HANDLER_VERSION); - if (conttype != null) outgoingHeader.put(httpHeader.CONTENT_TYPE,conttype); - if (contlength != -1) outgoingHeader.put(httpHeader.CONTENT_LENGTH, Long.toString(contlength)); - if (contentEncoding != null) outgoingHeader.put(httpHeader.CONTENT_ENCODING, contentEncoding); - if (transferEncoding != null) outgoingHeader.put(httpHeader.TRANSFER_ENCODING, transferEncoding); - - // getting the http version of the soap client - String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); - - // sending http headers - httpd.sendRespondHeader(conProp,respond,httpVer,httpStatusCode,httpStatusText,outgoingHeader); - } -} diff --git a/source/de/anomic/soap/services/AdminService.java b/source/de/anomic/soap/services/AdminService.java deleted file mode 100644 index 675ccd800..000000000 --- a/source/de/anomic/soap/services/AdminService.java +++ /dev/null @@ -1,837 +0,0 @@ -//AdminService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Properties; -import java.util.logging.Handler; -import java.util.logging.LogRecord; -import java.util.logging.Logger; -import java.util.logging.XMLFormatter; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.http.httpRemoteProxyConfig; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; -import de.anomic.server.serverObjects; -import de.anomic.server.serverThread; -import de.anomic.server.logging.GuiHandler; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNewsPool; -import de.anomic.yacy.yacyNewsRecord; -import de.anomic.yacy.yacySeed; - -public class AdminService extends AbstractService { - - /* ===================================================================== - * Used Plasmaswitchboard config properties - * ===================================================================== */ - private static final String _10_HTTPD = "10_httpd"; - private static final String RESTART = "restart"; - - // peer properties - private static final String PORT = "port"; - private static final String PEER_NAME = "peerName"; - - // remote proxy properties - private static final String REMOTE_PROXY_USE = "remoteProxyUse"; - private static final String REMOTE_PROXY_USE4SSL = "remoteProxyUse4SSL"; - private static final String REMOTE_PROXY_USE4YACY = "remoteProxyUse4Yacy"; - private static final String REMOTE_PROXY_NO_PROXY = "remoteProxyNoProxy"; - private static final String REMOTE_PROXY_PWD = "remoteProxyPwd"; - private static final String REMOTE_PROXY_USER = "remoteProxyUser"; - private static final String REMOTE_PROXY_PORT = "remoteProxyPort"; - private static final String REMOTE_PROXY_HOST = "remoteProxyHost"; - - // remote triggered crawl properties - private static final String CRAWL_RESPONSE = "crawlResponse"; - private static final String _62_REMOTETRIGGEREDCRAWL_BUSYSLEEP = plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP; - private static final String _62_REMOTETRIGGEREDCRAWL = plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL; - - // index transfer properties - private static final String INDEX_RECEIVE_BLOCK_BLACKLIST = "indexReceiveBlockBlacklist"; - private static final String ALLOW_RECEIVE_INDEX = "allowReceiveIndex"; - private static final String ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING = "allowDistributeIndexWhileCrawling"; - private static final String ALLOW_DISTRIBUTE_INDEX = "allowDistributeIndex"; - - // message forwarding properties - private static final String MSG_FORWARDING_TO = "msgForwardingTo"; - private static final String MSG_FORWARDING_CMD = "msgForwardingCmd"; - private static final String MSG_FORWARDING_ENABLED = "msgForwardingEnabled"; - private static final String MSG_FORWARDING = "msgForwarding"; - - // peer profile - private static final String PEERPROFILE_COMMENT = "comment"; - private static final String PEERPROFILE_MSN = "msn"; - private static final String PEERPROFILE_YAHOO = "yahoo"; - private static final String PEERPROFILE_JABBER = "jabber"; - private static final String PEERPROFILE_ICQ = "icq"; - private static final String PEERPROFILE_EMAIL = "email"; - private static final String PEERPROFILE_HOMEPAGE = "homepage"; - private static final String PEERPROFILE_NICKNAME = "nickname"; - private static final String PEERPROFILE_NAME = "name"; - private static final String PEER_PROFILE_FETCH_SUCCESS = "success"; - private static final String PEER_HASH = "hash"; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_CONFIG_XML = "xml/config_p.xml"; - private static final String TEMPLATE_VERSION_XML = "xml/version.xml"; - private static final String TEMPLATE_PROFILE_XML = "ViewProfile.xml"; - private static final String TEMPLATE_PERFORMANCE_QUEUES = "PerformanceQueues_p.xml"; - - /** - * This function can be used to set a configuration option - * @param key the name of the option - * @param value the value of the option as String - * @throws AxisFault if authentication failed - */ - public void setConfigProperty(String key, String value) throws AxisFault { - // Check for errors - if ((key == null)||(key.length() == 0)) throw new IllegalArgumentException("Key must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // add key to switchboard - if (value == null) value = ""; - this.switchboard.setConfig(key,value); - } - - /** - * This function can be used to set multiple configuration option - * @param keys an array containing the names of all options - * @param values an array containing the values of all options - * @throws AxisFault if authentication failed - * @throws IllegalArgumentException if key.length != value.length - */ - public void setProperties(String[] keys, String values[]) throws AxisFault{ - // Check for errors - if ((keys == null)||(keys.length == 0)) throw new IllegalArgumentException("Key array must not be null or empty."); - if ((values == null)||(values.length == 0)) throw new IllegalArgumentException("Values array must not be null or empty."); - if (keys.length != values.length) throw new IllegalArgumentException("Invalid input. " + keys.length + " keys but " + values.length + " values received."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - for (int i=0; i < keys.length; i++) { - // get the key - String nextKey = keys[i]; - if ((nextKey == null)||(nextKey.length() == 0)) throw new IllegalArgumentException("Key at position " + i + " was null or empty."); - - // get the value - String nextValue = values[i]; - if (nextValue == null) nextValue = ""; - - // save the value - this.switchboard.setConfig(nextKey,nextValue); - } - } - - /** - * This function can be used to geht the value of a single configuration option - * @param key the name of the option - * @return the value of the option as string - * @throws AxisFault if authentication failed - */ - public String getConfigProperty(String key) throws AxisFault { - // Check for errors - if ((key == null)||(key.length() == 0)) throw new IllegalArgumentException("Key must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the config property - return this.switchboard.getConfig(key,null); - } - - /** - * This function can be used to query the value of multiple configuration options - * @param keys an array containing the names of the configuration options to query - * @return an array containing the values of the configuration options as string - * @throws AxisFault if authentication failed - */ - public String[] getConfigProperties(String[] keys) throws AxisFault { - // Check for errors - if ((keys == null)||(keys.length== 0)) throw new IllegalArgumentException("Key array must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the properties - ArrayList returnValues = new ArrayList(keys.length); - for (int i=0; i < keys.length; i++) { - String nextKey = keys[i]; - if ((nextKey == null)||(nextKey.length() == 0)) throw new IllegalArgumentException("Key at position " + i + " was null or empty."); - - returnValues.add(this.switchboard.getConfig(nextKey,null)); - } - - // return the result - return (String[]) returnValues.toArray(new String[keys.length]); - } - - - /** - * Returns the current configuration of this peer as XML Document - * @return a XML document of the following format - *
-     * <?xml version="1.0"?>
-     * <settings>
-     *   <option>
-	 *     <key>option-name</key>
-	 *     <value>option-value</value>
-	 *   </option>
-	 * </settings>
-     * 
- * - * @throws AxisFault if authentication failed - * @throws Exception - */ - public Document getConfigPropertyList() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_CONFIG_XML, new serverObjects(),this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Returns detailed version information about this peer - * @return a XML document of the following format - *
-     * <?xml version="1.0"?>
-     * <version>
-     *	  <number>0.48202791</number>
-     *	  <svnRevision>2791</svnRevision>
-     *	  <buildDate>20061017</buildDate>
-	 * </version>
-     * 
- * @throws AxisFault if authentication failed - * @throws Exception - */ - public Document getVersion() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_VERSION_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public Document getPerformanceQueues() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_PERFORMANCE_QUEUES, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * This function can be used to configure the peer name - * @param newName the new name of the peer - * @throws AxisFault if authentication failed or peer name was not accepted - */ - public void setPeerName(String newName) throws AxisFault { - // Check for errors - if ((newName == null)||(newName.length() == 0)) throw new IllegalArgumentException("The peer name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the previous name - String prevName = this.switchboard.getConfig(PEER_NAME, ""); - if (prevName.equals("newName")) return; - - // take a look if there is already an other peer with this name - yacySeed oldSeed = yacyCore.seedDB.lookupByName(newName); - if (oldSeed != null) throw new AxisFault("Other peer '" + oldSeed.getName() + "/" + oldSeed.getHexHash() + "' with this name found"); - - // name must not be too short - if (newName.length() < 3) throw new AxisFault("Name is too short"); - - // name must not be too long - if (newName.length() > 80) throw new AxisFault("Name is too long."); - - // check for invalid chars - for (int i = 0; i < newName.length(); i++) { - if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_".indexOf(newName.charAt(i)) < 0) - throw new AxisFault("Invalid char at position " + i); - } - - // use the new name - this.switchboard.setConfig(PEER_NAME, newName); - } - - /** - * Changes the port the server socket is bound to. - * - * Please not that after the request was accepted the server waits - * a timeout of 5 seconds before the server port binding is changed - * - * @param newPort the new server port - * @throws AxisFault if authentication failed - */ - public void setPeerPort(int newPort) throws AxisFault { - if (newPort <= 0) throw new IllegalArgumentException("Invalid port number"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // get the old value - int oldPort = (int) this.switchboard.getConfigLong(PORT, 8080); - if (oldPort == newPort) return; - - // getting the server thread - serverCore theServerCore = (serverCore) this.switchboard.getThread(_10_HTTPD); - - // store the new value - this.switchboard.setConfig(PORT, newPort); - - // restart the port listener - // TODO: check if the port is free - theServerCore.reconnect(5000); - } - - /** - * This function can be enabled the usage of an already configured remote proxy - * @param enableProxy true to enable and false to disable remote proxy usage - * @throws AxisFault if authentication failed or remote proxy configuration is missing - */ - public void enableRemoteProxy(boolean enableProxy) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check for errors - String proxyHost = this.switchboard.getConfig(REMOTE_PROXY_HOST, ""); - if (proxyHost.length() == 0) throw new AxisFault("Remote proxy hostname is not configured"); - - String proxyPort = this.switchboard.getConfig(REMOTE_PROXY_PORT, ""); - if (proxyPort.length() == 0) throw new AxisFault("Remote proxy port is not configured"); - - // store the new state - plasmaSwitchboard sb = (plasmaSwitchboard) this.switchboard; - sb.setConfig(REMOTE_PROXY_USE,Boolean.toString(enableProxy)); - sb.remoteProxyConfig = httpRemoteProxyConfig.init(sb); - } - - /** - * This function can be used to configured another remote proxy that should be used by - * yacy as parent proxy. - * If a parameter value is null then the current configuration value is not - * changed. - * - * @param enableRemoteProxy to enable or disable remote proxy usage - * @param proxyHost the remote proxy host name - * @param proxyPort the remote proxy user name - * @param proxyUserName login name for the remote proxy - * @param proxyPwd password to login to the remote proxy - * @param noProxyList a list of addresses that should not be accessed via the remote proxy - * @param useProxy4YaCy specifies if the remote proxy should be used for the yacy core protocol - * @param useProxy4SSL specifies if the remote proxy should be used for ssl - * - * @throws AxisFault if authentication failed - */ - public void setRemoteProxy( - Boolean enableRemoteProxy, - String proxyHost, - Integer proxyPort, - String proxyUserName, - String proxyPwd, - String noProxyList, - Boolean useProxy4YaCy, - Boolean useProxy4SSL - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (proxyHost != null) - this.switchboard.setConfig(REMOTE_PROXY_HOST, proxyHost); - - if (proxyPort != null) - this.switchboard.setConfig(REMOTE_PROXY_PORT, proxyPort.toString()); - - if (proxyUserName != null) - this.switchboard.setConfig(REMOTE_PROXY_USER, proxyUserName); - - if (proxyPwd != null) - this.switchboard.setConfig(REMOTE_PROXY_PWD, proxyPwd); - - if (noProxyList != null) - this.switchboard.setConfig(REMOTE_PROXY_NO_PROXY, noProxyList); - - if (useProxy4YaCy != null) - this.switchboard.setConfig(REMOTE_PROXY_USE4YACY, useProxy4YaCy.toString()); - - if (useProxy4SSL != null) - this.switchboard.setConfig(REMOTE_PROXY_USE4SSL, useProxy4SSL.toString()); - - // enable remote proxy usage - if (enableRemoteProxy != null) this.enableRemoteProxy(enableRemoteProxy.booleanValue()); - } - - /** - * Shutdown this peer - * @throws AxisFault if authentication failed - */ - public void shutdownPeer() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - this.switchboard.setConfig(RESTART, "false"); - - // Terminate the peer in 3 seconds (this gives us enough time to finish the request - ((plasmaSwitchboard)this.switchboard).terminate(3000); - } - - /** - * This function can be used to configure Remote Triggered Crawling for this peer. - * - * @param enableRemoteTriggeredCrawls to enable remote triggered crawling - * @param maximumAllowedPPM to configure the maximum allowed pages per minute that should be crawled. - * Set this to 0 for unlimited crawling. - * - * @throws AxisFault - */ - public void setDistributedCrawling( - Boolean enableRemoteTriggeredCrawls, - Integer maximumAllowedPPM - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // if the ppm was set, change it - if (maximumAllowedPPM != null) { - long newBusySleep; - - // calculate the new sleep time for the remote triggered crawl thread - if (maximumAllowedPPM.intValue() < 1) { - // unlimited crawling - newBusySleep = 100; - } else { - // limited crawling - newBusySleep = 60000 / maximumAllowedPPM.intValue(); - if (newBusySleep < 100) newBusySleep = 100; - } - - // get the server thread - serverThread rct = this.switchboard.getThread(_62_REMOTETRIGGEREDCRAWL); - - // set the new sleep time - if (rct != null) rct.setBusySleep(newBusySleep); - - // store it - this.switchboard.setConfig(_62_REMOTETRIGGEREDCRAWL_BUSYSLEEP, Long.toString(newBusySleep)); - } - - // if set enable/disable remote triggered crawls - if (enableRemoteTriggeredCrawls != null) { - this.switchboard.setConfig(CRAWL_RESPONSE, enableRemoteTriggeredCrawls.toString()); - } - } - - public void setTransferProperties( - Boolean indexDistribution, - Boolean indexDistributeWhileCrawling, - Boolean indexReceive, - Boolean indexReceiveBlockBlacklist - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // index Distribution on/off - if (indexDistribution != null) { - this.switchboard.setConfig(ALLOW_DISTRIBUTE_INDEX, indexDistribution.toString()); - } - - // Index Distribution while crawling - if (indexDistributeWhileCrawling != null) { - this.switchboard.setConfig(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING, indexDistributeWhileCrawling.toString()); - } - - // Index Receive - if (indexReceive != null) { - this.switchboard.setConfig(ALLOW_RECEIVE_INDEX, indexReceive.toString()); - } - - // block URLs received by DHT by blocklist - if (indexReceiveBlockBlacklist != null) { - this.switchboard.setConfig(INDEX_RECEIVE_BLOCK_BLACKLIST, indexReceiveBlockBlacklist.toString()); - } - } - - public Document getTransferProperties() throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument("transferProperties"); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(ALLOW_DISTRIBUTE_INDEX); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_DISTRIBUTE_INDEX,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_DISTRIBUTE_INDEX_WHILE_CRAWLING,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(ALLOW_RECEIVE_INDEX); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(ALLOW_RECEIVE_INDEX,true)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(INDEX_RECEIVE_BLOCK_BLACKLIST); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(INDEX_RECEIVE_BLOCK_BLACKLIST,true)))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to configure the message forwarding settings of a peer. - * @see
Peer Configuration - Message Forwarding - * - * @param enableForwarding specifies if forwarding should be enabled - * @param forwardingCommand the forwarding command to use. e.g. /usr/sbin/sendmail - * @param forwardingTo the delivery destination. e.g. root@localhost - * - * @throws AxisFault if authentication failed - */ - public void setMessageForwarding( - Boolean enableForwarding, - String forwardingCommand, - String forwardingTo - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // index Distribution on/off - if (enableForwarding != null) { - this.switchboard.setConfig(MSG_FORWARDING_ENABLED, enableForwarding.toString()); - } - - if (forwardingCommand != null) { - this.switchboard.setConfig(MSG_FORWARDING_CMD, forwardingCommand); - } - - if (forwardingTo != null) { - this.switchboard.setConfig(MSG_FORWARDING_TO, forwardingTo); - } - } - - /** - * Function to query the current message forwarding configuration of a peer. - * @see Peer Configuration - Message Forwarding - * - * @return a XML document of the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <msgForwarding>
-     *   <msgForwardingEnabled>false</msgForwardingEnabled>
-     *   <msgForwardingCmd>/usr/sbin/sendmail</msgForwardingCmd>
-     *   <msgForwardingTo>root@localhost</msgForwardingTo>
-     * </msgForwarding>
-     * 
- * - * @throws AxisFault if authentication failed - * @throws ParserConfigurationException on XML parser errors - */ - public Document getMessageForwarding() throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument(MSG_FORWARDING); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_ENABLED); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(this.switchboard.getConfigBool(MSG_FORWARDING_ENABLED,false)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_CMD); - xmlElement.appendChild(xmlDoc.createTextNode(this.switchboard.getConfig(MSG_FORWARDING_CMD,""))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(MSG_FORWARDING_TO); - xmlElement.appendChild(xmlDoc.createTextNode(this.switchboard.getConfig(MSG_FORWARDING_TO,""))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to query the last peer logging records. Please note that the maximum amount of records - * depends on the peer GuiHandler logging configuration.
- * Per default a maximum of 400 entries are kept in memory. - * - * See: DATA/LOG/yacy.logging: - *
de.anomic.server.logging.GuiHandler.size = 400
- * - * @param sequenceNumber all logging records with a squence number greater than this parameter are fetched. - * - * @return a XML document of the following format - *
<?xml version="1.0" encoding="UTF-8"?>
-     * <log>
-     * <record>
-     *   <date>2006-11-03T15:35:09</date>
-     *   <millis>1162564509850</millis>
-     *   <sequence>15</sequence>
-     *   <logger>KELONDRO</logger>
-     *   <level>FINE</level>
-     *   <thread>10</thread>
-     *   <message>KELONDRO DEBUG /home/yacy/DATA/PLASMADB/ACLUSTER/indexAssortment009.db: preloaded 1 records into cache</message>
-     * </record>
-     * [...]
-     * </log>
-     * 
- * This is the default format of the java logging {@link XMLFormatter} class. - * See: Sample XML Output - * - * @throws AxisFault if authentication failed - * @throws ParserConfigurationException on XML parser errors - **/ - public Document getServerLog(Long sequenceNumber) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - Handler logHandler = null; - LogRecord[] log = null; - - // getting the root handler - Logger logger = Logger.getLogger(""); - - // take a look for the GuiHandler - Handler[] handlers = logger.getHandlers(); - for (int i=0; i", "")); - - // format the logging entries - for (int i=0; i < log.length; i++) { - buffer.append(formatter.format(log[i])); - } - - // adding tailer - buffer.append(formatter.getTail(logHandler)); - - // convert into dom - return convertContentToXML(buffer.toString()); - } - - /** - * Function to configure the profile of this peer. - * If a input parameters is null the old value will not be overwritten. - * - * @param name the name of the peer owner - * @param nickname peer owner nick name - * @param homepage - * @param email - * @param icq - * @param jabber - * @param yahoo - * @param msn - * @param comments - * - * @throws AxisFault if authentication failed - */ - public void setLocalPeerProfile( - String name, - String nickname, - String homepage, - String email, - String icq, - String jabber, - String yahoo, - String msn, - String comments - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // load peer properties - final Properties profile = new Properties(); - FileInputStream fileIn = null; - try { - fileIn = new FileInputStream(new File("DATA/SETTINGS/profile.txt")); - profile.load(fileIn); - } catch(IOException e) { - throw new AxisFault("Unable to load the peer profile"); - } finally { - if (fileIn != null) try { fileIn.close(); } catch (Exception e) {/* */} - } - - // set all properties - if (name != null) profile.setProperty(PEERPROFILE_NAME,name); - if (nickname != null) profile.setProperty(PEERPROFILE_NICKNAME,nickname); - if (homepage != null) profile.setProperty(PEERPROFILE_HOMEPAGE,homepage); - if (email != null) profile.setProperty(PEERPROFILE_EMAIL,email); - if (icq != null) profile.setProperty(PEERPROFILE_ICQ,icq); - if (jabber != null) profile.setProperty(PEERPROFILE_JABBER,jabber); - if (yahoo != null) profile.setProperty(PEERPROFILE_YAHOO,yahoo); - if (msn != null) profile.setProperty(PEERPROFILE_MSN,msn); - if (comments != null) profile.setProperty(PEERPROFILE_COMMENT,comments); - - // store it - FileOutputStream fileOut = null; - try { - fileOut = new FileOutputStream(new File("DATA/SETTINGS/profile.txt")); - profile.store(fileOut , null ); - - // generate a news message - Properties news = profile; - news.remove(PEERPROFILE_COMMENT); - yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_PROFILE_UPDATE, news)); - } catch(IOException e) { - throw new AxisFault("Unable to write profile data to file"); - } finally { - if (fileOut != null) try { fileOut.close(); } catch (Exception e) {/* */} - } - } - - /** - * Returns the peer profile of this peer - * @return a xml document in the same format as returned by function {@link #getPeerProfile(String)} - * @throws Exception - */ - public Document getLocalPeerProfile() throws Exception { - return this.getPeerProfile("localhash"); - } - - /** - * Function to query the profile of a remote peer - * @param peerhash the peer hash - * @return a xml document in the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <profile>
-     * 	<status code="3">Peer profile successfully fetched</status>
-     * 	<name><![CDATA[myName]]></name>
-     * 	<nickname><![CDATA[myNickName]]></nickname>
-     * 	<homepage><![CDATA[http://myhompage.de]]></homepage>
-     * 	<email/>
-     * 	<icq/>
-     * 	<jabber/>
-     * 	<yahoo/>
-     * 	<msn/>
-     * 	<comment><![CDATA[Comments]]></comment>
-     * </profile>
-     * 
- * @throws Exception if authentication failed - */ - public Document getPeerProfile(String peerhash) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - serverObjects args = new serverObjects(); - args.put(PEER_HASH,peerhash); - - // invoke servlet - serverObjects tp = this.serverContext.invokeServlet(TEMPLATE_PROFILE_XML,args, this.requestHeader); - - // query status - if (tp.containsKey(PEER_PROFILE_FETCH_SUCCESS)) { - String success = tp.get(PEER_PROFILE_FETCH_SUCCESS,"3"); - if (success.equals("0")) throw new AxisFault("Invalid parameters passed to servlet."); - else if (success.equals("1")) throw new AxisFault("The requested peer is unknown or can not be accessed."); - else if (success.equals("2")) throw new AxisFault("The requested peer is offline"); - } else { - throw new AxisFault("Unkown error. Unable to determine profile fetching status."); - } - - - // generate output - byte[] result = this.serverContext.buildServletOutput(TEMPLATE_PROFILE_XML, tp); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public void doGarbageCollection() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // execute garbage collection - System.gc(); - } -} diff --git a/source/de/anomic/soap/services/BlacklistService.java b/source/de/anomic/soap/services/BlacklistService.java deleted file mode 100644 index 5f570ae7e..000000000 --- a/source/de/anomic/soap/services/BlacklistService.java +++ /dev/null @@ -1,608 +0,0 @@ -//BlacklistService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -// -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; - -import javax.activation.DataHandler; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.data.listManager; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.urlPattern.abstractURLPattern; -import de.anomic.plasma.urlPattern.plasmaURLPattern; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -public class BlacklistService extends AbstractService { - - - private static final String LIST_MANAGER_LISTS_PATH = "listManager.listsPath"; - private static final String BLACKLISTS = ".BlackLists"; - //private static final String BLACKLISTS_TYPES = "BlackLists.types"; - private final static String BLACKLIST_SHARED = "BlackLists.Shared"; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_BLACKLIST_XML = "xml/blacklists_p.xml"; - - - public boolean urlIsBlacklisted(String blacklistType, String urlString) throws AxisFault, MalformedURLException { - if (blacklistType == null || blacklistType.length() == 0) throw new IllegalArgumentException("The blacklist type must not be null or empty."); - if (urlString == null || urlString.length() == 0) throw new IllegalArgumentException("The url must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check if we know all type passed to this function - checkForKnownBlacklistTypes(new String[]{blacklistType}); - - // check for url validity - URL url = new URL(urlString); - String hostlow = url.getHost().toLowerCase(); - String file = url.getFile(); - - // check if the specified url is listed - return (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, file)); - } - - public Document getBlacklistList() throws Exception { - try { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_BLACKLIST_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - } - - public void createBlacklist(String blacklistName, boolean shareBlacklist, String[] activateForBlacklistTypes) throws IOException { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - if (blacklistName.indexOf("/") != -1) - throw new IllegalArgumentException("Blacklist name must not contain '/'."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(activateForBlacklistTypes); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist already exists - if (blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' already exist."); - - // creating the new file - createBlacklistFile(blacklistName); - - // share the newly created blacklist - if (shareBlacklist) doShareBlacklist(blacklistName); - - // activate blacklist - this.activateBlacklistForTypes(blacklistName,activateForBlacklistTypes); - } - - public void deleteBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // deactivate list - deativateBlacklistForAllTypes(blacklistName); - - // unshare list - doUnshareBlacklist(blacklistName); - - // delete the file - deleteBlacklistFile(blacklistName); - } - - public void shareBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // share blacklist - this.doShareBlacklist(blacklistName); - } - - public void unshareBlacklist(String blacklistName) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // share blacklist - this.doUnshareBlacklist(blacklistName); - } - - public void activateBlacklist(String blacklistName, String[] activateForBlacklistTypes) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(activateForBlacklistTypes); - - // activate blacklist - activateBlacklistForTypes(blacklistName, activateForBlacklistTypes); - } - - public void deactivateBlacklist(String blacklistName, String[] deactivateForBlacklistTypes) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - - // check if we know all types passed to this function - checkForKnownBlacklistTypes(deactivateForBlacklistTypes); - - // activate blacklist - deactivateBlacklistForTypes(blacklistName, deactivateForBlacklistTypes); - } - - public void addBlacklistItem(String blacklistName, String blacklistItem) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - if ((blacklistItem == null)||(blacklistItem.length() == 0)) - throw new IllegalArgumentException("Blacklist item must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // prepare item - blacklistItem = prepareBlacklistItem(blacklistItem); - - // TODO: check if the entry is already in there - - // append the line to the file - addBlacklistItemToFile(blacklistItem, blacklistName); - - // pass the entry to the blacklist engine - addBlacklistItemToBlacklist(blacklistItem, blacklistName); - } - - public void removeBlacklistItem(String blacklistName, String blacklistItem) throws AxisFault { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - if ((blacklistItem == null)||(blacklistItem.length() == 0)) - throw new IllegalArgumentException("Blacklist item must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) - throw new AxisFault("Blacklist with name '" + blacklistName + "' does not exist."); - - // prepare item - blacklistItem = prepareBlacklistItem(blacklistItem); - - // remove blacklist from file - removeBlacklistItemFromBlacklistFile(blacklistItem,blacklistName); - - // remove it from the blacklist engine - removeBlacklistItemFromBlacklist(blacklistItem,blacklistName); - } - - public void importBlacklist(String blacklistName) throws IOException, SOAPException { - // Check for errors - if ((blacklistName == null)||(blacklistName.length() == 0)) - throw new IllegalArgumentException("Blacklist name must not be null or empty."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // check if the blacklist file exists - if (!blacklistExists(blacklistName)) { - // create blacklist - createBlacklistFile(blacklistName); - } - - // get attachment - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - - PrintWriter writer = null; - BufferedReader reader = null; - try { - // getting a reader - reader = new BufferedReader(new InputStreamReader(dh.getInputStream(),"UTF-8")); - - // getting blacklist file writer - writer = getBlacklistFileWriter(blacklistName); - - // read new item - String blacklistItem = null; - while ((blacklistItem = reader.readLine()) != null) { - // convert it into a proper format - blacklistItem = prepareBlacklistItem(blacklistItem); - - // TODO: check if the item already exits - - // write item to blacklist file - writer.println(blacklistItem); - writer.flush(); - - // inform blacklist engine about new item - addBlacklistItemToBlacklist(blacklistItem, blacklistName); - } - } finally { - if (reader != null) try { reader.close(); } catch (Exception e) {/* */} - if (writer != null) try { writer.close(); } catch (Exception e) {/* */} - } - } - - public String[] getBlacklistTypes() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // initialize the list manager - initBlacklistManager(); - - // return supported types - return getSupportedBlacklistTypeArray(); - } - - private void addBlacklistItemToBlacklist(String blacklistItem, String blacklistName) { - // split the item into host part and path - String[] itemParts = getBlacklistItemParts(blacklistItem); - - // getting the supported blacklist types - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - - // loop through the various types - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - - // if the current blacklist is activated for the type, add the item to the list - if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + BLACKLISTS,blacklistName)) { - plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],itemParts[0], itemParts[1]); - } - } - } - - private void addBlacklistItemToFile(String blacklistItem, String blacklistName) throws AxisFault { - PrintWriter pw = null; - try { - pw = getBlacklistFileWriter(blacklistName); - pw.println(blacklistItem); - pw.flush(); - pw.close(); - } catch (IOException e) { - throw new AxisFault("Unable to append blacklist entry.",e); - } finally { - if (pw != null) try { pw.close(); } catch (Exception e){ /* */} - } - } - - private PrintWriter getBlacklistFileWriter(String blacklistName) throws AxisFault { - try { - return new PrintWriter(new FileWriter(getBlacklistFile(blacklistName), true)); - } catch (IOException e) { - throw new AxisFault("Unable to open blacklist file.",e); - } - } - - private void removeBlacklistItemFromBlacklistFile(String blacklistItem, String blacklistName) { - // load blacklist data from file - ArrayList list = listManager.getListArray(getBlacklistFile(blacklistName)); - - // delete the old entry from file - if (list != null) { - for (int i=0; i < list.size(); i++) { - if (((String)list.get(i)).equals(blacklistItem)) { - list.remove(i); - break; - } - } - listManager.writeList(getBlacklistFile(blacklistName), (String[])list.toArray(new String[list.size()])); - } - } - - private void removeBlacklistItemFromBlacklist(String blacklistItem, String blacklistName) { - String[] itemParts = getBlacklistItemParts(blacklistItem); - - // getting the supported blacklist types - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - - // loop through the various types - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - - // if the current blacklist is activated for the type, remove the item from the list - if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + BLACKLISTS,blacklistName)) { - plasmaSwitchboard.urlBlacklist.remove(supportedBlacklistTypes[blTypes],itemParts[0], itemParts[1]); - } - } - } - - private String prepareBlacklistItem(String blacklistItem) { - if (blacklistItem == null) throw new NullPointerException("Item is null"); - - // cut of heading http:// - if (blacklistItem.startsWith("http://") ){ - blacklistItem = blacklistItem.substring("http://".length()); - } - - // adding missing parts - int pos = blacklistItem.indexOf("/"); - if (pos < 0) { - // add default empty path pattern - blacklistItem = blacklistItem + "/.*"; - } - return blacklistItem; - } - - private String[] getBlacklistItemParts(String blacklistItem) { - if (blacklistItem == null) throw new NullPointerException("Item is null"); - - int pos = blacklistItem.indexOf("/"); - if (pos == -1) throw new IllegalArgumentException("Item format is not correct."); - - return new String[] { - blacklistItem.substring(0, pos), - blacklistItem.substring(pos + 1) - }; - } - - /* not used - private String[] getSharedBlacklistArray() { - String sharedBlacklists = this.switchboard.getConfig(BLACKLIST_SHARED, ""); - String[] supportedBlacklistTypeArray = sharedBlacklists.split(","); - return supportedBlacklistTypeArray; - } - */ - - private File getBlacklistFile(String blacklistName) { - File blacklistFile = new File(listManager.listsPath, blacklistName); - return blacklistFile; - } - - private boolean blacklistExists(String blacklistName) { - File blacklistFile = getBlacklistFile(blacklistName); - return blacklistFile.exists(); - } - - /* not used - private HashSet getSharedBlacklistSet() { - HashSet supportedTypesSet = new HashSet(Arrays.asList(getSharedBlacklistArray())); - return supportedTypesSet; - } - */ - - private String[] getSupportedBlacklistTypeArray() { - String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING; - String[] supportedBlacklistTypeArray = supportedBlacklistTypesStr.split(","); - return supportedBlacklistTypeArray; - } - - private void createBlacklistFile(String blacklistName) throws IOException { - File newFile = getBlacklistFile(blacklistName); - newFile.createNewFile(); - } - - private void deleteBlacklistFile(String blacklistName) { - File BlackListFile = new File(listManager.listsPath, blacklistName); - BlackListFile.delete(); - } - - private void doShareBlacklist(String blacklistName) { - listManager.addListToListslist(BLACKLIST_SHARED, blacklistName); - } - - private void doUnshareBlacklist(String blacklistName) { - listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistName); - } - - private void initBlacklistManager() { - // init Manager properties - if (listManager.switchboard == null) - listManager.switchboard = (plasmaSwitchboard) this.switchboard; - - if (listManager.listsPath == null) - listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig(LIST_MANAGER_LISTS_PATH, "DATA/LISTS")); - } - - /* not used - private void ativateBlacklistForAllTypes(String blacklistName) { - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - this.activateBlacklistForTypes(blacklistName,supportedBlacklistTypes); - } - */ - - private void activateBlacklistForTypes(String blacklistName, String[] activateForBlacklistTypes) { - if (activateForBlacklistTypes == null) return; - - for (int blTypes=0; blTypes < activateForBlacklistTypes.length; blTypes++) { - listManager.addListToListslist(activateForBlacklistTypes[blTypes] + BLACKLISTS, blacklistName); - } - } - - private void deativateBlacklistForAllTypes(String blacklistName) { - String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray(); - this.deactivateBlacklistForTypes(blacklistName,supportedBlacklistTypes); - } - - private void deactivateBlacklistForTypes(String blacklistName, String[] deactivateForBlacklistTypes) { - if (deactivateForBlacklistTypes == null) return; - - for (int blTypes=0; blTypes < deactivateForBlacklistTypes.length; blTypes++) { - listManager.removeListFromListslist(deactivateForBlacklistTypes[blTypes] + BLACKLISTS, blacklistName); - } - } - - private HashSet getSupportedBlacklistTypeSet() { - HashSet supportedTypesSet = new HashSet(Arrays.asList(getSupportedBlacklistTypeArray())); - return supportedTypesSet; - } - - private void checkForKnownBlacklistTypes(String[] types) throws AxisFault { - if (types == null) return; - - // get kown blacklist types - HashSet supportedTypesSet = getSupportedBlacklistTypeSet(); - - // check if we know all types stored in the array - for (int i=0; i < types.length; i++) { - if (!supportedTypesSet.contains(types[i])) - throw new AxisFault("Unknown blaclist type '" + types[i] + "' at position " + i); - } - } - -} diff --git a/source/de/anomic/soap/services/BookmarkService.java b/source/de/anomic/soap/services/BookmarkService.java deleted file mode 100644 index f837445a6..000000000 --- a/source/de/anomic/soap/services/BookmarkService.java +++ /dev/null @@ -1,511 +0,0 @@ -//BookmarkService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - - -package de.anomic.soap.services; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.HashSet; - -import javax.activation.DataHandler; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.data.bookmarksDB; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNewsPool; -import de.anomic.yacy.yacyNewsRecord; - -public class BookmarkService extends AbstractService { - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_BOOKMARK_LIST_GET_XML = "xml/bookmarks/posts/get.xml"; - private static final String TEMPLATE_BOOKMARK_LIST_ALL_XML = "xml/bookmarks/posts/all.xml"; - private static final String TEMPLATE_BOOKMARK_TAGS_XML = "xml/bookmarks/tags/get.xml"; - - /** - * @return a handler to the YaCy Bookmark DB - */ - private bookmarksDB getBookmarkDB() { - assert (this.switchboard != null) : "Switchboard object is null"; - assert (this.switchboard instanceof plasmaSwitchboard) : "Incorrect switchboard object"; - assert (((plasmaSwitchboard)this.switchboard).bookmarksDB != null) : "Bookmark DB is null"; - - return ((plasmaSwitchboard)this.switchboard).bookmarksDB; - } - - /** - * @return returns the input stream of a soap attachment - * @throws AxisFault if no attachment was found or attachments are not supported - * @throws SOAPException if attachment decoding didn't work - * @throws IOException on attachment read errors - */ - private InputStream getAttachmentInputstream() throws AxisFault, SOAPException, IOException { - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - - // return the input stream - return dh.getInputStream(); - } - - /** - * Converts an array of tags into a HashSet - * @param tagArray the array of tags - * @return the HashSet - */ - private HashSet tagArrayToHashSet(String[] tagArray) { - HashSet tagSet = new HashSet(); - if (tagArray == null) return tagSet; - - for (int i=0; i < tagArray.length; i++) { - String nextTag = tagArray[i].trim(); - if (nextTag.length() > 0) tagSet.add(nextTag); - } - - return tagSet; - } - - /** - * Converts the tag array into a space separated list - * @param tagArray the tag array - * @return space separated list of tags - */ - private String tagArrayToSepString(String[] tagArray, String sep) { - StringBuffer buffer = new StringBuffer(); - - for (int i=0; i < tagArray.length; i++) { - String nextTag = tagArray[i].trim(); - if (nextTag.length() > 0) { - if (i > 0) buffer.append(sep); - buffer.append(nextTag); - } - } - - return buffer.toString(); - } - - /** - * To publish a YaCy news message that a new bookmark was added. - * This is only done for public bookmarks - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - */ - private void publisNewBookmarkNews(String url, String title, String description, String[] tags) { - if (title == null) title = ""; - if (description == null) description = ""; - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - - // convert tag array into hashset - String tagString = tagArrayToSepString(tags," "); - - // create a news message - HashMap map = new HashMap(); - map.put("url", url.replace(',', '|')); - map.put("title", title.replace(',', ' ')); - map.put("description", description.replace(',', ' ')); - map.put("tags", tagString); - yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_BOOKMARK_ADD, map)); - } - - /** - * Sets the properties of a {@link bookmarksDB.Bookmark} object - * @param isEdit specifies if we are in edit mode or would like to create a new bookmark - * @param bookmark the {@link bookmarksDB.Bookmark} object - * - * @param isPublic specifies if the bookmark is public - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - */ - private void setBookmarkProperties(boolean isEdit, bookmarksDB.Bookmark bookmark, String url, String title, String description, Boolean isPublic, String[] tags) { - - if (!isEdit) { - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - if (title == null) title = ""; - if (description == null) description = ""; - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - if (isPublic == null) isPublic = Boolean.FALSE; - } - - // convert tag array into hashset - HashSet tagSet = null; - if (tags != null) tagSet = tagArrayToHashSet(tags); - - // set properties - if (url != null) bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_URL, url); - if (title != null) bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_TITLE, title); - if (description != null)bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description); - if (isPublic != null) bookmark.setPublic(isPublic.booleanValue()); - if (tags != null) bookmark.setTags(tagSet, true); - } - - /** - * Function to add a new bookmark to the yacy bookmark DB. - * - * @param isPublic specifies if the bookmark is public - * @param url the url of the bookmark - * @param title bookmark title - * @param description bookmark description - * @param tags array of tags - * - * @return the url hash of the newly created bookmark - * - * @throws AxisFault if authentication failed - */ - public String addBookmark( - String url, - String title, - String description, - String[] tags, - Boolean isPublic - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - - // create new bookmark object - bookmarksDB.Bookmark bookmark = getBookmarkDB().createBookmark(url, "admin"); //FIXME: "admin" can be user.getUserName() for users with bookmarkrights - - // set bookmark properties - if(bookmark != null){ - this.setBookmarkProperties(false,bookmark,url,title,description,isPublic,tags); - if (isPublic != null && isPublic.booleanValue()) { - // create a news message - publisNewBookmarkNews(url,title,description,tags); - } - getBookmarkDB().saveBookmark(bookmark); - } else { - throw new AxisFault("Unable to create bookmark. Unknown reason."); - } - - return bookmark.getUrlHash(); - } - - /** - * Function to delete a bookmark from the yacy bookmark db - * - * @param urlHash the url hash to identify the bookmark - * - * @throws AxisFault if authentication failed - */ - public void deleteBookmarkByHash(String urlHash) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash must not be null or empty"); - - // delete bookmark - getBookmarkDB().removeBookmark(urlHash); - } - - public void deleteBookmarksByHash(String[] urlHashs) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHashs == null || urlHashs.length==0) throw new IllegalArgumentException("The url hash array must not be null or empty"); - - for (int i=0; i < urlHashs.length; i++) { - String urlHash = urlHashs[i]; - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash at position " + i + " is null or empty."); - - // delete bookmark - getBookmarkDB().removeBookmark(urlHash); - } - } - - public void deleteBookmark(String url) throws AxisFault { - if (url == null || url.length()==0) throw new IllegalArgumentException("The url must not be null or empty"); - - // generating the url hash - String hash = plasmaURL.urlHash(url); - - // delete url - this.deleteBookmarkByHash(hash); - } - - public void deleteBookmarks(String[] urls) throws AxisFault { - if (urls == null || urls.length==0) throw new IllegalArgumentException("The url array must not be null or empty"); - - String[] hashs = new String[urls.length]; - for (int i=0; i < urls.length; i++) { - String url = urls[i]; - if (url == null || url.length()==0) throw new IllegalArgumentException("The url at position " + i + " is null or empty"); - - // generating the url hash - hashs[i] = plasmaURL.urlHash(url); - } - - // delete url - this.deleteBookmarksByHash(hashs); - } - - - public String bookmarkIsKnown(String url) throws AxisFault { - String urlHash = plasmaURL.urlHash(url); - return this.bookmarkIsKnownByHash(urlHash); - } - - public String bookmarkIsKnownByHash(String urlHash) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url-hash must not be null or empty"); - - // get the bookmark object - bookmarksDB.Bookmark bookmark = getBookmarkDB().getBookmark(urlHash); - - // set bookmark properties - if(bookmark == null) return null; - return bookmark.getTagsString(); - } - - /** - * Function to change the properties of a bookmark stored in the YaCy Bookmark DB - * - * @param urlHash the url hash to identify the bookmark - * @param isPublic specifies if the bookmark is public - * @param url the changed url of the bookmark - * @param title the changed bookmark title - * @param description the changed bookmark description - * @param tags the changed array of tags - * - * @return the url hash of the changed bookmark (this will be different to the urlHash input parameter if the bookmark url was changed - * - * @throws AxisFault if authentication failed - */ - public String editBookmark( - String urlHash, - String url, - String title, - String description, - String[] tags, - Boolean isPublic - ) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (urlHash == null || urlHash.length()==0) throw new IllegalArgumentException("The url hash must not be null or empty"); - - // getting the bookmark - bookmarksDB.Bookmark bookmark = getBookmarkDB().getBookmark(urlHash); - if (bookmark == null) throw new AxisFault("Bookmark with hash " + urlHash + " could not be found"); - - // edit values - setBookmarkProperties(true,bookmark,url,title,description,isPublic,tags); - - // return the url has (may have been changed) - return bookmark.getUrlHash(); - } - - /** - * To rename a bookmark tag - * @param oldTagName the old tag name - * @param newTagName the new name - * @throws AxisFault if authentication failed - */ - public void renameTag(String oldTagName, String newTagName) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - if (oldTagName == null || oldTagName.length()==0) throw new IllegalArgumentException("The old tag name not be null or empty"); - if (newTagName == null || newTagName.length()==0) throw new IllegalArgumentException("The nwe tag name not be null or empty"); - - boolean success = getBookmarkDB().renameTag(oldTagName,newTagName); - if (!success) throw new AxisFault("Unable to rename tag. Unknown reason."); - } - - /** - * Returns the list of bookmarks stored in the bookmark db - * @param tag the tag name for which the corresponding bookmarks should be fetched - * @param date the bookmark date - * - * @return a XML document of the following format: - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <posts>
-	 *   <post description="YaCy Bookmarks Test" extended="YaCy Bookmarks junit test" hash="c294613d42343009949c0369bc56f722" href="http://www.yacy.de/testurl2" tag="bookmarks testing yacy" time="2006-11-04T14:33:01Z"/>
-	 * </posts>
-	 * 
-	 * 
-	 * @throws AxisFault if authentication failed
-	 * @throws Exception if xml generation failed
-	 */
-	public Document getBookmarkList(String tag, String date) throws Exception {
-		
-        // extracting the message context
-        extractMessageContext(AUTHENTICATION_NEEDED);          	
-        
-        // generating the template containing the network status information
-        serverObjects args = new serverObjects();
-        args.put("extendedXML", "");
-        if (tag != null) args.put("tag",tag);
-        if (date != null) args.put("date",date);
-        
-        byte[] result = this.serverContext.writeTemplate((date != null)?TEMPLATE_BOOKMARK_LIST_GET_XML:TEMPLATE_BOOKMARK_LIST_ALL_XML, args, this.requestHeader);
-        
-        // sending back the result to the client
-        return this.convertContentToXML(result);    		
-	}
-	
-	/**
-	 * Returns the list of bookmark tags for which bookmarks exists in the YaCy bookmark db
-	 * 
-	 * @return a XML document of the following format:
-	 * 
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <tags>
-	 *   <tag count="1" tag="bookmarks"/>
-	 *   <tag count="1" tag="testing"/>
-	 *   <tag count="1" tag="yacy"/>
-	 * </tags>
-	 * 
-	 * 
-	 * @throws AxisFault if authentication failed
-	 * @throws Exception if xml generation failed
-	 */	
-	public Document getBookmarkTagList() throws Exception {
-		
-        // extracting the message context
-        extractMessageContext(AUTHENTICATION_NEEDED);          	
-        
-        // generate the xml document
-        byte[] result = this.serverContext.writeTemplate(TEMPLATE_BOOKMARK_TAGS_XML, new serverObjects(), this.requestHeader);
-        
-        // sending back the result to the client
-        return this.convertContentToXML(result);    
-	}
-	
-	/**
-	 * Function to import YaCy from XML (transfered via SOAP Attachment).
- * This function expects a xml document in the same format as returned by - * function {@link #getBookmarkList(String, String)}. - * @param isPublic specifies if the imported bookmarks are public or local - * @return the amount of imported bookmarks - * - * @throws SOAPException if there is no data in the attachment - * @throws IOException if authentication failed or the attachment could not be read - */ - public int importBookmarkXML(boolean isPublic) throws SOAPException, IOException { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the attachment input stream - InputStream xmlIn = getAttachmentInputstream(); - - // import bookmarks - int importCount = getBookmarkDB().importFromXML(xmlIn, isPublic); - - // return amount of imported bookmarks - return importCount; - } - - /** - * Function to import YaCy from a html document (transfered via SOAP Attachment).
- * This function expects a well formed html document. - * - * @param baseURL the base url. This is needed to generate absolut URLs from relative URLs - * @param tags a list of bookmarks tags that should be assigned to the new bookmarks - * @param isPublic specifies if the imported bookmarks are public or local - * @return the amount of imported bookmarks - * - * @throws SOAPException if there is no data in the attachment - * @throws IOException if authentication failed or the attachment could not be read - */ - public int importHtmlBookmarkFile(String baseURL, String[] tags, boolean isPublic) throws SOAPException, IOException { - if (tags == null || tags.length == 0) tags = new String[]{"unsorted"}; - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the attachment input stream - InputStream htmlIn = getAttachmentInputstream(); - InputStreamReader htmlReader = new InputStreamReader(htmlIn,"UTF-8"); - - // import bookmarks - URL theBaseURL = new URL(baseURL); - String tagList = tagArrayToSepString(tags,","); - int importCount = getBookmarkDB().importFromBookmarks(theBaseURL,htmlReader, tagList,isPublic); - - // return amount of imported bookmarks - return importCount; - } -} diff --git a/source/de/anomic/soap/services/CrawlService.java b/source/de/anomic/soap/services/CrawlService.java deleted file mode 100644 index e856f92b9..000000000 --- a/source/de/anomic/soap/services/CrawlService.java +++ /dev/null @@ -1,229 +0,0 @@ -//CrawlService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -public class CrawlService extends AbstractService { - - private static final String GLOBALCRAWLTRIGGER = "globalcrawltrigger"; - private static final String REMOTETRIGGEREDCRAWL = "remotetriggeredcrawl"; - private static final String LOCAL_CRAWL = "localCrawl"; - private static final String CRAWL_STATE = "crawlState"; - - - /** - * Constant: template for crawling - */ - private static final String TEMPLATE_CRAWLING = "QuickCrawlLink_p.xml"; - - /** - * Function to crawl a single link with depth 0 - */ - public Document crawlSingleUrl(String crawlingURL) throws AxisFault { - return this.crawling(crawlingURL, "CRAWLING-ROOT", new Integer(0), ".*", Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, null, Boolean.TRUE); - } - - public Document crawling( - String crawlingURL, - String crawljobTitel, - Integer crawlingDepth, - String crawlingFilter, - Boolean indexText, - Boolean indexMedia, - Boolean crawlingQ, - Boolean storeHTCache, - Boolean crawlOrder, - String crawlOrderIntention, - Boolean xsstopw - ) throws AxisFault { - try { - // extracting the message context - extractMessageContext(true); - - // setting the crawling properties - serverObjects args = new serverObjects(); - args.put("url",crawlingURL); - if (crawljobTitel != null && crawljobTitel.length() > 0) - args.put("title",crawljobTitel); - if (crawlingFilter != null && crawlingFilter.length() > 0) - args.put("crawlingFilter",crawlingFilter); - if (crawlingDepth != null && crawlingDepth.intValue() > 0) - args.put("crawlingDepth",crawlingDepth.toString()); - if (indexText != null) - args.put("indexText",indexText.booleanValue()?"on":"off"); - if (indexMedia != null) - args.put("indexMedia",indexMedia.booleanValue()?"on":"off"); - if (crawlingQ != null) - args.put("crawlingQ",crawlingQ.booleanValue()?"on":"off"); - if (storeHTCache != null) - args.put("storeHTCache",storeHTCache.booleanValue()?"on":"off"); - if (crawlOrder != null) - args.put("crawlOrder",crawlOrder.booleanValue()?"on":"off"); - if (crawlOrderIntention != null) - args.put("intention",crawlOrderIntention); - if (xsstopw != null) - args.put("xsstopw",xsstopw.booleanValue()?"on":"off"); - - // triggering the crawling - byte[] result = this.serverContext.writeTemplate(TEMPLATE_CRAWLING, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * Function to pause crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @throws AxisFault - */ - public void pauseCrawling() throws AxisFault { - this.pauseResumeCrawling(Boolean.TRUE, Boolean.TRUE, Boolean.TRUE); - } - - /** - * Function to resume crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @throws AxisFault - */ - public void resumeCrawling() throws AxisFault { - this.pauseResumeCrawling(Boolean.FALSE, Boolean.FALSE, Boolean.FALSE); - } - - /** - * Function to pause or resume crawling of local crawl jobs, remote crawl jobs and sending of remote crawl job triggers - * @param localCrawl if null current status is not changed. pause local crawls if true or - * resumes local crawls if false - * @param remoteTriggeredCrawl if null current status is not changed. pause remote crawls if true or - * resumes remote crawls if false - * @param globalCrawlTrigger if null current status is not changed. stops sending of global crawl triggers to other peers if true or - * resumes sending of global crawl triggers if false - * @throws AxisFault - */ - public void pauseResumeCrawling(Boolean localCrawl, Boolean remoteTriggeredCrawl, Boolean globalCrawlTrigger) throws AxisFault { - // extracting the message context - extractMessageContext(true); - - if (localCrawl != null) { - if (localCrawl.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } - } - - if (remoteTriggeredCrawl != null) { - if (remoteTriggeredCrawl.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } - } - - if (globalCrawlTrigger != null) { - if (globalCrawlTrigger.booleanValue()) { - ((plasmaSwitchboard)this.switchboard).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); - } else { - ((plasmaSwitchboard)this.switchboard).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); - } - } - } - - /** - * Function to query the current state of the following crawling queues: - *
    - *
  • local crawl jobs
  • - *
  • remote crawl jobs
  • - *
  • of remote crawl job triggers
  • - *
- * @return returns a XML document in the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <crawlState>
-     * 	<localCrawl>true</localCrawl>
-     * 	<remotetriggeredcrawl>false</remotetriggeredcrawl>
-     * 	<globalcrawltrigger>false</globalcrawltrigger>
-     * </crawlState>
-     * 
- * @throws AxisFault if authentication failed - * @throws ParserConfigurationException if xml generation failed - */ - public Document getCrawlPauseResumeState() throws AxisFault, ParserConfigurationException { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - plasmaSwitchboard sb = (plasmaSwitchboard)this.switchboard; - - // creating XML document - Element xmlElement = null; - Document xmlDoc = createNewXMLDocument(CRAWL_STATE); - Element xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement(LOCAL_CRAWL); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(REMOTETRIGGEREDCRAWL); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement(GLOBALCRAWLTRIGGER); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER)))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - -} diff --git a/source/de/anomic/soap/services/MessageService.java b/source/de/anomic/soap/services/MessageService.java deleted file mode 100644 index 0fdcadc5d..000000000 --- a/source/de/anomic/soap/services/MessageService.java +++ /dev/null @@ -1,321 +0,0 @@ -//MessageService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; - -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import de.anomic.data.messageBoard; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyClient; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; - -public class MessageService extends AbstractService { - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_MESSAGE_HEADER_LIST_XML = "Messages_p.xml"; - - /* ===================================================================== - * Other used constants - * ===================================================================== */ - private static final String MESSAGES_CATEGORY_REMOTE = "remote"; - - /** - * @return a handler to the YaCy Messages DB - */ - private messageBoard getMessageDB() { - assert (this.switchboard != null) : "Switchboard object is null"; - assert (this.switchboard instanceof plasmaSwitchboard) : "Incorrect switchboard object"; - assert (((plasmaSwitchboard)this.switchboard).messageDB != null) : "Messsage DB is null"; - - return ((plasmaSwitchboard)this.switchboard).messageDB; - } - - /** - * Function to read the identifiers of all messages stored in the message db - * @return an array of message identifiers currently stored in the message DB - * @throws IOException if authentication failed or a DB read error occured - */ - public String[] getMessageIDs() throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the messageDB - messageBoard db = getMessageDB(); - - // loop through the messages and receive the message ids - ArrayList idList = new ArrayList(db.size()); - Iterator i = getMessageDB().keys(MESSAGES_CATEGORY_REMOTE, true); - while (i.hasNext()) { - String messageKey = (String) i.next(); - if (messageKey != null) idList.add(messageKey); - } - - //return array - return (String[]) idList.toArray(new String[idList.size()]); - } - - /** - * Returns a list with the sender, subject and date of all messages stored in the message db - * - * @return a xml document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <messages>
-	 * 	<message id="remote______2005060901120600">
-	 * 		<date>2005/06/09 01:12:06</date>
-	 * 		<from hash="peerhash">SourcePeerName</from>
-	 * 		<to>DestPeerName</to>
-	 * 		<subject><![CDATA[Message subject]]></subject>
-	 * 	</message>
-	 * </messages>
-	 * 
- * - * @throws Exception if authentication failed - */ - public Document getMessageHeaderList() throws Exception { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generate the xml document - serverObjects args = new serverObjects(); - args.put("action","list"); - - byte[] result = this.serverContext.writeTemplate(TEMPLATE_MESSAGE_HEADER_LIST_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Function to geht detailes about a message stored in the message db - * @param messageID the identifier of the message to query - * @return a xml document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <message id="remote______2005060901120600">
-	 * 	<date>2005/06/09 01:12:06</date>
-	 * 	<from hash="peerhash">sourcePeerName</from>
-	 * 	<to>destPeerName</to>
-	 * 	<subject><![CDATA[Test-Subject]]></subject>
-	 * 	<message><![CDATA[Message-Body]]>
-	 * </message>
-	 * 
- * - * @throws Exception if authentication failed - */ - public Document getMessage(String messageID) throws Exception { - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (messageID == null || messageID.length() == 0) throw new IllegalArgumentException("The message id must not be null or empty."); - - // generate the xml document - serverObjects args = new serverObjects(); - args.put("action","view"); - args.put("object",messageID); - - byte[] result = this.serverContext.writeTemplate(TEMPLATE_MESSAGE_HEADER_LIST_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Function to delete a message - * @param messageID the message identifier of the message that should be deleted - * @throws AxisFault if authentication failed or the message ID is unknown - */ - public void deleteMessage(String messageID) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (messageID == null || messageID.length() == 0) throw new IllegalArgumentException("The message id must not be null or empty."); - - // getting the messageDB - messageBoard db = getMessageDB(); - - // check if the message exists - if (db.read(messageID) == null) throw new AxisFault("Message with ID " + messageID + " does not exist."); - - // delete the message - db.remove(messageID); - } - - /** - * Function to delete multiple messages - * @param messageIDs an array of message ids - * @throws AxisFault if authentication failed or one of the message IDs is unknown - */ - public void deleteMessages(String[] messageIDs) throws AxisFault { - if (messageIDs == null || messageIDs.length == 0) throw new IllegalArgumentException("The message id array must not be null or empty."); - - // loop through the ids - for (int i=0; i < messageIDs.length; i++) { - String nextID = messageIDs[i]; - if (nextID == null || nextID.length() == 0) throw new IllegalArgumentException("The message id at position " + i + " is null or empty."); - - this.deleteMessage(nextID); - } - } - - /** - * A function to check if the destination peer will accept a message of this peer. - * @param destinationPeerHash the peer hash of the destination peer - * @return a XML document of the following format - *
-	 * <?xml version="1.0" encoding="UTF-8"?>
-	 * <messageSendPermission>
-	 * 	<permission>true</permission>
-	 * 	<response>Welcome to my peer!</response>
-	 * 	<messageSize>10240</messageSize>
-	 * 	<attachmentsize>0</attachmentsize>
-	 * </messageSendPermission>
-	 * 
- * The tag permission specifies if we are allowed to send a messag to this peer. Response is a textual - * description why we are allowed or not allowed to send a message. messageSize specifies the maximum - * allowed message size. attachmentsize specifies the maximum attachment size accepted. - * - * @throws AxisFault if authentication failed or the destination peer is not reachable - * @throws ParserConfigurationException if xml generation failed - */ - public Document getMessageSendPermission(String destinationPeerHash) throws AxisFault, ParserConfigurationException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (destinationPeerHash == null || destinationPeerHash.length() == 0) throw new IllegalArgumentException("The destination peer hash must not be null or empty."); - - // get the peer from the db - yacySeed targetPeer = yacyCore.seedDB.getConnected(destinationPeerHash); - if (targetPeer == null) throw new AxisFault("The destination peer is not connected"); - - // check for permission to send message - HashMap result = yacyClient.permissionMessage(destinationPeerHash); - if (result == null) throw new AxisFault("No response received from peer"); - - boolean accepted = false; - String reason = "Unknown reason"; - if (result.containsKey("response")) { - String response = (String) result.get("response"); - if (response.equals("-1")) { - accepted = false; - reason = "request rejected"; - } else { - accepted = true; - reason = response; - } - } - - // return XML Document - Element xmlElement = null, xmlRoot; - Document xmlDoc = createNewXMLDocument("messageSendPermission"); - xmlRoot = xmlDoc.getDocumentElement(); - - xmlElement = xmlDoc.createElement("permission"); - xmlElement.appendChild(xmlDoc.createTextNode(Boolean.toString(accepted))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("response"); - xmlElement.appendChild(xmlDoc.createTextNode(reason)); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("messageSize"); - xmlElement.appendChild(xmlDoc.createTextNode((String)result.get("messagesize"))); - xmlRoot.appendChild(xmlElement); - - xmlElement = xmlDoc.createElement("attachmentsize"); - xmlElement.appendChild(xmlDoc.createTextNode((String)result.get("attachmentsize"))); - xmlRoot.appendChild(xmlElement); - - return xmlDoc; - } - - /** - * Function to send a message to a remote peer - * @param destinationPeerHash the peer hash of the remot peer - * @param subject the message subject - * @param message the message body - * - * @return the a response status message of the remote peer. - * - * @throws AxisFault if authentication failed - */ - public String sendMessage(String destinationPeerHash, String subject, String message) throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - if (destinationPeerHash == null || destinationPeerHash.length() == 0) throw new IllegalArgumentException("The destination peer hash must not be null or empty."); - if (subject == null || subject.length() == 0) throw new IllegalArgumentException("The subject must not be null or empty."); - if (message == null || message.length() == 0) throw new IllegalArgumentException("The message body must not be null or empty."); - - // convert the string into a byte array - byte[] mb; - try { - mb = message.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - mb = message.getBytes(); - } - - // send the message to the remote peer - HashMap result = yacyClient.postMessage(destinationPeerHash, subject, mb); - - // get the peer resonse - if (result == null) throw new AxisFault("No response received from peer"); - return (String) (result.containsKey("response") ? result.get("response") : "Unknown response"); - } -} diff --git a/source/de/anomic/soap/services/SearchService.java b/source/de/anomic/soap/services/SearchService.java deleted file mode 100644 index 8e6449c25..000000000 --- a/source/de/anomic/soap/services/SearchService.java +++ /dev/null @@ -1,346 +0,0 @@ -//httpdSoapService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; - -import de.anomic.data.htmlTools; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaSearchPreOrder; -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; - -/** - * SOAP Service Class that will be invoked by the httpdSoapHandler - * - * @author Martin Thelian - */ -public class SearchService extends AbstractService -{ - /* ================================================================ - * Constants needed to set the template that should be used to - * fullfil the request - * ================================================================ */ - private static final String TEMPLATE_SEARCH = "yacysearch.soap"; - private static final String TEMPLATE_URLINFO = "ViewFile.soap"; - private static final String TEMPLATE_SNIPPET = "xml/snippet.xml"; - private static final String TEMPLATE_OPENSEARCH = "opensearchdescription.xml"; - private static final String TEMPLATE_SEARCHSTATS = "SearchStatistics_p.html"; - - /* ===================================================================== - * Constants needed to get search statistic info - * ===================================================================== */ - private static final int SEARCHSTATS_LOCAL_SEARCH_LOG = 1; - private static final int SEARCHSTATS_LOCAL_SEARCH_TRACKER = 2; - private static final int SEARCHSTATS_REMOTE_SEARCH_LOG = 3; - private static final int SEARCHSTATS_REMOTE_SEARCH_TRACKER = 4; - - /** - * Constructor of this class - */ - public SearchService() { - super(); - // nothing special todo here at the moment - } - - /** - * Service for doing a simple search with the standard settings - * - * @param searchString the search string that should be used - * @param maxSearchCount the maximum amount of search result that should be returned - * @param searchOrder can be a combination of YBR, Date and Quality, e.g. YBR-Date-Quality or Date-Quality-YBR - * @param searchMode can be global or local - * @param searchMode the total amount of seconds to use for the search - * @param urlMask if the urlMaskfilter parameter should be used - * @param urlMaskfilter e.g. .* - * @param prefermaskfilter - * @param category can be image or href - * - * @return an xml document containing the search results. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document search( - String searchString, - int maxSearchCount, - String searchOrder, - String searchMode, - int maxSearchTime, - boolean urlMask, - String urlMaskfilter, - String prefermaskfilter, - String category - ) - throws AxisFault { - try { - // extracting the message context - extractMessageContext(false); - - if ((searchMode == null) || !(searchMode.equalsIgnoreCase("global") || searchMode.equalsIgnoreCase("locale"))) { - searchMode = "global"; - } - - if (maxSearchCount < 0) { - maxSearchCount = 10; - } - - if (searchOrder == null || searchOrder.length() == 0) { - searchOrder = plasmaSearchPreOrder.canUseYBR() ? "YBR-Date-Quality" : "Date-Quality-YBR"; - } - - if (maxSearchTime < 0) { - maxSearchTime = 10; - } - - if (urlMaskfilter == null) { - urlMaskfilter = ".*"; - } - - if (prefermaskfilter == null) { - prefermaskfilter = ""; - } - - if (category == null || category.length() == 0) { - category = "href"; - } - - // setting the searching properties - serverObjects args = new serverObjects(); - args.put("search",searchString); - args.put("count",Integer.toString(maxSearchCount)); - args.put("order",searchOrder); - args.put("resource",searchMode); - args.put("time",Integer.toString(maxSearchTime)); - args.put("urlmask",(!urlMask)?"no":"yes"); - args.put("urlmaskfilter",urlMaskfilter); - args.put("prefermaskfilter",prefermaskfilter); - args.put("cat",category); - - args.put("Enter","Search"); - - // invoke servlet - serverObjects searchResult = this.serverContext.invokeServlet(TEMPLATE_SEARCH, args, this.requestHeader); - - // Postprocess search ... - int count = Integer.valueOf(searchResult.get("type_results","0")).intValue(); - for (int i=0; i < count; i++) { - searchResult.put("type_results_" + i + "_url",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_url",""), false)); - searchResult.put("type_results_" + i + "_description",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_description",""), true)); - searchResult.put("type_results_" + i + "_urlname",htmlTools.encodeUnicode2html(searchResult.get("type_results_" + i + "_urlname",""), true)); - } - - // format the result - byte[] result = this.serverContext.buildServletOutput(TEMPLATE_SEARCH, searchResult); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - - /** - * @param link the url - * @param viewMode one of (VIEW_MODE_AS_PLAIN_TEXT = 1, - * VIEW_MODE_AS_PARSED_TEXT = 2, - * VIEW_MODE_AS_PARSED_SENTENCES = 3) [Source: ViewFile.java] - * @return an xml document containing the url info. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document urlInfo(String urlStr, int viewMode) throws AxisFault { - try { - // getting the url hash for this url - URL url = new URL(urlStr); - String urlHash = plasmaURL.urlHash(url); - - // fetch urlInfo - return this.urlInfoByHash(urlHash, viewMode); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * @param urlHash the url hash - * @param viewMode one of (VIEW_MODE_AS_PLAIN_TEXT = 1, - * VIEW_MODE_AS_PARSED_TEXT = 2, - * VIEW_MODE_AS_PARSED_SENTENCES = 3) [Source: ViewFile.java] - * @return an xml document containing the url info. - * - * @throws AxisFault if the service could not be executed propery. - */ - public Document urlInfoByHash(String urlHash, int viewMode) throws AxisFault { - try { - // extracting the message context - extractMessageContext(true); - - if (urlHash == null || urlHash.trim().equals("")) { - throw new AxisFault("No Url-hash provided."); - } - - if (viewMode < 1 || viewMode > 3) { - viewMode = 2; - } - - String viewModeStr = "sentences"; - if (viewMode == 1) viewModeStr = "plain"; - else if (viewMode == 2) viewModeStr = "parsed"; - else if (viewMode == 3) viewModeStr = "sentences"; - - - // setting the properties - final serverObjects args = new serverObjects(); - args.put("urlHash",urlHash); - args.put("viewMode",viewModeStr); - - // generating the template containing the url info - byte[] result = this.serverContext.writeTemplate(TEMPLATE_URLINFO, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - public Document snippet(String url, String searchString) throws AxisFault { - try { - if (url == null || url.trim().equals("")) throw new AxisFault("No url provided."); - if (searchString == null || searchString.trim().equals("")) throw new AxisFault("No search string provided."); - - // extracting the message context - extractMessageContext(false); - - // setting the properties - final serverObjects args = new serverObjects(); - args.put("url",url); - args.put("search",searchString); - - // generating the template containing the url info - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SNIPPET, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - - } catch (Exception e) { - throw new AxisFault(e.getMessage()); - } - } - - /** - * Returns the OpenSearch-Description of this peer - * @return a XML document of the following format: - *
-	* <?xml version="1.0" encoding="UTF-8"?>
-	* <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
-	*   <ShortName>YaCy/peerName</ShortName>
-	*   <LongName>YaCy.net - P2P WEB SEARCH</LongName>
-	*   <Image type="image/gif">http://ip-address:port/env/grafics/yacy.gif</Image>
-	*   <Image type="image/vnd.microsoft.icon">http://ip-address:port/env/grafics/yacy.ico</Image>
-	*   <Language>en-us</Language>
-	*   <OutputEncoding>UTF-8</OutputEncoding>
-	*   <InputEncoding>UTF-8</InputEncoding>
-	*   <AdultContent>true</AdultContent>
-	*   <Description>YaCy is a open-source GPL-licensed software that can be used for stand-alone search engine installations or as a client for a multi-user P2P-based web indexing cluster. This is the access to peer 'peername'.</Description>
-	*   <Url type="application/rss+xml" template="http://ip-address:port/yacysearch.rss?search={searchTerms}&Enter=Search" />
-	*   <Developer>See http://developer.berlios.de/projects/yacy/</Developer>
-	*   <Query role="example" searchTerms="yacy" />
-	*   <Tags>YaCy P2P Web Search</Tags>
-	*   <Contact>See http://ip-address:port/ViewProfile.html?hash=localhash</Contact>
-	*   <Attribution>YaCy Software &copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
-	*   <SyndicationRight>open</SyndicationRight>
-	* </OpenSearchDescription>
-	* 
- * @throws Exception - */ - public Document getOpenSearchDescription() throws Exception { - // extracting the message context - extractMessageContext(false); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_OPENSEARCH, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - private Document getSearchStatData(int page) throws Exception { - if (page < 1 || page > 4) throw new IllegalArgumentException("Illegal page number."); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - serverObjects post = new serverObjects(); - post.put("page", Integer.toString(page)); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SEARCHSTATS, post, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - public Document getLocalSearchLog() throws Exception { - return this.getSearchStatData(SEARCHSTATS_LOCAL_SEARCH_LOG); - } - - public Document getLocalSearchTracker() throws Exception { - return this.getSearchStatData(SEARCHSTATS_LOCAL_SEARCH_TRACKER); - } - - public Document getRemoteSearchLog() throws Exception { - return this.getSearchStatData(SEARCHSTATS_REMOTE_SEARCH_LOG); - } - - public Document getRemoteSearchTracker() throws Exception { - return this.getSearchStatData(SEARCHSTATS_REMOTE_SEARCH_TRACKER); - } - -} diff --git a/source/de/anomic/soap/services/ShareService.java b/source/de/anomic/soap/services/ShareService.java deleted file mode 100644 index ff68a63d4..000000000 --- a/source/de/anomic/soap/services/ShareService.java +++ /dev/null @@ -1,714 +0,0 @@ -//ShareService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -// -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import java.io.File; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.lang.reflect.Method; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.activation.FileDataSource; -import javax.xml.soap.AttachmentPart; -import javax.xml.soap.SOAPException; - -import org.apache.axis.AxisFault; -import org.apache.axis.Message; -import org.apache.axis.MessageContext; -import org.apache.axis.attachments.Attachments; -import org.w3c.dom.Document; - -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCodings; -import de.anomic.server.serverFileUtils; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSystem; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; - -public class ShareService extends AbstractService { - - private static final int FILEINFO_MD5_STRING = 0; - private static final int FILEINFO_COMMENT = 1; - - private static final int GENMD5_MD5_ARRAY = 0; - //private static final int GENMD5_MD5_STRING = 1; - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - private static final String TEMPLATE_SHARE_XML = "htdocsdefault/dir.xml"; - - /** - * @return the yacy HTDOCS directory, e.g. DATA/HTDOCS - * @throws AxisFault if the directory does not exist - */ - private File getHtDocsPath() throws AxisFault { - // get htroot path - File htdocs = new File(this.switchboard.getRootPath(), this.switchboard.getConfig("htDocsPath", "DATA/HTDOCS")); - if (!htdocs.exists()) throw new AxisFault("htDocsPath directory does not exists."); - return htdocs; - } - - /** - * @return the yacy fileshare directory, e.g. DATA/HTDOCS/share/ - * @throws AxisFault if the directory does not exist - */ - private File getShareDir() throws AxisFault { - File htdocs = getHtDocsPath(); - File share = new File(htdocs,"share/"); - if (!share.exists()) throw new AxisFault("Share directory does not exists."); - return share; - } - - /** - * Converts the relative path received as input parameter into an absolut - * path pointing to a location in the yacy file-share. - * @param path the relative path - * @return the absolut path - * - * @throws AxisFault if the directory does not exist - * @throws AxisFault if the directory is not a directory - * @throws AxisFault if the directory is not readable - * @throws AxisFault if the directory path is too long - * @throws AxisFault if the directory path is outside of the yacy share directory - * @throws IOException other io errors - */ - private File getWorkingDir(String path) throws IOException { - File share = getShareDir(); - - // cut of a tailing slash - if (path != null && path.startsWith("/")) path = path.substring(1); - - // construct directory - File workingDir = (path==null)?share:new File(share,path); - - if (!workingDir.exists()) - throw new AxisFault("Working directory does not exists"); - - if (!workingDir.isDirectory()) - throw new AxisFault("Working directory is not a directory"); - - if (!workingDir.canRead()) - throw new AxisFault("Working directory is not readable."); - - if (!workingDir.canWrite()) - throw new AxisFault("Working directory is not writeable."); - - if (workingDir.getAbsolutePath().length() > serverSystem.maxPathLength) - throw new AxisFault("Path name is too long"); - - if (!workingDir.getCanonicalPath().startsWith(share.getCanonicalPath())) - throw new AxisFault("Invalid path. Path does not start with " + share.getCanonicalPath()); - - return workingDir; - } - - /** - * Returns a file object representing a file in the yacy fileshare directory - * @param workingDir the current working directory (must be a subdirectory of the share directory) - * @param workingFileName the name of the file - * @return a file object pointing to a file or directory in the yacy fileshare directory - * - * @throws NullPointerException if the filename is null - * @throws AxisFault if the file name contains (back)slashes - * @throws AxisFault if the file path is too long - * @throws AxisFault if the file path is outside the yacy share directory - * @throws AxisFault if the file path is pointing to share itself - * - * @throws IOException on other io errors - */ - private File getWorkingFile(File workingDir, String workingFileName) throws AxisFault, IOException { - if (workingDir == null) throw new NullPointerException("Working dir is null"); - - // getting file-share directory - File share = getShareDir(); - - // check filename for illegal characters - if (workingFileName != null) { - if ((workingFileName.indexOf("/") != -1) || (workingFileName.indexOf("\\") != -1)) - throw new AxisFault("Filename contains illegal characters."); - } - - File workingFile = (workingFileName==null)?workingDir:new File(workingDir, workingFileName); - - if (workingFile.getAbsolutePath().length() > serverSystem.maxPathLength) - throw new AxisFault("Path name is too long"); - - if (!workingFile.getCanonicalPath().startsWith(workingDir.getCanonicalPath())) - throw new AxisFault("Invalid path. Path does not start with " + workingDir.getCanonicalPath()); - - if (share.getCanonicalPath().equals(workingFile.getCanonicalPath())) - throw new AxisFault("Invalid path. You can not operate on htroot."); - - return workingFile; - } - - /** - * Returns the md5 sum of a file - * @param theFile the file for which the MD5 sum should be calculated - * @return the md5 sum as byte array - */ - private byte[] generateFileMD5(File theFile) { - byte[] md5 = serverCodings.encodeMD5Raw(theFile); - return md5; - } - - /** - * Returns the hex. representation of a md5 sum array - * @param md5Array the md5 sum as byte array - * @return the string representation of the md5 sum - */ - private String convertMD5ArrayToString(byte[] md5Array) { - String md5s = serverCodings.encodeHex(md5Array); - return md5s; - } - - /** - * Returns a file object representing the md5-file that belongs to a regular yacy fileshare file - * @param theFile the original file - * @return the md5 file that belongs to the original file - * - * @throws IOException - */ - private File getFileMD5File(File theFile) throws IOException { - final File md5File = new File(theFile.getCanonicalPath() + ".md5"); - return md5File; - } - - private void deleteFileMD5File(File theFile) throws IOException { - File md5File = getFileMD5File(theFile); - if (md5File.exists()) { - md5File.delete(); - } - } - - /** - * Generates a md5 sum of a file and store it together with an optional comment - * in a special md5 file. - * @param theFile the original file - * @param comment description of the file - * @return an Object array containing - *
    - *
  • [{@link GENMD5_MD5_ARRAY}] the md5 sum of the file as byte array
  • - *
  • [{@link GENMD5_MD5_STRING}] the md5 sum of the file as string
  • - *
- * @throws UnsupportedEncodingException should never occur - * @throws IOException if the md5 file could not be written or the source file could not be read - */ - private Object[] generateMD5File(File theFile, String comment) throws UnsupportedEncodingException, IOException { - if (comment == null) comment = ""; - - // calculate md5 - byte[] md5b = generateFileMD5(theFile); - - // convert md5 sum to string - String md5s = convertMD5ArrayToString(md5b); - - // write comment + md5 to file - File md5File = getFileMD5File(theFile); - if (md5File.exists()) md5File.delete(); - serverFileUtils.write((md5s + "\n" + comment).getBytes("UTF-8"), md5File); - - return new Object[]{md5b,md5s}; - } - - /** - * Returns the content of the md5-file that belongs to a regular yacy file-share file - * @param theFile the regular file-share file - * @return an array containing - *
    - *
  • [{@link FILEINFO_MD5_STRING}] the md5 sum of the file as string
  • - *
  • [{@link FILEINFO_COMMENT}] the comment
  • - *
- * @throws IOException if the md5 file could not be read - */ - private String[] readFileInfo(File theFile) throws IOException { - File md5File = getFileMD5File(theFile); - - String md5s = ""; - String description = ""; - if (md5File.exists()) { - try { - md5s = new String(serverFileUtils.read(md5File),"UTF-8"); - int pos = md5s.indexOf('\n'); - if (pos >= 0) { - description = md5s.substring(pos + 1); - md5s = md5s.substring(0, pos); - } - } catch (IOException e) {/* */} - } - return new String[]{md5s,description}; - } - - private String readFileComment(File theFile) throws IOException { - String[] info = readFileInfo(theFile); - return info[FILEINFO_COMMENT]; - } - - private String readFileMD5String(File theFile) throws IOException { - String[] info = readFileInfo(theFile); - return info[FILEINFO_MD5_STRING]; - } - - private String yacyhURL(yacySeed seed, String filename, String md5) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("yacyhURL", new Class[]{yacySeed.class,String.class,String.class}); - - // invoke the desired method - return (String) m.invoke(null, new Object[] {seed,filename,md5}); - } catch (Exception e) { - throw new AxisFault("Unable to generate the yacyhURL"); - } - } - - private void indexPhrase(String urlstring, String phrase, String descr, byte[] md5) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("indexPhrase", new Class[]{plasmaSwitchboard.class,String.class,String.class,String.class,byte[].class}); - - // invoke the desired method - m.invoke(null, new Object[] {this.switchboard,urlstring,phrase,(descr==null)?"":descr,md5}); - } catch (Exception e) { - throw new AxisFault("Unable to index the file"); - } - } - - private void deletePhrase(String urlstring, String phrase, String descr) throws AxisFault { - try { - // getting the template class file - Class c = this.serverContext.getProvider().loadClass(this.serverContext.getServletClassFile(TEMPLATE_SHARE_XML)); - Method m = c.getMethod("deletePhrase", new Class[]{plasmaSwitchboard.class,String.class,String.class,String.class}); - - // invoke the desired method - m.invoke(null, new Object[] {this.switchboard,urlstring,phrase,(descr==null)?"":descr}); - } catch (Exception e) { - throw new AxisFault("Unable to index the file"); - } - } - - private String getPhrase(String filename) { - return filename.replace('.', ' ').replace('_', ' ').replace('-', ' '); - } - - private void indexFile(File newFile, String comment, byte[] md5b) throws IOException { - if (comment == null) comment = ""; - - // getting the file name - String newFileName = newFile.getName(); - String phrase = this.getPhrase(newFileName); - - // convert md5 sum to string - String md5s = convertMD5ArrayToString(md5b); - - // index file - String urlstring = yacyhURL(yacyCore.seedDB.mySeed, newFileName, md5s); - indexPhrase(urlstring, phrase, comment, md5b); - } - - private void unIndexFile(File file) throws IOException { - String filename = file.getName(); - String phrase = this.getPhrase(filename); - - // getting file info [0=md5s,1=comment] - String[] fileInfo = readFileInfo(file); - - // delete old indexed phrases - String urlstring = yacyhURL(yacyCore.seedDB.mySeed, filename, fileInfo[FILEINFO_MD5_STRING]); - deletePhrase(urlstring, phrase, fileInfo[FILEINFO_COMMENT]); - } - - - private void deleteRecursive(File file) throws IOException { - if (file == null) throw new NullPointerException("File object is null"); - if (!file.exists()) return; - if (!file.canWrite()) throw new IllegalArgumentException("File object can not be deleted. No write access."); - - if (file.isDirectory()) { - // delete all subdirectories and files - File[] subFiles = file.listFiles(); - for (int i = 0; i < subFiles.length; i++) deleteRecursive(subFiles[i]); - } else { - // unindex the file - this.unIndexFile(file); - - // delete md5 file - this.deleteFileMD5File(file); - } - - // delete file / directory - file.delete(); - } - - /** - * Returns a directory listing in xml format - * @param workingDirPath a relative path within the yacy file-share - * @return the directory listing of the specified path as XML document - * - * @throws Exception if the directory does not exist or can not be read - */ - public Document getDirList(String workingDirPath) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the htDocs and sub-directory - File htdocs = getHtDocsPath(); - File workingDir = getWorkingDir(workingDirPath); - - // generate the proper path for the servlet - workingDirPath = workingDir.getCanonicalPath().substring(htdocs.getCanonicalPath().length()+1); - if (!workingDirPath.endsWith("/")) workingDirPath = workingDirPath + "/"; - - // construct arguments - this.requestHeader.put("PATH",workingDirPath); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_SHARE_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Uploads a new file into the specified subdirectory of the yacy file-share directory. - * The Uploaded file must be passed via SOAP Attachment - * - * @param workingDirPath a relative path within the yacy file-share - * @param indexFile specifies if the file should be indexed by yacy - * @param comment a description of the file - * - * @throws IOException - * @throws SOAPException - */ - public void uploadFile(String workingDirPath, boolean indexFile, String comment) throws IOException, SOAPException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the request message - Message reqMsg = msgContext.getRequestMessage(); - - // getting the attachment implementation - Attachments messageAttachments = reqMsg.getAttachmentsImpl(); - if (messageAttachments == null) { - throw new AxisFault("Attachments not supported"); - } - - int attachmentCount= messageAttachments.getAttachmentCount(); - if (attachmentCount == 0) - throw new AxisFault("No attachment found"); - else if (attachmentCount != 1) - throw new AxisFault("Too many attachments as expected."); - - // getting the attachments - AttachmentPart[] attachments = (AttachmentPart[])messageAttachments.getAttachments().toArray(new AttachmentPart[attachmentCount]); - - // getting the content of the attachment - DataHandler dh = attachments[0].getDataHandler(); - String newFileName = attachments[0].getContentId(); - if (newFileName == null) newFileName = "newFile"; - - // getting directory to create - File newFile = getWorkingFile(workingDir,newFileName); - if (newFile.exists()) throw new AxisFault("File '" + newFileName + "' already exists"); - - // copy datahandler content to file - serverFileUtils.copy(dh.getInputStream(),newFile); - - // generate md5 sum and index the file - Object[] info = generateMD5File(newFile,comment); - if (indexFile) indexFile(newFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - /** - * Creates a new directory - * @param workingDirPath a relative path within the yacy file-share - * @param newDirName the name of the new directory - * @throws IOException - */ - public void createDirectory(String workingDirPath, String newDirName) throws IOException { - if (newDirName == null || newDirName.length() == 0) throw new AxisFault("The new directory name must not be null"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting directory to create - File newDirFile = getWorkingFile(workingDir,newDirName); - if (newDirFile.exists()) - throw new AxisFault("Directory '" + newDirName + "' already exists"); - - // create Directory - newDirFile.mkdirs(); - } - - /** - * Deletes a file or directory located in the yacy file-share directory - * @param workingDirPath a relative path within the yacy file-share - * @param nameToDelete the name of the file or directory that should be deleted. - * Attention: Directories will be deleted recursively - * - * @throws IOException - */ - public void delete(String workingDirPath, String nameToDelete) throws IOException { - if (nameToDelete == null || nameToDelete.length() == 0) throw new AxisFault("The file name must not be null"); - - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting directory or file to delete - File fileToDelete = getWorkingFile(workingDir, nameToDelete); - - // delete file/dir - this.deleteRecursive(fileToDelete); - } - - - /** - * Reads the comment assigned to a file located in the yacy file-share - * @param workingDirPath a relative path within the yacy file-share - * @param fileName the name of the file - * @return the comment assigned to a file located in the yacy file-share or an emty string if no comment is available - * @throws AxisFault - * @throws IOException - */ - public String getFileComment(String workingDirPath, String fileName) throws AxisFault, IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // get the old file comment - return this.readFileComment(workingFile); - } - - /** - * Reads the MD5 checksum of a file located in the yacy file-share - * @param workingDirPatha relative path within the yacy file-share - * @param fileName the name of the file - * @return the MD5 checksum of the file or an empty string if the checksum is not available - * @throws IOException - */ - public String getFileMD5(String workingDirPath, String fileName) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // get the old file comment - return this.readFileMD5String(workingFile); - } - - /** - * To download a file located in the yacy file-share. - * This function returns the requested file as soap attachment to the caller of this function. - * - * @param workingDirPath a relative path within the yacy file-share - * @param fileName the name of the file that should be downloaded - * @return the md5 sum of the downloaded file - * - * @throws IOException - * @throws SOAPException - */ - public String getFile(String workingDirPath, String fileName) throws IOException, SOAPException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the working directory - File workingDir = getWorkingDir(workingDirPath); - - // getting the working file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // getting the md5 string and comment - String[] info = readFileInfo(workingFile); - - // get the current message context - MessageContext msgContext = MessageContext.getCurrentContext(); - - // getting the response message - Message respMsg = msgContext.getResponseMessage(); - - // creating a datasource and data handler - DataSource data = new FileDataSource(workingFile); - DataHandler attachmentFile = new DataHandler(data); - - AttachmentPart attachmentPart = respMsg.createAttachmentPart(); - attachmentPart.setDataHandler(attachmentFile); - attachmentPart.setContentId(workingFile.getName()); - - respMsg.addAttachmentPart(attachmentPart); - respMsg.saveChanges(); - - // return the md5 hash of the file as result - return info[FILEINFO_MD5_STRING]; - } - - public void renameFile(String workingDirPath, String oldFileName, String newFileName, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting file - File sourceFile = getWorkingFile(workingDir,oldFileName); - if (!sourceFile.exists()) throw new AxisFault("Source file does not exist."); - if (!sourceFile.isFile()) throw new AxisFault("Source file is not a file."); - - File destFile = getWorkingFile(workingDir,newFileName); - if (destFile.exists()) throw new AxisFault("Destination file already exists."); - - // get the old file comment - String comment = this.readFileComment(sourceFile); - - // unindex the old file and delete the old MD5 file - this.unIndexFile(sourceFile); - this.deleteFileMD5File(sourceFile); - - // rename file - sourceFile.renameTo(destFile); - - // generate MD5 file and index file - Object[] info = generateMD5File(destFile,comment); - if (indexFile) indexFile(destFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - /** - * To change the comment of a file located in the yacy file-share - * @param workingDirPatha relative path within the yacy file-share - * @param fileName the name of the file - * @param comment the new comment - * @param indexFile specifies if the file should be indexed by yacy - * - * @throws IOException - */ - public void changeComment(String workingDirPath, String fileName, String comment, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting the full path - File workingDir = getWorkingDir(workingDirPath); - - // getting wroking file - File workingFile = getWorkingFile(workingDir,fileName); - if (!workingFile.exists()) throw new AxisFault("Requested file does not exist."); - if (!workingFile.canRead())throw new AxisFault("Requested file can not be read."); - if (!workingFile.isFile()) throw new AxisFault("Requested file is not a file."); - - // unindex file and delete MD5 file - this.unIndexFile(workingFile); - this.deleteFileMD5File(workingFile); - - // generate new MD5 file and index file - Object[] info = generateMD5File(workingFile,comment); - if (indexFile) indexFile(workingFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } - - public void moveFile(String sourceDirName, String destDirName, String fileName, boolean indexFile) throws IOException { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // getting source and destination directory - File sourceDir = getWorkingDir(sourceDirName); - File destDir = getWorkingDir(destDirName); - - // getting source and destination file - File sourceFile = getWorkingFile(sourceDir,fileName); - if (!sourceFile.exists()) throw new AxisFault("Source file does not exist."); - if (!sourceFile.isFile()) throw new AxisFault("Source file is not a file."); - - File destFile = getWorkingFile(destDir,fileName); - if (destFile.exists()) throw new AxisFault("Destination file already exists."); - - // getting the old comment - String comment = this.readFileComment(sourceFile); - - // unindex old file and delete MD5 file - this.unIndexFile(sourceFile); - this.deleteFileMD5File(sourceFile); - - // rename file - sourceFile.renameTo(destFile); - - // index file and generate MD5 - Object[] info = generateMD5File(destFile,comment); - if (indexFile) indexFile(destFile,comment,(byte[]) info[GENMD5_MD5_ARRAY]); - } -} diff --git a/source/de/anomic/soap/services/StatusService.java b/source/de/anomic/soap/services/StatusService.java deleted file mode 100644 index 0a7f8f5fb..000000000 --- a/source/de/anomic/soap/services/StatusService.java +++ /dev/null @@ -1,261 +0,0 @@ -//StatusService.java -//------------------------ -//part of YaCy -//(C) by Michael Peter Christen; mc@anomic.de -//first published on http://www.anomic.de -//Frankfurt, Germany, 2005 -// -//this file was contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -//Using this software in any meaning (reading, learning, copying, compiling, -//running) means that you agree that the Author(s) is (are) not responsible -//for cost, loss of data or any harm that may be caused directly or indirectly -//by usage of this softare or this documentation. The usage of this software -//is on your own risk. The installation and usage (starting/running) of this -//software may allow other people or application to access your computer and -//any attached devices and is highly dependent on the configuration of the -//software which must be done by the user of the software; the author(s) is -//(are) also not responsible for proper configuration and usage of the -//software, even if provoked by documentation provided together with -//the software. -// -//Any changes to this file according to the GPL as documented in the file -//gpl.txt aside this file in the shipment you received can be done to the -//lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -//the intact and unchanged copyright notice. -//Contributions and changes to the program code must be marked as such. - - -package de.anomic.soap.services; - -import org.apache.axis.AxisFault; -import org.w3c.dom.Document; - -import de.anomic.server.serverObjects; -import de.anomic.soap.AbstractService; -import de.anomic.yacy.yacyCore; - -public class StatusService extends AbstractService { - - /* ===================================================================== - * Used XML Templates - * ===================================================================== */ - /** - * Constant: template for the network status page - */ - private static final String TEMPLATE_NETWORK_XML = "Network.xml"; - private static final String TEMPLATE_QUEUES_XML = "xml/queues_p.xml"; - private static final String TEMPLATE_STATUS_XML = "xml/status_p.xml"; - - /* ===================================================================== - * Constants needed to query the network status - * ===================================================================== */ - private static final int NETWORK_OVERVIEW = 0; - private static final int NETWORK_ACTIVE_PEERS = 1; - private static final int NETWORK_PASSIVE_PEERS = 2; - private static final int NETWORK_POTENTIAL_PEERS = 3; - - /** - * @return - * @throws Exception - * @deprecated kept for backward compatibility - */ - public Document network() throws Exception { - return this.getNetworkOverview(); - } - - public Document getNetworkOverview() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_OVERVIEW); - } - - public Document getActivePeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_ACTIVE_PEERS); - } - - public Document getPassivePeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_PASSIVE_PEERS); - } - - public Document getPotentialPeers() throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - // return DOM - return getNetworkData(NETWORK_POTENTIAL_PEERS); - } - - private Document getNetworkData(int page) throws Exception { - if (page < 0 || page > 3) page = 0; - - serverObjects post = new serverObjects(); - post.put("page", Integer.toString(page)); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_NETWORK_XML, post, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - /** - * Returns a list of peers this peer currently knows - * @param peerType the peer types to query. This could be - *
    - *
  • active
  • - *
  • passive
  • - *
  • potential
  • - *
- * @param maxCount the maximum amount of records to return - * @param details if detailed informations should be returned - * - * @return a XML document of the following format - *
-     * <?xml version="1.0" encoding="UTF-8"?>
-     * <peers>
-     *   <peer>
-     *     <hash>XXXXXXX</hash>
-     *     <fullname>Peer Name</fullname>
-     *     <version>0.424/01505</version>
-     *     <ppm>0</ppm>
-     *     <uptime>2 days 14:37</uptime>
-     *     <links>-</links>
-     *     <words>-</words>
-     *     <lastseen>48</lastseen>
-     *     <sendWords>-</sendWords>
-     *     <receivedWords>-</receivedWords>
-     *     <sendURLs>-</sendURLs>
-     *     <receivedURLs>-</receivedURLs>    
-     *     <age>369</age>
-     *     <seeds>61</seeds>
-     *     <connects>2</connects>
-     *     <address>127.0.0.1:8080</address>        
-     *   </peer>
-     * </peers>
-     * 
- * @throws Exception - */ - public Document peerList(String peerType, int maxCount, boolean details) throws Exception { - // extracting the message context - extractMessageContext(NO_AUTHENTICATION); - - if (peerType == null || peerType.length() == 0) throw new IllegalArgumentException("The peer type must not be null or empty."); - if (!(peerType.equalsIgnoreCase("active") || peerType.equalsIgnoreCase("passive") || peerType.equalsIgnoreCase("Potential"))) - throw new IllegalArgumentException("Unknown peer type. Should be (active|passive|potential)"); - - // configuring output mode - serverObjects args = new serverObjects(); - if (peerType.equalsIgnoreCase("active")) args.put("page","1"); - else if (peerType.equalsIgnoreCase("passive")) args.put("page","2"); - else if (peerType.equalsIgnoreCase("potential")) args.put("page","3"); - - // specifying if the detailed list should be returned - if (details) args.put("ip","1"); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_NETWORK_XML, args, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - /** - * Returns the current status of the following queues - *
    - *
  • Indexing Queue
  • - *
  • Loader Queue
  • - *
  • Local Crawling Queue
  • - *
  • Remote Triggered Crawling Queue
  • - *
- * @param localqueueCount the amount of items that should be returned. If this is null 10 items will be returned - * @param loaderqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @param localcrawlerqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @param remotecrawlerqueueCount the amount of items that should be returned. This parameter will be ignored at the moment - * @return a XML document containing the status information. For the detailed format, take a look into the template file - * htroot/xml/queues_p.xml - * - * @throws AxisFault if authentication failed - * @throws Exception on other unexpected errors - * - * @since 2835 - */ - public Document getQueueStatus( - Integer localqueueCount, - Integer loaderqueueCount, - Integer localcrawlerqueueCount, - Integer remotecrawlerqueueCount - ) throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // passing parameters to servlet - serverObjects input = new serverObjects(); - if (localqueueCount != null) input.put("num",localqueueCount.toString()); - //if (loaderqueueCount != null) input.put("num",loaderqueueCount.toString()); - //if (localcrawlerqueueCount != null) input.put("num",localcrawlerqueueCount.toString()); - //if (remotecrawlerqueueCount != null) input.put("num",remotecrawlerqueueCount.toString()); - - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_QUEUES_XML, input, this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - /** - * Query status information about this peer - * @return the status-{@link Document} - * @throws Exception - */ - public Document getStatus() throws Exception { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // generating the template containing the network status information - byte[] result = this.serverContext.writeTemplate(TEMPLATE_STATUS_XML, new serverObjects(), this.requestHeader); - - // sending back the result to the client - return this.convertContentToXML(result); - } - - - public String getPeerHash() throws AxisFault { - // extracting the message context - extractMessageContext(AUTHENTICATION_NEEDED); - - // return the peer hash - return yacyCore.seedDB.mySeed.hash; - } - -} diff --git a/source/de/anomic/soap/services/admin.wsdl b/source/de/anomic/soap/services/admin.wsdl deleted file mode 100644 index f212ac2f7..000000000 --- a/source/de/anomic/soap/services/admin.wsdl +++ /dev/null @@ -1,504 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/blacklist.wsdl b/source/de/anomic/soap/services/blacklist.wsdl deleted file mode 100644 index 2a1e21722..000000000 --- a/source/de/anomic/soap/services/blacklist.wsdl +++ /dev/null @@ -1,285 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/bookmarks.wsdl b/source/de/anomic/soap/services/bookmarks.wsdl deleted file mode 100644 index 38c608088..000000000 --- a/source/de/anomic/soap/services/bookmarks.wsdl +++ /dev/null @@ -1,318 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/crawl.wsdl b/source/de/anomic/soap/services/crawl.wsdl deleted file mode 100644 index 9ff191968..000000000 --- a/source/de/anomic/soap/services/crawl.wsdl +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/messages.wsdl b/source/de/anomic/soap/services/messages.wsdl deleted file mode 100644 index 2396851c3..000000000 --- a/source/de/anomic/soap/services/messages.wsdl +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/search.wsdl b/source/de/anomic/soap/services/search.wsdl deleted file mode 100644 index 2677c9ff9..000000000 --- a/source/de/anomic/soap/services/search.wsdl +++ /dev/null @@ -1,233 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/share.wsdl b/source/de/anomic/soap/services/share.wsdl deleted file mode 100644 index fb21158ab..000000000 --- a/source/de/anomic/soap/services/share.wsdl +++ /dev/null @@ -1,258 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/soap/services/status.wsdl b/source/de/anomic/soap/services/status.wsdl deleted file mode 100644 index 49695ec88..000000000 --- a/source/de/anomic/soap/services/status.wsdl +++ /dev/null @@ -1,224 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index 50690e377..99d1e22bc 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -46,7 +46,7 @@ import java.util.Hashtable; import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; -import de.anomic.net.URL; +import de.anomic.yacy.yacyURL; public class loaderThreads { @@ -83,11 +83,11 @@ public class loaderThreads { this.failed = 0; } - public void newPropLoaderThread(String name, URL url) { + public void newPropLoaderThread(String name, yacyURL url) { newThread(name, url, new propLoader()); } - public void newThread(String name, URL url, loaderProcess process) { + public void newThread(String name, yacyURL url, loaderProcess process) { Thread t = new loaderThread(url, process); threads.put(name, t); t.start(); @@ -130,13 +130,13 @@ public class loaderThreads { } protected class loaderThread extends Thread { - private URL url; + private yacyURL url; private Exception error; private loaderProcess process; private byte[] page; private boolean loaded; - public loaderThread(URL url, loaderProcess process) { + public loaderThread(yacyURL url, loaderProcess process) { this.url = url; this.process = process; this.error = null; @@ -232,7 +232,7 @@ public class loaderThreads { httpRemoteProxyConfig proxyConfig = httpRemoteProxyConfig.init("192.168.1.122", 3128); loaderThreads loader = new loaderThreads(proxyConfig); try { - loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt")); + loader.newPropLoaderThread("load1", new yacyURL("http://www.anomic.de/superseed.txt", null)); } catch (MalformedURLException e) { } diff --git a/source/de/anomic/urlRedirector/urlRedirectord.java b/source/de/anomic/urlRedirector/urlRedirectord.java index 4a36021de..4e18d3fa7 100644 --- a/source/de/anomic/urlRedirector/urlRedirectord.java +++ b/source/de/anomic/urlRedirector/urlRedirectord.java @@ -10,8 +10,6 @@ import java.util.Date; import de.anomic.data.userDB; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.plasma.plasmaURL; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; @@ -20,6 +18,7 @@ import de.anomic.server.serverHandler; import de.anomic.server.logging.serverLog; import de.anomic.server.serverCore.Session; import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyURL; public class urlRedirectord implements serverHandler { @@ -180,7 +179,7 @@ public class urlRedirectord implements serverHandler { String reasonString = null; try { // generating URL Object - URL reqURL = new URL(this.nextURL); + yacyURL reqURL = new yacyURL(this.nextURL, null); // getting URL mimeType httpHeader header = httpc.whead(reqURL, reqURL.getHost(), 10000, null, null, switchboard.remoteProxyConfig); @@ -191,7 +190,7 @@ public class urlRedirectord implements serverHandler { header.mime()) ) { // first delete old entry, if exists - String urlhash = plasmaURL.urlHash(this.nextURL); + String urlhash = reqURL.hash(); switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.noticeURL.remove(urlhash); switchboard.errorURL.remove(urlhash); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index ad1729aae..dd1c3abee 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -55,13 +55,12 @@ import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; -import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBitfield; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSearchContainer; +import de.anomic.plasma.plasmaSearchProcessing; import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; @@ -107,7 +106,7 @@ public final class yacyClient { yacyCore.log.logFine("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacting peer at " + address); // send request result = nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/hello.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/hello.html", null), yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", 12000, null, @@ -243,7 +242,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 8000, null, @@ -274,7 +273,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 8000, null, @@ -307,7 +306,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/query.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), target.getHexHash() + ".yacyh", 6000, null, @@ -390,7 +389,7 @@ public final class yacyClient { HashMap result = null; try { result = nxTools.table( - httpc.wput(new URL("http://" + target.getClusterAddress() + "/yacy/search.html"), + httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/search.html", null), target.getHexHash() + ".yacyh", 60000, null, @@ -538,7 +537,7 @@ public final class yacyClient { if (singleAbstract == null) singleAbstract = new TreeMap(); ci = new serverByteBuffer(((String) entry.getValue()).getBytes()); //System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); - plasmaURL.decompressIndex(singleAbstract, ci, target.hash); + plasmaSearchProcessing.decompressIndex(singleAbstract, ci, target.hash); abstractCache.put(wordhash, singleAbstract); } } @@ -583,7 +582,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + targetAddress(targetHash) + "/yacy/message.html"), + httpc.wput(new yacyURL("http://" + targetAddress(targetHash) + "/yacy/message.html", null), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", 8000, null, @@ -619,7 +618,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + targetAddress(targetHash) + "/yacy/message.html"), + httpc.wput(new yacyURL("http://" + targetAddress(targetHash) + "/yacy/message.html", null), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", 20000, null, @@ -663,7 +662,7 @@ public final class yacyClient { // send request try { - final URL url = new URL("http://" + targetAddress + "/yacy/transfer.html"); + final yacyURL url = new yacyURL("http://" + targetAddress + "/yacy/transfer.html", null); final HashMap result = nxTools.table( httpc.wput(url, url.getHost(), @@ -699,7 +698,7 @@ public final class yacyClient { // send request try { - final URL url = new URL("http://" + targetAddress + "/yacy/transfer.html"); + final yacyURL url = new yacyURL("http://" + targetAddress + "/yacy/transfer.html", null); final HashMap result = nxTools.table( httpc.wput(url, url.getHost(), @@ -743,11 +742,11 @@ public final class yacyClient { return "wrong protocol: " + protocol; } - public static HashMap crawlOrder(yacySeed targetSeed, URL url, URL referrer, int timeout) { - return crawlOrder(targetSeed, new URL[]{url}, new URL[]{referrer}, timeout); + public static HashMap crawlOrder(yacySeed targetSeed, yacyURL url, yacyURL referrer, int timeout) { + return crawlOrder(targetSeed, new yacyURL[]{url}, new yacyURL[]{referrer}, timeout); } - public static HashMap crawlOrder(yacySeed target, URL[] url, URL[] referrer, int timeout) { + public static HashMap crawlOrder(yacySeed target, yacyURL[] url, yacyURL[] referrer, int timeout) { assert (target != null); assert (yacyCore.seedDB.mySeed != null); assert (yacyCore.seedDB.mySeed != target); @@ -775,7 +774,7 @@ public final class yacyClient { // send request try { final HashMap result = nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/crawlOrder.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/crawlOrder.html", null), target.getHexHash() + ".yacyh", timeout, null, @@ -842,7 +841,7 @@ public final class yacyClient { // send request try { return nxTools.table( - httpc.wput(new URL("http://" + address + "/yacy/crawlReceipt.html"), + httpc.wput(new yacyURL("http://" + address + "/yacy/crawlReceipt.html", null), target.getHexHash() + ".yacyh", 60000, null, @@ -991,7 +990,7 @@ public final class yacyClient { try { final ArrayList v = nxTools.strings( httpc.wput( - new URL("http://" + address + "/yacy/transferRWI.html"), + new yacyURL("http://" + address + "/yacy/transferRWI.html", null), targetSeed.getHexHash() + ".yacyh", timeout, null, @@ -1046,7 +1045,7 @@ public final class yacyClient { try { final ArrayList v = nxTools.strings( httpc.wput( - new URL("http://" + address + "/yacy/transferURL.html"), + new yacyURL("http://" + address + "/yacy/transferURL.html", null), targetSeed.getHexHash() + ".yacyh", timeout, null, @@ -1081,7 +1080,7 @@ public final class yacyClient { try { return nxTools.table( httpc.wput( - new URL("http://" + address + "/yacy/profile.html"), + new yacyURL("http://" + address + "/yacy/profile.html", null), targetSeed.getHexHash() + ".yacyh", 10000, null, @@ -1118,14 +1117,14 @@ public final class yacyClient { final HashMap result = nxTools.table( httpc.wget( - new URL("http://" + target.getPublicAddress() + "/yacy/search.html" + + new yacyURL("http://" + target.getPublicAddress() + "/yacy/search.html" + "?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + "&youare=" + target.hash + "&key=" + "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + "&count=10" + "&resource=global" + "&query=" + wordhashe + - "&network.unit.name=" + plasmaSwitchboard.getSwitchboard().getConfig("network.unit.name", yacySeed.DFLT_NETWORK_UNIT)), + "&network.unit.name=" + plasmaSwitchboard.getSwitchboard().getConfig("network.unit.name", yacySeed.DFLT_NETWORK_UNIT), null), target.getHexHash() + ".yacyh", 5000, null, diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index d5f77dea2..43ce7edc2 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -70,7 +70,6 @@ import java.util.List; import java.util.Map; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverSemaphore; @@ -755,7 +754,7 @@ public class yacyCore { } // ensure that the seed file url is configured properly - URL seedURL; + yacyURL seedURL; try{ final String seedURLStr = sb.getConfig("seedURL", ""); if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); } @@ -765,7 +764,7 @@ public class yacyCore { )){ throw new MalformedURLException("Unsupported protocol."); } - seedURL = new URL(seedURLStr); + seedURL = new yacyURL(seedURLStr, null); } catch(MalformedURLException e) { final String errorMsg = "Malformed seed file URL '" + sb.getConfig("seedURL", "") + "'. " + e.getMessage(); log.logWarning("SaveSeedList: " + errorMsg); @@ -783,7 +782,7 @@ public class yacyCore { "\n\tPrevious peerType is '" + seedDB.mySeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR) + "'."); // logt = seedDB.uploadCache(seedFTPServer, seedFTPAccount, seedFTPPassword, seedFTPPath, seedURL); - logt = seedDB.uploadCache(uploader,sb, seedDB, seedURL); + logt = seedDB.uploadCache(uploader, sb, seedDB, seedURL); if (logt != null) { if (logt.indexOf("Error") >= 0) { seedDB.mySeed.put(yacySeed.PEERTYPE, prevStatus); diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 9a5a5a44c..3785d006e 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -50,7 +50,6 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Map; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.urlPattern.plasmaURLPattern; @@ -326,13 +325,13 @@ public class yacyNewsPool { if (record.created().getTime() == 0) return; Map attributes = record.attributes(); if (attributes.containsKey("url")){ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("url")))){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new yacyURL((String) attributes.get("url"), null))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); return; } } if (attributes.containsKey("startURL")){ - if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("startURL")))){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new yacyURL((String) attributes.get("startURL"), null))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); return; } diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index fc1e32143..193a4ea56 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -51,7 +51,6 @@ import java.util.Iterator; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; @@ -145,7 +144,7 @@ public class yacyPeerActions { yacySeed ys; String seedListFileURL; - URL url; + yacyURL url; ArrayList seedList; Iterator enu; int lc; @@ -171,7 +170,7 @@ public class yacyPeerActions { reqHeader.put(httpHeader.PRAGMA,"no-cache"); reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache"); - url = new URL(seedListFileURL); + url = new yacyURL(seedListFileURL, null); long start = System.currentTimeMillis(); header = httpc.whead(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader); long loadtime = System.currentTimeMillis() - start; diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 35199a36a..1341d41b7 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -68,7 +68,6 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.net.URL; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; @@ -730,7 +729,7 @@ public final class yacySeedDB { // String seedFTPAccount, // String seedFTPPassword, // File seedFTPPath, - URL seedURL) throws Exception { + yacyURL seedURL) throws Exception { // upload a seed file, if possible if (seedURL == null) throw new NullPointerException("UPLOAD - Error: URL not given"); @@ -766,7 +765,7 @@ public final class yacySeedDB { return log; } - private ArrayList downloadSeedFile(URL seedURL) throws IOException { + private ArrayList downloadSeedFile(yacyURL seedURL) throws IOException { httpc remote = null; try { // init httpc diff --git a/source/de/anomic/net/URL.java b/source/de/anomic/yacy/yacyURL.java similarity index 53% rename from source/de/anomic/net/URL.java rename to source/de/anomic/yacy/yacyURL.java index 76f0eaaf4..ec1e2a302 100644 --- a/source/de/anomic/net/URL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -1,6 +1,6 @@ -// URL.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 13.07.2006 on http://www.anomic.de +// yacyURL.java +// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 13.07.2006 on http://yacy.net // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedRevision: 1986 $ @@ -22,27 +22,384 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -package de.anomic.net; +package de.anomic.yacy; // this class exsist to provide a system-wide normal form representation of urls, // and to prevent that java.net.URL usage causes DNS queries which are used in java.net. import java.io.File; +import java.net.InetAddress; import java.net.MalformedURLException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class URL { +import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.server.serverCodings; +import de.anomic.server.serverDomains; - private String protocol, host, userInfo, path, quest, ref; +public class yacyURL { + + + // TLD separation in political and cultural parts + // https://www.cia.gov/cia/publications/factbook/index.html + // http://en.wikipedia.org/wiki/List_of_countries_by_continent + + private static final String[] TLD_NorthAmericaOceania={ + // primary english-speaking countries + // english-speaking countries from central america are also included + // includes also dutch and french colonies in the caribbean sea + // and US/English/Australian military bases in asia + "EDU=US Educational", + "GOV=US Government", + "MIL=US Military", + "NET=Network", + "ORG=Non-Profit Organization", + "AN=Netherlands Antilles", + "AS=American Samoa", + "AG=Antigua and Barbuda", + "AI=Anguilla", + "AU=Australia", + "BB=Barbados", + "BZ=Belize", + "BM=Bermuda", + "BS=Bahamas", + "CA=Canada", + "CC=Cocos (Keeling) Islands", + "CK=Cook Islands", + "CX=Christmas Island", // located in the Indian Ocean, but belongs to Australia + "DM=Dominica", + "FM=Micronesia", + "FJ=Fiji", + "GD=Grenada", + "GP=Guadeloupe", + "GS=South Georgia and the South Sandwich Islands", // south of south america, but administrated by british, has only a scientific base + "GU=Guam", // strategical US basis close to Japan + "HM=Heard and McDonald Islands", // uninhabited, sub-Antarctic island, owned by Australia + "HT=Haiti", + "IO=British Indian Ocean Territory", // UK-US naval support facility in the Indian Ocean + "KI=Kiribati", // 33 coral atolls in the pacific, formerly owned by UK + "KN=Saint Kitts and Nevis", // islands in the carribean see + "KY=Cayman Islands", + "LC=Saint Lucia", + "MH=Marshall Islands", // formerly US atomic bomb test site, now a key installation in the US missile defense network + "MP=Northern Mariana Islands", // US strategic location in the western Pacific Ocean + "NC=New Caledonia", + "NF=Norfolk Island", + "NR=Nauru", // independent UN island + "NU=Niue", // one of world's largest coral islands + "NZ=New Zealand (Aotearoa)", + "PG=Papua New Guinea", + "PN=Pitcairn", // overseas territory of the UK + "PR=Puerto Rico", // territory of the US with commonwealth status + "PW=Palau", // was once governed by Micronesia + "Sb=Solomon Islands", + "TC=Turks and Caicos Islands", // overseas territory of the UK + "TK=Tokelau", // group of three atolls in the South Pacific Ocean, british protectorat + "TO=Tonga", + "TT=Trinidad and Tobago", + "TV=Tuvalu", // nine coral atolls in the South Pacific Ocean; in 2000, Tuvalu leased its TLD ".tv" for $50 million over a 12-year period + "UM=US Minor Outlying Islands", // nine insular United States possessions in the Pacific Ocean and the Caribbean Sea + "US=United States", + "VC=Saint Vincent and the Grenadines", + "VG=Virgin Islands (British)", + "VI=Virgin Islands (U.S.)", + "VU=Vanuatu", + "WF=Wallis and Futuna Islands", + "WS=Samoa" + }; + private static final String[] TLD_MiddleSouthAmerica = { + // primary spanish and portugese-speaking + "AR=Argentina", + "AW=Aruba", + "BR=Brazil", + "BO=Bolivia", + "CL=Chile", + "CO=Colombia", + "CR=Costa Rica", + "CU=Cuba", + "DO=Dominican Republic", + "EC=Ecuador", + "FK=Falkland Islands (Malvinas)", + "GF=French Guiana", + "GT=Guatemala", + "GY=Guyana", + "HN=Honduras", + "JM=Jamaica", + "MX=Mexico", + "NI=Nicaragua", + "PA=Panama", + "PE=Peru", + "PY=Paraguay", + "SR=Suriname", + "SV=El Salvador", + "UY=Uruguay", + "VE=Venezuela" + }; + private static final String[] TLD_EuropaRussia = { + // includes also countries that are mainly french- dutch- speaking + // and culturally close to europe + "AD=Andorra", + "AL=Albania", + "AQ=Antarctica", + "AT=Austria", + "BA=Bosnia and Herzegovina", + "BE=Belgium", + "BG=Bulgaria", + "BV=Bouvet Island", // this island is uninhabited and covered by ice, south of africa but governed by Norway + "BY=Belarus", + "CH=Switzerland", + "CS=Czechoslovakia (former)", + "CZ=Czech Republic", + "CY=Cyprus", + "DE=Germany", + "DK=Denmark", + "ES=Spain", + "EE=Estonia", + "FI=Finland", + "FO=Faroe Islands", // Viking Settlers + "FR=France", + "FX=France, Metropolitan", + "GB=Great Britain (UK)", + "GI=Gibraltar", + "GL=Greenland", + "GR=Greece", + "HR=Croatia (Hrvatska)", + "HU=Hungary", + "IE=Ireland", + "IS=Iceland", + "IT=Italy", + "LI=Liechtenstein", + "LT=Lithuania", + "LU=Luxembourg", + "LV=Latvia", + "MD=Moldova", + "MC=Monaco", + "MK=Macedonia", + "MN=Mongolia", + "MS=Montserrat", // British island in the Caribbean Sea, almost not populated because of strong vulcanic activity + "MT=Malta", + "MQ=Martinique", // island in the eastern Caribbean Sea, overseas department of France + "NATO=Nato field", + "NL=Netherlands", + "NO=Norway", + "PF=French Polynesia", // French annexed Polynesian island in the South Pacific, French atomic bomb test site + "PL=Poland", + "PM=St. Pierre and Miquelon", // french-administrated colony close to canada, belongs to France + "PT=Portugal", + "RO=Romania", + "RU=Russia", + "SE=Sweden", + "SI=Slovenia", + "SJ=Svalbard and Jan Mayen Islands", // part of Norway + "SM=San Marino", + "SK=Slovak Republic", + "SU=USSR (former)", + "TF=French Southern Territories", // islands in the arctic see, no inhabitants + "UK=United Kingdom", + "UA=Ukraine", + "VA=Vatican City State (Holy See)", + "YU=Yugoslavia" + }; + + private static final String[] TLD_MiddleEastWestAsia = { + // states that are influenced by islamic culture and arabic language + // includes also eurasia states and those that had been part of the former USSR and close to southwest asia + "AE=United Arab Emirates", + "AF=Afghanistan", + "AM=Armenia", + "AZ=Azerbaijan", + "BH=Bahrain", + "GE=Georgia", + "IL=Israel", + "IQ=Iraq", + "IR=Iran", + "JO=Jordan", + "KG=Kyrgyzstan", + "KZ=Kazakhstan", + "KW=Kuwait", + "LB=Lebanon", + "OM=Oman", + "QA=Qatar", + "SA=Saudi Arabia", + "SY=Syria", + "TJ=Tajikistan", + "TM=Turkmenistan", + "PK=Pakistan", + "TR=Turkey", + "UZ=Uzbekistan", + "YE=Yemen" + }; + private static final String[] TLD_SouthEastAsia = { + "BD=Bangladesh", + "BN=Brunei Darussalam", + "BT=Bhutan", + "CN=China", + "HK=Hong Kong", + "ID=Indonesia", + "IN=India", + "LA=Laos", + "NP=Nepal", + "JP=Japan", + "KH=Cambodia", + "KP=Korea (North)", + "KR=Korea (South)", + "LK=Sri Lanka", + "MY=Malaysia", + "MM=Myanmar", // formerly known as Burma + "MO=Macau", // Portuguese settlement, part of China, but has some autonomy + "MV=Maldives", // group of atolls in the Indian Ocean + "PH=Philippines", + "SG=Singapore", + "TP=East Timor", + "TH=Thailand", + "TW=Taiwan", + "VN=Viet Nam" + }; + private static final String[] TLD_Africa = { + "AO=Angola", + "BF=Burkina Faso", + "BI=Burundi", + "BJ=Benin", + "BW=Botswana", + "CF=Central African Republic", + "CG=Congo", + "CI=Cote D'Ivoire (Ivory Coast)", + "CM=Cameroon", + "CV=Cape Verde", + "DJ=Djibouti", + "DZ=Algeria", + "EG=Egypt", + "EH=Western Sahara", + "ER=Eritrea", + "ET=Ethiopia", + "GA=Gabon", + "GH=Ghana", + "GM=Gambia", + "GN=Guinea", + "GQ=Equatorial Guinea", + "GW=Guinea-Bissau", + "KE=Kenya", + "KM=Comoros", + "LR=Liberia", + "LS=Lesotho", + "LY=Libya", + "MA=Morocco", + "MG=Madagascar", + "ML=Mali", + "MR=Mauritania", + "MU=Mauritius", + "MW=Malawi", + "MZ=Mozambique", + "NA=Namibia", + "NE=Niger", + "NG=Nigeria", + "RE=Reunion", + "RW=Rwanda", + "SC=Seychelles", + "SD=Sudan", + "SH=St. Helena", + "SL=Sierra Leone", + "SN=Senegal", + "SO=Somalia", + "ST=Sao Tome and Principe", + "SZ=Swaziland", + "TD=Chad", + "TG=Togo", + "TN=Tunisia", + "TZ=Tanzania", + "UG=Uganda", + "ZA=South Africa", + "ZM=Zambia", + "ZR=Zaire", + "ZW=Zimbabwe", + "YT=Mayotte" + }; + private static final String[] TLD_Generic = { + "COM=US Commercial", + "AERO=", + "BIZ=", + "COOP=", + "INFO=", + "MUSEUM=", + "NAME=", + "PRO=", + "ARPA=", + "INT=International", + "ARPA=Arpanet", + "NT=Neutral Zone" + }; + + + /* + * TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net, + * org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR, + * ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM, + * BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL, + * CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ, + * EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE, + * GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM, + * HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE, + * JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI, + * LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO, + * MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC, + * NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH, + * PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC, + * SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC, + * TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ, + * UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT, + * YU, ZA, ZM, ZW + */ + + public static String dummyHash; + + private static HashMap TLDID = new HashMap(); + private static HashMap TLDName = new HashMap(); + + private static void insertTLDProps(String[] TLDList, int id) { + int p; + String tld, name; + Integer ID = new Integer(id); + for (int i = 0; i < TLDList.length; i++) { + p = TLDList[i].indexOf('='); + if (p > 0) { + tld = TLDList[i].substring(0, p).toLowerCase(); + name = TLDList[i].substring(p + 1); + TLDID.put(tld, ID); + TLDName.put(tld, name); + } + } + } + + static { + // create a dummy hash + dummyHash = ""; + for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; + + // assign TLD-ids and names + insertTLDProps(TLD_EuropaRussia, 0); + insertTLDProps(TLD_MiddleSouthAmerica, 1); + insertTLDProps(TLD_SouthEastAsia, 2); + insertTLDProps(TLD_MiddleEastWestAsia, 3); + insertTLDProps(TLD_NorthAmericaOceania, 4); + insertTLDProps(TLD_Africa, 5); + insertTLDProps(TLD_Generic, 6); + // the id=7 is used to flag local addresses + } + + // class variables + private String protocol, host, userInfo, path, quest, ref, hash; private int port; - public URL(String url) throws MalformedURLException { + public yacyURL(String url, String hash) throws MalformedURLException { if (url == null) throw new MalformedURLException("url string is null"); parseURLString(url); + this.hash = hash; } - public void parseURLString(String url) throws MalformedURLException { + private void parseURLString(String url) throws MalformedURLException { // identify protocol assert (url != null); url = url.trim(); @@ -100,40 +457,41 @@ public class URL { } } - public URL(File file) throws MalformedURLException { + public yacyURL(File file) throws MalformedURLException { this("file", "", -1, file.getAbsolutePath()); } - public static URL newURL(String baseURL, String relPath) throws MalformedURLException { + public static yacyURL newURL(String baseURL, String relPath) throws MalformedURLException { if ((baseURL == null) || (relPath.startsWith("http://")) || (relPath.startsWith("https://")) || (relPath.startsWith("ftp://")) || (relPath.startsWith("file://")) || (relPath.startsWith("smb://"))) { - return new URL(relPath); + return new yacyURL(relPath, null); } else { - return new URL(new URL(baseURL), relPath); + return new yacyURL(new yacyURL(baseURL, null), relPath); } } - public static URL newURL(URL baseURL, String relPath) throws MalformedURLException { + public static yacyURL newURL(yacyURL baseURL, String relPath) throws MalformedURLException { if ((baseURL == null) || (relPath.startsWith("http://")) || (relPath.startsWith("https://")) || (relPath.startsWith("ftp://")) || (relPath.startsWith("file://")) || (relPath.startsWith("smb://"))) { - return new URL(relPath); + return new yacyURL(relPath, null); } else { - return new URL(baseURL, relPath); + return new yacyURL(baseURL, relPath); } } - private URL(URL baseURL, String relPath) throws MalformedURLException { + private yacyURL(yacyURL baseURL, String relPath) throws MalformedURLException { if (baseURL == null) throw new MalformedURLException("base URL is null"); if (relPath == null) throw new MalformedURLException("relPath is null"); + this.hash = null; this.protocol = baseURL.protocol; this.host = baseURL.host; this.port = baseURL.port; @@ -176,12 +534,13 @@ public class URL { escape(); } - public URL(String protocol, String host, int port, String path) throws MalformedURLException { + public yacyURL(String protocol, String host, int port, String path) throws MalformedURLException { if (protocol == null) throw new MalformedURLException("protocol is null"); this.protocol = protocol; this.host = host; this.port = port; this.path = path; + this.hash = null; identRef(); identQuest(); escape(); @@ -400,11 +759,11 @@ public class URL { int r = this.host.indexOf(':'); if (r < 0) { this.port = dflt; - } else { + } else { try { - String portStr = this.host.substring(r + 1); + String portStr = this.host.substring(r + 1); if (portStr.trim().length() > 0) this.port = Integer.parseInt(portStr); - else this.port = -1; + else this.port = -1; this.host = this.host.substring(0, r); } catch (NumberFormatException e) { throw new MalformedURLException("wrong port in host fragment '" + this.host + "' of input url '" + inputURL + "'"); @@ -522,7 +881,7 @@ public class URL { this.getHost().toLowerCase() + ((defaultPort) ? ("") : (":" + this.port)) + path; } - public boolean equals(URL other) { + public boolean equals(yacyURL other) { return (((this.protocol == other.protocol) || (this.protocol.equals(other.protocol))) && ((this.host == other.host ) || (this.host.equals(other.host))) && ((this.userInfo == other.userInfo) || (this.userInfo.equals(other.userInfo))) && @@ -537,10 +896,178 @@ public class URL { } public int compareTo(Object h) { - assert (h instanceof URL); - return this.toString().compareTo(((URL) h).toString()); + assert (h instanceof yacyURL); + return this.toString().compareTo(((yacyURL) h).toString()); } + // static methods from plasmaURL + + public static final int flagTypeID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; + } + + public static final int flagTLDID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 28) >> 2; + } + + public static final int flagLengthID(String hash) { + return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 3); + } + + public final String hash() { + // in case that the object was initialized without a known url hash, compute it now + if (this.hash == null) this.hash = urlHashComputation(); + return this.hash; + } + + private final String urlHashComputation() { + // the url hash computation needs a DNS lookup to check if the addresses domain is local + // that causes that this method may be very slow + + assert this.hash == null; // should only be called if the hash was not computed bevore + + int p = this.host.lastIndexOf('.'); + String tld = "", dom = tld; + if (p > 0) { + tld = host.substring(p + 1); + dom = host.substring(0, p); + } + Integer ID = (serverDomains.isLocal(tld)) ? null : (Integer) TLDID.get(tld); // identify local addresses + int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7 + boolean isHTTP = this.protocol.equals("http"); + p = dom.lastIndexOf('.'); // locate subdomain + String subdom = ""; + if (p > 0) { + subdom = dom.substring(0, p); + dom = dom.substring(p + 1); + } + + // find rootpath + String pathx = new String(this.path); + if (pathx.startsWith("/")) + pathx = pathx.substring(1); + if (pathx.endsWith("/")) + pathx = pathx.substring(0, pathx.length() - 1); + p = pathx.indexOf('/'); + String rootpath = ""; + if (p > 0) { + rootpath = pathx.substring(0, p); + } + + // we collected enough information to compute the fragments that are + // basis for hashes + int l = dom.length(); + int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3; + byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey); + + // combine the attributes + StringBuffer hash = new StringBuffer(12); + // form the 'local' part of the hash + hash.append(kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(toNormalform(true, true))).substring(0, 5)); // 5 chars + hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char + // form the 'global' part of the hash + hash.append(protocolHostPort(this.protocol, host, port)); // 5 chars + hash.append(kelondroBase64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char + + // return result hash + return new String(hash); + } + + private static char subdomPortPath(String subdom, int port, String rootpath) { + return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(subdom + ":" + port + ":" + rootpath)).charAt(0); + } + + private static final char rootURLFlag = subdomPortPath("www", 80, ""); + + public static final boolean probablyRootURL(String urlHash) { + return (urlHash.charAt(5) == rootURLFlag); + } + + private static String protocolHostPort(String protocol, String host, int port) { + return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(protocol + ":" + host + ":" + port)).substring(0, 5); + } + + private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" }; + + public static final yacyURL probablyWordURL(String urlHash, TreeSet words) { + Iterator wi = words.iterator(); + String word; + while (wi.hasNext()) { + word = (String) wi.next(); + if ((word == null) || (word.length() == 0)) continue; + String pattern = urlHash.substring(6, 11); + for (int i = 0; i < testTLDs.length; i++) { + if (pattern.equals(protocolHostPort("http", "www." + word.toLowerCase() + "." + testTLDs[i], 80))) + try { + return new yacyURL("http://www." + word.toLowerCase() + "." + testTLDs[i], null); + } catch (MalformedURLException e) { + return null; + } + } + } + return null; + } + + public static final boolean isWordRootURL(String givenURLHash, TreeSet words) { + if (!(probablyRootURL(givenURLHash))) return false; + yacyURL wordURL = probablyWordURL(givenURLHash, words); + if (wordURL == null) return false; + if (wordURL.hash().equals(givenURLHash)) return true; + return false; + } + + public static final int domLengthEstimation(String urlHash) { + // generates an estimation of the original domain length + assert (urlHash != null); + assert (urlHash.length() == 12) : "urlhash = " + urlHash; + int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); + int domLengthKey = flagbyte & 3; + switch (domLengthKey) { + case 0: + return 4; + case 1: + return 10; + case 2: + return 14; + case 3: + return 20; + } + return 20; + } + + public static int domLengthNormalized(String urlHash) { + return 255 * domLengthEstimation(urlHash) / 30; + } + + public static final int domDomain(String urlHash) { + // returns the ID of the domain of the domain + assert (urlHash != null); + assert (urlHash.length() == 12) : "urlhash = " + urlHash; + int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); + return (flagbyte & 12) >> 2; + } + + public static boolean isGlobalDomain(String urlhash) { + return domDomain(urlhash) != 7; + } + + // checks for local/global IP range and local IP + public boolean isLocal() { + InetAddress hostAddress = serverDomains.dnsResolve(this.host); // TODO: use a check with the hash first + if (hostAddress == null) /* we are offline */ return false; // it is rare to be offline in intranets + return hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); + } + + // language calculation + public static String language(yacyURL url) { + String language = "uk"; + String host = url.getHost(); + int pos = host.lastIndexOf("."); + if ((pos > 0) && (host.length() - pos == 3)) language = host.substring(pos + 1).toLowerCase(); + return language; + } + + public static void main(String[] args) { String[][] test = new String[][]{ new String[]{null, "http://www.anomic.de/home/test?x=1#home"}, @@ -570,12 +1097,12 @@ public class URL { new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&showuser=23585"} }; String environment, url; - de.anomic.net.URL aURL, aURL1; + yacyURL aURL, aURL1; java.net.URL jURL; for (int i = 0; i < test.length; i++) { environment = test[i][0]; url = test[i][1]; - try {aURL = de.anomic.net.URL.newURL(environment, url);} catch (MalformedURLException e) {aURL = null;} + try {aURL = yacyURL.newURL(environment, url);} catch (MalformedURLException e) {aURL = null;} if (environment == null) { try {jURL = new java.net.URL(url);} catch (MalformedURLException e) {jURL = null;} } else { @@ -593,7 +1120,7 @@ public class URL { // check stability: the normalform of the normalform must be equal to the normalform if (aURL != null) try { - aURL1 = new de.anomic.net.URL(aURL.toNormalform(false, true)); + aURL1 = new yacyURL(aURL.toNormalform(false, true), null); if (!(aURL1.toNormalform(false, true).equals(aURL.toNormalform(false, true)))) { System.out.println("no stability for url:"); System.out.println("aURL0=" + aURL.toString()); diff --git a/source/de/anomic/yacy/yacyVersion.java b/source/de/anomic/yacy/yacyVersion.java index b86c0e36f..4c1f67f91 100644 --- a/source/de/anomic/yacy/yacyVersion.java +++ b/source/de/anomic/yacy/yacyVersion.java @@ -41,7 +41,6 @@ import java.util.regex.Pattern; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpc; -import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverSystem; @@ -73,10 +72,10 @@ public final class yacyVersion implements Comparator, Comparable { public String dateStamp; public int svn; public boolean proRelease, mainRelease; - public URL url; + public yacyURL url; public String name; - public yacyVersion(URL url) { + public yacyVersion(yacyURL url) { this(url.getFileName()); this.url = url; } @@ -256,7 +255,7 @@ public final class yacyVersion implements Comparator, Comparable { // {promainreleases, prodevreleases, stdmainreleases, stddevreleases} Object[] a = new Object[latestReleaseLocations.size()]; for (int j = 0; j < latestReleaseLocations.size(); j++) { - a[j] = getReleases((URL) latestReleaseLocations.get(j), force); + a[j] = getReleases((yacyURL) latestReleaseLocations.get(j), force); } TreeSet[] r = new TreeSet[4]; TreeSet s; @@ -270,7 +269,7 @@ public final class yacyVersion implements Comparator, Comparable { return r; } - private static TreeSet[] getReleases(URL location, boolean force) { + private static TreeSet[] getReleases(yacyURL location, boolean force) { // get release info from a internet resource // {promainreleases, prodevreleases, stdmainreleases, stddevreleases} TreeSet[] latestRelease = (TreeSet[]) latestReleases.get(location); @@ -286,7 +285,7 @@ public final class yacyVersion implements Comparator, Comparable { return latestRelease; } - private static TreeSet[] allReleaseFrom(URL url) { + private static TreeSet[] allReleaseFrom(yacyURL url) { // retrieves the latest info about releases // this is done by contacting a release location, // parsing the content and filtering+parsing links @@ -308,7 +307,7 @@ public final class yacyVersion implements Comparator, Comparable { yacyVersion release; while (i.hasNext()) { try { - url = new URL((String) i.next()); + url = new yacyURL((String) i.next(), null); } catch (MalformedURLException e1) { continue; // just ignore invalid urls } diff --git a/source/yacy.java b/source/yacy.java index d97e5c314..d4df71164 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -74,7 +74,6 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; -import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlLURL; @@ -92,6 +91,7 @@ import de.anomic.server.logging.serverLog; import de.anomic.tools.enumerateFiles; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacySeedDB; +import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyVersion; /** @@ -412,7 +412,7 @@ public final class yacy { server.terminate(false); server.interrupt(); if (server.isAlive()) try { - URL u = new URL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port)); + yacyURL u = new yacyURL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null); httpc.wget(u, u.getHost(), 1000, null, null, null, null, null); // kick server serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket"); } catch (IOException ee) { diff --git a/test/de/anomic/soap/services/AbstractServiceTest.java b/test/de/anomic/soap/services/AbstractServiceTest.java deleted file mode 100644 index 6bca3292a..000000000 --- a/test/de/anomic/soap/services/AbstractServiceTest.java +++ /dev/null @@ -1,77 +0,0 @@ -package de.anomic.soap.services; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.rmi.Remote; -import java.util.Hashtable; -import java.util.Properties; - -import javax.xml.rpc.ServiceException; - -import junit.framework.TestCase; - -import org.apache.axis.MessageContext; -import org.apache.axis.client.Stub; -import org.apache.axis.transport.http.HTTPConstants; - -import de.anomic.http.httpd; - -public abstract class AbstractServiceTest extends TestCase { - protected static final String SOAP_HEADER_NAMESPACE = "http://http.anomic.de/header"; - protected static final String SOAP_HEADER_AUTHORIZATION = "Authorization"; - - protected static String authString; - protected static String peerPort; - protected static Remote service; - - protected void setUp() throws Exception { - this.loadConfigProperties(); - super.setUp(); - } - - protected abstract void createServiceClass() throws ServiceException; - - protected String getBaseServiceURL() { - return "http://localhost:" + peerPort + "/soap/"; - } - - protected void loadConfigProperties() throws Exception { - BufferedInputStream fileInput = null; - try { - File configFile = new File("DATA/SETTINGS/httpProxy.conf"); - System.out.println("Reading config file: " + configFile.getAbsoluteFile().toString()); - fileInput = new BufferedInputStream(new FileInputStream(configFile)); - - // load property list - Properties peerProperties = new Properties(); - peerProperties.load(fileInput); - fileInput.close(); - - // getting admin account auth string - authString = peerProperties.getProperty(httpd.ADMIN_ACCOUNT_B64MD5); - if (authString == null) throw new Exception("Unable to find authentication information."); - - peerPort = peerProperties.getProperty("port"); - if (authString == null) throw new Exception("Unable to find peer port information."); - - // creating the service class - createServiceClass(); - - // setting the authentication header - ((Stub)service).setHeader(SOAP_HEADER_NAMESPACE,SOAP_HEADER_AUTHORIZATION,authString); - - // configure axis to use HTTP 1.1 - ((Stub)service)._setProperty(MessageContext.HTTP_TRANSPORT_VERSION,HTTPConstants.HEADER_PROTOCOL_V11); - - // configure axis to use chunked transfer encoding - Hashtable userHeaderTable = new Hashtable(); - userHeaderTable.put(HTTPConstants.HEADER_TRANSFER_ENCODING, HTTPConstants.HEADER_TRANSFER_ENCODING_CHUNKED); - ((Stub)service)._setProperty(HTTPConstants.REQUEST_HEADERS,userHeaderTable); - } catch (Exception e) { - e.printStackTrace(); - } finally { - if (fileInput != null) try { fileInput.close(); } catch (Exception e){/* ignore this */} - } - } -} diff --git a/test/de/anomic/soap/services/AdminServiceTest.java b/test/de/anomic/soap/services/AdminServiceTest.java deleted file mode 100644 index 10eb7861b..000000000 --- a/test/de/anomic/soap/services/AdminServiceTest.java +++ /dev/null @@ -1,75 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; -import java.util.HashMap; - -import javax.xml.rpc.ServiceException; -import javax.xml.transform.TransformerException; - -import org.apache.axis.utils.XMLUtils; -import org.apache.xpath.XPathAPI; -import org.w3c.dom.DOMException; -import org.w3c.dom.Document; - -import yacy.soap.admin.AdminService; -import yacy.soap.admin.AdminServiceServiceLocator; - -public class AdminServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - AdminServiceServiceLocator locator = new AdminServiceServiceLocator(); - locator.setadminEndpointAddress(getBaseServiceURL() + "admin"); - - service = locator.getadmin(); - } - - private HashMap getMessageForwardingProperties(Document xml) throws DOMException, TransformerException { - HashMap result = new HashMap(); - - result.put("msgForwardingEnabled",Boolean.valueOf(XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingEnabled").getFirstChild().getNodeValue())); - result.put("msgForwardingCmd",XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingCmd").getFirstChild().getNodeValue()); - result.put("msgForwardingTo",XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingTo").getFirstChild().getNodeValue()); - - return result; - } - - public void testMessageForwarding() throws RemoteException, TransformerException { - // backup old values - HashMap oldValues = getMessageForwardingProperties(((AdminService)service).getMessageForwarding()); - - // set new values - Boolean msgEnabled = Boolean.TRUE; - String msgCmd = "/usr/sbin/sendmail"; - String msgTo = "yacy@localhost"; - ((AdminService)service).setMessageForwarding(msgEnabled.booleanValue(),msgCmd,msgTo); - - // query configured properties - Document xml = ((AdminService)service).getMessageForwarding(); - - // check if values are equal - assertEquals(msgEnabled,Boolean.valueOf(XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingEnabled").getFirstChild().getNodeValue())); - assertEquals(msgCmd,XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingCmd").getFirstChild().getNodeValue()); - assertEquals(msgTo,XPathAPI.selectSingleNode(xml,"/msgForwarding/msgForwardingTo").getFirstChild().getNodeValue()); - - // print it out - System.out.println(XMLUtils.DocumentToString(xml)); - - // set back to old values - ((AdminService)service).setMessageForwarding( - ((Boolean)oldValues.get("msgForwardingEnabled")).booleanValue(), - (String)oldValues.get("msgForwardingCmd"), - (String)oldValues.get("msgForwardingTo") - ); - } - - public void testGetServerLog() throws RemoteException { - Document xml = ((AdminService)service).getServerLog(0); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testGetPeerProfile() throws RemoteException { - Document xml = ((AdminService)service).getPeerProfile("localhash"); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/BlacklistServiceTest.java b/test/de/anomic/soap/services/BlacklistServiceTest.java deleted file mode 100644 index 29e4063c2..000000000 --- a/test/de/anomic/soap/services/BlacklistServiceTest.java +++ /dev/null @@ -1,103 +0,0 @@ -package de.anomic.soap.services; - -import java.io.IOException; -import java.rmi.RemoteException; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.blacklist.BlacklistService; -import yacy.soap.blacklist.BlacklistServiceServiceLocator; - -public class BlacklistServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - BlacklistServiceServiceLocator locator = new BlacklistServiceServiceLocator(); - locator.setblacklistEndpointAddress(getBaseServiceURL() + "blacklist"); - - service = locator.getblacklist(); - } - - public void testGetBlacklistList() throws RemoteException { - Document xml = ((BlacklistService)service).getBlacklistList(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testBlacklist() throws RemoteException { - BlacklistService bl = ((BlacklistService)service); - - // create new blacklist - String blacklistName = "junit_test_" + System.currentTimeMillis(); - bl.createBlacklist(blacklistName,false,null); - - // share blacklist - bl.shareBlacklist(blacklistName); - - // getting supported blacklist Types - String[] blTypes = bl.getBlacklistTypes(); - - // activate blacklist - bl.activateBlacklist(blacklistName,blTypes); - - // add blacklist item - String item = "http://www.yacy.net"; - bl.addBlacklistItem(blacklistName,item); - - // getting the blacklist list - Document xml = bl.getBlacklistList(); - System.out.println(XMLUtils.DocumentToString(xml)); - - // test is listed - boolean isListed = bl.urlIsBlacklisted("proxy","http://www.yacy.net/blacklisttest"); - assertEquals(true,isListed); - - // remove blacklist item - bl.removeBlacklistItem(blacklistName,item); - - // unshare - bl.unshareBlacklist(blacklistName); - - // deactivate for proxy and dht - bl.deactivateBlacklist(blacklistName,new String[]{"proxy","dht"}); - - // delete blacklist - bl.deleteBlacklist(blacklistName); - } - - public void testBacklistImport() throws IOException { - BlacklistService bl = ((BlacklistService)service); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("import.txt","www.yacy.net/.*\r\n" + - "www.yacy-websuche.de/.*"); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - part.setContentType("text/plain"); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import it - String blacklistName = "junit_test_" + System.currentTimeMillis(); - bl.importBlacklist(blacklistName); - - // clear attachment - ((Stub)service).clearAttachments(); - - // delete blacklist - bl.deleteBlacklist(blacklistName); - } - -} diff --git a/test/de/anomic/soap/services/BookmarkServiceTest.java b/test/de/anomic/soap/services/BookmarkServiceTest.java deleted file mode 100644 index 2203dab05..000000000 --- a/test/de/anomic/soap/services/BookmarkServiceTest.java +++ /dev/null @@ -1,153 +0,0 @@ -package de.anomic.soap.services; - -import java.net.MalformedURLException; -import java.rmi.RemoteException; -import java.util.Date; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.bookmarks.BookmarkService; -import yacy.soap.bookmarks.BookmarkServiceServiceLocator; -import de.anomic.data.bookmarksDB; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaURL; - -public class BookmarkServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - BookmarkServiceServiceLocator locator = new BookmarkServiceServiceLocator(); - locator.setbookmarksEndpointAddress(getBaseServiceURL() + "bookmarks"); - - service = locator.getbookmarks(); - } - - public void testBookmarks() throws Exception { - BookmarkService bm = ((BookmarkService)service); - - String testURL1 = "http://www.yacy.de/testurl1"; - String testURL2 = "http://www.yacy.de/testurl2"; - - // create new bookmark - String urlHash = bm.addBookmark(testURL1,"YaCy Bookmarks Test","YaCy Bookmarks junit test",new String[]{"yacy","bookmarks","testing"},false); - - // change bookmark - urlHash = bm.editBookmark(urlHash,testURL2,null,null,null,false); - - // get bookmark listing - Document xml = bm.getBookmarkList("testing",bookmarksDB.dateToiso8601(new Date(System.currentTimeMillis()))); - System.out.println(XMLUtils.DocumentToString(xml)); - - // get tag list - xml = bm.getBookmarkTagList(); - System.out.println(XMLUtils.DocumentToString(xml)); - - // rename tag - bm.renameTag("testing","tested"); - - // delete tag - bm.deleteBookmarkByHash(urlHash); - } - - public void testImportHtmlBookmarklist() throws RemoteException { - BookmarkService bm = ((BookmarkService)service); - String[] hashs = new String[5]; - - // generate the html file - StringBuffer xmlStr = new StringBuffer(); - xmlStr.append(""); - for (int i=0; i < hashs.length; i++) { - String url = "/testxmlimport" + i; - String title = "YaCy Bookmark XML Import " + i; - String hash = plasmaURL.urlHash("http://www.yacy.de"+ url); - - xmlStr.append("\t").append(title).append("\r\n"); - - hashs[i] = hash; - } - xmlStr.append(""); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("bookmarks.html",xmlStr.toString()); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import xml - int importCount = bm.importHtmlBookmarkFile("http://www.yacy.de/",new String[]{"yacy","bookmarks","htmlimport"},false); - assertEquals(hashs.length,importCount); - - // query imported documents - Document xml = bm.getBookmarkList("htmlimport",null); - System.out.println(XMLUtils.DocumentToString(xml)); - - // delete imported URLS - bm.deleteBookmarksByHash(hashs); - } - - public void testImportXML() throws MalformedURLException, RemoteException { - BookmarkService bm = ((BookmarkService)service); - - String dateString = bookmarksDB.dateToiso8601(new Date(System.currentTimeMillis())); - String[] hashs = new String[5]; - - // generate xml document to import - StringBuffer xmlStr = new StringBuffer(); - xmlStr.append("\r\n") - .append("\r\n"); - - for (int i=0; i < hashs.length; i++) { - URL url = new URL("http://www.yacy.de/testxmlimport" + i); - String title = "YaCy Bookmark XML Import " + i; - String description = "YaCy Bookmarkx XML Import junit test with url " + i; - String hash = plasmaURL.urlHash(url); - String tags = "yacy bookmarks xmlimport"; - - xmlStr.append("\t\r\n"); - - hashs[i] = hash; - } - - xmlStr.append(""); - - // create datasource to hold the attachment content - DataSource data = new PlainTextDataSource("bookmarks.xml",xmlStr.toString()); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - - // import xml - int importCount = bm.importBookmarkXML(false); - assertEquals(hashs.length,importCount); - - // query imported documents - Document xml = bm.getBookmarkList("xmlimport",dateString); - System.out.println(XMLUtils.DocumentToString(xml)); - - // delete imported URLS - bm.deleteBookmarksByHash(hashs); - } - -} diff --git a/test/de/anomic/soap/services/CrawlServiceTest.java b/test/de/anomic/soap/services/CrawlServiceTest.java deleted file mode 100644 index 3c2ec5ddb..000000000 --- a/test/de/anomic/soap/services/CrawlServiceTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.rpc.ServiceException; - -import org.apache.axis.AxisFault; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.crawl.CrawlService; -import yacy.soap.crawl.CrawlServiceServiceLocator; - -public class CrawlServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - CrawlServiceServiceLocator locator = new CrawlServiceServiceLocator(); - locator.setcrawlEndpointAddress(getBaseServiceURL() + "crawl"); - - service = locator.getcrawl(); - } - - public void testGetCrawlPauseResumeState() throws RemoteException { - Document xml = ((CrawlService)service).getCrawlPauseResumeState(); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/MessageServiceTest.java b/test/de/anomic/soap/services/MessageServiceTest.java deleted file mode 100644 index 8b0a8bd37..000000000 --- a/test/de/anomic/soap/services/MessageServiceTest.java +++ /dev/null @@ -1,62 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.rpc.ServiceException; - -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.messages.MessageService; -import yacy.soap.messages.MessageServiceServiceLocator; - - - -public class MessageServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - MessageServiceServiceLocator locator = new MessageServiceServiceLocator(); - locator.setmessagesEndpointAddress(getBaseServiceURL() + "messages"); - - service = locator.getmessages(); - } - - public void testGetMessageIDs() throws RemoteException { - MessageService ms = ((MessageService)service); - String[] IDs = ms.getMessageIDs(); - - StringBuffer idList = new StringBuffer(); - for (int i=0; i < IDs.length; i++) { - if (i > 0) idList.append(", "); - idList.append(IDs[i]); - } - - System.out.println(idList); - } - - public void testGetMessageHeaderList() throws RemoteException { - MessageService ms = ((MessageService)service); - Document xml = ms.getMessageHeaderList(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testMessage() throws RemoteException { - MessageService ms = ((MessageService)service); - - // get message IDs - String[] IDs = ms.getMessageIDs(); - - if (IDs != null && IDs.length > 0) { - Document xml = ms.getMessage(IDs[0]); - System.out.println(XMLUtils.DocumentToString(xml)); - } - } - - public void testGetMessageSendPermission() throws RemoteException { - MessageService ms = ((MessageService)service); - - Document xml = ms.getMessageSendPermission("mseSVGrNKKnw"); - System.out.println(XMLUtils.DocumentToString(xml)); - } -} diff --git a/test/de/anomic/soap/services/ServiceTests.java b/test/de/anomic/soap/services/ServiceTests.java deleted file mode 100644 index fe51621aa..000000000 --- a/test/de/anomic/soap/services/ServiceTests.java +++ /dev/null @@ -1,21 +0,0 @@ -package de.anomic.soap.services; - -import junit.framework.Test; -import junit.framework.TestSuite; - -public class ServiceTests { - - public static Test suite() { - TestSuite suite = new TestSuite("Test for de.anomic.soap.services"); - //$JUnit-BEGIN$ - suite.addTestSuite(AdminServiceTest.class); - suite.addTestSuite(ShareServiceTest.class); - suite.addTestSuite(StatusServiceTest.class); - suite.addTestSuite(BlacklistServiceTest.class); - suite.addTestSuite(BookmarkServiceTest.class); - suite.addTestSuite(MessageServiceTest.class); - //$JUnit-END$ - return suite; - } - -} diff --git a/test/de/anomic/soap/services/ShareServiceTest.java b/test/de/anomic/soap/services/ShareServiceTest.java deleted file mode 100644 index 0f7dde64f..000000000 --- a/test/de/anomic/soap/services/ShareServiceTest.java +++ /dev/null @@ -1,109 +0,0 @@ -package de.anomic.soap.services; - -import java.io.IOException; -import java.util.Date; - -import javax.activation.DataHandler; -import javax.activation.DataSource; -import javax.xml.rpc.ServiceException; -import javax.xml.soap.SOAPException; - -import org.apache.axis.attachments.AttachmentPart; -import org.apache.axis.attachments.PlainTextDataSource; -import org.apache.axis.client.Stub; -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.share.ShareService; -import yacy.soap.share.ShareServiceServiceLocator; -import de.anomic.server.serverFileUtils; - -public class ShareServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - ShareServiceServiceLocator locator = new ShareServiceServiceLocator(); - locator.setshareEndpointAddress(getBaseServiceURL() + "share"); - service = locator.getshare(); - } - - public void testCreateDeleteDir() throws SOAPException, IOException { - String newDirName = "junit_test_" + System.currentTimeMillis(); - String newFileName = "import.txt"; - - /* =================================================================== - * Create directory - * =================================================================== */ - System.out.println("Creating new directory ..."); - ((ShareService)service).createDirectory("/",newDirName); - - /* =================================================================== - * Upload file - * =================================================================== */ - System.out.println("Uploading test file ..."); - - // create datasource to hold the attachment content - String testText = "Test text of the test file"; - DataSource data = new PlainTextDataSource(newFileName,testText); - DataHandler attachmentFile = new DataHandler(data); - - // creating attachment part - AttachmentPart part = new AttachmentPart(); - part.setDataHandler(attachmentFile); - part.setContentType("text/plain"); - part.setContentId(newFileName); - - // setting the attachment format that should be used - ((Stub)service)._setProperty(org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT,org.apache.axis.client.Call.ATTACHMENT_ENCAPSULATION_FORMAT_MIME); - ((Stub)service).addAttachment(part); - ((ShareService)service).uploadFile(newDirName,true,"jUnit Testupload at " + new Date()); - - // clear attachment - ((Stub)service).clearAttachments(); - - /* =================================================================== - * Download file - * =================================================================== */ - System.out.println("Downloading test file ..."); - - // execute service call - String md5 = ((ShareService)service).getFile(newDirName,newFileName); - - // get received attachments - Object[] attachments = ((Stub)service).getAttachments(); - - assertTrue(attachments.length == 1); - assertTrue(attachments[0] instanceof AttachmentPart); - - // get datahandler - DataHandler dh = ((AttachmentPart)attachments[0]).getDataHandler(); - - // cread content - byte[] content = serverFileUtils.read(dh.getInputStream()); - assertTrue(content.length > 0); - - // convert it to string - String contentString = new String(content,"UTF-8"); - assertEquals(testText,contentString); - - /* =================================================================== - * Change file comment - * =================================================================== */ - System.out.println("Changing file comment ..."); - ((ShareService)service).changeComment(newDirName,newFileName,"New comment on this file",true); - - /* =================================================================== - * Get dirlist - * =================================================================== */ - System.out.println("Get dirlist ... "); - Document xml =((ShareService)service).getDirList(newDirName); - System.out.println(XMLUtils.DocumentToString(xml)); - - /* =================================================================== - * Delete directory - * =================================================================== */ - System.out.println("Deleting directory and testfile ... "); - ((ShareService)service).delete("/",newDirName); - } - -} diff --git a/test/de/anomic/soap/services/StatusServiceTest.java b/test/de/anomic/soap/services/StatusServiceTest.java deleted file mode 100644 index 3d8ffa846..000000000 --- a/test/de/anomic/soap/services/StatusServiceTest.java +++ /dev/null @@ -1,42 +0,0 @@ -package de.anomic.soap.services; - -import java.rmi.RemoteException; - -import javax.xml.rpc.ServiceException; - -import org.apache.axis.utils.XMLUtils; -import org.w3c.dom.Document; - -import yacy.soap.status.StatusService; -import yacy.soap.status.StatusServiceServiceLocator; - -public class StatusServiceTest extends AbstractServiceTest { - - protected void createServiceClass() throws ServiceException { - // construct Soap object - StatusServiceServiceLocator locator = new StatusServiceServiceLocator(); - locator.setstatusEndpointAddress(getBaseServiceURL() + "status"); - - service = locator.getstatus(); - } - - public void testNetworkOverview() throws RemoteException { - Document xml = ((StatusService)service).getNetworkOverview(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testGetQueueStatus() throws RemoteException { - Document xml = ((StatusService)service).getQueueStatus(10,10,10,10); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testStatus() throws RemoteException { - Document xml = ((StatusService)service).getStatus(); - System.out.println(XMLUtils.DocumentToString(xml)); - } - - public void testPeerList() throws RemoteException { - Document xml = ((StatusService)service).peerList("active",300,true); - System.out.println(XMLUtils.DocumentToString(xml)); - } -}