* add a bit documentation to DigestURI, use DigestURI(string) instead of DigestURI(string, null)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7276 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 15 years ago
parent 25a8e55bc9
commit 7d8de34778

@ -63,7 +63,7 @@ public class BlacklistTest_p {
!urlstring.startsWith("file://")) urlstring = "http://" + urlstring;
DigestURI testurl = null;
try {
testurl = new DigestURI(urlstring, null);
testurl = new DigestURI(urlstring);
} catch (final MalformedURLException e) { testurl = null; }
if(testurl != null) {
prop.putHTML("url",testurl.toString());

@ -92,7 +92,7 @@ public class Blacklist_p {
!urlstring.startsWith("file://")) urlstring = "http://"+urlstring;
DigestURI testurl = null;
try {
testurl = new DigestURI(urlstring, null);
testurl = new DigestURI(urlstring);
} catch (final MalformedURLException e) { testurl = null; }
if(testurl != null) {
prop.putHTML("testlist_url",testurl.toString());

@ -43,7 +43,7 @@ public class CacheResource_p {
final String u = post.get("url", "");
DigestURI url;
try {
url = new DigestURI(u, null);
url = new DigestURI(u);
} catch (MalformedURLException e) {
Log.logException(e);
return prop;

@ -98,7 +98,7 @@ public class ConfigAppearance_p {
Iterator<String> it;
try {
final DigestURI u = new DigestURI(url, null);
final DigestURI u = new DigestURI(url);
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL

@ -102,7 +102,7 @@ public class ConfigLanguage_p {
final String url = post.get("url");
Iterator<String> it;
try{
final DigestURI u = new DigestURI(url, null);
final DigestURI u = new DigestURI(url);
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
}catch(final IOException e){
prop.put("status", "1");//unable to get url

@ -81,7 +81,7 @@ public class ConfigUpdate_p {
final String release = post.get("releasedownload", "");
if (release.length() > 0) {
try {
yacyRelease versionToDownload = new yacyRelease(new DigestURI(release, null));
yacyRelease versionToDownload = new yacyRelease(new DigestURI(release));
// replace this version with version which contains public key
yacyRelease.DevAndMainVersions allReleases = yacyRelease.allReleases(false, false);

@ -131,7 +131,7 @@ public class Crawler_p {
// normalize URL
DigestURI crawlingStartURL = null;
try {crawlingStartURL = new DigestURI(crawlingStart, null);} catch (final MalformedURLException e1) {}
try {crawlingStartURL = new DigestURI(crawlingStart);} catch (final MalformedURLException e1) {}
crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true);
// set new properties
@ -240,7 +240,7 @@ public class Crawler_p {
// stack request
// first delete old entry, if exists
final DigestURI url = new DigestURI(crawlingStart, null);
final DigestURI url = new DigestURI(crawlingStart);
final byte[] urlhash = url.hash();
indexSegment.urlMetadata().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
@ -358,7 +358,7 @@ public class Crawler_p {
FileUtils.copy(fileString, writer);
writer.close();
final Map<MultiProtocolURI, String> hyperlinks = scraper.getAnchors();
final DigestURI crawlURL = new DigestURI("file://" + file.toString(), null);
final DigestURI crawlURL = new DigestURI("file://" + file.toString());
final CrawlProfile profile = new CrawlProfile(
fileName,
crawlURL,
@ -413,7 +413,7 @@ public class Crawler_p {
} else if (crawlingMode.equals("sitemap")) {
String sitemapURLStr = post.get("sitemapURL","");
try {
final DigestURI sitemapURL = new DigestURI(sitemapURLStr, null);
final DigestURI sitemapURL = new DigestURI(sitemapURLStr);
final CrawlProfile pe = new CrawlProfile(
sitemapURLStr,
sitemapURL,
@ -443,7 +443,7 @@ public class Crawler_p {
}
} else if (crawlingMode.equals("sitelist")) {
try {
final DigestURI sitelistURL = new DigestURI(crawlingStart, null);
final DigestURI sitelistURL = new DigestURI(crawlingStart);
// download document
ContentScraper scraper = null;
scraper = sb.loader.parseResource(sitelistURL, CrawlProfile.CacheStrategy.IFFRESH);

@ -163,7 +163,7 @@ public class IndexControlURLs_p {
if (post.containsKey("urldelete")) {
try {
urlhash = new String((new DigestURI(urlstring, null)).hash());
urlhash = new String((new DigestURI(urlstring)).hash());
} catch (final MalformedURLException e) {
urlhash = null;
}
@ -179,7 +179,7 @@ public class IndexControlURLs_p {
if (post.containsKey("urlstringsearch")) {
try {
final DigestURI url = new DigestURI(urlstring, null);
final DigestURI url = new DigestURI(urlstring);
urlhash = new String(url.hash());
prop.put("urlhash", urlhash);
final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes(), null, 0);

@ -59,7 +59,7 @@ public class IndexImportOAIPMH_p {
if (oaipmhurl.indexOf("?") < 0) oaipmhurl = oaipmhurl + "?verb=ListRecords&metadataPrefix=oai_dc";
DigestURI url = null;
try {
url = new DigestURI(oaipmhurl, null);
url = new DigestURI(oaipmhurl);
OAIPMHLoader r = new OAIPMHLoader(sb.loader, url, sb.surrogatesInPath, "oaipmh-one");
ResumptionToken rt = r.getResumptionToken();
prop.put("import-one", 1);
@ -93,7 +93,7 @@ public class IndexImportOAIPMH_p {
sb.tables.recordAPICall(post, "IndexImportOAIPMH_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "OAI-PMH import for " + oaipmhurl);
DigestURI url = null;
try {
url = new DigestURI(oaipmhurl, null);
url = new DigestURI(oaipmhurl);
OAIPMHImporter job = new OAIPMHImporter(sb.loader, url);
job.start();
prop.put("status", 1);
@ -129,7 +129,7 @@ public class IndexImportOAIPMH_p {
while (sourceList.size() > 0) {
String oaipmhurl = sourceList.remove(r.nextInt(sourceList.size()));
try {
url = new DigestURI(oaipmhurl, null);
url = new DigestURI(oaipmhurl);
OAIPMHImporter job = new OAIPMHImporter(sb.loader, url);
job.start();
} catch (MalformedURLException e) {

@ -246,7 +246,7 @@ public class Load_RSS_p {
DigestURI url = null;
try {
url = post.containsKey("url") ? new DigestURI(post.get("url", ""), null) : null;
url = post.containsKey("url") ? new DigestURI(post.get("url", "")) : null;
} catch (MalformedURLException e) {
Log.logWarning("Load_RSS_p", "url not well-formed: '" + post.get("url", "") + "'");
}
@ -311,7 +311,7 @@ public class Load_RSS_p {
int i = 0;
for (final Hit item: feed) {
try {
DigestURI messageurl = new DigestURI(item.getLink(), null);
DigestURI messageurl = new DigestURI(item.getLink());
author = item.getAuthor();
if (author == null) author = item.getCopyright();
pubDate = item.getPubDate();

@ -125,12 +125,12 @@ public class QuickCrawlLink_p {
if (crawlingStart != null) {
crawlingStart = crawlingStart.trim();
try {crawlingStart = new DigestURI(crawlingStart, null).toNormalform(true, true);} catch (final MalformedURLException e1) {}
try {crawlingStart = new DigestURI(crawlingStart).toNormalform(true, true);} catch (final MalformedURLException e1) {}
// check if url is proper
DigestURI crawlingStartURL = null;
try {
crawlingStartURL = new DigestURI(crawlingStart, null);
crawlingStartURL = new DigestURI(crawlingStart);
} catch (final MalformedURLException e) {
prop.put("mode_status", "1");
prop.put("mode_code", "1");

@ -243,13 +243,13 @@ public class Supporter {
// add/subtract votes and write record
if (entry != null) {
try {
urlhash = new String((new DigestURI(url, null)).hash());
urlhash = new String((new DigestURI(url)).hash());
} catch (final MalformedURLException e) {
urlhash = null;
}
if (urlhash == null)
try {
urlhash = new String((new DigestURI("http://" + url, null)).hash());
urlhash = new String((new DigestURI("http://" + url)).hash());
} catch (final MalformedURLException e) {
urlhash = null;
}

@ -135,7 +135,7 @@ public class Surftips {
url = row.getColString(0, null);
try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url, null)))
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url)))
continue;
}catch(final MalformedURLException e){continue;};
title = row.getColString(1,"UTF-8");
@ -305,13 +305,13 @@ public class Surftips {
// add/subtract votes and write record
if (entry != null) {
try {
urlhash = new String((new DigestURI(url, null)).hash());
urlhash = new String((new DigestURI(url)).hash());
} catch (final MalformedURLException e) {
urlhash = null;
}
if (urlhash == null)
try {
urlhash = new String((new DigestURI("http://"+url, null)).hash());
urlhash = new String((new DigestURI("http://"+url)).hash());
} catch (final MalformedURLException e) {
urlhash = null;
}

@ -149,7 +149,7 @@ public class ViewFile {
}
// define an url by post parameter
url = new DigestURI(MultiProtocolURI.unescape(urlString), null);
url = new DigestURI(MultiProtocolURI.unescape(urlString));
urlHash = new String(url.hash());
pre = post.get("pre", "false").equals("true");
} catch (final MalformedURLException e) {}

@ -68,7 +68,7 @@ public class ViewImage {
DigestURI url = null;
if ((urlString.length() > 0) && (auth)) try {
url = new DigestURI(urlString, null);
url = new DigestURI(urlString);
} catch (final MalformedURLException e1) {
url = null;
}

@ -104,7 +104,7 @@ public class WebStructurePicture_p {
// find start hash
String hash = null;
try {
hash = new String((new DigestURI("http://" + host, null)).hash(), 6, 6);
hash = new String((new DigestURI("http://" + host)).hash(), 6, 6);
} catch (final MalformedURLException e) {Log.logException(e);}
//assert (sb.webStructure.outgoingReferences(hash) != null);

@ -23,7 +23,7 @@ public class delete_p {
return prop;
}
try {
if (post.containsKey("url") && switchboard.bookmarksDB.removeBookmark(new String((new DigestURI(post.get("url", "nourl"), null)).hash()))) {
if (post.containsKey("url") && switchboard.bookmarksDB.removeBookmark(new String((new DigestURI(post.get("url", "nourl"))).hash()))) {
prop.put("result", "1");
} else if (post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))) {
prop.put("result", "1");

@ -50,7 +50,7 @@ public class getpageinfo_p {
if (actions.indexOf("title")>=0) {
DigestURI u = null;
try {
u = new DigestURI(url, null);
u = new DigestURI(url);
} catch (final MalformedURLException e) {
// fail, do nothing
}
@ -101,7 +101,7 @@ public class getpageinfo_p {
}
if (actions.indexOf("robots")>=0) {
try {
final DigestURI theURL = new DigestURI(url, null);
final DigestURI theURL = new DigestURI(url);
// determine if crawling of the current URL is allowed
prop.put("robots-allowed", sb.robots.isDisallowed(theURL) ? "0" : "1");

@ -47,7 +47,7 @@ public class webstructure {
DigestURI url = null;
if (about.length() > 6) {
try {
url = new DigestURI(about, null);
url = new DigestURI(about);
about = new String(url.hash(), 6, 6);
} catch (MalformedURLException e) {
about = null;

@ -75,7 +75,7 @@ public class yacydoc {
if (urlstring.length() > 0 && urlhash.length() == 0) {
try {
DigestURI url = new DigestURI(urlstring, null);
DigestURI url = new DigestURI(urlstring);
urlhash = new String(url.hash());
} catch (MalformedURLException e) {
Log.logException(e);

@ -61,7 +61,7 @@ public class rct_p {
// put url on remote crawl stack
DigestURI url;
try {
url = new DigestURI(item.getLink(), null);
url = new DigestURI(item.getLink());
} catch (final MalformedURLException e) {
url = null;
}

@ -141,7 +141,7 @@ public class sharedBlacklist_p {
// download the blacklist
try {
// get List
DigestURI u = new DigestURI(downloadURLOld, null);
DigestURI u = new DigestURI(downloadURLOld);
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final Exception e) {
@ -159,7 +159,7 @@ public class sharedBlacklist_p {
prop.putHTML("page_source", downloadURL);
try {
final DigestURI u = new DigestURI(downloadURL, null);
final DigestURI u = new DigestURI(downloadURL);
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);

@ -108,7 +108,7 @@ public class yacysearchitem {
final int port=result.url().getPort();
DigestURI faviconURL = null;
if (isHtml && !sb.isIntranetMode() && !result.url().isLocal()) try {
faviconURL = new DigestURI(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico", null);
faviconURL = new DigestURI(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico");
} catch (final MalformedURLException e1) {
Log.logException(e1);
faviconURL = null;

@ -420,12 +420,12 @@ public class CrawlQueues {
// put url on remote crawl stack
try {
url = new DigestURI(item.getLink(), null);
url = new DigestURI(item.getLink());
} catch (final MalformedURLException e) {
continue;
}
try {
referrer = new DigestURI(item.getReferrer(), null);
referrer = new DigestURI(item.getReferrer());
} catch (final MalformedURLException e) {
referrer = null;
}

@ -104,7 +104,7 @@ public class SitemapImporter extends Thread {
byte[] nexturlhash = null;
DigestURI url = null;
try {
url = new DigestURI(entry.url(), null);
url = new DigestURI(entry.url());
nexturlhash = url.hash();
} catch (final MalformedURLException e1) {
}

@ -747,7 +747,7 @@ public class Response {
try {
String r = requestHeader.get(RequestHeader.REFERER, null);
if (r == null) return null;
return new DigestURI(r, null);
return new DigestURI(r);
} catch (final Exception e) {
return null;
}
@ -758,7 +758,7 @@ public class Response {
String u = requestHeader.get(RequestHeader.REFERER, "");
if (u == null || u.length() == 0) return null;
try {
return new DigestURI(u, null).hash();
return new DigestURI(u).hash();
} catch (final Exception e) {
return null;
}

@ -76,7 +76,7 @@ public class URLAnalysis {
private static DigestURI poison = null;
static {
try {
poison = new DigestURI("http://poison.org/poison", null);
poison = new DigestURI("http://poison.org/poison");
} catch (MalformedURLException e) {
poison = null;
}
@ -177,7 +177,7 @@ public class URLAnalysis {
line = line.trim();
if (line.length() > 0) {
try {
DigestURI url = new DigestURI(line, null);
DigestURI url = new DigestURI(line);
in.put(url);
} catch (InterruptedException e) {
Log.logException(e);
@ -279,7 +279,7 @@ public class URLAnalysis {
line = line.trim();
if (line.length() > 0) {
try {
DigestURI url = new DigestURI(line, null);
DigestURI url = new DigestURI(line);
hosts.add(url.getHost());
} catch (MalformedURLException e) {
continue;
@ -369,7 +369,7 @@ public class URLAnalysis {
line = line.trim();
if (line.length() > 0) {
try {
DigestURI url = new DigestURI(line, null);
DigestURI url = new DigestURI(line);
urls.add(url.toNormalform(true, true));
} catch (MalformedURLException e) {
continue;

@ -491,7 +491,7 @@ public class bookmarksDB {
url="http://"+url;
}
try {
this.urlHash = new String((new DigestURI(url, null)).hash());
this.urlHash = new String((new DigestURI(url)).hash());
} catch (final MalformedURLException e) {
this.urlHash = null;
}
@ -512,7 +512,7 @@ public class bookmarksDB {
}
public Bookmark(final Map<String, String> map) throws MalformedURLException {
this(new String((new DigestURI(map.get(BOOKMARK_URL), null)).hash()), map);
this(new String((new DigestURI(map.get(BOOKMARK_URL))).hash()), map);
}
Map<String, String> toMap() {

@ -886,7 +886,7 @@ public final class HTTPDFileHandler {
// save position
fis.mark(1000);
// scrape document to look up charset
final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new DigestURI("http://localhost", null),null,false);
final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new DigestURI("http://localhost"),null,false);
final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
if(charset != null)
mimeType = mimeType + "; charset="+charset;

@ -324,7 +324,7 @@ public final class HTTPDProxyHandler {
final String newUrl = redirectorReader.readLine();
if (!newUrl.equals("")) {
try {
url = new DigestURI(newUrl, null);
url = new DigestURI(newUrl);
} catch(final MalformedURLException e){}//just keep the old one
}
if (log.isFinest()) log.logFinest(reqID +" using redirector to "+ url);

@ -286,7 +286,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
if ((pos = oldUrlStr.indexOf("://")) != -1) {
// trying to correct the url
final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
final DigestURI newUrl = new DigestURI(newUrlStr, null);
final DigestURI newUrl = new DigestURI(newUrlStr);
// doing a http head request to test if the url is correct
// final Client client = new Client(10000);

@ -731,7 +731,7 @@ public final class Switchboard extends serverSwitch {
netload: for (String netdef: netdefs) {
netdef = netdef.trim();
try {
netdefmap = Switchboard.loadFileAsMap(new DigestURI(netdef, null));
netdefmap = Switchboard.loadFileAsMap(new DigestURI(netdef));
if (netdefmap == null || netdefmap.size() == 0) continue netload;
setConfig(netdefmap);
break netload;
@ -748,7 +748,7 @@ public final class Switchboard extends serverSwitch {
}
if (networkGroupDefinition.startsWith("http://")) {
try {
setConfig(Switchboard.loadFileAsMap(new DigestURI(networkGroupDefinition, null)));
setConfig(Switchboard.loadFileAsMap(new DigestURI(networkGroupDefinition)));
} catch (final MalformedURLException e) { }
} else {
final File networkGroupDefinitionFile = new File(getAppPath(), networkGroupDefinition);
@ -769,7 +769,7 @@ public final class Switchboard extends serverSwitch {
DigestURI locationURL;
try {
// try to parse url
locationURL = new DigestURI(location, null);
locationURL = new DigestURI(location);
} catch (final MalformedURLException e) {
break;
}
@ -1812,7 +1812,7 @@ public final class Switchboard extends serverSwitch {
try {
crawlStacker.enqueueEntry(new Request(
response.initiator(),
new DigestURI(u, null),
new DigestURI(u),
response.url().hash(),
nextEntry.getValue(),
new Date(),
@ -2289,7 +2289,7 @@ public final class Switchboard extends serverSwitch {
// get the links for a specific site
DigestURI url;
try {
url = new DigestURI(r, null);
url = new DigestURI(r);
} catch (MalformedURLException e) {
Log.logException(e);
return;
@ -2325,7 +2325,7 @@ public final class Switchboard extends serverSwitch {
final String urlString = "http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=" + query.trim().replaceAll(" ", "+") + "&n=2";
DigestURI url;
try {
url = new DigestURI(MultiProtocolURI.unescape(urlString), null);
url = new DigestURI(MultiProtocolURI.unescape(urlString));
} catch (MalformedURLException e1) {
return;
}
@ -2441,7 +2441,7 @@ public final class Switchboard extends serverSwitch {
// load the seed list
try {
url = new DigestURI(seedListFileURL, null);
url = new DigestURI(seedListFileURL);
//final long start = System.currentTimeMillis();
client.HEADResponse(url.toString());
header = new ResponseHeader(client.getHttpResponse().getAllHeaders());

@ -161,8 +161,8 @@ public class RCIEvaluation {
dom = i.next();
if (dom.startsWith("www.")) dom = dom.substring(4);
try {
dommap.put(new String((new DigestURI("http://" + dom, null)).hash(), 6, 6), dom);
dommap.put(new String((new DigestURI("http://www." + dom, null)).hash(), 6, 6), "www." + dom);
dommap.put(new String((new DigestURI("http://" + dom)).hash(), 6, 6), dom);
dommap.put(new String((new DigestURI("http://www." + dom)).hash(), 6, 6), "www." + dom);
} catch (final MalformedURLException e) {}
}
return dommap;

@ -102,7 +102,7 @@ public class OSMTile {
public static BufferedImage getSingleTile(final tileCoordinates tile, int retry) {
DigestURI tileURL;
try {
tileURL = new DigestURI(tile.url(retry), null);
tileURL = new DigestURI(tile.url(retry));
} catch (final MalformedURLException e) {
return null;
}

@ -627,7 +627,7 @@ public class yacyCore {
)) {
throw new MalformedURLException("Unsupported protocol.");
}
seedURL = new DigestURI(seedURLStr, null);
seedURL = new DigestURI(seedURLStr);
} catch (final MalformedURLException e) {
final String errorMsg = "Malformed seed file URL '" + sb.peers.mySeed().get(yacySeed.SEEDLIST, "") + "'. " + e.getMessage();
log.logWarning("SaveSeedList: " + errorMsg);

@ -351,13 +351,13 @@ public class yacyNewsPool {
if (record.created().getTime() == 0) return;
final Map<String, String> attributes = record.attributes();
if (attributes.containsKey("url")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url"), null))){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url"));
return;
}
}
if (attributes.containsKey("startURL")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL"), null))){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL"));
return;
}

@ -115,7 +115,7 @@ public class DCEntry extends TreeMap<String, String> {
u = bestU(urls);
}
try {
return new DigestURI(u, null);
return new DigestURI(u);
} catch (MalformedURLException e) {
if (useRelationAsAlternative) {
DigestURI relation = this.getRelation();
@ -136,7 +136,7 @@ public class DCEntry extends TreeMap<String, String> {
u = bestU(urls);
}
try {
return new DigestURI(u, null);
return new DigestURI(u);
} catch (MalformedURLException e) {
Log.logWarning("DCEntry", "getRelation: url is bad: " + e.getMessage());
return null;

@ -260,7 +260,7 @@ public class PhpBB3Dao implements Dao {
) {
try {
// generate output file name and attributes
String targethost = new DigestURI(this.urlstub, null).getHost();
String targethost = new DigestURI(this.urlstub).getHost();
int fc = 0;
File outputfiletmp = null, outputfile = null;

@ -490,7 +490,7 @@ public class MediawikiImporter extends Thread implements Importer {
}
public void genDocument() throws Parser.Failure {
try {
url = new DigestURI(urlStub + title, null);
url = new DigestURI(urlStub + title);
document = Document.mergeDocuments(url, "text/html", TextParser.parseSource(url, "text/html", "utf-8", html.getBytes("UTF-8")));
// the wiki parser is not able to find the proper title in the source text, so it must be set here
document.setTitle(title);

@ -81,7 +81,7 @@ public class OAIListFriendsLoader {
Map<String, String> m;
for (Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try {
if (!oaiFriend.getValue().exists()) {
Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey(), null), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey()), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue());
}

@ -74,7 +74,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
String url = ResumptionToken.truncatedURL(source);
if (!url.endsWith("?")) url = url + "?";
try {
this.source = new DigestURI(url + "verb=ListRecords&metadataPrefix=oai_dc", null);
this.source = new DigestURI(url + "verb=ListRecords&metadataPrefix=oai_dc");
} catch (MalformedURLException e) {
// this should never happen
Log.logException(e);

@ -135,7 +135,7 @@ public class ResumptionToken extends TreeMap<String, String> {
// encoded state
if (token.indexOf("from=") >= 0) {
return new DigestURI(url + "verb=ListRecords&" + token, null);
return new DigestURI(url + "verb=ListRecords&" + token);
}
// cached result set
@ -146,7 +146,7 @@ public class ResumptionToken extends TreeMap<String, String> {
// the resumption token is still fresh
}
String u = url + "verb=ListRecords&resumptionToken=" + escape(token);
return new DigestURI(u, null);
return new DigestURI(u);
}
public static StringBuilder escape(final String s) {

@ -581,7 +581,7 @@ public final class TransformerWriter extends Writer {
System.exit(0);
final char[] buffer = new char[512];
try {
final ContentScraper scraper = new ContentScraper(new DigestURI("http://localhost:8080", null));
final ContentScraper scraper = new ContentScraper(new DigestURI("http://localhost:8080"));
final Transformer transformer = new ContentTransformer();
final Reader is = new FileReader(args[0]);
final FileOutputStream fos = new FileOutputStream(new File(args[0] + ".out"));

@ -38,7 +38,13 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.ByteArray;
/**
* URI-object providing YaCy-hash computation
*
* Hashes for URIs are split in several parts
* For URIs pointing to resources not globally available,
* the domainhash-part gets one reserved value
*/
public class DigestURI extends MultiProtocolURI implements Serializable {
private static final long serialVersionUID = -1173233022912141885L;
@ -47,12 +53,17 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
// class variables
private byte[] hash;
/**
* Shortcut, calculate hash for shorted url/hostname
* @param host
* @return
*/
public static String domhash(final String host) {
String h = host;
if (!h.startsWith("http://")) h = "http://" + h;
DigestURI url = null;
try {
url = new DigestURI(h, null);
url = new DigestURI(h);
} catch (MalformedURLException e) {
Log.logException(e);
return null;
@ -60,24 +71,45 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
return (url == null) ? null : new String(url.hash(), 6, 6);
}
/**
* DigestURI from File
*/
public DigestURI(final File file) throws MalformedURLException {
this("file", "", -1, file.getAbsolutePath());
}
/**
* DigestURI from URI string
*/
public DigestURI(final String url) throws MalformedURLException {
this(url, null);
}
/**
* DigestURI from URI string, hash is already calculated
* @param url
* @param hash already calculated hash for url
* @throws MalformedURLException
*/
public DigestURI(final String url, final byte[] hash) throws MalformedURLException {
super(url);
this.hash = hash;
}
/**
* DigestURI from general URI
* @param baseURL
*/
public DigestURI(final MultiProtocolURI baseURL) {
super(baseURL);
this.hash = null;
}
/**
* DigestURI from general URI, hash already calculated
* @param baseURL
* @param hash
*/
public DigestURI(final MultiProtocolURI baseURL, final byte[] hash) {
super(baseURL);
this.hash = hash;
@ -113,6 +145,10 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
return (Base64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 3);
}
/**
* get YaCy-hash of URI
* @return
*/
public final byte[] hash() {
// in case that the object was initialized without a known url hash, compute it now
synchronized (this) {
@ -121,6 +157,14 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
return this.hash;
}
/**
* calculated YaCy-Hash of this URI
*
* @note needs DNS lookup to check if the addresses domain is local
* that causes that this method may be very slow
*
* @return hash
*/
private final byte[] urlHashComputation() {
// the url hash computation needs a DNS lookup to check if the addresses domain is local
// that causes that this method may be very slow
@ -266,7 +310,9 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
return domDomain(urlHash) == id;
}
// checks for local/global IP range and local IP
/**
* checks for local/global IP range and local IP
*/
public final boolean isLocal() {
if (this.isSMB() || this.isFile()) return true;
if (this.hash == null) synchronized (this) {
@ -277,6 +323,11 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
return domDomain(this.hash) == 7;
}
/**
* checks, if hash is in local/global IP range
* @param urlhash
* @return
*/
public static final boolean isLocal(final byte[] urlhash) {
return domDomain(urlhash) == 7;
}

Loading…
Cancel
Save