joined anomic.net.URL, plasmaURL and url hash computation:

search profiling showed, that a major amount of time is wasted by computing url hashes. The computation does an intranet-check, which needs a DNS lookup. This caused that each urlhash computation needed 100-200 milliseconds, which caused remote searches to delay at least 1 second more that necessary. The solution to this problem is to attach a URL hash to the URL data structure, because that means that the url hash value can be filled after retrieval of the URL from the database. The redesign of the url/urlhash management caused a major redesign of many parts of the software. Since some parts had been decided to be given up they had been removed during this change to avoid unnecessary maintenance of unused code.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4074 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 66905b7c97
commit daf0f74361

@ -221,7 +221,7 @@
<!-- compiling the main sources -->
<javac srcdir="${src}/" destdir="${build}"
excludes="de/anomic/plasma/parser/*/*,de/anomic/yacy/seedUpload/**,de/anomic/soap/**,yacy.java,de/anomic/server/portForwarding/*/*,de/anomic/data/rssReader.java"
excludes="de/anomic/plasma/parser/*/*,de/anomic/yacy/seedUpload/**,yacy.java,de/anomic/server/portForwarding/*/*,de/anomic/data/rssReader.java"
debug="true" debuglevel="lines,vars,source"
source="${javacSource}" target="${javacTarget}">
<classpath refid="project.class.path"/>
@ -241,7 +241,6 @@
<javac srcdir="${htroot}/"
classpathref="project.class.path"
debug="true" debuglevel="lines,vars,source"
excludes="soap/*"
source="${javacSource}" target="${javacTarget}"/>
</target>
@ -297,33 +296,6 @@
<fileset dir="${src}/" includes="de/anomic/yacy/seedUpload/yacySeedUpload*.xml"/>
</subant>
</target>
<!-- compiling optional soap API and building install packages -->
<target name="compileSoap" depends="compileMain" description="Compiling and zipping additional yacy SOAP API">
<subant target="${extensionTarget}">
<property name="src" location="${src}"/>
<property name="build" location="${build}"/>
<property name="libx" location="${libx}"/>
<property name="htroot" value="${htroot}"/>
<property name="release" location="${release_ext}"/>
<property name="javacSource" value="${javacSource}"/>
<property name="javacTarget" value="${javacTarget}"/>
<fileset dir="${src}/" includes="de/anomic/soap/build.xml"/>
</subant>
</target>
<target name="distSoapClientStubJar" depends="init" description="Generates a jar file with all client stub classes for the YaCy SOAP API">
<subant target="buildClientStubJar">
<property name="src" location="${src}"/>
<property name="build" location="${build}"/>
<property name="libx" location="${libx}"/>
<property name="htroot" value="${htroot}"/>
<property name="release" location="${release_ext}"/>
<property name="javacSource" value="${javacSource}"/>
<property name="javacTarget" value="${javacTarget}"/>
<fileset dir="${src}/" includes="de/anomic/soap/build.xml"/>
</subant>
</target>
<target name="compilePortForwarding" depends="compileMain" description="Compiling and zipping additional port forwarder">
<javac srcdir="${src}/de/anomic/server/portForwarding" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source">
@ -359,7 +331,7 @@
</target>
<!-- compile optional classs that were not compiled elsewhere -->
<target name="compileExtensions" depends="compileMain,compileParsers,compileSeedUploaders,compileSoap,compilePortForwarding">
<target name="compileExtensions" depends="compileMain,compileParsers,compileSeedUploaders,compilePortForwarding">
<!-- compile rss Reader -->
<javac srcdir="${src}" destdir="${build}"
@ -423,9 +395,7 @@
<!-- excluding all additional content parsers -->
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- excluding all additional seed uploaders -->
<exclude name="de/anomic/yacy/seedUpload/**"/>
<!-- excluding the soap handler -->
<exclude name="de/anomic/soap/**"/>
<exclude name="de/anomic/yacy/seedUpload/**"/>
<!-- excluding the port forwarder -->
<exclude name="de/anomic/server/portForwarding/*/*"/>
<!-- ecluding rss Reader class -->
@ -527,8 +497,6 @@
<exclude name="de/anomic/plasma/parser/*/*"/>
<!-- excluding sources for additional seed uploaders -->
<exclude name="de/anomic/yacy/seedUpload/yacySeedUpload**"/>
<!-- excluding soap -->
<exclude name="de/anomic/soap/**"/>
<!-- excluding the port forwarder -->
<exclude name="de/anomic/server/portForwarding/*/*"/>
<!-- ecluding rss Reader class -->
@ -545,7 +513,6 @@
<fileset dir="${htroot}">
<include name="**/*"/>
<exclude name="yacy/seedUpload/**"/>
<exclude name="soap/**"/>
</fileset>
<fileset dir="${htroot}">
<include name="yacy/seedUpload/yacySeedUploadFile.html"/>

@ -60,7 +60,6 @@ import java.util.TreeMap;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
@ -68,6 +67,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class Blacklist_p {
private final static String DISABLED = "disabled_";
@ -95,9 +95,9 @@ public class Blacklist_p {
prop.put("testlist",1);
String urlstring = post.get("testurl", "");
if(!urlstring.startsWith("http://")) urlstring = "http://"+urlstring;
URL testurl = null;
yacyURL testurl = null;
try {
testurl = new URL(urlstring);
testurl = new yacyURL(urlstring, null);
} catch (MalformedURLException e) { }
if(testurl != null) {
prop.put("testlist_url",testurl.toString());

@ -59,7 +59,6 @@ import de.anomic.data.bookmarksDB.Tag;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
@ -69,6 +68,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacyURL;
public class Bookmarks {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
@ -214,7 +214,7 @@ public class Bookmarks {
}
try {
File file=new File((String)post.get("bookmarksfile"));
switchboard.bookmarksDB.importFromBookmarks(new URL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic);
switchboard.bookmarksDB.importFromBookmarks(new yacyURL(file) , new String((byte[])post.get("bookmarksfile$file")), tags, isPublic);
} catch (MalformedURLException e) {}
}else if(post.containsKey("xmlfile")){

@ -62,7 +62,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSwitchboard;
@ -71,6 +70,7 @@ import de.anomic.plasma.cache.UnsupportedProtocolException;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class CacheAdmin_p {
@ -118,7 +118,7 @@ public class CacheAdmin_p {
final StringBuffer tree = new StringBuffer();
final StringBuffer info = new StringBuffer();
final URL url = plasmaHTCache.getURL(file);
final yacyURL url = plasmaHTCache.getURL(file);
String urlstr = "";

@ -58,11 +58,11 @@ import de.anomic.data.listManager;
import de.anomic.data.translator;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyURL;
public class ConfigLanguage_p {
@ -97,7 +97,7 @@ public class ConfigLanguage_p {
String url = (String)post.get("url");
ArrayList langVector;
try{
URL u = new URL(url);
yacyURL u = new yacyURL(url, null);
langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8");
}catch(IOException e){
prop.put("status", 1);//unable to get url

@ -56,12 +56,12 @@ import java.util.Iterator;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyURL;
public class ConfigSkins_p {
@ -126,7 +126,7 @@ public class ConfigSkins_p {
String url = (String)post.get("url");
ArrayList skinVector;
try{
URL u = new URL(url);
yacyURL u = new yacyURL(url, null);
skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8");
}catch(IOException e){
prop.put("status", 1);//unable to get URL

@ -31,11 +31,11 @@ import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverSystem;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.yacyVersion;
public class ConfigUpdate_p {
@ -54,7 +54,7 @@ public class ConfigUpdate_p {
String release = post.get("releasedownload", "");
if (release.length() > 0) {
try {
yacyVersion.downloadRelease(new yacyVersion(new URL(release)));
yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null)));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();

@ -30,7 +30,6 @@ import java.util.Locale;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
@ -39,6 +38,7 @@ import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class CrawlResults {
@ -170,7 +170,7 @@ public class CrawlResults {
urlstr = comp.url().toNormalform(false, true);
urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
cachepath = plasmaHTCache.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1);
cachepath = plasmaHTCache.getCachePath(new yacyURL(urlstr, null)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1);
prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0);
if (showControl) {

@ -53,7 +53,7 @@
<td><label for="crawlingDepth">Crawling Range</label>:</td>
<td>
<input type="radio" name="range" value="wide" checked="checked" />Wide: depth <input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" />&nbsp;&nbsp;|&nbsp;&nbsp;
<input type="radio" name="range" value="domain" />Complete Single Domain
<input type="radio" name="range" value="domain" />Complete Domain
</td>
<td>
The range defines if the crawl shall consider a complete domain, or a wide crawl up to a specific depth.

@ -28,7 +28,6 @@ import java.util.Enumeration;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -36,6 +35,7 @@ import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class CrawlStartSimple_p {
@ -152,8 +152,8 @@ public class CrawlStartSimple_p {
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed.isVirgin()) || (yacyCore.seedDB.mySeed.isJunior())) {
prop.put("remoteCrawlPeers", 0);
} else {
Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true);
Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false);
Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, true);
Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(yacyURL.dummyHash, false);
if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) {
prop.put("remoteCrawlPeers", 0); //no peers availible
} else {

@ -55,7 +55,6 @@ import de.anomic.data.URLFetcherStack;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaSwitchboard;
@ -64,6 +63,7 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class CrawlURLFetchStack_p {
@ -200,16 +200,16 @@ public class CrawlURLFetchStack_p {
prop.put("upload", 1);
} else if (type.equals("html")) {
try {
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file));
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file));
final Writer writer = new htmlFilterWriter(null, null, scraper, null, false);
serverFileUtils.write(content, writer);
writer.close();
final Iterator it = ((HashMap)scraper.getAnchors()).keySet().iterator();
int added = 0, failed = 0;
URL url;
yacyURL url;
while (it.hasNext()) try {
url = new URL((String)it.next());
url = new yacyURL((String) it.next(), null);
if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url)) {
failed++;
continue;
@ -264,7 +264,7 @@ public class CrawlURLFetchStack_p {
private static boolean addURL(String url, boolean blCheck, URLFetcherStack stack) {
try {
if (url == null || url.length() == 0) return false;
URL u = new URL(url);
yacyURL u = new yacyURL(url, null);
if (blCheck && plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, u)) return false;
stack.push(u);
return true;
@ -288,7 +288,7 @@ public class CrawlURLFetchStack_p {
url = post.get("url" + i, null);
if (url == null || url.length() == 0) continue;
try {
stack.push(new URL(url));
stack.push(new yacyURL(url, null));
count++;
} catch (MalformedURLException e) {
serverLog.logInfo("URLFETCHER", "retrieved invalid url for adding to the stack: " + url);

@ -49,7 +49,6 @@ import java.util.Iterator;
import java.util.Random;
import java.util.TreeMap;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaSwitchboard;
@ -62,6 +61,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.yacyVersion;
public class CrawlURLFetch_p {
@ -139,10 +139,10 @@ public class CrawlURLFetch_p {
count,
frequency);
} else {
URL url = null;
yacyURL url = null;
if (post.get("source", "").equals("url")) {
try {
url = new URL(post.get("host", null));
url = new yacyURL(post.get("host", null), null);
if (!savedURLs.contains(url.toNormalform(true, true)))
savedURLs.add(url.toNormalform(true, true));
prop.put("host", post.get("host", url.toString()));
@ -152,7 +152,7 @@ public class CrawlURLFetch_p {
}
} else if (post.get("source", "").equals("savedURL")) {
try {
url = new URL(post.get("saved", ""));
url = new yacyURL(post.get("saved", ""), null);
} catch (MalformedURLException e) {
/* should never appear, except for invalid input, see above */
}
@ -355,7 +355,7 @@ public class CrawlURLFetch_p {
public String lastServerResponse = null;
public int lastFailed = 0;
public final URL url;
public final yacyURL url;
public final int count;
public long delay;
public final plasmaSwitchboard sb;
@ -363,7 +363,7 @@ public class CrawlURLFetch_p {
public boolean paused = false;
public static URL getListServletURL(String host, int mode, int count, String peerHash) {
public static yacyURL getListServletURL(String host, int mode, int count, String peerHash) {
String r = "http://" + host + "/yacy/list.html?list=queueUrls&display=";
switch (mode) {
@ -380,7 +380,7 @@ public class CrawlURLFetch_p {
}
try {
return new URL(r);
return new yacyURL(r, null);
} catch (MalformedURLException e) {
return null;
}
@ -389,7 +389,7 @@ public class CrawlURLFetch_p {
public URLFetcher(
serverSwitch env,
plasmaCrawlProfile.entry profile,
URL url,
yacyURL url,
int count,
long delayMs) {
if (env == null || profile == null || url == null)
@ -420,7 +420,7 @@ public class CrawlURLFetch_p {
public void run() {
this.paused = false;
long start;
URL url;
yacyURL url;
while (!isInterrupted()) {
try {
start = System.currentTimeMillis();
@ -449,7 +449,7 @@ public class CrawlURLFetch_p {
}
}
private URL getDLURL() {
private yacyURL getDLURL() {
if (this.url != null) return this.url;
// choose random seed
@ -493,7 +493,7 @@ public class CrawlURLFetch_p {
this.failed.put(urls[i], reason);
try {
plasmaCrawlZURL.Entry ee = this.sb.errorURL.newEntry(
new URL(urls[i]),
new yacyURL(urls[i], null),
reason);
ee.store();
this.sb.errorURL.stackPushEntry(ee);
@ -503,7 +503,7 @@ public class CrawlURLFetch_p {
return this.lastFetchedURLs;
}
private String[] getURLs(URL url) {
private String[] getURLs(yacyURL url) {
if (url == null) return null;
String[] r = null;
try {

@ -24,11 +24,11 @@
import java.net.MalformedURLException;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
import de.anomic.xml.rssReader;
import de.anomic.yacy.yacyURL;
// test url:
// http://localhost:8080/FeedReader_p.html?url=http://www.tagesthemen.de/xml/rss2
@ -40,9 +40,9 @@ public class FeedReader_p {
prop.put("page", 0);
if (post != null) {
URL url;
yacyURL url;
try {
url = new URL((String) post.get("url"));
url = new yacyURL((String) post.get("url"), null);
} catch (MalformedURLException e) {
prop.put("page", 2);
return prop;

@ -62,11 +62,9 @@ import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroRotateIterator;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
@ -76,6 +74,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class IndexControl_p {
@ -196,7 +195,11 @@ public class IndexControl_p {
}
if (post.containsKey("urldelete")) {
urlhash = plasmaURL.urlHash(urlstring);
try {
urlhash = (new yacyURL(urlstring, null)).hash();
} catch (MalformedURLException e) {
urlhash = null;
}
if ((urlhash == null) || (urlstring == null)) {
prop.put("result", "No input given; nothing deleted.");
} else {
@ -307,8 +310,8 @@ public class IndexControl_p {
if (post.containsKey("urlstringsearch")) {
try {
URL url = new URL(urlstring);
urlhash = plasmaURL.urlHash(url);
yacyURL url = new yacyURL(urlstring, null);
urlhash = url.hash();
prop.put("urlhash", urlhash);
indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null);
if (entry == null) {
@ -369,7 +372,7 @@ public class IndexControl_p {
try {
String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
URL url;
yacyURL url;
for (int i=0; i<urlx.length; i++) {
urlHashes.add(urlx[i]);
indexURLEntry e = switchboard.wordIndex.loadedURL.load(urlx[i], null);
@ -397,7 +400,7 @@ public class IndexControl_p {
try {
String[] supportedBlacklistTypes = abstractURLPattern.BLACKLIST_TYPES_STRING.split(",");
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
URL url;
yacyURL url;
for (int i=0; i<urlx.length; i++) {
urlHashes.add(urlx[i]);
indexURLEntry e = switchboard.wordIndex.loadedURL.load(urlx[i], null);
@ -460,7 +463,7 @@ public class IndexControl_p {
}
indexURLEntry.Components comp = entry.comp();
String referrer = null;
indexURLEntry le = switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null);
indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null);
if (le == null) {
referrer = "<unknown>";
} else {
@ -519,7 +522,7 @@ public class IndexControl_p {
}
}
URL url;
yacyURL url;
final Iterator iter = tm.keySet().iterator();
while (iter.hasNext()) {
us = iter.next().toString();
@ -536,7 +539,7 @@ public class IndexControl_p {
prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash);
prop.put("genUrlList_urlList_"+i+"_urlExists_urlString", us);
prop.put("genUrlList_urlList_"+i+"_urlExists_pos", uh[1]);
url = new URL(us);
url = new yacyURL(us, null);
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", 1);
}

@ -50,7 +50,6 @@ import java.util.Iterator;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
@ -59,6 +58,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class IndexCreateIndexingQueue_p {
@ -173,7 +173,7 @@ public class IndexCreateIndexingQueue_p {
prop.put("rejected_only-latest", 0);
}
dark = true;
URL url;
yacyURL url;
String initiatorHash, executorHash;
plasmaCrawlZURL.Entry entry;
yacySeed initiatorSeed, executorSeed;

@ -126,7 +126,7 @@ public class IndexCreateWWWGlobalQueue_p {
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.urlhash());
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark;
showNum++;
} else {

@ -144,7 +144,7 @@ public class IndexCreateWWWLocalQueue_p {
if (value != null) {
Matcher matcher = compiledPattern.matcher(value);
if (matcher.find()) {
switchboard.noticeURL.remove(entry.urlhash());
switchboard.noticeURL.remove(entry.url().hash());
}
}
}
@ -190,7 +190,7 @@ public class IndexCreateWWWLocalQueue_p {
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.urlhash());
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark;
showNum++;
} else {

@ -125,7 +125,7 @@ public class IndexCreateWWWRemoteQueue_p {
prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_" + showNum + "_anchor", urle.name());
prop.put("crawler-queue_list_" + showNum + "_url", urle.url().toString());
prop.put("crawler-queue_list_" + showNum + "_hash", urle.urlhash());
prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash());
dark = !dark;
showNum++;
} else {

@ -55,13 +55,12 @@ import java.util.Date;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public class QuickCrawlLink_p {
@ -128,19 +127,19 @@ public class QuickCrawlLink_p {
if (crawlingStart != null) {
crawlingStart = crawlingStart.trim();
try {crawlingStart = new URL(crawlingStart).toNormalform(true, true);} catch (MalformedURLException e1) {}
try {crawlingStart = new yacyURL(crawlingStart, null).toNormalform(true, true);} catch (MalformedURLException e1) {}
// check if url is proper
URL crawlingStartURL = null;
yacyURL crawlingStartURL = null;
try {
crawlingStartURL = new URL(crawlingStart);
crawlingStartURL = new yacyURL(crawlingStart, null);
} catch (MalformedURLException e) {
prop.put("mode_status", 1);
prop.put("mode_code", "1");
return prop;
}
String urlhash = plasmaURL.urlHash(crawlingStart);
String urlhash = crawlingStartURL.hash();
switchboard.wordIndex.loadedURL.remove(urlhash);
switchboard.noticeURL.remove(urlhash);
switchboard.errorURL.remove(urlhash);

@ -48,11 +48,10 @@ import java.util.Iterator;
import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class Statistics {
@ -75,13 +74,13 @@ public class Statistics {
boolean dark = true;
Map map;
String urlString;
URL url;
yacyURL url;
while ((it.hasNext()) && (count < maxCount)) {
map = (Map) it.next();
if (count >= maxCount) break;
urlString = (String) map.get("key");
try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (!serverDomains.isLocal(url))) {
try { url = new yacyURL(urlString, null); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (!url.isLocal())) {
prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark;
prop.put("page_backlinks_list_" + count + "_url", urlString);
prop.put("page_backlinks_list_" + count + "_date", map.get("date"));

@ -54,7 +54,6 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpd;
import de.anomic.http.httpdByteCountInputStream;
import de.anomic.http.httpdByteCountOutputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
@ -64,6 +63,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.yacyVersion;
public class Status {
@ -120,7 +120,7 @@ public class Status {
String release = post.get("releasedownload", "");
if (release.length() > 0) {
try {
yacyVersion.downloadRelease(new yacyVersion(new URL(release)));
yacyVersion.downloadRelease(new yacyVersion(new yacyURL(release, null)));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();

@ -32,8 +32,6 @@ import java.util.HashMap;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaURL;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroNaturalOrder;
@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class Supporter {
@ -125,10 +124,9 @@ public class Supporter {
if (row == null) continue;
url = row.getColString(0, null);
try{
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url)))
continue;
}catch(MalformedURLException e){continue;};
try {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, urlhash))) continue;
} catch(MalformedURLException e) {continue;}
title = row.getColString(1,"UTF-8");
description = row.getColString(2,"UTF-8");
if ((url == null) || (title == null) || (description == null)) continue;
@ -241,10 +239,18 @@ public class Supporter {
// add/subtract votes and write record
if (entry != null) {
urlhash = plasmaURL.urlHash(url);
try {
urlhash = (new yacyURL(url, null)).hash();
} catch (MalformedURLException e) {
urlhash = null;
}
if (urlhash == null)
urlhash=plasmaURL.urlHash("http://"+url);
if(urlhash==null){
try {
urlhash = (new yacyURL("http://" + url, null)).hash();
} catch (MalformedURLException e) {
urlhash = null;
}
if (urlhash==null) {
System.out.println("Supporter: bad url '" + url + "' from news record " + record.toString());
continue;
}

@ -32,8 +32,6 @@ import java.util.HashMap;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaURL;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroNaturalOrder;
@ -48,6 +46,7 @@ import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class Surftips {
@ -134,7 +133,7 @@ public class Surftips {
url = row.getColString(0, null);
try{
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url)))
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new yacyURL(url, null)))
continue;
}catch(MalformedURLException e){continue;};
title = row.getColString(1,"UTF-8");
@ -302,10 +301,18 @@ public class Surftips {
// add/subtract votes and write record
if (entry != null) {
urlhash = plasmaURL.urlHash(url);
try {
urlhash = (new yacyURL(url, null)).hash();
} catch (MalformedURLException e) {
urlhash = null;
}
if (urlhash == null)
urlhash=plasmaURL.urlHash("http://"+url);
if(urlhash==null){
try {
urlhash = (new yacyURL("http://"+url, null)).hash();
} catch (MalformedURLException e) {
urlhash = null;
}
if (urlhash == null) {
System.out.println("Surftips: bad url '" + url + "' from news record " + record.toString());
continue;
}

@ -1,69 +0,0 @@
//Thumbnail.java
//------------
// part of YACY
//
// (C) 2007 Alexander Schier
//
// last change: $LastChangedDate: $ by $LastChangedBy: $
// $LastChangedRevision: $
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class Thumbnail{
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
servletProperties prop = new servletProperties();
String command=env.getConfig("thumbnailProgram", "");
if(command.equals("")||post==null||!post.containsKey("url")){
prop.put("image", "thumbnail cannot be generated"); //TODO: put a "thumbnail not possible" image.
return prop;
}
String[] cmdline=new String[3];
cmdline[0]=env.getConfig("thumbnailProgram", "");
cmdline[1]=post.get("url", "");
plasmaSwitchboard sb=plasmaSwitchboard.getSwitchboard();
File path=new File(sb.workPath, plasmaURL.urlHash(cmdline[1])+".png");
cmdline[2]=path.getAbsolutePath();//does not contain an extension!
try {
Runtime.getRuntime().exec(cmdline);
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
String line;
StringBuffer image=new StringBuffer();
while((line=br.readLine())!=null){
image.append(line);
}
//path.delete(); //we do not cache, yet.
prop.put("image", image.toString());
} catch (IOException e) {
prop.put("image", "error creating thumbnail");//TODO: put a "thumbnail error" image.
}
httpHeader out_header=new httpHeader();
out_header.put(httpHeader.CONTENT_TYPE, "image/png");
prop.setOutgoingHeader(out_header);
return prop;
}
}

@ -58,7 +58,6 @@ import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParserDocument;
@ -70,6 +69,7 @@ import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class ViewFile {
@ -99,7 +99,7 @@ public class ViewFile {
String viewMode = post.get("viewMode","sentences");
prop.put("error_vMode-" + viewMode, 1);
URL url = null;
yacyURL url = null;
String descr = "";
int wordCount = 0;
int size = 0;
@ -144,7 +144,7 @@ public class ViewFile {
}
// define an url by post parameter
url = new URL(urlString);
url = new yacyURL(urlString, null);
pre = post.get("pre", "false").equals("true");
} catch (MalformedURLException e) {}

@ -48,12 +48,12 @@ import java.io.InputStream;
import java.net.MalformedURLException;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
import de.anomic.ymage.ymageImageParser;
public class ViewImage {
@ -70,9 +70,9 @@ public class ViewImage {
String urlLicense = post.get("code", "");
boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights
URL url = null;
yacyURL url = null;
if ((urlString.length() > 0) && (auth)) try {
url = new URL(urlString);
url = new yacyURL(urlString, null);
} catch (MalformedURLException e1) {
url = null;
}

@ -38,11 +38,9 @@ import java.util.regex.PatternSyntaxException;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.dbImport.dbImporter;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
@ -50,6 +48,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacyURL;
public class WatchCrawler_p {
public static final String CRAWLING_MODE_URL = "url";
@ -101,12 +100,12 @@ public class WatchCrawler_p {
String newcrawlingfilter = post.get("crawlingFilter", ".*");
if (fullDomain) try {
newcrawlingfilter = ".*" + (new URL(post.get("crawlingURL",""))).getHost() + ".*";
newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*";
} catch (MalformedURLException e) {}
env.setConfig("crawlingFilter", newcrawlingfilter);
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0"));
if (fullDomain) newcrawlingdepth = 99;
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8"));
if (fullDomain) newcrawlingdepth = 8;
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
@ -158,12 +157,12 @@ public class WatchCrawler_p {
if (pos == -1) crawlingStart = "http://" + crawlingStart;
// normalizing URL
try {crawlingStart = new URL(crawlingStart).toNormalform(true, true);} catch (MalformedURLException e1) {}
try {crawlingStart = new yacyURL(crawlingStart, null).toNormalform(true, true);} catch (MalformedURLException e1) {}
// check if url is proper
URL crawlingStartURL = null;
yacyURL crawlingStartURL = null;
try {
crawlingStartURL = new URL(crawlingStart);
crawlingStartURL = new yacyURL(crawlingStart, null);
} catch (MalformedURLException e) {
crawlingStartURL = null;
}
@ -181,7 +180,7 @@ public class WatchCrawler_p {
// stack request
// first delete old entry, if exists
String urlhash = plasmaURL.urlHash(crawlingStart);
String urlhash = (new yacyURL(crawlingStart, null)).hash();
switchboard.wordIndex.loadedURL.remove(urlhash);
switchboard.noticeURL.remove(urlhash);
switchboard.errorURL.remove(urlhash);
@ -258,7 +257,7 @@ public class WatchCrawler_p {
String fileString = new String(fileContent,"UTF-8");
// parsing the bookmark file and fetching the headline and contained links
htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL(file));
htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL(file));
//OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
Writer writer = new htmlFilterWriter(null,null,scraper,null,false);
serverFileUtils.write(fileString,writer);
@ -282,12 +281,12 @@ public class WatchCrawler_p {
nexturlstring = nexturlstring.trim();
// normalizing URL
nexturlstring = new URL(nexturlstring).toNormalform(true, true);
nexturlstring = new yacyURL(nexturlstring, null).toNormalform(true, true);
// generating an url object
URL nexturlURL = null;
yacyURL nexturlURL = null;
try {
nexturlURL = new URL(nexturlstring);
nexturlURL = new yacyURL(nexturlstring, null);
} catch (MalformedURLException ex) {
nexturlURL = null;
c++;

@ -32,12 +32,11 @@ import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaWebStructure;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
import de.anomic.ymage.ymageGraph;
import de.anomic.ymage.ymageMatrix;
import de.anomic.ymage.ymageToolPrint;
@ -92,7 +91,7 @@ public class WebStructurePicture_p {
// find start hash
String hash = null;
try {
hash = plasmaURL.urlHash(new URL("http://" + host)).substring(6);
hash = (new yacyURL("http://" + host, null)).hash().substring(6);
} catch (MalformedURLException e) {e.printStackTrace();}
assert (sb.webStructure.references(hash) != null);

@ -60,12 +60,11 @@ import de.anomic.data.userDB;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.http.httpd;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
@ -79,6 +78,7 @@ import de.anomic.tools.dirlistComparator;
import de.anomic.tools.md5DirFileFilter;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class dir {
@ -364,7 +364,7 @@ public class dir {
public static void indexPhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr, byte[] md5) {
try {
final URL url = new URL(urlstring);
final yacyURL url = new yacyURL(urlstring, null);
final plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes()), "UTF-8");
final indexURLEntry newEntry = new indexURLEntry(
url,
@ -379,7 +379,7 @@ public class dir {
md5, // md5
(long) phrase.length(), // size
condenser.RESULT_NUMB_WORDS, // word count
plasmaURL.DT_SHARE, // doctype
plasmaHTCache.DT_SHARE, // doctype
new kelondroBitfield(4),
"**", // language
0,0,0,0,0,0
@ -392,14 +392,13 @@ public class dir {
5 /*process case*/
);
final String urlHash = newEntry.hash();
/*final int words =*/ switchboard.wordIndex.addPageIndex(url, urlHash, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaURL.DT_SHARE, 0, 0);
/*final int words =*/ switchboard.wordIndex.addPageIndex(url, new Date(), phrase.length() + descr.length() + 13, null, condenser, "**", plasmaHTCache.DT_SHARE, 0, 0);
} catch (IOException e) {}
}
public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) {
try {
final String urlhash = plasmaURL.urlHash(new URL(urlstring));
final String urlhash = (new yacyURL(urlstring, null)).hash();
final Iterator words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes("UTF-8"), "UTF-8").keySet().iterator();
String word;
while (words.hasNext()) {

@ -33,15 +33,14 @@ import java.net.MalformedURLException;
import java.util.HashMap;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class index {
@ -80,13 +79,13 @@ public class index {
final String referer = (String) header.get(httpHeader.REFERER);
if (referer != null) {
URL url;
yacyURL url;
try {
url = new URL(referer);
url = new yacyURL(referer, null);
} catch (MalformedURLException e) {
url = null;
}
if ((url != null) && (!serverDomains.isLocal(url))) {
if ((url != null) && (!url.isLocal())) {
final HashMap referrerprop = new HashMap();
referrerprop.put("count", "1");
referrerprop.put("clientip", header.get(httpHeader.CONNECTION_PROP_CLIENTIP));

@ -58,7 +58,6 @@ import java.util.HashSet;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.server.serverObjects;
@ -66,6 +65,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class sharedBlacklist_p {
@ -131,7 +131,7 @@ public class sharedBlacklist_p {
reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache");
// get List
URL u = new URL(downloadURL);
yacyURL u = new yacyURL(downloadURL, null);
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader, null), "UTF-8");
} catch (Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
@ -147,7 +147,7 @@ public class sharedBlacklist_p {
prop.put("page_source", downloadURL);
try {
URL u = new URL(downloadURL);
yacyURL u = new yacyURL(downloadURL, null);
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig, null, null), "UTF-8"); //get List
} catch (Exception e) {
prop.put("status", STATUS_URL_PROBLEM);

@ -1,33 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>SOAP Service List</title>
#%env/templates/metas.template%#
</head>
<body>
#%env/templates/header.template%#
<h2>Deployed SOAP Services</h2>
<p>Currently #[services]# services are deployed.</p>
<table>
<tr class="TableHeader">
<td>Service name</td>
<td>Operation</td>
<td>WSDL</td>
</tr>
#{services}#<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td><b>#[name]#</b>&nbsp;[#[style]#]</td>
<td>
<table>
#{methods}#<tr>
<td>#[name]#</td>
<td><tt>#[method]#</tt></td>
</tr>#{/methods}#
</table>
</td>
<td><a href="/soap/#[name]#?wsdl">WSDL</a></td>
</tr>#{/services}#
#%env/templates/footer.template%#
</body>
</html>

@ -1,105 +0,0 @@
// ServiceList.java
// -----------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
//
// This File is contributed by Martin Thelian
//
// $LastChangedDate: 2007-02-24 13:56:32 +0000 (Sa, 24 Feb 2007) $
// $LastChangedRevision: 3391 $
// $LastChangedBy: karlchenofhell $
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
// You must compile this file with
// javac -classpath .:../classes Blacklist_p.java
// if the shell's current path is HTROOT
package soap;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.axis.AxisEngine;
import org.apache.axis.ConfigurationException;
import org.apache.axis.description.OperationDesc;
import org.apache.axis.description.ServiceDesc;
import de.anomic.http.httpHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class ServiceList {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws ConfigurationException {
serverObjects prop = new serverObjects();
// getting the SOAP engine
AxisEngine engine = (AxisEngine) post.get("SOAP.engine");
// loop through the deployed services
int i = 0;
boolean dark = true;
Iterator serviceIter = engine.getConfig().getDeployedServices();
while (serviceIter.hasNext()) {
// getting the service description
ServiceDesc serviceDescription = (ServiceDesc)serviceIter.next();
prop.put("services_" + i + "_name",serviceDescription.getName());
prop.put("services_" + i + "_style",serviceDescription.getStyle());
prop.put("services_" + i + "_dark", ((dark) ? 1 : 0) ); dark =! dark;
// loop through the methods of this service
int j = 0;
ArrayList operations = serviceDescription.getOperations();
while (j < operations.size()) {
OperationDesc op = (OperationDesc)operations.get(j);
prop.put("services_" + i + "_methods_" + j + "_name",op.getName());
prop.put("services_" + i + "_methods_" + j + "_method",op.getMethod());
j++;
}
prop.put("services_" + i + "_methods",j);
i++;
}
prop.put("services",i);
return prop;
}
}

@ -42,11 +42,13 @@
// Contributions and changes to the program code must be marked as such.
package xml.bookmarks.posts;
import java.net.MalformedURLException;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class delete_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
@ -54,13 +56,17 @@ public class delete_p {
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects();
if(post!= null){
if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark(plasmaURL.urlHash(post.get("url", "nourl"))) ){
prop.put("result", 1);
}else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){
prop.put("result", 1);
}else{
prop.put("result",0);
}
try {
if( post.containsKey("url") && switchboard.bookmarksDB.removeBookmark((new yacyURL(post.get("url", "nourl"), null)).hash())) {
prop.put("result", 1);
}else if(post.containsKey("urlhash") && switchboard.bookmarksDB.removeBookmark(post.get("urlhash", "nohash"))){
prop.put("result", 1);
}else{
prop.put("result",0);
}
} catch (MalformedURLException e) {
prop.put("result",0);
}
}else{
prop.put("result",0);
}

@ -200,7 +200,7 @@ public class queues_p {
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
prop.put(tableName + "_" + showNum + "_anchor", urle.name());
prop.put(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
prop.put(tableName + "_" + showNum + "_hash", urle.urlhash());
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
showNum++;
}
}

@ -55,11 +55,11 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class getpageinfo_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
@ -82,7 +82,7 @@ public class getpageinfo_p {
}
if (actions.indexOf("title")>=0) {
try {
URL u = new URL(url);
yacyURL u = new yacyURL(url, null);
String contentString=new String(httpc.wget(u, u.getHost(), 6000, null, null, ((plasmaSwitchboard) env).remoteProxyConfig, null, null)) ;
htmlFilterContentScraper scraper = new htmlFilterContentScraper(u);
@ -110,13 +110,13 @@ public class getpageinfo_p {
}
if(actions.indexOf("robots")>=0){
try {
URL theURL = new URL(url);
yacyURL theURL = new yacyURL(url, null);
// determine if crawling of the current URL is allowed
prop.put("robots-allowed", robotsParser.isDisallowed(theURL) ? 0:1);
// get the sitemap URL of the domain
URL sitemapURL = robotsParser.getSitemapURL(theURL);
yacyURL sitemapURL = robotsParser.getSitemapURL(theURL);
prop.put("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
} catch (MalformedURLException e) {}
}

@ -45,13 +45,12 @@
// You must compile this file with
// javac -classpath .:../classes crawlOrder.java
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -59,6 +58,7 @@ import de.anomic.tools.crypt;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public final class crawlOrder {
@ -182,16 +182,16 @@ public final class crawlOrder {
// old method: only one url
// normalizing URL
String newURL = new URL((String) urlv.get(0)).toNormalform(true, true);
String newURL = new yacyURL((String) urlv.get(0), null).toNormalform(true, true);
if (!newURL.equals(urlv.get(0))) {
env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0));
}
String refURL = (refv.get(0) == null) ? null : new URL((String) refv.get(0)).toNormalform(true, true);
String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true);
if ((refURL != null) && (!refURL.equals(refv.get(0)))) {
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
}
if (!switchboard.acceptURL(new URL(newURL))) {
if (!switchboard.acceptURL(new yacyURL(newURL, null))) {
env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL);
return null;
}
@ -263,7 +263,12 @@ public final class crawlOrder {
// case where we have already the url loaded;
reason = reasonString;
// send lurl-Entry as response
indexURLEntry entry = switchboard.wordIndex.loadedURL.load(plasmaURL.urlHash(url), null);
indexURLEntry entry;
try {
entry = switchboard.wordIndex.loadedURL.load((new yacyURL(url, null)).hash(), null);
} catch (MalformedURLException e) {
entry = null;
}
if (entry == null) {
response = "rejected";
lurl = "";

@ -56,7 +56,6 @@ import de.anomic.data.URLFetcherStack;
import de.anomic.data.htmlTools;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
@ -65,6 +64,7 @@ import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public final class list {
@ -121,7 +121,7 @@ public final class list {
if (count > 0 && db.size() > 0) {
final StringBuffer b = new StringBuffer();
URL url;
yacyURL url;
int cnt = 0;
for (int i=0; i<count; i++) {
if ((url = db.pop()) == null) continue;

@ -40,7 +40,6 @@ import de.anomic.kelondro.kelondroBitfield;
import de.anomic.index.indexContainer;
import de.anomic.net.natLib;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchProcessing;
@ -125,6 +124,7 @@ public final class search {
int indexabstractContainercount = 0;
int joincount = 0;
plasmaSearchQuery theQuery = null;
plasmaSearchProcessing localProcess = null;
ArrayList accu = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
@ -133,10 +133,10 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
// prepare a search profile
plasmaSearchProcessing localTiming = new plasmaSearchProcessing(theQuery.maximumTime, theQuery.displayResults());
localProcess = new plasmaSearchProcessing(theQuery.maximumTime, theQuery.displayResults());
//theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, sb.wordIndex, null);
Map[] containers = localTiming.localSearchContainers(theQuery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
Map[] containers = localProcess.localSearchContainers(theQuery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry;
@ -146,7 +146,7 @@ public final class search {
wordhash = (String) entry.getKey();
indexContainer container = (indexContainer) entry.getValue();
indexabstractContainercount += container.size();
indexabstract.append("indexabstract." + wordhash + "=").append(plasmaURL.compressIndex(container, null, 1000).toString()).append(serverCore.crlfString);
indexabstract.append("indexabstract." + wordhash + "=").append(plasmaSearchProcessing.compressIndex(container, null, 1000).toString()).append(serverCore.crlfString);
}
}
@ -155,18 +155,17 @@ public final class search {
prop.putASIS("references", "");
} else {
// retrieve index containers from search request
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, duetime, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
plasmaSearchProcessing localProcess = new plasmaSearchProcessing(theQuery.maximumTime, theQuery.displayResults());
//plasmaSearchProcessing remoteProcess = null;
localProcess = new plasmaSearchProcessing(theQuery.maximumTime, theQuery.displayResults());
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProcess, sb.wordIndex, null, true, abstractSet);
//Map[] containers = localProcess.localSearchContainers(theQuery, sb.wordIndex, plasmaSearchQuery.hashes2Set(urls));
// set statistic details of search result and find best result index set
if (theSearch.getLocalCount() == 0) {
prop.putASIS("indexcount", "");
@ -204,7 +203,7 @@ public final class search {
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific
// urls, because it is a re-search
if ((theSearch.IAmaxcounthash == null) || (urls.length() != 0) || (queryhashes.size() == 1) || (abstracts.length() == 0)) {
if ((theSearch.IAmaxcounthash == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.length() == 0)) {
prop.putASIS("indexabstract", "");
} else if (abstracts.equals("auto")) {
// automatically attach the index abstract for the index that has the most references. This should be our target dht position
@ -221,13 +220,15 @@ public final class search {
}
}
if (partitions > 0) sb.requestedQueries = sb.requestedQueries + 1d / (double) partitions; // increase query counter
// prepare reference hints
localProcess.startTimer();
Object[] ws = theSearch.references();
StringBuffer refstr = new StringBuffer();
for (int j = 0; j < ws.length; j++)
refstr.append(",").append((String) ws[j]);
prop.putASIS("references", (refstr.length() > 0) ? refstr.substring(1) : new String(refstr));
localProcess.yield("reference collection", ws.length);
}
prop.putASIS("indexabstract", new String(indexabstract));
@ -241,6 +242,7 @@ public final class search {
} else {
// result is a List of urlEntry elements
localProcess.startTimer();
StringBuffer links = new StringBuffer();
String resource = null;
plasmaSearchEvent.ResultEntry entry;
@ -253,6 +255,7 @@ public final class search {
}
prop.putASIS("links", new String(links));
prop.put("linkcount", accu.size());
localProcess.yield("result list preparation", accu.size());
}
// add information about forward peers
@ -278,7 +281,7 @@ public final class search {
yacyCore.log.logInfo("EXIT HASH SEARCH: " +
plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + joincount + " links found, " +
prop.get("linkcount", "?") + " links selected, " +
indexabstractContainercount + " index abstract references attached, " +
indexabstractContainercount + " index abstracts, " +
(System.currentTimeMillis() - timestamp) + " milliseconds");
prop.putASIS("searchtime", Long.toString(System.currentTimeMillis() - timestamp));

@ -137,7 +137,7 @@ public final class transferURL {
}
// check if the entry is blacklisted
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, lEntry.hash(), comp.url()))) {
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, comp.url()))) {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;

@ -72,7 +72,7 @@ public class urls {
prop.put("item_" + c + "_description", entry.name());
prop.put("item_" + c + "_author", "");
prop.put("item_" + c + "_pubDate", serverDate.shortSecondTime(entry.appdate()));
prop.put("item_" + c + "_guid", entry.urlhash());
prop.put("item_" + c + "_guid", entry.url().hash());
c++;
count--;
}

@ -58,7 +58,6 @@ import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSearchEvent;
@ -69,7 +68,6 @@ import de.anomic.plasma.plasmaSearchProcessing;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -78,6 +76,7 @@ import de.anomic.tools.crypt;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacyURL;
public class yacysearch {
@ -104,9 +103,9 @@ public class yacysearch {
// save referrer
// System.out.println("HEADER=" + header.toString());
if (referer != null) {
URL url;
try { url = new URL(referer); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (!serverDomains.isLocal(url))) {
yacyURL url;
try { url = new yacyURL(referer, null); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (!url.isLocal())) {
final HashMap referrerprop = new HashMap();
referrerprop.put("count", "1");
referrerprop.put("clientip", header.get("CLIENTIP"));
@ -454,8 +453,8 @@ public class yacysearch {
int depth = post.getInt("depth", 0);
int columns = post.getInt("columns", 6);
URL url = null;
try {url = new URL(post.get("url", ""));} catch (MalformedURLException e) {}
yacyURL url = null;
try {url = new yacyURL(post.get("url", ""), null);} catch (MalformedURLException e) {}
plasmaSearchImages si = new plasmaSearchImages(6000, url, depth);
Iterator i = si.entries();
htmlFilterImageEntry ie;

@ -30,13 +30,11 @@ import java.net.URLEncoder;
import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -44,6 +42,7 @@ import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
public class yacysearchitem {
@ -86,9 +85,9 @@ public class yacysearchitem {
prop.put("content_url", result.urlstring());
int port=result.url().getPort();
URL faviconURL;
yacyURL faviconURL;
try {
faviconURL = new URL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico");
faviconURL = new yacyURL(result.url().getProtocol() + "://" + result.url().getHost() + ((port != -1) ? (":" + String.valueOf(port)) : "") + "/favicon.ico", null);
} catch (MalformedURLException e1) {
faviconURL = null;
}
@ -102,14 +101,14 @@ public class yacysearchitem {
prop.put("content_size", Long.toString(result.filesize()));
TreeSet[] query = theQuery.queryWords();
URL wordURL = null;
yacyURL wordURL = null;
try {
prop.put("content_words", URLEncoder.encode(query[0].toString(),"UTF-8"));
} catch (UnsupportedEncodingException e) {}
prop.put("content_former", theQuery.queryString);
prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.hash()) +
((plasmaURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") +
(((wordURL = plasmaURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : ""));
prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + yacyURL.domLengthEstimation(result.hash()) +
((yacyURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") +
(((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : ""));
prop.putASIS("content_snippet", result.textSnippet().getLineMarked(theQuery.queryHashes));

@ -60,14 +60,13 @@ import org.xml.sax.helpers.DefaultHandler;
import de.anomic.http.httpc;
import de.anomic.http.httpdByteCountInputStream;
import de.anomic.index.indexURLEntry;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverDate;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
/**
* Class to parse a sitemap file.<br>
@ -140,7 +139,7 @@ public class SitemapParser extends DefaultHandler {
/**
* The location of the sitemap file
*/
private URL siteMapURL = null;
private yacyURL siteMapURL = null;
/**
* The next URL to enqueue
@ -153,7 +152,7 @@ public class SitemapParser extends DefaultHandler {
private Date lastMod = null;
public SitemapParser(plasmaSwitchboard sb, URL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) {
public SitemapParser(plasmaSwitchboard sb, yacyURL sitemap, plasmaCrawlProfile.entry theCrawlingProfile) {
if (sb == null) throw new NullPointerException("The switchboard must not be null");
if (sitemap == null) throw new NullPointerException("The sitemap URL must not be null");
this.switchboard = sb;
@ -276,7 +275,12 @@ public class SitemapParser extends DefaultHandler {
if (this.nextURL == null) return;
// get the url hash
String nexturlhash = plasmaURL.urlHash(this.nextURL);
String nexturlhash;
try {
nexturlhash = (new yacyURL(this.nextURL, null)).hash();
} catch (MalformedURLException e1) {
nexturlhash = null;
}
// check if the url is known and needs to be recrawled
if (this.lastMod != null) {
@ -314,7 +318,7 @@ public class SitemapParser extends DefaultHandler {
this.logger.logInfo("The URL '" + this.nextURL + "' can not be crawled. Reason: " + error);
// insert URL into the error DB
plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new URL(this.nextURL), error);
plasmaCrawlZURL.Entry ee = this.switchboard.errorURL.newEntry(new yacyURL(this.nextURL, null), error);
ee.store();
this.switchboard.errorURL.stackPushEntry(ee);
} catch (MalformedURLException e) {/* ignore this */ }

@ -51,8 +51,8 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack;
import de.anomic.net.URL;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class URLFetcherStack {
@ -84,7 +84,7 @@ public class URLFetcherStack {
this.db.close();
}
public boolean push(URL url) {
public boolean push(yacyURL url) {
try {
this.db.push(this.db.row().newEntry(
new byte[][] { url.toNormalform(true, true).getBytes() }
@ -97,14 +97,14 @@ public class URLFetcherStack {
}
}
public URL pop() {
public yacyURL pop() {
try {
kelondroRow.Entry r = this.db.pop();
if (r == null) return null;
final String url = r.getColString(0, null);
try {
this.popped++;
return new URL(url);
return new yacyURL(url, null);
} catch (MalformedURLException e) {
this.log.logSevere("found invalid URL-entry: " + url);
return null;

@ -29,7 +29,7 @@ package de.anomic.data;
import java.util.HashMap;
import java.util.Random;
import de.anomic.net.URL;
import de.anomic.yacy.yacyURL;
public class URLLicense {
@ -46,7 +46,7 @@ public class URLLicense {
this.keylen = keylen;
}
public String aquireLicense(URL url) {
public String aquireLicense(yacyURL url) {
// generate license key
String license = "";
while (license.length() < keylen) license += Integer.toHexString(random.nextInt());
@ -59,10 +59,10 @@ public class URLLicense {
return license;
}
public URL releaseLicense(String license) {
URL url = null;
public yacyURL releaseLicense(String license) {
yacyURL url = null;
synchronized (permissions) {
url = (URL) permissions.remove(license);
url = (yacyURL) permissions.remove(license);
}
/*
if (url == null) {

@ -48,6 +48,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@ -72,7 +73,6 @@ import org.xml.sax.SAXException;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaURL;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroException;
@ -80,10 +80,10 @@ import de.anomic.kelondro.kelondroMapObjects;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroObjects;
import de.anomic.kelondro.kelondroObjectsMapEntry;
import de.anomic.net.URL;
import de.anomic.server.serverDate;
import de.anomic.server.serverFileUtils;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class bookmarksDB {
kelondroMapObjects tagsTable;
@ -390,7 +390,7 @@ public class bookmarksDB {
}
public int importFromBookmarks(URL baseURL, String input, String tag, boolean importPublic){
public int importFromBookmarks(yacyURL baseURL, String input, String tag, boolean importPublic){
try {
// convert string to inputstream
ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8"));
@ -402,7 +402,7 @@ public class bookmarksDB {
return 0;
}
}
public int importFromBookmarks(URL baseURL, InputStreamReader input, String tag, boolean importPublic){
public int importFromBookmarks(yacyURL baseURL, InputStreamReader input, String tag, boolean importPublic){
int importCount = 0;
HashMap links=new HashMap();
@ -712,7 +712,11 @@ public class bookmarksDB {
if(!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")){
url="http://"+url;
}
this.urlHash=plasmaURL.urlHash(url);
try {
this.urlHash=(new yacyURL(url, null)).hash();
} catch (MalformedURLException e) {
this.urlHash = null;
}
entry.put(BOOKMARK_URL, url);
this.timestamp=System.currentTimeMillis();
tags=new HashSet();
@ -728,7 +732,7 @@ public class bookmarksDB {
removeBookmark(this.urlHash); //prevent empty tags
}
public Bookmark(String urlHash, URL url){
public Bookmark(String urlHash, yacyURL url){
super();
this.urlHash=urlHash;
entry.put(BOOKMARK_URL, url.toNormalform(false, true));
@ -742,9 +746,9 @@ public class bookmarksDB {
tags=new HashSet();
timestamp=System.currentTimeMillis();
}
public Bookmark(kelondroObjectsMapEntry map) {
this(plasmaURL.urlHash((String)map.map().get(BOOKMARK_URL)), map.map());
public Bookmark(kelondroObjectsMapEntry map) throws MalformedURLException {
this((new yacyURL((String)map.map().get(BOOKMARK_URL), null)).hash(), map.map());
}
private Map toMap(){

@ -56,11 +56,11 @@ import java.util.Date;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlRobotsTxt;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
/*
* A class for Parsing robots.txt files.
@ -227,7 +227,7 @@ public final class robotsParser{
return new Object[]{denyList,sitemap,crawlDelay};
}
private static final int getPort(URL theURL) {
private static final int getPort(yacyURL theURL) {
int port = theURL.getPort();
if (port == -1) {
if (theURL.getProtocol().equalsIgnoreCase("http")) {
@ -240,7 +240,7 @@ public final class robotsParser{
return port;
}
private static final String getHostPort(URL theURL) {
private static final String getHostPort(yacyURL theURL) {
String urlHostPort = null;
int port = getPort(theURL);
urlHostPort = theURL.getHost() + ":" + port;
@ -249,9 +249,9 @@ public final class robotsParser{
return urlHostPort;
}
public static URL getSitemapURL(URL theURL) {
public static yacyURL getSitemapURL(yacyURL theURL) {
if (theURL == null) throw new IllegalArgumentException();
URL sitemapURL = null;
yacyURL sitemapURL = null;
// generating the hostname:poart string needed to do a DB lookup
String urlHostPort = getHostPort(theURL);
@ -265,13 +265,13 @@ public final class robotsParser{
try {
String sitemapUrlStr = robotsTxt4Host.getSitemap();
if (sitemapUrlStr != null) sitemapURL = new URL(sitemapUrlStr);
if (sitemapUrlStr != null) sitemapURL = new yacyURL(sitemapUrlStr, null);
} catch (MalformedURLException e) {/* ignore this */}
return sitemapURL;
}
public static Integer getCrawlDelay(URL theURL) {
public static Integer getCrawlDelay(yacyURL theURL) {
if (theURL == null) throw new IllegalArgumentException();
Integer crawlDelay = null;
@ -292,7 +292,7 @@ public final class robotsParser{
return crawlDelay;
}
public static boolean isDisallowed(URL nexturl) {
public static boolean isDisallowed(yacyURL nexturl) {
if (nexturl == null) throw new IllegalArgumentException();
// generating the hostname:poart string needed to do a DB lookup
@ -309,10 +309,10 @@ public final class robotsParser{
(robotsTxt4Host.getLoadedDate() == null) ||
(System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() > 7*24*60*60*1000)
) {
URL robotsURL = null;
yacyURL robotsURL = null;
// generating the proper url to download the robots txt
try {
robotsURL = new URL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt");
robotsURL = new yacyURL(nexturl.getProtocol(),nexturl.getHost(),getPort(nexturl),"/robots.txt");
} catch (MalformedURLException e) {
serverLog.logSevere("ROBOTS","Unable to generate robots.txt URL for URL '" + nexturl.toString() + "'.");
return false;
@ -371,7 +371,7 @@ public final class robotsParser{
return false;
}
static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception {
static Object[] downloadRobotsTxt(yacyURL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception {
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
redirectionCount--;
@ -392,7 +392,7 @@ public final class robotsParser{
httpHeader reqHeaders = new httpHeader();
// adding referer
reqHeaders.put(httpHeader.REFERER, (URL.newURL(robotsURL,"/")).toNormalform(true, true));
reqHeaders.put(httpHeader.REFERER, (yacyURL.newURL(robotsURL,"/")).toNormalform(true, true));
if (entry != null) {
oldEtag = entry.getETag();
@ -447,7 +447,7 @@ public final class robotsParser{
redirectionUrlString = redirectionUrlString.trim();
// generating the new URL object
URL redirectionUrl = URL.newURL(robotsURL, redirectionUrlString);
yacyURL redirectionUrl = yacyURL.newURL(robotsURL, redirectionUrlString);
// returning the used httpc
httpc.returnInstance(con);

@ -314,12 +314,11 @@ public final class userDB {
public static final String BLOG_RIGHT = "blogRight";
public static final String WIKIADMIN_RIGHT = "wikiAdminRight";
public static final String BOOKMARK_RIGHT = "bookmarkRight";
public static final String SOAP_RIGHT = "soapRight";
//to create new rights, you just need to edit this strings
public static final String RIGHT_TYPES=
ADMIN_RIGHT+","+DOWNLOAD_RIGHT+","+UPLOAD_RIGHT+","+PROXY_RIGHT+","+
BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT+","+SOAP_RIGHT;
BLOG_RIGHT+","+BOOKMARK_RIGHT+","+WIKIADMIN_RIGHT;
public static final String RIGHT_NAMES="Admin,Download,Upload,Proxy usage,Blog,Bookmark,Wiki Admin,SOAP";
public static final int PROXY_ALLOK = 0; //can Surf

@ -62,10 +62,10 @@ import java.util.TreeSet;
import javax.swing.event.EventListenerList;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper {
@ -112,14 +112,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
/**
* {@link URL} to the favicon that belongs to the document
*/
private URL favicon;
private yacyURL favicon;
/**
* The document root {@link URL}
*/
private URL root;
private yacyURL root;
public htmlFilterContentScraper(URL root) {
public htmlFilterContentScraper(yacyURL root) {
// the root value here will not be used to load the resource.
// it is only the reference for relative links
super(linkTags0, linkTags1);
@ -161,7 +161,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
private String absolutePath(String relativePath) {
try {
return URL.newURL(root, relativePath).toNormalform(false, true);
return yacyURL.newURL(root, relativePath).toNormalform(false, true);
} catch (Exception e) {
return "";
}
@ -175,13 +175,13 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
height = Integer.parseInt(tagopts.getProperty("height", "-1"));
} catch (NumberFormatException e) {}
try {
URL url = new URL(absolutePath(tagopts.getProperty("src", "")));
yacyURL url = new yacyURL(absolutePath(tagopts.getProperty("src", "")), null);
htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt",""), width, height);
images.add(ie);
} catch (MalformedURLException e) {}
}
if (tagname.equalsIgnoreCase("base")) try {
root = new URL(tagopts.getProperty("href", ""));
root = new yacyURL(tagopts.getProperty("href", ""), null);
} catch (MalformedURLException e) {}
if (tagname.equalsIgnoreCase("frame")) {
anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
@ -204,9 +204,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
}
if (tagname.equalsIgnoreCase("link")) {
URL newLink = null;
yacyURL newLink = null;
try {
newLink = new URL(absolutePath(tagopts.getProperty("href", "")));
newLink = new yacyURL(absolutePath(tagopts.getProperty("href", "")), null);
} catch (MalformedURLException e) {}
if (newLink != null) {
@ -363,7 +363,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
/**
* @return the {@link URL} to the favicon that belongs to the document
*/
public URL getFavicon() {
public yacyURL getFavicon() {
return this.favicon;
}
@ -478,7 +478,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
}
public static htmlFilterContentScraper parseResource(URL location) throws IOException {
public static htmlFilterContentScraper parseResource(yacyURL location) throws IOException {
// load page
byte[] page = httpc.wget(
location,

@ -40,22 +40,22 @@
package de.anomic.htmlFilter;
import de.anomic.net.URL;
import de.anomic.yacy.yacyURL;
public class htmlFilterImageEntry implements Comparable {
private URL url;
private yacyURL url;
private String alt;
private int width, height;
public htmlFilterImageEntry(URL url, String alt, int width, int height) {
public htmlFilterImageEntry(yacyURL url, String alt, int width, int height) {
this.url = url;
this.alt = alt;
this.width = width;
this.height = height;
}
public URL url() {
public yacyURL url() {
return this.url;
}

@ -36,8 +36,7 @@ import java.io.Writer;
import java.util.Properties;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.yacy.yacyURL;
public class htmlFilterInputStream extends InputStream implements htmlFilterEventListener {
@ -59,7 +58,7 @@ public class htmlFilterInputStream extends InputStream implements htmlFilterEven
public htmlFilterInputStream(
InputStream inStream,
String inputStreamCharset,
URL rooturl,
yacyURL rooturl,
htmlFilterTransformer transformer,
boolean passbyIfBinarySuspect
) throws UnsupportedEncodingException {

@ -63,8 +63,8 @@ import java.net.MalformedURLException;
import java.util.Enumeration;
import java.util.Properties;
import de.anomic.net.URL;
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacyURL;
public final class htmlFilterWriter extends Writer {
@ -508,7 +508,7 @@ public final class htmlFilterWriter extends Writer {
if (args.length != 1) return;
char[] buffer = new char[512];
try {
htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL("http://localhost:8080"));
htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost:8080", null));
htmlFilterTransformer transformer = new htmlFilterContentTransformer();
// TODO: this does not work at the moment
System.exit(0);

@ -73,9 +73,9 @@ import java.util.TimeZone;
import java.util.TreeMap;
import java.util.Vector;
import de.anomic.net.URL;
import de.anomic.server.serverCore;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public final class httpHeader extends TreeMap implements Map {
@ -675,7 +675,7 @@ public final class httpHeader extends TreeMap implements Map {
}
public static boolean supportChunkedEncoding(Properties conProp) {
// getting the http version of the soap client
// getting the http version of the client
String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
// only clients with http version 1.1 supports chunk
@ -793,7 +793,7 @@ public final class httpHeader extends TreeMap implements Map {
theHeader.append("\r\n");
}
public static URL getRequestURL(Properties conProp) throws MalformedURLException {
public static yacyURL getRequestURL(Properties conProp) throws MalformedURLException {
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
@ -807,7 +807,7 @@ public final class httpHeader extends TreeMap implements Map {
host = host.substring(0, pos);
}
URL url = new URL("http", host, port, (args == null) ? path : path + "?" + args);
yacyURL url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args);
return url;
}

@ -76,7 +76,6 @@ import javax.net.ssl.X509TrustManager;
import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
@ -84,6 +83,7 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyURL;
/**
* This class implements an http client. While http access is built-in in java
@ -959,7 +959,7 @@ public final class httpc {
}
public static byte[] singleGET(
URL u,
yacyURL u,
String vhost,
int timeout,
String user,
@ -1017,7 +1017,7 @@ public final class httpc {
}
public static byte[] singlePOST(
URL u,
yacyURL u,
String vhost,
int timeout,
String user,
@ -1049,7 +1049,7 @@ public final class httpc {
}
public static byte[] wget(
URL url,
yacyURL url,
String vhost,
int timeout,
String user,
@ -1090,7 +1090,7 @@ public final class httpc {
return a;
}
public static Map loadHashMap(URL url, httpRemoteProxyConfig proxy) {
public static Map loadHashMap(yacyURL url, httpRemoteProxyConfig proxy) {
try {
// should we use the proxy?
boolean useProxy = (proxy != null) &&
@ -1119,7 +1119,7 @@ public final class httpc {
}
public static httpHeader whead(
URL url,
yacyURL url,
String vhost,
int timeout,
String user,
@ -1130,7 +1130,7 @@ public final class httpc {
}
public static httpHeader whead(
URL url,
yacyURL url,
String vhost,
int timeout,
String user,
@ -1172,7 +1172,7 @@ public final class httpc {
}
public static byte[] wput(
URL url,
yacyURL url,
String vhost,
int timeout,
String user,
@ -1217,7 +1217,7 @@ public final class httpc {
httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort);
try {
URL u = new URL(url);
yacyURL u = new yacyURL(url, null);
text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig, null, null));
} catch (MalformedURLException e) {
System.out.println("The url '" + url + "' is wrong.");

@ -51,7 +51,6 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Constructor;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URLDecoder;
@ -67,7 +66,6 @@ import java.util.StringTokenizer;
import de.anomic.data.htmlTools;
import de.anomic.data.userDB;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCodings;
@ -78,9 +76,9 @@ import de.anomic.server.serverHandler;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.soap.httpdSoapHandler;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
/**
@ -119,7 +117,6 @@ public final class httpd implements serverHandler {
public static final String hline = "-------------------------------------------------------------------------------";
public static HashMap reverseMappingCache = new HashMap();
private httpdSoapHandler soapHandler = null;
private static plasmaSwitchboard switchboard = null;
private static String virtualHost = null;
@ -488,36 +485,8 @@ public final class httpd implements serverHandler {
if (this.prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) {
// pass to server
if (this.allowServer) {
/*
* Handling SOAP Requests here ...
*/
if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) {
if (this.soapHandler == null) {
try {
Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler");
Constructor classConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } );
this.soapHandler = (httpdSoapHandler) classConstructor.newInstance(new Object[] { switchboard });
} catch (Exception e) {
sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e);
return serverCore.TERMINATE_CONNECTION;
} catch (NoClassDefFoundError e) {
sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e);
return serverCore.TERMINATE_CONNECTION;
} catch (Error e) {
sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e);
return serverCore.TERMINATE_CONNECTION;
}
}
this.soapHandler.doGet(this.prop, header, this.session.out);
/*
* Handling HTTP requests here ...
*/
} else {
if (this.handleServerAuthentication(header)) {
httpdFileHandler.doGet(this.prop, header, this.session.out);
}
if (this.handleServerAuthentication(header)) {
httpdFileHandler.doGet(this.prop, header, this.session.out);
}
} else {
// not authorized through firewall blocking (ip does not match filter)
@ -637,40 +606,8 @@ public final class httpd implements serverHandler {
if (prop.getProperty(httpHeader.CONNECTION_PROP_HOST).equals(virtualHost)) {
// pass to server
if (allowServer) {
/*
* Handling SOAP Requests here ...
*/
if (this.prop.containsKey(httpHeader.CONNECTION_PROP_PATH) && this.prop.getProperty(httpHeader.CONNECTION_PROP_PATH).startsWith("/soap/")) {
if (this.soapHandler == null) {
try {
// creating the soap handler class by name
Class soapHandlerClass = Class.forName("de.anomic.soap.httpdSoapHandler");
// Look for the proper constructor
Constructor soapHandlerConstructor = soapHandlerClass.getConstructor( new Class[] { serverSwitch.class } );
// creating the new object
this.soapHandler = (httpdSoapHandler)soapHandlerConstructor.newInstance( new Object[] { switchboard } );
} catch (Exception e) {
sendRespondError(this.prop,this.session.out,4,501,null,"Error while initializing SOAP Excension",e);
return serverCore.TERMINATE_CONNECTION;
} catch (NoClassDefFoundError e) {
sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e);
return serverCore.TERMINATE_CONNECTION;
} catch (Error e) {
sendRespondError(this.prop,this.session.out,4,503,null,"SOAP Extension not installed",e);
return serverCore.TERMINATE_CONNECTION;
}
}
this.soapHandler.doPost(this.prop, header, this.session.out, this.session.in);
/*
* Handling normal HTTP requests here ...
*/
} else {
if (handleServerAuthentication(header)) {
httpdFileHandler.doPost(prop, header, this.session.out, this.session.in);
}
if (handleServerAuthentication(header)) {
httpdFileHandler.doPost(prop, header, this.session.out, this.session.in);
}
} else {
// not authorized through firewall blocking (ip does not match filter)
@ -1199,7 +1136,7 @@ public final class httpd implements serverHandler {
String urlString;
try {
urlString = (new URL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString();
urlString = (new yacyURL((method.equals(httpHeader.METHOD_CONNECT)?"https":"http"), host, port, (args == null) ? path : path + "?" + args)).toString();
} catch (MalformedURLException e) {
urlString = "invalid URL";
}

@ -962,7 +962,6 @@ public final class httpdFileHandler {
}
}
//System.out.println("**DEBUG** loading class file " + classFile);
Class c = provider.loadClass(classFile);
Class[] params = new Class[] {
httpHeader.class,

@ -93,7 +93,6 @@ import java.util.zip.GZIPOutputStream;
import de.anomic.htmlFilter.htmlFilterContentTransformer;
import de.anomic.htmlFilter.htmlFilterTransformer;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
@ -107,6 +106,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.server.logging.serverMiniLogFormatter;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public final class httpdProxyHandler {
@ -319,7 +319,7 @@ public final class httpdProxyHandler {
int pos=0;
int port=0;
URL url = null;
yacyURL url = null;
try {
url = httpHeader.getRequestURL(conProp);
@ -329,11 +329,11 @@ public final class httpdProxyHandler {
redirectorWriter.println(url.toNormalform(false, true));
redirectorWriter.flush();
}
String newUrl=redirectorReader.readLine();
if(!newUrl.equals("")){
try{
url=new URL(newUrl);
}catch(MalformedURLException e){}//just keep the old one
String newUrl = redirectorReader.readLine();
if (!newUrl.equals("")) {
try {
url = new yacyURL(newUrl, null);
} catch(MalformedURLException e){}//just keep the old one
}
conProp.setProperty(httpHeader.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort());
conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, url.getPath());
@ -474,7 +474,7 @@ public final class httpdProxyHandler {
}
}
private static void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) {
private static void fulfillRequestFromWeb(Properties conProp, yacyURL url,String ext, httpHeader requestHeader, httpHeader cachedResponseHeader, File cacheFile, OutputStream respond) {
GZIPOutputStream gzippedOut = null;
httpChunkedOutputStream chunkedOut = null;
@ -727,7 +727,7 @@ public final class httpdProxyHandler {
private static void fulfillRequestFromCache(
Properties conProp,
URL url,
yacyURL url,
String ext,
httpHeader requestHeader,
httpHeader cachedResponseHeader,
@ -865,7 +865,7 @@ public final class httpdProxyHandler {
httpc remote = null;
httpc.response res = null;
URL url = null;
yacyURL url = null;
try {
// remembering the starting time of the request
Date requestDate = new Date(); // remember the time...
@ -892,7 +892,7 @@ public final class httpdProxyHandler {
}
try {
url = new URL("http", host, port, (args == null) ? path : path + "?" + args);
url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args);
} catch (MalformedURLException e) {
String errorMsg = "ERROR: internal error with url generation: host=" +
host + ", port=" + port + ", path=" + path + ", args=" + args;
@ -968,7 +968,7 @@ public final class httpdProxyHandler {
public static void doPost(Properties conProp, httpHeader requestHeader, OutputStream respond, PushbackInputStream body) throws IOException {
httpc remote = null;
URL url = null;
yacyURL url = null;
try {
// remembering the starting time of the request
Date requestDate = new Date(); // remember the time...
@ -993,7 +993,7 @@ public final class httpdProxyHandler {
}
try {
url = new URL("http", host, port, (args == null) ? path : path + "?" + args);
url = new yacyURL("http", host, port, (args == null) ? path : path + "?" + args);
} catch (MalformedURLException e) {
String errorMsg = "ERROR: internal error with url generation: host=" +
host + ", port=" + port + ", path=" + path + ", args=" + args;
@ -1308,7 +1308,7 @@ public final class httpdProxyHandler {
out.flush();
}
*/
private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, URL url) {
private static void handleProxyException(Exception e, httpc remote, Properties conProp, OutputStream respond, yacyURL url) {
// this may happen if
// - the targeted host does not exist
// - anything with the remote server was wrong.

@ -60,7 +60,6 @@ import java.util.Properties;
import de.anomic.http.httpChunkedInputStream;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
@ -71,6 +70,7 @@ import de.anomic.server.serverFileUtils;
import de.anomic.server.serverHandler;
import de.anomic.server.logging.serverLog;
import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacyURL;
/**
* @author theli
@ -346,7 +346,7 @@ public class icapd implements serverHandler {
httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true);
// getting the request URL
URL httpRequestURL = httpHeader.getRequestURL(httpReqProps);
yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps);
/* =========================================================================
* Parsing response data

@ -38,15 +38,14 @@ import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverCodings;
import de.anomic.server.serverDate;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyURL;
import de.anomic.index.indexRWIEntry;
public class indexURLEntry {
@ -120,7 +119,7 @@ public class indexURLEntry {
private indexRWIEntry word; // this is only used if the url is transported via remote search requests
public indexURLEntry(
URL url,
yacyURL url,
String descr,
String author,
String tags,
@ -143,12 +142,12 @@ public class indexURLEntry {
int lapp) {
// create new entry and store it into database
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, plasmaURL.urlHash(url), null);
this.entry.setCol(col_hash, url.hash(), null);
this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag));
this.entry.setCol(col_mod, encodeDate(mod));
this.entry.setCol(col_load, encodeDate(load));
this.entry.setCol(col_fresh, encodeDate(fresh));
this.entry.setCol(col_referrer, referrer.getBytes());
this.entry.setCol(col_referrer, (referrer == null) ? null : referrer.getBytes());
this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc);
@ -170,7 +169,7 @@ public class indexURLEntry {
return kelondroNaturalOrder.encodeLong(d.getTime() / 86400000, 4);
}
public static byte[] encodeComp(URL url, String descr, String author, String tags, String ETag) {
public static byte[] encodeComp(yacyURL url, String descr, String author, String tags, String ETag) {
serverCharBuffer s = new serverCharBuffer(200);
s.append(url.toNormalform(false, true)).append(10);
s.append(descr).append(10);
@ -190,9 +189,9 @@ public class indexURLEntry {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
URL url;
yacyURL url;
try {
url = new URL(crypt.simpleDecode(prop.getProperty("url", ""), null));
url = new yacyURL(crypt.simpleDecode(prop.getProperty("url", ""), null), prop.getProperty("hash"));
} catch (MalformedURLException e) {
url = null;
}
@ -202,7 +201,7 @@ public class indexURLEntry {
String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); if (ETag == null) ETag = "";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, plasmaURL.urlHash(url), null);
this.entry.setCol(col_hash, url.hash(), null);
this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag));
try {
this.entry.setCol(col_mod, encodeDate(serverDate.shortDayFormatter.parse(prop.getProperty("mod", "20000101"))));
@ -219,7 +218,7 @@ public class indexURLEntry {
} catch (ParseException e) {
this.entry.setCol(col_fresh, encodeDate(new Date()));
}
this.entry.setCol(col_referrer, prop.getProperty("referrer", plasmaURL.dummyHash).getBytes());
this.entry.setCol(col_referrer, prop.getProperty("referrer", yacyURL.dummyHash).getBytes());
this.entry.setCol(col_md5, serverCodings.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
@ -301,6 +300,7 @@ public class indexURLEntry {
ArrayList cl = nxTools.strings(this.entry.getCol("comp", null), "UTF-8");
return new indexURLEntry.Components(
(cl.size() > 0) ? ((String) cl.get(0)).trim() : "",
hash(),
(cl.size() > 1) ? ((String) cl.get(1)).trim() : "",
(cl.size() > 2) ? ((String) cl.get(2)).trim() : "",
(cl.size() > 3) ? ((String) cl.get(3)).trim() : "",
@ -442,12 +442,12 @@ public class indexURLEntry {
}
public class Components {
private URL url;
private yacyURL url;
private String title, author, tags, ETag;
public Components(String url, String title, String author, String tags, String ETag) {
public Components(String url, String urlhash, String title, String author, String tags, String ETag) {
try {
this.url = new URL(url);
this.url = new yacyURL(url, urlhash);
} catch (MalformedURLException e) {
this.url = null;
}
@ -456,18 +456,18 @@ public class indexURLEntry {
this.tags = tags;
this.ETag = ETag;
}
public Components(URL url, String descr, String author, String tags, String ETag) {
public Components(yacyURL url, String descr, String author, String tags, String ETag) {
this.url = url;
this.title = descr;
this.author = author;
this.tags = tags;
this.ETag = ETag;
}
public URL url() { return this.url; }
public String title() { return this.title; }
public String author() { return this.author; }
public String tags() { return this.tags; }
public String ETag() { return this.ETag; }
public yacyURL url() { return this.url; }
public String title() { return this.title; }
public String author() { return this.author; }
public String tags() { return this.tags; }
public String ETag() { return this.ETag; }
}
}

@ -307,7 +307,7 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
}
private synchronized void checkConsistency() {
if (debugmode) try { // in debug mode
if ((debugmode) && (entryFile != null)) try { // in debug mode
long efl = entryFile.length();
assert ((efl - POS_NODES) % ((long) recordsize)) == 0 : "rest = " + ((entryFile.length() - POS_NODES) % ((long) recordsize)) + ", USEDC = " + this.USEDC + ", FREEC = " + this.FREEC + ", recordsize = " + recordsize + ", file = " + filename;
long calculated_used = (efl - POS_NODES) / ((long) recordsize);

@ -41,11 +41,11 @@ import java.util.TimeZone;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverCodings;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class kelondroCollectionIndex {
@ -762,7 +762,7 @@ public class kelondroCollectionIndex {
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
ref = entry.getColBytes(0);
if ((ref.length == 12) && (plasmaURL.probablyRootURL(new String(ref)))) {
if ((ref.length == 12) && (yacyURL.probablyRootURL(new String(ref)))) {
survival.addUnique(entry);
i.remove();
}

@ -53,6 +53,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
import de.anomic.tools.disorderHeap;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyURL;
public class natLib {
@ -64,7 +65,7 @@ public class natLib {
rm status.htm
*/
try {
ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null, null, null));
ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://192.168.0.1:80/status.htm", null), "192.168.0.1", 5000, "admin", password, null, null, null));
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
@ -76,7 +77,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null, null, null));
ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.whatismyip.com/", null), "www.whatsmyip.com", 5000, null, null, null, null, null));
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@ -87,7 +88,7 @@ public class natLib {
private static String getStanford() {
try {
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null, null, null));
ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null), "www.slac.stanford.edu", 5000, null, null, null, null, null));
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
@ -98,7 +99,7 @@ public class natLib {
private static String getIPID() {
try {
ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8");
ArrayList x = nxTools.strings(httpc.wget(new yacyURL("http://ipid.shat.net/", null), "ipid.shat.net", 5000, null, null, null, null, null), "UTF-8");
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);

@ -51,7 +51,7 @@ package de.anomic.plasma.cache;
import java.util.Date;
import java.util.Map;
import de.anomic.net.URL;
import de.anomic.yacy.yacyURL;
/**
* A class containing metadata about a downloaded resource
@ -66,13 +66,13 @@ public interface IResourceInfo {
/**
* @return the URL of this content
*/
public URL getUrl();
public yacyURL getUrl();
/**
* Returns the referer URL of this URL
* @return referer URL
*/
public URL getRefererUrl();
public yacyURL getRefererUrl();
/**
* Returns the mimetype of the cached object
@ -92,11 +92,6 @@ public interface IResourceInfo {
*/
public Date getModificationDate();
/**
* @return the url hash of the content URL
*/
public String getUrlHash();
/**
* Specifies if the resource was requested with a
* if modified since date

@ -51,11 +51,12 @@ package de.anomic.plasma.cache;
import java.lang.reflect.Constructor;
import java.util.Map;
import de.anomic.net.URL;
import de.anomic.yacy.yacyURL;
public class ResourceInfoFactory {
public IResourceInfo buildResourceInfoObj(
URL resourceURL,
yacyURL resourceURL,
Map resourceMetadata
) throws UnsupportedProtocolException, IllegalAccessException {
@ -73,7 +74,7 @@ public class ResourceInfoFactory {
// getting the constructor
final Constructor classConstructor = moduleClass.getConstructor( new Class[] {
URL.class,
yacyURL.class,
Map.class
} );

@ -51,10 +51,9 @@ import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import de.anomic.plasma.plasmaURL;
import de.anomic.net.URL;
import de.anomic.plasma.cache.IResourceInfo;
import de.anomic.plasma.cache.ResourceInfoFactory;
import de.anomic.yacy.yacyURL;
public class ResourceInfo implements IResourceInfo {
@ -62,8 +61,7 @@ public class ResourceInfo implements IResourceInfo {
public static final String MODIFICATION_DATE = "modificationDate";
public static final String REFERER = "referer";
private URL url;
private String urlHash;
private yacyURL url;
private HashMap propertyMap;
/**
@ -71,24 +69,22 @@ public class ResourceInfo implements IResourceInfo {
* @param objectURL
* @param objectInfo
*/
public ResourceInfo(URL objectURL, Map objectInfo) {
public ResourceInfo(yacyURL objectURL, Map objectInfo) {
if (objectURL == null) throw new NullPointerException();
if (objectInfo == null) throw new NullPointerException();
// generating the url hash
this.url = objectURL;
this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true));
// create the http header object
this.propertyMap = new HashMap(objectInfo);
}
public ResourceInfo(URL objectURL, String refererUrl, String mimeType, Date fileDate) {
public ResourceInfo(yacyURL objectURL, String refererUrl, String mimeType, Date fileDate) {
if (objectURL == null) throw new NullPointerException();
// generating the url hash
this.url = objectURL;
this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true));
// create the http header object
this.propertyMap = new HashMap();
@ -113,22 +109,18 @@ public class ResourceInfo implements IResourceInfo {
return new Date(Long.valueOf((String) this.propertyMap.get(MODIFICATION_DATE)).longValue());
}
public URL getRefererUrl() {
public yacyURL getRefererUrl() {
try {
return (this.propertyMap == null) ? null : new URL((String)this.propertyMap.get(REFERER));
return (this.propertyMap == null) ? null : new yacyURL((String)this.propertyMap.get(REFERER), null);
} catch (MalformedURLException e) {
return null;
}
}
public URL getUrl() {
public yacyURL getUrl() {
return this.url;
}
public String getUrlHash() {
return this.urlHash;
}
public Date ifModifiedSince() {
return null;
}

@ -52,16 +52,14 @@ import java.util.Date;
import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.cache.IResourceInfo;
import de.anomic.plasma.cache.ResourceInfoFactory;
import de.anomic.server.serverDate;
import de.anomic.yacy.yacyURL;
public class ResourceInfo implements IResourceInfo {
private URL url;
private String urlHash;
private yacyURL url;
private httpHeader responseHeader;
private httpHeader requestHeader;
@ -70,25 +68,23 @@ public class ResourceInfo implements IResourceInfo {
* @param objectURL
* @param objectInfo
*/
public ResourceInfo(URL objectURL, Map objectInfo) {
public ResourceInfo(yacyURL objectURL, Map objectInfo) {
if (objectURL == null) throw new NullPointerException();
if (objectInfo == null) throw new NullPointerException();
// generating the url hash
this.url = objectURL;
this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true));
// create the http header object
this.responseHeader = new httpHeader(null, objectInfo);
}
public ResourceInfo(URL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) {
public ResourceInfo(yacyURL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) {
if (objectURL == null) throw new NullPointerException();
if (responseHeaders == null) throw new NullPointerException();
// generating the url hash
this.url = objectURL;
this.urlHash = plasmaURL.urlHash(this.url.toNormalform(true, true));
this.requestHeader = requestHeaders;
this.responseHeader = responseHeaders;
@ -131,10 +127,10 @@ public class ResourceInfo implements IResourceInfo {
return docDate;
}
public URL getRefererUrl() {
public yacyURL getRefererUrl() {
if (this.requestHeader == null) return null;
try {
return new URL((String) this.requestHeader.get(httpHeader.REFERER, ""));
return new yacyURL((String) this.requestHeader.get(httpHeader.REFERER, ""), null);
} catch (Exception e) {
return null;
}
@ -143,7 +139,7 @@ public class ResourceInfo implements IResourceInfo {
/**
* @see de.anomic.plasma.cache.IResourceInfo#getUrl()
*/
public URL getUrl() {
public yacyURL getUrl() {
return this.url;
}
@ -151,7 +147,7 @@ public class ResourceInfo implements IResourceInfo {
* @see de.anomic.plasma.cache.IResourceInfo#getUrlHash()
*/
public String getUrlHash() {
return this.urlHash;
return this.url.hash();
}
public void setRequestHeader(httpHeader reqestHeader) {

@ -49,10 +49,9 @@ package de.anomic.plasma.crawler;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date;
import de.anomic.plasma.plasmaURL;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
import de.anomic.plasma.plasmaCrawlProfile;
@ -61,6 +60,7 @@ import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlWorker {
@ -86,7 +86,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
* Crawl job specific variables
* ============================================================ */
public plasmaCrawlLoaderMessage theMsg;
protected URL url;
protected yacyURL url;
protected String name;
protected String refererURLString;
protected String initiator;
@ -281,7 +281,12 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
this.errorMessage = failreason;
// convert the referrer URL into a hash value
String referrerHash = (this.refererURLString==null)?null:plasmaURL.urlHash(this.refererURLString);
String referrerHash;
try {
referrerHash = (this.refererURLString == null) ? null : (new yacyURL(this.refererURLString, null)).hash();
} catch (MalformedURLException e) {
referrerHash = null;
}
// create a new errorURL DB entry
plasmaCrawlEntry bentry = new plasmaCrawlEntry(

@ -55,7 +55,6 @@ import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.Date;
import de.anomic.net.URL;
import de.anomic.net.ftpc;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaHTCache;
@ -68,6 +67,7 @@ import de.anomic.plasma.crawler.plasmaCrawlWorker;
import de.anomic.plasma.crawler.plasmaCrawlerPool;
import de.anomic.plasma.plasmaHTCache.Entry;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorker {
@ -188,7 +188,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
if (isFolder) {
fullPath = fullPath + "/";
file = "";
this.url = URL.newURL(this.url,fullPath);
this.url = yacyURL.newURL(this.url,fullPath);
}
}

@ -60,8 +60,6 @@ import de.anomic.http.httpc;
import de.anomic.http.httpdBoundedSizeOutputStream;
import de.anomic.http.httpdLimitExceededException;
import de.anomic.http.httpdProxyHandler;
import de.anomic.plasma.plasmaURL;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlLoader;
import de.anomic.plasma.plasmaHTCache;
@ -74,6 +72,7 @@ import de.anomic.plasma.crawler.plasmaCrawlerPool;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverSystem;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public final class CrawlWorker extends AbstractCrawlWorker {
@ -140,7 +139,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
return load(DEFAULT_CRAWLING_RETRY_COUNT);
}
protected plasmaHTCache.Entry createCacheEntry(URL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) {
protected plasmaHTCache.Entry createCacheEntry(yacyURL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) {
IResourceInfo resourceInfo = new ResourceInfo(requestUrl,requestHeader,response.responseHeader);
return plasmaHTCache.newEntry(
requestDate,
@ -314,7 +313,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
}
// normalizing URL
URL redirectionUrl = URL.newURL(this.url, redirectionUrlString);
yacyURL redirectionUrl = yacyURL.newURL(this.url, redirectionUrlString);
// returning the used httpc
httpc.returnInstance(remote);
@ -332,7 +331,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
}
// generating url hash
String urlhash = plasmaURL.urlHash(redirectionUrl);
String urlhash = redirectionUrl.hash();
// removing url from loader queue
plasmaCrawlLoader.switchboard.noticeURL.remove(urlhash);

@ -47,14 +47,14 @@ package de.anomic.plasma.dbImport;
import java.util.HashMap;
import de.anomic.data.SitemapParser;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.yacy.yacyURL;
public class SitemapImporter extends AbstractImporter implements dbImporter {
private SitemapParser parser = null;
private URL sitemapURL = null;
private yacyURL sitemapURL = null;
public SitemapImporter(plasmaSwitchboard switchboard) {
super("sitemap",switchboard);
@ -110,7 +110,7 @@ public class SitemapImporter extends AbstractImporter implements dbImporter {
try {
// getting the sitemap URL
this.sitemapURL = new URL((String)initParams.get("sitemapURL"));
this.sitemapURL = new yacyURL((String)initParams.get("sitemapURL"), null);
// getting the crawling profile to use
plasmaCrawlProfile.entry profileEntry = this.sb.profiles.getEntry((String)initParams.get("crawlingProfile"));

@ -142,13 +142,13 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
this.urlCount++;
nextEntry = this.importNurlDB.pop(stackTypes[stackType], false);
nextHash = nextEntry.urlhash();
nextHash = nextEntry.url().hash();
} else {
if (!entryIter.hasNext()) break;
this.urlCount++;
nextEntry = (plasmaCrawlEntry) entryIter.next();
nextHash = nextEntry.urlhash();
nextHash = nextEntry.url().hash();
}
} catch (IOException e) {
this.log.logWarning("Unable to import entry: " + e.toString());

@ -52,11 +52,11 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.server.serverThread;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
/**
* New classes implementing the {@link de.anomic.plasma.parser.Parser} interface
@ -139,7 +139,7 @@ public abstract class AbstractParser implements Parser{
return tempFile;
}
public int parseDir(URL location, String prefix, File dir, plasmaParserDocument doc)
public int parseDir(yacyURL location, String prefix, File dir, plasmaParserDocument doc)
throws ParserException, InterruptedException, IOException {
if (!dir.isDirectory())
throw new ParserException("tried to parse ordinary file " + dir + " as directory", location);
@ -153,7 +153,7 @@ public abstract class AbstractParser implements Parser{
if (file.isDirectory()) {
result += parseDir(location, prefix, file, doc);
} else try {
URL url = URL.newURL(location, "/" + prefix + "/"
yacyURL url = yacyURL.newURL(location, "/" + prefix + "/"
// XXX: workaround for relative paths within document
+ file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1)
+ "/" + file.getName());
@ -185,7 +185,7 @@ public abstract class AbstractParser implements Parser{
* @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, byte[])
*/
public plasmaParserDocument parse(
URL location,
yacyURL location,
String mimeType,
String charset,
byte[] source
@ -220,7 +220,7 @@ public abstract class AbstractParser implements Parser{
* @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.File)
*/
public plasmaParserDocument parse(
URL location,
yacyURL location,
String mimeType,
String charset,
File sourceFile
@ -254,7 +254,7 @@ public abstract class AbstractParser implements Parser{
*
* @see de.anomic.plasma.parser.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream)
*/
public abstract plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
public abstract plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
/**
* @return Returns a list of library names that are needed by this parser

@ -48,9 +48,9 @@ import java.io.File;
import java.io.InputStream;
import java.util.Hashtable;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
/**
* This interface defines a list of methods that needs to be implemented
@ -74,7 +74,7 @@ public interface Parser {
*
* @throws ParserException if the content could not be parsed properly
*/
public plasmaParserDocument parse(URL location, String mimeType, String charset, byte[] source)
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, byte[] source)
throws ParserException, InterruptedException;
/**
@ -88,7 +88,7 @@ public interface Parser {
*
* @throws ParserException if the content could not be parsed properly
*/
public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile)
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile)
throws ParserException, InterruptedException;
/**
@ -102,7 +102,7 @@ public interface Parser {
*
* @throws ParserException if the content could not be parsed properly
*/
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source)
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source)
throws ParserException, InterruptedException;
/**

@ -44,13 +44,13 @@
package de.anomic.plasma.parser;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.yacy.yacyURL;
public class ParserException extends Exception
{
private String errorCode = null;
private URL url = null;
private yacyURL url = null;
private static final long serialVersionUID = 1L;
@ -58,21 +58,21 @@ public class ParserException extends Exception
super();
}
public ParserException(String message, URL url) {
public ParserException(String message, yacyURL url) {
this(message,url,plasmaCrawlEURL.DENIED_PARSER_ERROR);
}
public ParserException(String message, URL url, String errorCode) {
public ParserException(String message, yacyURL url, String errorCode) {
super(message);
this.errorCode = errorCode;
this.url = url;
}
public ParserException(String message, URL url, Throwable cause) {
public ParserException(String message, yacyURL url, Throwable cause) {
this(message,url,cause,plasmaCrawlEURL.DENIED_PARSER_ERROR);
}
public ParserException(String message, URL url, Throwable cause, String errorCode) {
public ParserException(String message, yacyURL url, Throwable cause, String errorCode) {
super(message, cause);
this.errorCode = errorCode;
this.url = url;
@ -82,7 +82,7 @@ public class ParserException extends Exception
return this.errorCode;
}
public URL getURL() {
public yacyURL getURL() {
return this.url;
}
}

@ -50,12 +50,13 @@ import java.util.Hashtable;
import org.apache.tools.bzip2.CBZip2InputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class bzipParser extends AbstractParser implements Parser {
/**
@ -87,7 +88,7 @@ public class bzipParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
File tempFile = null;
try {

@ -48,15 +48,13 @@ import java.util.Hashtable;
import org.textmining.text.extraction.WordExtractor;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class docParser
extends AbstractParser
implements Parser {
public class docParser extends AbstractParser implements Parser {
/**
* a list of mime types that are supported by this parser class
@ -78,7 +76,7 @@ implements Parser {
this.parserName = "Word Document Parser";
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
InputStream source) throws ParserException, InterruptedException {

@ -49,12 +49,12 @@ import java.io.InputStream;
import java.util.Hashtable;
import java.util.zip.GZIPInputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class gzipParser extends AbstractParser implements Parser {
@ -83,7 +83,7 @@ public class gzipParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
File tempFile = null;
try {

@ -56,17 +56,15 @@ import net.sf.jmimemagic.MagicMatchNotFoundException;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class mimeTypeParser
extends AbstractParser
implements Parser {
public class mimeTypeParser extends AbstractParser implements Parser {
/**
* a list of mime types that are supported by this parser class
@ -127,7 +125,7 @@ implements Parser {
return null;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
String orgMimeType = mimeType;
@ -188,7 +186,7 @@ implements Parser {
}
}
public plasmaParserDocument parse(URL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType,String charset, InputStream source) throws ParserException, InterruptedException {
File dstFile = null;
try {
dstFile = File.createTempFile("mimeTypeParser",".tmp");

@ -59,7 +59,6 @@ import com.catcode.odf.OpenDocumentMetadata;
import com.catcode.odf.OpenDocumentTextInputStream;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
@ -67,6 +66,7 @@ import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public class odtParser extends AbstractParser implements Parser {
@ -95,7 +95,7 @@ public class odtParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File dest) throws ParserException, InterruptedException {
Writer writer = null;
File writerFile = null;
@ -209,7 +209,7 @@ public class odtParser extends AbstractParser implements Parser {
}
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
File dest = null;
try {
// creating a tempfile
@ -241,7 +241,7 @@ public class odtParser extends AbstractParser implements Parser {
if (args.length != 1) return;
// getting the content URL
URL contentUrl = new URL(args[0]);
yacyURL contentUrl = new yacyURL(args[0], null);
// creating a new parser
odtParser testParser = new odtParser();

@ -55,13 +55,13 @@ import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
import org.pdfbox.util.PDFTextStripper;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacyURL;
public class pdfParser extends AbstractParser implements Parser {
@ -89,7 +89,7 @@ public class pdfParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
PDDocument theDocument = null;
Writer writer = null;

@ -50,11 +50,11 @@ import java.util.Hashtable;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class pptParser extends AbstractParser implements Parser {
@ -88,7 +88,7 @@ public class pptParser extends AbstractParser implements Parser {
* parses the source documents and returns a plasmaParserDocument containing
* all extracted information about the parsed document
*/
public plasmaParserDocument parse(URL location, String mimeType,
public plasmaParserDocument parse(yacyURL location, String mimeType,
String charset, InputStream source) throws ParserException,
InterruptedException {
try {

@ -52,12 +52,12 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Hashtable;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class psParser extends AbstractParser implements Parser {
@ -114,7 +114,7 @@ public class psParser extends AbstractParser implements Parser {
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
File outputFile = null;
try {
@ -281,7 +281,7 @@ public class psParser extends AbstractParser implements Parser {
super.reset();
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
File tempFile = null;
try {

@ -53,12 +53,12 @@ import com.jguild.jrpm.io.RPMFile;
import com.jguild.jrpm.io.datatype.DataTypeIf;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
/**
* @author theli
@ -92,7 +92,7 @@ public class rpmParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
InputStream source) throws ParserException {
File dstFile = null;
try {
@ -106,7 +106,7 @@ public class rpmParser extends AbstractParser implements Parser {
}
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, File sourceFile) throws ParserException, InterruptedException {
RPMFile rpmFile = null;
try {
String summary = null, description = null, packager = null, name = sourceFile.getName();
@ -177,7 +177,7 @@ public class rpmParser extends AbstractParser implements Parser {
public static void main(String[] args) {
try {
URL contentUrl = new URL(args[0]);
yacyURL contentUrl = new yacyURL(args[0], null);
rpmParser testParser = new rpmParser();
byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null);

@ -56,7 +56,6 @@ import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
@ -66,6 +65,7 @@ import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.xml.rssReader;
import de.anomic.xml.rssReader.Item;
import de.anomic.yacy.yacyURL;
public class rssParser extends AbstractParser implements Parser {
@ -92,7 +92,7 @@ public class rssParser extends AbstractParser implements Parser {
this.parserName = "Rich Site Summary/Atom Feed Parser";
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {
LinkedList feedSections = new LinkedList();
@ -114,7 +114,7 @@ public class rssParser extends AbstractParser implements Parser {
String feedDescription = reader.getChannel().getDescription();
if (reader.getImage() != null) {
images.add(new htmlFilterImageEntry(new URL(reader.getImage()), feedTitle, -1, -1));
images.add(new htmlFilterImageEntry(new yacyURL(reader.getImage(), null), feedTitle, -1, -1));
}
// loop through the feed items
@ -126,7 +126,7 @@ public class rssParser extends AbstractParser implements Parser {
Item item = reader.getItem(i);
String itemTitle = item.getTitle();
URL itemURL = new URL(item.getLink());
yacyURL itemURL = new yacyURL(item.getLink(), null);
String itemDescr = item.getDescription();
String itemCreator = item.getCreator();
if (itemCreator != null && itemCreator.length() > 0) authors.append(",").append(itemCreator);

@ -49,15 +49,13 @@ import java.util.Hashtable;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class rtfParser
extends AbstractParser
implements Parser {
public class rtfParser extends AbstractParser implements Parser {
/**
* a list of mime types that are supported by this parser class
@ -80,7 +78,7 @@ implements Parser {
this.parserName = "Rich Text Format Parser";
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {

@ -48,13 +48,13 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverCachedFileOutputStream;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
import SevenZip.ArchiveExtractCallback;
import SevenZip.Archive.IInArchive;
@ -117,7 +117,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
plasmaParserDocument theDoc;
// workaround for relative links in file, normally '#' shall be used behind the location, see
// below for reversion of the effects
URL url = URL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath);
yacyURL url = yacyURL.newURL(doc.getLocation(), this.prefix + "/" + super.filePath);
String mime = plasmaParser.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
if (this.cfos.isFallback()) {
theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentFile());

@ -51,13 +51,13 @@ import SevenZip.IInStream;
import SevenZip.MyRandomAccessFile;
import SevenZip.Archive.SevenZip.Handler;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverCachedFileOutputStream;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class sevenzipParser extends AbstractParser implements Parser {
@ -81,7 +81,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
super.parserName = "7zip Archive Parser";
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
IInStream source, long maxRamSize) throws ParserException, InterruptedException {
plasmaParserDocument doc = new plasmaParserDocument(location, mimeType, charset);
Handler archive;
@ -111,12 +111,12 @@ public class sevenzipParser extends AbstractParser implements Parser {
}
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
byte[] source) throws ParserException, InterruptedException {
return parse(location, mimeType, charset, new ByteArrayIInStream(source), Parser.MAX_KEEP_IN_MEMORY_SIZE - source.length);
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
File sourceFile) throws ParserException, InterruptedException {
try {
return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"), Parser.MAX_KEEP_IN_MEMORY_SIZE);
@ -125,7 +125,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
}
}
public plasmaParserDocument parse(URL location, String mimeType, String charset,
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset,
InputStream source) throws ParserException, InterruptedException {
try {
serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE);

@ -44,7 +44,6 @@
package de.anomic.plasma.parser.swf;
import java.io.InputStream;
import de.anomic.net.URL;
import java.util.Hashtable;
import java.util.HashMap;
@ -54,6 +53,7 @@ import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class swfParser extends AbstractParser implements Parser {
@ -90,7 +90,7 @@ public class swfParser extends AbstractParser implements Parser {
* parses the source documents and returns a plasmaParserDocument containing
* all extracted information about the parsed document
*/
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {
SWF2HTML swf2html = new SWF2HTML();

@ -59,7 +59,6 @@ import java.util.zip.GZIPInputStream;
import com.ice.tar.TarEntry;
import com.ice.tar.TarInputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
@ -67,6 +66,7 @@ import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class tarParser extends AbstractParser implements Parser {
@ -97,7 +97,7 @@ public class tarParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
long docTextLength = 0;
OutputStream docText = null;
@ -166,7 +166,7 @@ public class tarParser extends AbstractParser implements Parser {
checkInterruption();
// parsing the content
subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
} catch (ParserException e) {
this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getMessage());
} finally {

@ -57,11 +57,11 @@ import org.apache.commons.codec.net.QuotedPrintableCodec;
import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
/**
* Vcard specification: http://www.imc.org/pdi/vcard-21.txt
@ -97,7 +97,7 @@ public class vcfParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {
StringBuffer parsedTitle = new StringBuffer();
@ -212,7 +212,7 @@ public class vcfParser extends AbstractParser implements Parser {
parsedData.clear();
} else if (key.toUpperCase().startsWith("URL")) {
try {
URL newURL = new URL(value);
yacyURL newURL = new yacyURL(value, null);
anchors.put(newURL.toString(),newURL.toString());
//parsedData.put(key,value);
} catch (MalformedURLException ex) {/* ignore this */}
@ -268,7 +268,7 @@ public class vcfParser extends AbstractParser implements Parser {
public static void main(String[] args) {
try {
URL contentUrl = new URL(args[0]);
yacyURL contentUrl = new yacyURL(args[0], null);
vcfParser testParser = new vcfParser();
byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null, null);

@ -56,11 +56,11 @@ import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
public class xlsParser extends AbstractParser implements Parser, HSSFListener {
@ -102,7 +102,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
* parses the source documents and returns a plasmaParserDocument containing
* all extracted information about the parsed document
*/
public plasmaParserDocument parse(URL location, String mimeType,
public plasmaParserDocument parse(yacyURL location, String mimeType,
String charset, InputStream source) throws ParserException,
InterruptedException {
try {

@ -57,7 +57,6 @@ import java.util.TreeSet;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
@ -65,6 +64,7 @@ import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
public class zipParser extends AbstractParser implements Parser {
@ -95,7 +95,7 @@ public class zipParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES;
}
public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
long docTextLength = 0;
OutputStream docText = null;
@ -149,7 +149,7 @@ public class zipParser extends AbstractParser implements Parser {
serverFileUtils.copy(zippedContent,subDocTempFile,entry.getSize());
// parsing the zip file entry
subDoc = theParser.parseSource(URL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
subDoc = theParser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
} catch (ParserException e) {
this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getMessage());
} finally {

@ -58,7 +58,6 @@ import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack;
import de.anomic.kelondro.kelondroAbstractRecords;
import de.anomic.server.serverDomains;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -269,24 +268,24 @@ public class plasmaCrawlBalancer {
public synchronized void push(plasmaCrawlEntry entry) throws IOException {
assert entry != null;
if (urlFileIndex.has(entry.urlhash().getBytes())) {
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed");
if (urlFileIndex.has(entry.url().hash().getBytes())) {
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed");
return;
}
// extend domain stack
String dom = entry.urlhash().substring(6);
String dom = entry.url().hash().substring(6);
LinkedList domainList = (LinkedList) domainStacks.get(dom);
if (domainList == null) {
// create new list
domainList = new LinkedList();
synchronized (domainStacks) {
domainList.add(entry.urlhash());
domainList.add(entry.url().hash());
domainStacks.put(dom, domainList);
}
} else {
// extend existent domain list
domainList.addLast(entry.urlhash());
domainList.addLast(entry.url().hash());
}
// add to index
@ -430,7 +429,7 @@ public class plasmaCrawlBalancer {
return null;
}
plasmaCrawlEntry crawlEntry = new plasmaCrawlEntry(rowEntry);
long minimumDelta = (serverDomains.isLocal(crawlEntry.url())) ? minimumLocalDelta : minimumGlobalDelta;
long minimumDelta = (crawlEntry.url().isLocal()) ? minimumLocalDelta : minimumGlobalDelta;
plasmaCrawlRobotsTxt.Entry robotsEntry = plasmaSwitchboard.robots.getEntry(crawlEntry.url().getHost());
Integer hostDelay = (robotsEntry == null) ? null : robotsEntry.getCrawlDelay();
long genericDelta = ((robotsEntry == null) || (hostDelay == null)) ? minimumDelta : Math.max(minimumDelta, hostDelay.intValue() * 1000);

@ -34,9 +34,9 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyURL;
public class plasmaCrawlEntry {
@ -63,9 +63,8 @@ public class plasmaCrawlEntry {
private String initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered
private String urlhash; // the url's hash
private String referrer; // the url's referrer hash
private URL url; // the url as string
private yacyURL url; // the url as string
private String name; // the name of the url, from anchor tag <a>name</a>
private long appdate; // the time when the url was first time appeared
private long loaddate; // the time when the url was loaded
@ -78,7 +77,7 @@ public class plasmaCrawlEntry {
private kelondroBitfield flags;
private int handle;
public plasmaCrawlEntry(URL url) {
public plasmaCrawlEntry(yacyURL url) {
this(yacyCore.seedDB.mySeed.hash, url, null, null, new Date(), null, 0, 0, 0);
}
@ -95,7 +94,7 @@ public class plasmaCrawlEntry {
*/
public plasmaCrawlEntry(
String initiator,
URL url,
yacyURL url,
String referrer,
String name,
Date appdate,
@ -106,10 +105,9 @@ public class plasmaCrawlEntry {
) {
// create new entry and store it into database
assert appdate != null;
this.urlhash = plasmaURL.urlHash(url);
this.initiator = initiator;
this.url = url;
this.referrer = (referrer == null) ? plasmaURL.dummyHash : referrer;
this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer;
this.name = (name == null) ? "" : name;
this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null
@ -131,10 +129,9 @@ public class plasmaCrawlEntry {
private void insertEntry(kelondroRow.Entry entry) throws IOException {
String urlstring = entry.getColString(2, null);
if (urlstring == null) throw new IOException ("url string is null");
this.urlhash = entry.getColString(0, null);
this.initiator = entry.getColString(1, null);
this.url = new URL(urlstring);
this.referrer = (entry.empty(3)) ? plasmaURL.dummyHash : entry.getColString(3, null);
this.url = new yacyURL(urlstring, entry.getColString(0, null));
this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.appdate = entry.getColLong(5);
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
@ -168,7 +165,7 @@ public class plasmaCrawlEntry {
namebytes = this.name.getBytes();
}
byte[][] entry = new byte[][] {
this.urlhash.getBytes(),
this.url.hash().getBytes(),
(initiator == null) ? "".getBytes() : this.initiator.getBytes(),
this.url.toString().getBytes(),
this.referrer.getBytes(),
@ -186,16 +183,11 @@ public class plasmaCrawlEntry {
return rowdef.newEntry(entry);
}
public URL url() {
public yacyURL url() {
// the url
return url;
}
public String urlhash() {
// the hash of this url
return this.urlhash;
}
public String referrerhash() {
// the urlhash of a referer url
return this.referrer;

@ -69,11 +69,11 @@ import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexSplitTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyURL;
public final class plasmaCrawlLURL {
@ -118,8 +118,8 @@ public final class plasmaCrawlLURL {
public synchronized void stack(indexURLEntry e, String initiatorHash, String executorHash, int stackType) {
if (e == null) { return; }
try {
if (initiatorHash == null) { initiatorHash = plasmaURL.dummyHash; }
if (executorHash == null) { executorHash = plasmaURL.dummyHash; }
if (initiatorHash == null) { initiatorHash = yacyURL.dummyHash; }
if (executorHash == null) { executorHash = yacyURL.dummyHash; }
switch (stackType) {
case 0: break;
case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break;
@ -161,6 +161,7 @@ public final class plasmaCrawlLURL {
// - look into the hash cache
// - look into the filed properties
// if the url cannot be found, this returns null
if (urlHash == null) return null;
try {
kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes());
if (entry == null) return null;
@ -394,7 +395,7 @@ public final class plasmaCrawlLURL {
if ((pos = oldUrlStr.indexOf("://")) != -1) {
// trying to correct the url
String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
URL newUrl = new URL(newUrlStr);
yacyURL newUrl = new yacyURL(newUrlStr, null);
// doing a http head request to test if the url is correct
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
@ -534,7 +535,7 @@ public final class plasmaCrawlLURL {
// returns url-hash
if (args[0].equals("-h")) try {
// arg 1 is url
System.out.println("HASH: " + plasmaURL.urlHash(new URL(args[1])));
System.out.println("HASH: " + (new yacyURL(args[1], null)).hash());
} catch (MalformedURLException e) {}
if (args[0].equals("-l")) try {
// arg 1 is path to URLCache

@ -50,13 +50,13 @@ import java.util.HashSet;
import org.apache.commons.pool.impl.GenericKeyedObjectPool;
import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.net.URL;
import de.anomic.plasma.crawler.plasmaCrawlWorker;
import de.anomic.plasma.crawler.plasmaCrawlerException;
import de.anomic.plasma.crawler.plasmaCrawlerFactory;
import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue;
import de.anomic.plasma.crawler.plasmaCrawlerPool;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
public final class plasmaCrawlLoader extends Thread {
@ -215,7 +215,7 @@ public final class plasmaCrawlLoader extends Thread {
}
public plasmaHTCache.Entry loadSync(
URL url,
yacyURL url,
String urlName,
String referer,
String initiator,
@ -267,7 +267,7 @@ public final class plasmaCrawlLoader extends Thread {
}
public void loadAsync(
URL url,
yacyURL url,
String urlName,
String referer,
String initiator,

@ -42,13 +42,13 @@
package de.anomic.plasma;
import de.anomic.net.URL;
import de.anomic.server.serverSemaphore;
import de.anomic.yacy.yacyURL;
public final class plasmaCrawlLoaderMessage {
public final int crawlingPriority;
public final URL url;
public final yacyURL url;
public final String name;
public final String referer;
public final String initiator;
@ -64,7 +64,7 @@ public final class plasmaCrawlLoaderMessage {
// loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) {
public plasmaCrawlLoaderMessage(
URL url,
yacyURL url,
String name, // the name of the url, from anchor tag <a>name</a>
String referer,
String initiator,

@ -58,7 +58,6 @@ import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.data.robotsParser;
import de.anomic.http.httpc;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroException;
@ -68,12 +67,12 @@ import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverDomains;
import de.anomic.server.serverSemaphore;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyURL;
public final class plasmaCrawlStacker {
@ -190,7 +189,7 @@ public final class plasmaCrawlStacker {
}
public void enqueue(
URL nexturl,
yacyURL nexturl,
String referrerhash,
String initiatorHash,
String name,
@ -247,7 +246,7 @@ public final class plasmaCrawlStacker {
String reason = null; // failure reason
// getting the initiator peer hash
if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash;
if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = yacyURL.dummyHash;
// strange errors
if (nexturlString == null) {
@ -257,21 +256,20 @@ public final class plasmaCrawlStacker {
}
// getting the referer url and url hash
URL referrerURL = null;
yacyURL referrerURL = null;
if (referrerString != null) {
try {
referrerURL = new URL(referrerString);
referrerURL = new yacyURL(referrerString, null);
} catch (MalformedURLException e) {
referrerURL = null;
referrerString = null;
}
}
String referrerHash = (referrerString==null)?null:plasmaURL.urlHash(referrerString);
// check for malformed urls
URL nexturl = null;
yacyURL nexturl = null;
try {
nexturl = new URL(nexturlString);
nexturl = new yacyURL(nexturlString, null);
} catch (MalformedURLException e) {
reason = plasmaCrawlEURL.DENIED_MALFORMED_URL;
this.log.logSevere("Wrong URL in stackCrawl: " + nexturlString +
@ -367,10 +365,9 @@ public final class plasmaCrawlStacker {
// check if the url is double registered
checkInterruption();
String nexturlhash = plasmaURL.urlHash(nexturl);
String dbocc = this.sb.urlExists(nexturlhash);
String dbocc = this.sb.urlExists(nexturl.hash());
indexURLEntry oldEntry = null;
oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null);
oldEntry = this.sb.wordIndex.loadedURL.load(nexturl.hash(), null);
boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder());
// apply recrawl rule
if ((dbocc != null) && (!(recrawl))) {
@ -396,7 +393,7 @@ public final class plasmaCrawlStacker {
}
// store information
boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
boolean local = ((initiatorHash.equals(yacyURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
boolean global =
(profile != null) &&
(profile.remoteIndexing()) /* granted */ &&
@ -415,7 +412,7 @@ public final class plasmaCrawlStacker {
checkInterruption();
plasmaCrawlEntry ne = new plasmaCrawlEntry(initiatorHash, /* initiator, needed for p2p-feedback */
nexturl, /* url clear text string */
referrerHash, /* last url in crawling queue */
(referrerURL == null) ? null : referrerURL.hash(), /* last url in crawling queue */
name, /* load date */
loadDate, /* the anchor name */
(profile == null) ? null : profile.handle(), // profile must not be null!
@ -551,7 +548,7 @@ public final class plasmaCrawlStacker {
synchronized(this.urlEntryHashCache) {
kelondroRow.Entry oldValue = this.urlEntryCache.put(newMessage.toRow());
if (oldValue == null) {
insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.urlhash());
insertionDoneSuccessfully = this.urlEntryHashCache.add(newMessage.url().hash());
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save