diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java
index c6f051e11..77bed6ae1 100644
--- a/htroot/CacheAdmin_p.java
+++ b/htroot/CacheAdmin_p.java
@@ -97,7 +97,7 @@ public class CacheAdmin_p {
prop.put("info", 0);
path.append((pathString.length() == 0) ? linkPathString("/", true) : linkPathString(pathString, false));
- urlstr = htmlFilterContentScraper.urlNormalform(url);
+ urlstr = url.toNormalform();
prop.put("info_url", urlstr);
info.ensureCapacity(40000);
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 63013c174..574c0565f 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -56,7 +56,6 @@ import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
@@ -214,7 +213,7 @@ public class IndexControl_p {
try {
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.getEntry(urlhash, null);
URL url = entry.url();
- urlstring = htmlFilterContentScraper.urlNormalform(url);
+ urlstring = url.toNormalform();
prop.put("urlstring", "");
switchboard.urlPool.loadedURL.remove(urlhash);
prop.put("result", "Removed URL " + urlstring);
@@ -393,7 +392,7 @@ public class IndexControl_p {
}
if (url == null) { return "No entry found for URL-hash " + urlhash; }
String result = "
" +
- "URL String | " + htmlFilterContentScraper.urlNormalform(url) + " |
" +
+ "URL String | " + url.toNormalform() + " |
" +
"Hash | " + urlhash + " |
" +
"Description | " + entry.descr() + " |
" +
"Modified-Date | " + entry.moddate() + " |
" +
diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java
index e5f9e968b..d26024631 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@@ -141,7 +141,7 @@ public class IndexCreate_p {
if (!(crawlingStart.startsWith("http"))) crawlingStart = "http://" + crawlingStart;
// normalizing URL
- crawlingStart = htmlFilterContentScraper.urlNormalform(null, crawlingStart);
+ try {crawlingStart = new URL(crawlingStart).toNormalform();} catch (MalformedURLException e1) {}
// check if url is proper
URL crawlingStartURL = null;
@@ -243,7 +243,7 @@ public class IndexCreate_p {
nexturlstring = nexturlstring.trim();
// normalizing URL
- nexturlstring = htmlFilterContentScraper.urlNormalform(null, nexturlstring);
+ nexturlstring = new URL(nexturlstring).toNormalform();
// generating an url object
URL nexturlURL = null;
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 93753be0c..0ce745935 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -54,7 +54,6 @@ import de.anomic.net.URL;
import java.net.URLDecoder;
import java.util.Date;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlProfile;
@@ -137,7 +136,7 @@ public class QuickCrawlLink_p {
if (crawlingStart != null) {
crawlingStart = crawlingStart.trim();
- crawlingStart = htmlFilterContentScraper.urlNormalform(null, crawlingStart);
+ try {crawlingStart = new URL(crawlingStart).toNormalform();} catch (MalformedURLException e1) {}
// check if url is proper
URL crawlingStartURL = null;
diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java
index 7cad3b892..9af1f6a97 100644
--- a/htroot/yacy/crawlOrder.java
+++ b/htroot/yacy/crawlOrder.java
@@ -48,9 +48,9 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
+import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
@@ -173,11 +173,11 @@ public final class crawlOrder {
// old method: only one url
// normalizing URL
- String newURL = htmlFilterContentScraper.urlNormalform(null, (String)urlv.get(0));
+ String newURL = new URL((String) urlv.get(0)).toNormalform();
if (!newURL.equals(urlv.get(0))) {
env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0));
}
- String refURL = htmlFilterContentScraper.urlNormalform(null, (String) refv.get(0));
+ String refURL = new URL((String) refv.get(0)).toNormalform();
if ((refURL != null) && (!refURL.equals(refv.get(0)))) {
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
}
diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java
index a84f03ce2..6b210e230 100644
--- a/source/de/anomic/data/robotsParser.java
+++ b/source/de/anomic/data/robotsParser.java
@@ -282,7 +282,7 @@ public final class robotsParser{
}
}
- if (robotsTxt4Host.isDisallowed(nexturl.getFile())) {
+ if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
return true;
}
return false;
@@ -327,7 +327,7 @@ public final class robotsParser{
}
- httpc.response res = con.GET(robotsURL.getFile(), reqHeaders);
+ httpc.response res = con.GET(robotsURL.getPath(), reqHeaders);
if (res.status.startsWith("2")) {
if (!res.responseHeader.mime().startsWith("text/plain")) {
robotsTxt = null;
diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index 418b3e0d2..db116699e 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -43,7 +43,6 @@
package de.anomic.htmlFilter;
-import de.anomic.server.logging.serverLog;
import de.anomic.server.serverByteBuffer;
import de.anomic.net.URL;
@@ -55,8 +54,6 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import java.util.TreeSet;
public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper {
@@ -117,7 +114,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if ((content.length() != 0) && (content.byteAt(content.length() - 1) != 32)) content.append(32);
content.append(super.stripAll(new serverByteBuffer(newtext, newtext.length + 1)).trim()).append(32);
}
-
+/*
public static String urlNormalform(URL url) {
boolean defaultPort = false;
// serverLog.logFinest("htmlFilter", "urlNormalform: '" + url.toString() + "'");
@@ -154,7 +151,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
return null;
}
}
-
+ */
public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"';
public static String[] urlComps(String normalizedURL) {
return normalizedURL.toLowerCase().split(splitrex); // word components of the url
@@ -162,7 +159,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
private String absolutePath(String relativePath) {
try {
- return urlNormalform(new URL(root, relativePath));
+ return new URL(root, relativePath).toString();
} catch (Exception e) {
return "";
}
diff --git a/source/de/anomic/index/indexEntryAttribute.java b/source/de/anomic/index/indexEntryAttribute.java
index 6eaaa6651..e5e45ede6 100644
--- a/source/de/anomic/index/indexEntryAttribute.java
+++ b/source/de/anomic/index/indexEntryAttribute.java
@@ -30,7 +30,6 @@ package de.anomic.index;
import de.anomic.net.URL;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.server.serverCodings;
import de.anomic.yacy.yacySeedDB;
@@ -93,7 +92,7 @@ public class indexEntryAttribute {
// doctype calculation
public static char docType(URL url) {
- String path = htmlFilterContentScraper.urlNormalform(url);
+ String path = url.getPath();
// serverLog.logFinest("PLASMA", "docType URL=" + path);
char doctype = doctype = indexEntryAttribute.DT_UNKNOWN;
if (path.endsWith(".gif")) { doctype = indexEntryAttribute.DT_IMAGE; }
diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java
index 85fc7c1d7..8dc692c19 100644
--- a/source/de/anomic/index/indexURL.java
+++ b/source/de/anomic/index/indexURL.java
@@ -32,7 +32,6 @@ import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.HashMap;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroTree;
import de.anomic.server.serverCodings;
@@ -501,7 +500,7 @@ public class indexURL {
int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3;
byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey);
// form the 'local' part of the hash
- String hash3 = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(htmlFilterContentScraper.urlNormalform(url))).substring(0, 5);
+ String hash3 = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, 5);
char hash2 = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(subdom + ":" + port + ":" + rootpath)).charAt(0);
// form the 'global' part of the hash
String hash1 = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.getProtocol() + ":" + host + ":" + port)).substring(0, 5);
@@ -529,13 +528,13 @@ public class indexURL {
public static final String oldurlHash(URL url) {
if (url == null) return null;
- String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(htmlFilterContentScraper.urlNormalform(url))).substring(0, urlHashLength);
+ String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, urlHashLength);
return hash;
}
- public static final String oldurlHash(String url) {
+ public static final String oldurlHash(String url) throws MalformedURLException {
if ((url == null) || (url.length() < 10)) return null;
- String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(htmlFilterContentScraper.urlNormalform(null, url))).substring(0, urlHashLength);
+ String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(new URL(url).toNormalform())).substring(0, urlHashLength);
return hash;
}
diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java
index a7cdbbeb6..3990e7ca0 100644
--- a/source/de/anomic/kelondro/kelondroRow.java
+++ b/source/de/anomic/kelondro/kelondroRow.java
@@ -50,18 +50,6 @@ public class kelondroRow {
}
}
- /*
- public kelondroRow(int[] rowi) {
- this.row = new kelondroColumn[rowi.length];
- this.colstart = new int[rowi.length];
- this.objectsize = 0;
- for (int i = 0; i < rowi.length; i++) {
- this.row[i] = new kelondroColumn("col_" + i, kelondroColumn.celltype_undefined, kelondroColumn.encoder_none, rowi[i], "");
- this.colstart[i] = this.objectsize;
- this.objectsize += this.row[i].cellwidth();
- }
- }
- */
public kelondroRow(String structure) {
// define row with row syntax
// example:
diff --git a/source/de/anomic/net/URL.java b/source/de/anomic/net/URL.java
index b9e8c99b0..3c06cc96f 100644
--- a/source/de/anomic/net/URL.java
+++ b/source/de/anomic/net/URL.java
@@ -150,9 +150,13 @@ public class URL {
}
public String getFile() {
+ return getFile(true);
+ }
+
+ public String getFile(boolean includeReference) {
// this is the path plus quest plus ref
if (quest != null) return path + "?" + quest;
- if (ref != null) return path + "#" + ref;
+ if ((ref != null) && (includeReference)) return path + "#" + ref;
return path;
}
@@ -188,7 +192,15 @@ public class URL {
return quest;
}
+ public String toNormalform() {
+ return toString(false);
+ }
+
public String toString() {
+ return toString(true);
+ }
+
+ public String toString(boolean includeReference) {
// generates a normal form of the URL
boolean defaultPort = false;
if (this.protocol.equals("http")) {
@@ -198,7 +210,7 @@ public class URL {
} else if (this.protocol.equals("https")) {
if (this.port < 0 || this.port == 443) { defaultPort = true; }
}
- String path = this.getFile();
+ String path = this.getFile(includeReference);
if (path.length() == 0 || path.charAt(0) != '/') { path = "/" + path; }
@@ -208,8 +220,9 @@ public class URL {
path = matcher.replaceAll("");
matcher.reset(path);
}
-
- return this.protocol + "://" + this.getHost().toLowerCase() + ((defaultPort) ? "" : (":" + this.port)) + getFile();
+
+ if (defaultPort) { return this.protocol + "://" + this.getHost().toLowerCase() + path; }
+ return this.protocol + "://" + this.getHost().toLowerCase() + ((defaultPort) ? "" : (":" + this.port)) + path;
}
public boolean equals(URL other) {
@@ -233,7 +246,8 @@ public class URL {
public static void main(String[] args) {
URL u;
- try {u = new URL("http://www.anomic.de/home/test?x=1#home"); System.out.println(u.toString());} catch (MalformedURLException e) {}
-
+ try {u = new URL("http://www.anomic.de/home/test?x=1#home"); System.out.println("toString=" + u.toString() + "\ntoNormalform=" + u.toNormalform());} catch (MalformedURLException e) {}
+ try {u = new URL("http://www.anomic.de/home/test?x=1"); System.out.println("toString=" + u.toString() + "\ntoNormalform=" + u.toNormalform());} catch (MalformedURLException e) {}
+ try {u = new URL("http://www.anomic.de/home/test#home"); System.out.println("toString=" + u.toString() + "\ntoNormalform=" + u.toNormalform());} catch (MalformedURLException e) {}
}
}
diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java
index 86b469cfe..89c9833dd 100644
--- a/source/de/anomic/plasma/plasmaCrawlWorker.java
+++ b/source/de/anomic/plasma/plasmaCrawlWorker.java
@@ -53,7 +53,6 @@ import java.net.SocketException;
import de.anomic.net.URL;
import java.net.UnknownHostException;
import java.util.Date;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
@@ -427,7 +426,7 @@ public final class plasmaCrawlWorker extends Thread {
}
// normalizing URL
- redirectionUrlString = htmlFilterContentScraper.urlNormalform(url, redirectionUrlString);
+ redirectionUrlString = new URL(url, redirectionUrlString).toNormalform();
// generating the new URL object
URL redirectionUrl = new URL(redirectionUrlString);
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 3e2a69df1..237defdd5 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -53,7 +53,6 @@
package de.anomic.plasma;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpc;
import de.anomic.http.httpHeader;
import de.anomic.index.indexEntryAttribute;
@@ -727,7 +726,7 @@ public final class plasmaHTCache {
// normalize url
// serverLog.logFine("PLASMA", "Entry: URL=" + url.toString());
- this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url);
+ this.nomalizedURLString = url.toNormalform();
try {
this.url = new URL(this.nomalizedURLString);
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index 661bb69b6..bd085f096 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -298,16 +298,10 @@ public final class plasmaParser {
public static String getFileExt(URL url) {
// getting the file path
- String name = url.getFile();
-
- // chopping http parameters from the url
- int p = name.lastIndexOf('?');
- if (p != -1) {
- name = name.substring(0,p);
- }
+ String name = url.getPath();
// tetermining last position of / in the file path
- p = name.lastIndexOf('/');
+ int p = name.lastIndexOf('/');
if (p != -1) {
name = name.substring(p);
}
@@ -574,7 +568,7 @@ public final class plasmaParser {
String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
int p = 0;
for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
- plasmaParserDocument ppd = new plasmaParserDocument(new URL(htmlFilterContentScraper.urlNormalform(location)),
+ plasmaParserDocument ppd = new plasmaParserDocument(new URL(location.toNormalform()),
mimeType, null, null, scraper.getTitle(),
sections, null,
scraper.getText(), scraper.getAnchors(), scraper.getImages());
diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java
index 8b1e0e3f6..b3e3feb95 100644
--- a/source/de/anomic/plasma/plasmaParserDocument.java
+++ b/source/de/anomic/plasma/plasmaParserDocument.java
@@ -42,7 +42,6 @@
package de.anomic.plasma;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import java.io.ByteArrayInputStream;
@@ -192,7 +191,9 @@ public class plasmaParserDocument {
} else {
ext = url.substring(extpos).toLowerCase();
}
- normal = htmlFilterContentScraper.urlNormalform(null, url);
+ try {normal = new URL(url).toNormalform();} catch (MalformedURLException e1) {
+ normal = null;
+ }
if (normal != null) { //TODO: extension function is not correct
if (plasmaParser.mediaExtContains(ext.substring(1))) {
// this is not a normal anchor, its a media link
@@ -216,7 +217,7 @@ public class plasmaParserDocument {
htmlFilterImageEntry iEntry;
while (i.hasNext()) {
iEntry = (htmlFilterImageEntry) i.next();
- normal = htmlFilterContentScraper.urlNormalform(iEntry.url());
+ normal = iEntry.url().toNormalform();
if (normal != null) medialinks.put(normal, iEntry.alt()); // avoid NullPointerException
}
diff --git a/source/de/anomic/plasma/plasmaSearchImages.java b/source/de/anomic/plasma/plasmaSearchImages.java
index 8b437e0bb..3782ff752 100644
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@@ -47,7 +47,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.server.serverDate;
@@ -72,10 +71,13 @@ public final class plasmaSearchImages {
Iterator i = hl.entrySet().iterator();
while (i.hasNext()) {
Map.Entry e = (Map.Entry) i.next();
- String nexturlstring = htmlFilterContentScraper.urlNormalform(null, (String) e.getKey());
+ String nexturlstring;
try {
+ nexturlstring = new URL((String) e.getKey()).toNormalform();
addAll(new plasmaSearchImages(sc, serverDate.remainingTime(start, maxTime, 10), new URL(nexturlstring), depth - 1));
- } catch (MalformedURLException e2) {}
+ } catch (MalformedURLException e1) {
+ e1.printStackTrace();
+ }
}
}
}
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 4bda60f06..f354db86b 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -54,7 +54,6 @@ import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySearch;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
@@ -438,7 +437,7 @@ public class plasmaSnippetCache {
while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) {
urlentry = acc.nextElement();
if (urlentry.url().getHost().endsWith(".yacyh")) continue;
- urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
+ urlstring = urlentry.url().toNormalform();
if ((urlstring.matches(urlmask)) &&
(!(existsInCache(urlentry.url(), queryhashes)))) {
new Fetcher(urlentry.url(), queryhashes).start();
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 9a521e208..d477426ca 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -107,6 +107,8 @@ import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
+import java.net.MalformedURLException;
+
import de.anomic.net.URL;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
@@ -1402,7 +1404,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
while (i.hasNext()) {
e = (Map.Entry) i.next();
nexturlstring = (String) e.getKey();
- nexturlstring = htmlFilterContentScraper.urlNormalform(null, nexturlstring);
+ try {nexturlstring = new URL(nexturlstring).toNormalform();} catch (MalformedURLException e1) {}
sbStackCrawlThread.enqueue(nexturlstring, entry.url().toString(), initiatorHash, (String) e.getValue(), docDate, entry.depth() + 1, entry.profile());
@@ -1883,9 +1885,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
url = new URL("http://" + address + "/" + host.substring(0, p) + filename);
urlname = "http://share." + seed.getName() + ".yacy" + filename;
if ((p = urlname.indexOf("?")) > 0) urlname = urlname.substring(0, p);
- urlstring = htmlFilterContentScraper.urlNormalform(url);
+ urlstring = url.toNormalform();
} else {
- urlstring = htmlFilterContentScraper.urlNormalform(url);
+ urlstring = url.toNormalform();
urlname = urlstring;
}
descr = urlentry.descr();
diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
index bf9ef13b8..984caa675 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@@ -44,7 +44,6 @@
package de.anomic.plasma;
-import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
@@ -276,7 +275,7 @@ public class plasmaSwitchboardQueue {
}
public String normalizedURLString() {
- return htmlFilterContentScraper.urlNormalform(url);
+ return url.toNormalform();
}
public String urlHash() {