From 4f9dae257102a71b4655394d4935bc50629d05bb Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 19 Feb 2009 22:58:00 +0000 Subject: [PATCH] remove reference in crawl entries git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5623 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/CrawlEntry.java | 1 + source/de/anomic/crawler/CrawlQueues.java | 2 +- source/de/anomic/yacy/yacyURL.java | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/crawler/CrawlEntry.java b/source/de/anomic/crawler/CrawlEntry.java index 67d0757f1..30982e8a2 100755 --- a/source/de/anomic/crawler/CrawlEntry.java +++ b/source/de/anomic/crawler/CrawlEntry.java @@ -132,6 +132,7 @@ public class CrawlEntry extends serverProcessorJob { assert url != null; assert initiator != null; assert profileHandle == null || profileHandle.length() == yacySeedDB.commonHashLength : profileHandle + " != " + yacySeedDB.commonHashLength; + url.removeRef(); // remove anchor reference this.initiator = initiator; this.url = url; this.refhash = (referrerhash == null) ? "" : referrerhash; diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index f4caad1f6..24588f2f2 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -508,7 +508,7 @@ public class CrawlQueues { protected final class crawlWorker extends Thread { - public CrawlEntry entry; + private CrawlEntry entry; private final Integer code; public crawlWorker(final CrawlEntry entry) { diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index 363505904..4665b902d 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -526,6 +526,10 @@ public class yacyURL implements Serializable { return ref; } + public void removeRef() { + ref = null; + } + public String getUserInfo() { return userInfo; } @@ -680,7 +684,7 @@ public class yacyURL implements Serializable { // the url hash computation needs a DNS lookup to check if the addresses domain is local // that causes that this method may be very slow - assert this.hash == null; // should only be called if the hash was not computed bevore + assert this.hash == null; // should only be called if the hash was not computed before final int id = serverDomains.getDomainID(this.host); // id=7: tld is local final boolean isHTTP = this.protocol.equals("http"); @@ -894,6 +898,7 @@ public class yacyURL implements Serializable { environment = test[i][0]; url = test[i][1]; try {aURL = yacyURL.newURL(environment, url);} catch (final MalformedURLException e) {aURL = null;} + if (aURL != null) System.out.println("normalized: " + aURL.toNormalform(true, true)); if (environment == null) { try {jURL = new java.net.URL(url);} catch (final MalformedURLException e) {jURL = null;} } else {