From e6bf9d90a526314feb084a3c5f4298dfdb5883c2 Mon Sep 17 00:00:00 2001 From: theli Date: Tue, 15 Nov 2005 09:07:00 +0000 Subject: [PATCH] *) Fixing Problems with MalformedURLs during Word Selection - removing (lurl.toString() == null) comparison because toString() is never null - adding (lurl.url() == null) condition because url() is null if we have selected a word entry with a malformed URL git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1083 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../plasma/plasmaWordIndexDistribution.java | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index ea4841c4a..e98647245 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -327,21 +327,18 @@ public final class plasmaWordIndexDistribution { while (urlIter.hasNext()) { indexEntry = (plasmaWordIndexEntry) urlIter.next(); lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); - if ((lurl == null) || (lurl.toString() == null)) { + if ((lurl == null) || (lurl.url() == null)) { unknownURLEntries.add(indexEntry.getUrlHash()); } else { - if (lurl.toString() == null) { - this.urlPool.loadedURL.remove(indexEntry.getUrlHash()); - unknownURLEntries.add(indexEntry.getUrlHash()); - } else { - knownURLs.put(indexEntry.getUrlHash(), lurl); - } + knownURLs.put(indexEntry.getUrlHash(), lurl); } } // now delete all entries that have no url entry hashIter = unknownURLEntries.iterator(); while (hashIter.hasNext()) { - indexEntity.removeEntry((String) hashIter.next(), false); + String nextUrlHash = (String) hashIter.next(); + indexEntity.removeEntry(nextUrlHash, false); + this.urlPool.loadedURL.remove(nextUrlHash); } if (indexEntity.size() == 0) { @@ -366,23 +363,20 @@ public final class plasmaWordIndexDistribution { while ((urlIter.hasNext()) && (count > 0)) { indexEntry = (plasmaWordIndexEntry) urlIter.next(); lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); - if (lurl == null) { + if ((lurl == null) || (lurl.url()==null)) { unknownURLEntries.add(indexEntry.getUrlHash()); } else { - if (lurl.toString() == null) { - this.urlPool.loadedURL.remove(indexEntry.getUrlHash()); - unknownURLEntries.add(indexEntry.getUrlHash()); - } else { - knownURLs.put(indexEntry.getUrlHash(), lurl); - tmpEntity.addEntry(indexEntry); - count--; - } + knownURLs.put(indexEntry.getUrlHash(), lurl); + tmpEntity.addEntry(indexEntry); + count--; } } // now delete all entries that have no url entry hashIter = unknownURLEntries.iterator(); while (hashIter.hasNext()) { - indexEntity.removeEntry((String) hashIter.next(), true); + String nextUrlHash = (String) hashIter.next(); + indexEntity.removeEntry(nextUrlHash, true); + this.urlPool.loadedURL.remove(nextUrlHash); } // use whats remaining this.log.logFine("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());