From e6bf9d90a526314feb084a3c5f4298dfdb5883c2 Mon Sep 17 00:00:00 2001
From: theli <theli@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 15 Nov 2005 09:07:00 +0000
Subject: [PATCH] *) Fixing Problems with MalformedURLs during Word Selection  
  - removing (lurl.toString() == null) comparison because toString() is never
 null    - adding (lurl.url() == null) condition because url() is null if we
 have selected a word entry with      a malformed URL

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1083 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 .../plasma/plasmaWordIndexDistribution.java   | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
index ea4841c4a..e98647245 100644
--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@@ -327,21 +327,18 @@ public final class plasmaWordIndexDistribution {
                         while (urlIter.hasNext()) {
                             indexEntry = (plasmaWordIndexEntry) urlIter.next();                            
                             lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
-                            if ((lurl == null) || (lurl.toString() == null)) {
+                            if ((lurl == null) || (lurl.url() == null)) {
                                 unknownURLEntries.add(indexEntry.getUrlHash());
                             } else {
-                                if (lurl.toString() == null) {
-                                    this.urlPool.loadedURL.remove(indexEntry.getUrlHash());
-                                    unknownURLEntries.add(indexEntry.getUrlHash());
-                                } else {
-                                    knownURLs.put(indexEntry.getUrlHash(), lurl);
-                                }
+                                knownURLs.put(indexEntry.getUrlHash(), lurl);
                             }
                         }
                         // now delete all entries that have no url entry
                         hashIter = unknownURLEntries.iterator();
                         while (hashIter.hasNext()) {
-                            indexEntity.removeEntry((String) hashIter.next(), false);
+                            String nextUrlHash = (String) hashIter.next();
+                            indexEntity.removeEntry(nextUrlHash, false);
+                            this.urlPool.loadedURL.remove(nextUrlHash);
                         }
                         
                         if (indexEntity.size() == 0) {
@@ -366,23 +363,20 @@ public final class plasmaWordIndexDistribution {
                         while ((urlIter.hasNext()) && (count > 0)) {
                             indexEntry = (plasmaWordIndexEntry) urlIter.next();
                             lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
-                            if (lurl == null) {
+                            if ((lurl == null) || (lurl.url()==null)) {
                                 unknownURLEntries.add(indexEntry.getUrlHash());
                             } else {
-                                if (lurl.toString() == null) {
-                                    this.urlPool.loadedURL.remove(indexEntry.getUrlHash());
-                                    unknownURLEntries.add(indexEntry.getUrlHash());
-                                } else {
-                                    knownURLs.put(indexEntry.getUrlHash(), lurl);
-                                    tmpEntity.addEntry(indexEntry);
-                                    count--;
-                                }
+                                knownURLs.put(indexEntry.getUrlHash(), lurl);
+                                tmpEntity.addEntry(indexEntry);
+                                count--;
                             }
                         }
                         // now delete all entries that have no url entry
                         hashIter = unknownURLEntries.iterator();
                         while (hashIter.hasNext()) {
-                            indexEntity.removeEntry((String) hashIter.next(), true);
+                            String nextUrlHash = (String) hashIter.next();
+                            indexEntity.removeEntry(nextUrlHash, true);
+                            this.urlPool.loadedURL.remove(nextUrlHash);
                         }
                         // use whats remaining
                         this.log.logFine("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());