From ff4362b02dd07ccf83232c7bcbc6ea6849467b17 Mon Sep 17 00:00:00 2001 From: hermens Date: Thu, 7 Sep 2006 14:32:46 +0000 Subject: [PATCH] some more fixes for new plasmaCrawlLURL.load behavior git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2511 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Bookmarks.java | 8 ++-- htroot/IndexControl_p.java | 46 +++++++++++++------ htroot/yacy/crawlOrder.java | 11 +++-- .../de/anomic/plasma/plasmaSwitchboard.java | 2 + source/de/anomic/plasma/plasmaWordIndex.java | 9 +++- 5 files changed, 54 insertions(+), 22 deletions(-) diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 4b4f02392..5c7f58dd6 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -138,9 +138,11 @@ public class Bookmarks { try { plasmaCrawlLURL.Entry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null); prop.put("mode_edit", 0); // create mode - prop.put("mode_title", urlentry.descr()); - prop.put("mode_description", urlentry.descr()); - prop.put("mode_url", urlentry.url()); + if (urlentry != null) { + prop.put("mode_title", urlentry.descr()); + prop.put("mode_description", urlentry.descr()); + prop.put("mode_url", urlentry.url()); + } prop.put("mode_tags", ""); prop.put("mode_public", 0); } catch (IOException e) { diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index c570e1520..a426554e6 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -213,11 +213,15 @@ public class IndexControl_p { if (post.containsKey("urlhashdelete")) { try { plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null); - URL url = entry.url(); - urlstring = url.toNormalform(); - prop.put("urlstring", ""); - switchboard.urlPool.loadedURL.remove(urlhash); - prop.put("result", "Removed URL " + urlstring); + if (entry != null) { + URL url = entry.url(); + urlstring = url.toNormalform(); + prop.put("urlstring", ""); + switchboard.urlPool.loadedURL.remove(urlhash); + prop.put("result", "Removed URL " + urlstring); + } else { + prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); + } } catch (IOException e) { prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); } @@ -263,7 +267,7 @@ public class IndexControl_p { iEntry = (indexEntry) urlIter.next(); try { lurl = switchboard.urlPool.loadedURL.load(iEntry.urlHash(), null); - if (lurl.toString() == null) { + if ((lurl == null)||(lurl.toString() == null)) { unknownURLEntries.add(iEntry.urlHash()); urlIter.remove(); } else { @@ -325,10 +329,14 @@ public class IndexControl_p { if (post.containsKey("urlhashsearch")) { try { plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null); - URL url = entry.url(); - urlstring = url.toString(); - prop.put("urlstring", urlstring); - prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + if (entry != null) { + URL url = entry.url(); + urlstring = url.toString(); + prop.put("urlstring", urlstring); + prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + } else { + prop.put("result", "No Entry for URL hash " + urlhash); + } } catch (IOException e) { prop.put("result", "No Entry for URL hash " + urlhash); } @@ -387,7 +395,12 @@ public class IndexControl_p { URL url = entry.url(); String referrer = null; try { - referrer = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null).url().toString(); + plasmaCrawlLURL.Entry referrerEntry = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null); + if (referrerEntry != null) { + referrer = referrerEntry.url().toString(); + } else { + referrer = ""; + } } catch (IOException e) { referrer = ""; } @@ -444,8 +457,13 @@ public class IndexControl_p { xi = (indexEntry) en.next(); uh = new String[]{xi.urlHash(), Integer.toString(xi.posintext())}; try { - us = switchboard.urlPool.loadedURL.load(uh[0], null).url().toString(); - tm.put(us, uh); + plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(uh[0], null); + if (entry != null) { + us = entry.url().toString(); + tm.put(us, uh); + } else { + tm.put(uh[0], uh); + } } catch (IOException e) { tm.put(uh[0], uh); } @@ -498,4 +516,4 @@ public class IndexControl_p { } } -} \ No newline at end of file +} diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java index 4eb2ce83a..c52de8975 100644 --- a/htroot/yacy/crawlOrder.java +++ b/htroot/yacy/crawlOrder.java @@ -251,9 +251,14 @@ public final class crawlOrder { // send lurl-Entry as response try { plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null); - response = "double"; - switchboard.urlPool.loadedURL.notifyGCrawl(entry.hash(), iam, youare); - lurl = crypt.simpleEncode(entry.toString()); + if (entry != null) { + response = "double"; + switchboard.urlPool.loadedURL.notifyGCrawl(entry.hash(), iam, youare); + lurl = crypt.simpleEncode(entry.toString()); + } else { + response = "rejected"; + lurl = ""; + } } catch (IOException e) { response = "rejected"; lurl = ""; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 5b36c4af9..3a3ac2cb7 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2159,6 +2159,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // determine the url string try { plasmaCrawlLURL.Entry entry = urlPool.loadedURL.load(urlhash, null); + if (entry == null) + return 0; URL url = entry.url(); if (url == null) return 0; diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index acaaa2504..f0818f5e0 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -692,8 +692,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { // System.out.println("Wordhash: "+wordHash+" UrlHash: // "+entry.getUrlHash()); try { - url = lurl.load(entry.urlHash(), null).url(); - if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) { + plasmaCrawlLURL.Entry lurlEntry = lurl.load(entry.urlHash(), null); + if (lurlEntry != null) { + url = lurlEntry.url(); + if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) { + urlHashs.add(entry.urlHash()); + } + } else { urlHashs.add(entry.urlHash()); } } catch (IOException e) {