From 159f795f658c97ab45d167368b488b8cd73e94ac Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 17 Jul 2005 22:25:50 +0000 Subject: [PATCH] bugfix (null pointer exception in assortments) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@404 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaSwitchboard.java | 16 +++++++--------- .../de/anomic/plasma/plasmaWordIndexCache.java | 10 +++++++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d3cb26872..c1e0292ab 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -932,13 +932,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } if (noIndexReason == null) { // strip out words - log.logDebug("(Profile) Condensing for '" + entry.normalizedURLString() + "'"); + log.logDebug("Condensing for '" + entry.normalizedURLString() + "'"); plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(document.getText())); //log.logInfo("INDEXING HEADLINE:" + descr); try { - log.logDebug("(Profile) Create LURL-Entry for '" + entry.normalizedURLString() + "', " + - "responseHeader=" + entry.responseHeader().toString()); + //log.logDebug("Create LURL-Entry for '" + entry.normalizedURLString() + "', " + + // "responseHeader=" + entry.responseHeader().toString()); Date lastModified = entry.responseHeader().lastModified(); if (lastModified == null) lastModified = entry.responseHeader().date(); if (lastModified == null) lastModified = new Date(); @@ -957,20 +957,18 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ); String urlHash = newEntry.hash(); - log.logDebug("(Profile) Remove NURL for '" + entry.normalizedURLString() + "'"); + //log.logDebug("Remove NURL for '" + entry.normalizedURLString() + "'"); urlPool.noticeURL.remove(urlHash); // worked-off if (((processCase == 4) || (processCase == 5) || (processCase == 6)) && (entry.profile().localIndexing())) { // remove stopwords - log.logDebug("(Profile) Exclude Stopwords for '" + entry.normalizedURLString() + "'"); log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + entry.url()); - //System.out.println("DEBUG: words left to be indexed: " + condenser.getWords()); // do indexing - log.logDebug("(Profile) Create Index for '" + entry.normalizedURLString() + "'"); + //log.logDebug("Create Index for '" + entry.normalizedURLString() + "'"); int words = searchManager.addPageIndex(entry.url(), urlHash, loadDate, condenser, plasmaWordIndexEntry.language(entry.url()), plasmaWordIndexEntry.docType(entry.responseHeader().mime())); - log.logInfo("Indexed " + words + " words in URL " + entry.url() + " (" + descr + ")"); + log.logInfo("*Indexed " + words + " words in URL " + entry.url() + " (" + descr + ")"); // if this was performed for a remote crawl request, notify requester if ((processCase == 6) && (initiator != null)) { @@ -978,7 +976,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser yacyClient.crawlReceipt(initiator, "crawl", "fill", "indexed", newEntry, ""); } } else { - log.logDebug("Resource '" + entry.normalizedURLString() + "' not indexed (indexing is off)"); + log.logDebug("Not Indexed Resource '" + entry.normalizedURLString() + "': process case=" + processCase); } } catch (Exception ee) { log.logError("Could not index URL " + entry.url() + ": " + ee.getMessage()); diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index a7486c495..ea79e5932 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -318,9 +318,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { if (cache.size() == 0) return; flushThread.pause(); try { - int count = hashScore.getMaxScore(); String hash = (String) hashScore.getMaxObject(); - long time = (hash == null) ? System.currentTimeMillis() : longTime(hashDate.getScore(hash)); + if (hash == null) { + flushThread.proceed(); + return; + } + int count = hashScore.getMaxScore(); + long time = longTime(hashDate.getScore(hash)); if ((count > ramcacheLimit) && (System.currentTimeMillis() - time > 10000)) { // flush high-score entries flushFromMem(hash, true); @@ -364,7 +368,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(key, container); if (feedback == null) { return container.size(); - } else if (reintegrate) { + } else if ((container.size() != feedback.size()) && (reintegrate)) { // put assortmentRecord together with container back to ram synchronized (cache) { cache.put(key, feedback);