From 3a4933b63cbb2e391c1a6253d4e00c4ff461497e Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 5 Dec 2006 12:32:19 +0000 Subject: [PATCH] bugfix for http://www.yacy-forum.de/viewtopic.php?p=28493#28493 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3045 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/index/indexContainer.java | 6 +++++- source/de/anomic/index/indexRWIEntryNew.java | 6 ++++-- source/de/anomic/index/indexURLEntryNew.java | 15 +++++++++------ source/de/anomic/plasma/plasmaCrawlLURL.java | 4 ++-- source/de/anomic/plasma/plasmaDHTChunk.java | 7 ++++--- source/de/anomic/plasma/plasmaSearchEvent.java | 11 +++++------ source/de/anomic/plasma/plasmaWordIndex.java | 6 +----- .../anomic/plasma/plasmaWordIndexAssortment.java | 4 ++-- source/de/anomic/plasma/plasmaWordIndexFile.java | 8 ++++++-- .../anomic/plasma/plasmaWordIndexFileCluster.java | 5 +++-- source/de/anomic/yacy/yacyClient.java | 6 +----- 11 files changed, 42 insertions(+), 36 deletions(-) diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index 0717eb431..dc8fe5960 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -34,6 +34,7 @@ import java.util.Set; import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; @@ -123,8 +124,11 @@ public class indexContainer extends kelondroRowSet { indexRWIEntry oldEntry; if (entry instanceof indexRWIEntryNew) oldEntry = new indexRWIEntryNew(oldEntryRow); - else + else try { oldEntry = new indexRWIEntryNew(new indexRWIEntryOld(oldEntryRow)); + } catch (kelondroException e) { + return false; + } if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container this.put(oldEntry.toKelondroEntry()); // put it back return false; diff --git a/source/de/anomic/index/indexRWIEntryNew.java b/source/de/anomic/index/indexRWIEntryNew.java index f064b1844..549c0791f 100644 --- a/source/de/anomic/index/indexRWIEntryNew.java +++ b/source/de/anomic/index/indexRWIEntryNew.java @@ -29,6 +29,7 @@ package de.anomic.index; import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroColumn; +import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow.Entry; import de.anomic.plasma.plasmaURL; @@ -148,8 +149,9 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { this.entry.setCol(col_reserve, 0); } - public indexRWIEntryNew(indexRWIEntryOld oldEntry) { - assert oldEntry.urlHash() != null; + public indexRWIEntryNew(indexRWIEntryOld oldEntry) throws kelondroException { + if (oldEntry.urlHash() == null) throw new kelondroException("hash is null"); + if (oldEntry.urlHash().length() != 12) throw new kelondroException("hash has wrong length"); this.entry = urlEntryRow.newEntry(); int mddlm = plasmaWordIndex.microDateDays(oldEntry.lastModified()); this.entry.setCol(col_urlhash, oldEntry.urlHash(), null); diff --git a/source/de/anomic/index/indexURLEntryNew.java b/source/de/anomic/index/indexURLEntryNew.java index 04fb99e5e..809b5cc20 100644 --- a/source/de/anomic/index/indexURLEntryNew.java +++ b/source/de/anomic/index/indexURLEntryNew.java @@ -8,6 +8,7 @@ import java.util.Properties; import java.util.ArrayList; import de.anomic.kelondro.kelondroBitfield; +import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroRow; @@ -64,7 +65,7 @@ public class indexURLEntryNew implements indexURLEntry { private kelondroRow.Entry entry; private String snippet; - private indexRWIEntry word; // this is only used if the url is transported via remote search requests + private indexRWIEntryNew word; // this is only used if the url is transported via remote search requests public indexURLEntryNew( URL url, @@ -127,7 +128,7 @@ public class indexURLEntryNew implements indexURLEntry { return s.toString().getBytes(); } - public indexURLEntryNew(kelondroRow.Entry entry, indexRWIEntry searchedWord) { + public indexURLEntryNew(kelondroRow.Entry entry, indexRWIEntryNew searchedWord) { this.entry = entry; this.snippet = null; this.word = searchedWord; @@ -182,8 +183,11 @@ public class indexURLEntryNew implements indexURLEntry { this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0"))); this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null); this.word = null; - if (prop.containsKey("word")) { - this.word = new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))); + if (prop.containsKey("word")) try { + // convert old data format + this.word = new indexRWIEntryNew(new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", "")))); + } catch (kelondroException e) { + this.word = null; } if (prop.containsKey("wi")) { this.word = new indexRWIEntryNew(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))); @@ -221,8 +225,7 @@ public class indexURLEntryNew implements indexURLEntry { if (this.word != null) { // append also word properties - if (this.word instanceof indexRWIEntryOld) s.append(",word=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm())); - if (this.word instanceof indexRWIEntryNew) s.append(",wi=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm())); + s.append(",wi=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm())); } return s; diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 75368f06d..027636571 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -62,8 +62,8 @@ import java.util.LinkedList; import de.anomic.http.httpc; import de.anomic.http.httpc.response; -import de.anomic.index.indexRWIEntry; import de.anomic.plasma.plasmaURL; +import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntryNew; import de.anomic.kelondro.kelondroBitfield; @@ -195,7 +195,7 @@ public final class plasmaCrawlLURL { return 0; } - public synchronized indexURLEntry load(String urlHash, indexRWIEntry searchedWord) { + public synchronized indexURLEntry load(String urlHash, indexRWIEntryNew searchedWord) { // generates an plasmaLURLEntry using the url hash // to speed up the access, the url-hashes are buffered // in the hash cache. diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index 7fac09f00..5b9741ce2 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -48,6 +48,7 @@ import java.util.Iterator; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; +import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; @@ -200,7 +201,7 @@ public class plasmaDHTChunk { Iterator indexContainerIterator = wordIndex.indexContainerSet(hash, ram, true, maxcount).iterator(); indexContainer container; Iterator urlIter; - indexRWIEntry iEntry; + indexRWIEntryNew iEntry; indexURLEntry lurl; int refcount = 0; int wholesize; @@ -227,7 +228,7 @@ public class plasmaDHTChunk { urlIter = container.entries(); // iterate over indexes to fetch url entries and store them in the urlCache while ((urlIter.hasNext()) && (maxcount > refcount) && (System.currentTimeMillis() < timeout)) { - iEntry = (indexRWIEntry) urlIter.next(); + iEntry = (indexRWIEntryNew) urlIter.next(); if ((iEntry == null) || (iEntry.urlHash() == null)) { urlIter.remove(); continue; @@ -247,7 +248,7 @@ public class plasmaDHTChunk { // remove all remaining; we have enough while (urlIter.hasNext()) { - iEntry = (indexRWIEntry) urlIter.next(); + iEntry = (indexRWIEntryNew) urlIter.next(); urlIter.remove(); } diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index a1669603a..eea50b4ef 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -39,7 +39,6 @@ // the intact and unchanged copyright notice. // Contributions and changes to the program code must be marked as such. - package de.anomic.plasma; import java.util.Collection; @@ -51,7 +50,7 @@ import java.util.Set; import java.util.TreeMap; import de.anomic.index.indexContainer; -import de.anomic.index.indexRWIEntry; +import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; @@ -380,7 +379,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { //if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty //if (searchResult.size() == 0) return acc; // case that we have nothing to do - indexRWIEntry entry; + indexRWIEntryNew entry; indexURLEntry page; Long preranking; Object[] preorderEntry; @@ -389,7 +388,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { while (preorder.hasNext()) { if ((System.currentTimeMillis() >= postorderLimitTime) && (acc.sizeFetched() >= minEntries)) break; preorderEntry = preorder.next(); - entry = (indexRWIEntry) preorderEntry[0]; + entry = (indexRWIEntryNew) preorderEntry[0]; // load only urls if there was not yet a root url of that hash preranking = (Long) preorderEntry[1]; // find the url entry @@ -442,11 +441,11 @@ public final class plasmaSearchEvent extends Thread implements Runnable { preorder.remove(true, true); // start url-fetch - indexRWIEntry entry; + indexRWIEntryNew entry; try { while (preorder.hasNext()) { if (System.currentTimeMillis() >= timeout) break; - entry = (indexRWIEntry) (preorder.next()[0]); + entry = (indexRWIEntryNew) (preorder.next()[0]); // find and fetch the url entry urlStore.load(entry.urlHash(), entry); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 08df1d71d..f49aef5e5 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -43,7 +43,6 @@ import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRAMRI; import de.anomic.index.indexRI; import de.anomic.index.indexRWIEntryNew; -import de.anomic.index.indexRWIEntryOld; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroMergeIterator; @@ -150,10 +149,7 @@ public final class plasmaWordIndex implements indexRI { } public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) { - if (entry instanceof indexRWIEntryOld) { - if (entry.urlHash() == null) return; - entry = new indexRWIEntryNew((indexRWIEntryOld) entry); - } + assert (entry instanceof indexRWIEntryNew); // set dhtInCase depending on wordHash if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true; diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 7a04961a6..e0b2b0936 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -124,10 +124,10 @@ public final class plasmaWordIndexAssortment { final long updateTime = row.getColLong(2); indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); int al = assortmentCapacity(row.objectsize()); - for (int i = 0; i < al; i++) { + for (int i = 0; i < al; i++) try { // fill AND convert old entries to new entries container.add(new indexRWIEntry[] { new indexRWIEntryNew(new indexRWIEntryOld(row.getColBytes(3 + i))) }, updateTime); - } + } catch (kelondroException e) {} return container; } diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java index d1bd492cd..40afbd6e7 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFile.java +++ b/source/de/anomic/plasma/plasmaWordIndexFile.java @@ -132,7 +132,9 @@ public final class plasmaWordIndexFile { public indexRWIEntry getEntry(String urlhash) throws IOException { kelondroRow.Entry n = theIndex.get(urlhash.getBytes()); if (n == null) return null; - return new indexRWIEntryNew(new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null))); + try { + return new indexRWIEntryNew(new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null))); + } catch (kelondroException e) {return null;} } public boolean contains(String urlhash) throws IOException { @@ -208,7 +210,9 @@ public final class plasmaWordIndexFile { public Object next() { if (i == null) return null; kelondroRow.Entry n = (kelondroRow.Entry) i.next(); - return new indexRWIEntryNew(new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null))); + try { + return new indexRWIEntryNew(new indexRWIEntryOld(n.getColString(0, null), n.getColString(1, null))); + } catch (kelondroException e) { return null; } } public void remove() { throw new UnsupportedOperationException(); diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index b14146c2a..c49d6e035 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -54,6 +54,7 @@ import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRI; import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexRWIEntryOld; +import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.yacy.yacySeedDB; @@ -231,10 +232,10 @@ public class plasmaWordIndexFileCluster implements indexRI { indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); indexRWIEntryNew entry; Iterator i = entity.elements(true); - while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) { + while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) try { entry = new indexRWIEntryNew((indexRWIEntryOld) i.next()); if ((urlselection == null) || (urlselection.contains(entry.urlHash()))) container.add(entry); - } + } catch (kelondroException e) {} return container; } else { return new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 2134dcf90..61c000ddc 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -56,7 +56,6 @@ import de.anomic.http.httpc; import de.anomic.index.indexContainer; import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntryNew; -import de.anomic.index.indexRWIEntryOld; import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; @@ -533,10 +532,7 @@ public final class yacyClient { } // add the url entry to the word indexes for (int m = 0; m < words; m++) { - if (entry instanceof indexRWIEntryOld) { - if (entry.urlHash() == null) continue; - entry = new indexRWIEntryNew((indexRWIEntryOld) entry); - } + assert (entry instanceof indexRWIEntryNew); container[m].add(new indexRWIEntry[]{entry}, System.currentTimeMillis()); } // store url hash for statistics