From 283a7181c61868fa9c594ad67397326324689a03 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 14 Mar 2006 23:22:49 +0000 Subject: [PATCH] try to fix new 100% cpu bug, possibly caused by iterator method see http://www.yacy-forum.de/viewtopic.php?p=18900#18900 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1888 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaDHTChunk.java | 79 +++++++++++++++++++- source/de/anomic/plasma/plasmaWordIndex.java | 16 ++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index 2dd7004d8..253b73adc 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -169,6 +169,82 @@ public class plasmaDHTChunk { return; } + private int selectTransferContainersResource(String hash, int resourceLevel, int maxcount) { + // the hash is a start hash from where the indexes are picked + ArrayList tmpContainers = new ArrayList(maxcount); + try { + String[] wordHashes = wordIndex.wordHashes(hash, resourceLevel, true, maxcount); + plasmaWordIndexEntryContainer indexContainer; + Iterator urlIter; + plasmaWordIndexEntry indexEntry; + plasmaCrawlLURL.Entry lurl; + int refcount = 0; + + urlCache = new HashMap(); + while ((maxcount > refcount) && ((tmpContainers.size() == 0) || (yacyDHTAction.dhtDistance(wordHashes[refcount], ((plasmaWordIndexEntryContainer) tmpContainers.get(0)).wordHash()) < 0.2))) { + // make an on-the-fly entity and insert values + indexContainer = wordIndex.getContainer(wordHashes[refcount], true, 10000); + int notBoundCounter = 0; + try { + urlIter = indexContainer.entries(); + // iterate over indexes to fetch url entries and store them in the urlCache + while ((urlIter.hasNext()) && (maxcount > refcount)) { + indexEntry = (plasmaWordIndexEntry) urlIter.next(); + try { + lurl = lurls.getEntry(indexEntry.getUrlHash(), indexEntry); + if ((lurl == null) || (lurl.url() == null)) { + notBoundCounter++; + urlIter.remove(); + wordIndex.removeEntries(wordHashes[refcount], new String[] { indexEntry.getUrlHash() }, true); + } else { + urlCache.put(indexEntry.getUrlHash(), lurl); + refcount++; + } + } catch (IOException e) { + notBoundCounter++; + urlIter.remove(); + wordIndex.removeEntries(wordHashes[refcount], new String[] { indexEntry.getUrlHash() }, true); + } + } + + // remove all remaining; we have enough + while (urlIter.hasNext()) { + indexEntry = (plasmaWordIndexEntry) urlIter.next(); + urlIter.remove(); + } + + // use whats left + log.logFine("Selected partial index (" + indexContainer.size() + " from " + wordIndex.indexSize(wordHashes[refcount-1]) + " URLs, " + notBoundCounter + " not bound) for word " + indexContainer.wordHash()); + tmpContainers.add(indexContainer); + } catch (kelondroException e) { + log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + wordHashes[refcount], e); + wordIndex.deleteIndex(wordHashes[refcount]); + } + } + // create result + indexContainers = (plasmaWordIndexEntryContainer[]) tmpContainers.toArray(new plasmaWordIndexEntryContainer[tmpContainers.size()]); + + if ((indexContainers == null) || (indexContainers.length == 0)) { + log.logFine("No index available for index transfer, hash start-point " + startPointHash); + this.status = chunkStatus_FAILED; + return 0; + } + + this.status = chunkStatus_FILLED; + + return refcount; + } catch (kelondroException e) { + log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e); + indexContainers = new plasmaWordIndexEntryContainer[0]; + urlCache = new HashMap(); + + this.status = chunkStatus_FAILED; + + return 0; + } + } + + /* private int selectTransferContainersResource(String hash, int resourceLevel, int maxcount) { // the hash is a start hash from where the indexes are picked ArrayList tmpContainers = new ArrayList(maxcount); @@ -245,7 +321,8 @@ public class plasmaDHTChunk { return 0; } } - + */ + public int deleteTransferIndexes() { Iterator urlIter; plasmaWordIndexEntry indexEntry; diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 0ffafc48a..ba738c65e 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -380,6 +380,22 @@ public final class plasmaWordIndex { public static final int RL_ASSORTMENTS = 2; public static final int RL_WORDFILES = 3; + public synchronized String[] wordHashes(String startHash, int resourceLevel, boolean rot, int count) { + String[] hashes = new String[count]; + Iterator i = wordHashes(startHash, resourceLevel, rot); + int j = 0; + while ((count-- > 0) && (i.hasNext())) { + hashes[j++] = (String) i.next(); + } + if (count > 0) { + String[] s = new String[j]; + System.arraycopy(hashes, 0, s, 0, j); + return s; + } else { + return hashes; + } + } + public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) { if (rot) return new rotatingWordIterator(startHash, resourceLevel); else return new correctedWordIterator(startHash, resourceLevel, rot); // use correction until bug is found