- bugfixes and debug code

- ne generalized index class indexCachedRI

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2930 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent de6295d0ae
commit ba967c4875

@ -0,0 +1,356 @@
// indexCachedRI.java
// -----------------------------
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 7.11.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.server.logging.serverLog;
public class indexCachedRI implements indexRI {
private kelondroRow payloadrow;
private kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private indexRAMRI dhtOutCache, dhtInCache;
private indexRI backend;
public boolean busyCacheFlush; // shows if a cache flush is currently performed
private int idleDivisor, busyDivisor;
public indexCachedRI(indexRAMRI dhtOutCache, indexRAMRI dhtInCache, indexRI backend, kelondroRow payloadrow, serverLog log) {
this.dhtOutCache = dhtOutCache;
this.dhtInCache = dhtInCache;
this.backend = backend;
this.payloadrow = payloadrow;
this.busyCacheFlush = false;
this.busyDivisor = 5000;
this.idleDivisor = 420;
}
public kelondroRow payloadrow() {
return payloadrow;
}
public void setWordFlushDivisor(int idleDivisor, int busyDivisor) {
this.idleDivisor = idleDivisor;
this.busyDivisor = busyDivisor;
}
public void flushControl() {
// check for forced flush
synchronized (this) {
if (dhtOutCache.size() > dhtOutCache.getMaxWordCount()) {
flushCache(dhtOutCache, dhtOutCache.size() + 500 - dhtOutCache.getMaxWordCount());
}
if (dhtInCache.size() > dhtInCache.getMaxWordCount()) {
flushCache(dhtInCache, dhtInCache.size() + 500 - dhtInCache.getMaxWordCount());
}
}
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, payloadrow);
}
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
// add the entry
if (dhtInCase) {
dhtInCache.addEntry(wordHash, entry, updateTime, true);
} else {
dhtOutCache.addEntry(wordHash, entry, updateTime, false);
flushControl();
}
return null;
}
public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
// add the entry
if (dhtInCase) {
dhtInCache.addEntries(entries, updateTime, true);
} else {
dhtOutCache.addEntries(entries, updateTime, false);
flushControl();
}
return null;
}
public void flushCacheSome(boolean busy) {
flushCacheSome(dhtOutCache, busy);
flushCacheSome(dhtInCache, busy);
}
private void flushCacheSome(indexRAMRI ram, boolean busy) {
int flushCount;
if (ram.size() > ram.getMaxWordCount()) {
flushCount = ram.size() + 100 - ram.getMaxWordCount();
} else {
flushCount = (busy) ? ram.size() / busyDivisor : ram.size() / idleDivisor;
if (flushCount > 100) flushCount = 100;
if (flushCount < 1) flushCount = Math.min(1, ram.size());
}
flushCache(ram, flushCount);
}
private void flushCache(indexRAMRI ram, int count) {
if (count <= 0) return;
busyCacheFlush = true;
String wordHash;
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
if (ram.size() == 0) break;
synchronized (this) {
wordHash = ram.bestFlushWordHash();
// flush the wordHash
indexContainer c = ram.deleteContainer(wordHash);
if (c != null) {
indexContainer feedback = backend.addEntries(c, c.updated(), false);
if (feedback != null) {
throw new RuntimeException("indexCollectionRI shall not return feedback entries; feedback = " + feedback.toString());
}
}
// pause to next loop to give other processes a chance to use IO
try {this.wait(8);} catch (InterruptedException e) {}
}
}
busyCacheFlush = false;
}
private static final int hour = 3600000;
private static final int day = 86400000;
public static int microDateDays(Date modified) {
return microDateDays(modified.getTime());
}
public static int microDateDays(long modified) {
// this calculates a virtual age from a given date
// the purpose is to have an age in days of a given modified date
// from a fixed standpoint in the past
// one day has 60*60*24 seconds = 86400 seconds
// we take mod 64**3 = 262144, this is the mask of the storage
return (int) ((modified / day) % 262144);
}
public static String microDateHoursStr(long time) {
return kelondroBase64Order.enhancedCoder.encodeLong(microDateHoursInt(time), 3);
}
public static int microDateHoursInt(long time) {
return (int) ((time / hour) % 262144);
}
public static int microDateHoursAge(String mdhs) {
return microDateHoursInt(System.currentTimeMillis()) - (int) kelondroBase64Order.enhancedCoder.decodeLong(mdhs);
}
public static long reverseMicroDateDays(int microDateDays) {
return ((long) microDateDays) * ((long) day);
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
// get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, maxTime);
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, true, maxTime);
} else {
container.add(dhtInCache.getContainer(wordHash, urlselection, true, maxTime), maxTime);
}
// get from collection index
if (container == null) {
container = backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), maxTime);
}
return container;
}
public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
// return map of wordhash:indexContainer
// retrieve entities that belong to the hashes
HashMap containers = new HashMap();
String singleHash;
indexContainer singleContainer;
Iterator i = wordHashes.iterator();
long start = System.currentTimeMillis();
long remaining;
while (i.hasNext()) {
// check time
remaining = maxTime - (System.currentTimeMillis() - start);
//if ((maxTime > 0) && (remaining <= 0)) break;
if ((maxTime >= 0) && (remaining <= 0)) remaining = 100;
// get next word hash:
singleHash = (String) i.next();
// retrieve index
singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
containers.put(singleHash, singleContainer);
}
return containers;
}
public int size() {
return java.lang.Math.max(backend.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()));
}
public int indexSize(String wordHash) {
int size = backend.indexSize(wordHash);
size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
return size;
}
public void close(int waitingBoundSeconds) {
synchronized (this) {
dhtInCache.close(waitingBoundSeconds);
dhtOutCache.close(waitingBoundSeconds);
backend.close(-1);
}
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, payloadrow);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
return c;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (backend.removeEntry(wordHash, urlHash, deleteComplete));
return removed;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
return removed;
}
public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) {
String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
return removed;
}
public TreeSet indexContainerSet(String startHash, boolean ramOnly, boolean rot, int count) {
// creates a set of indexContainers
// this does not use the dhtInCache
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
containerOrder.rotate(startHash.getBytes());
TreeSet containers = new TreeSet(containerOrder);
Iterator i = wordContainers(startHash, ramOnly, rot);
if (ramOnly) count = Math.min(dhtOutCache.size(), count);
indexContainer container;
while ((count > 0) && (i.hasNext())) {
container = (indexContainer) i.next();
if ((container != null) && (container.size() > 0)) {
containers.add(container);
count--;
}
}
return containers;
}
public Iterator wordContainers(String startHash, boolean rot) {
// returns an iteration of indexContainers
return wordContainers(startHash, false, rot);
}
public Iterator wordContainers(String startHash, boolean ramOnly, boolean rot) {
if (rot) return new rotatingContainerIterator(startHash, ramOnly);
if (ramOnly) {
return dhtOutCache.wordContainers(startHash, false);
}
return new kelondroMergeIterator(
dhtOutCache.wordContainers(startHash, false),
backend.wordContainers(startHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true);
}
private class rotatingContainerIterator implements Iterator {
Iterator i;
boolean ramOnly;
public rotatingContainerIterator(String startWordHash, boolean ramOnly) {
this.ramOnly = ramOnly;
i = wordContainers(startWordHash, ramOnly);
}
public void finalize() {
i = null;
}
public boolean hasNext() {
if (i.hasNext()) return true;
else {
i = wordContainers("------------", ramOnly);
return i.hasNext();
}
}
public Object next() {
return i.next();
}
public void remove() {
throw new java.lang.UnsupportedOperationException("rotatingWordIterator does not support remove");
}
} // class rotatingContainerIterator
}

@ -1,4 +1,4 @@
// indexRAMCacheRI.java
// indexRAMRI.java
// (C) 2005, 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 2005 on http://www.anomic.de
//

@ -1,11 +1,16 @@
// indexRI.java
// -----------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 6.5.2005
// (C) 2005 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 6.5.2005 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -19,25 +24,6 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.index;
@ -52,7 +38,7 @@ public interface indexRI {
public Iterator wordContainers(String startWordHash, boolean rot); // method to replace wordHashes
public long getUpdateTime(String wordHash);
public int indexSize(String wordHash);
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime);
public indexContainer deleteContainer(String wordHash);

@ -700,6 +700,10 @@ public class kelondroRecords {
if (value == null) {
while (valuewidth-- > 0) targetarray[targetoffset++] = 0;
} else {
assert ((valueoffset >= 0) && (valueoffset < value.length)) : "valueoffset = " + valueoffset;
assert ((valueoffset + valuewidth <= value.length)) : "valueoffset = " + valueoffset + ", valuewidth = " + valuewidth + ", value.length = " + value.length;
assert ((targetoffset >= 0) && (targetoffset < targetarray.length)) : "targetoffset = " + targetoffset;
assert ((targetoffset + valuewidth <= targetarray.length)) : "targetoffset = " + targetoffset + ", valuewidth = " + valuewidth + ", targetarray.length = " + targetarray.length;
System.arraycopy(value, valueoffset, targetarray, targetoffset, Math.min(value.length, valuewidth)); // error?
while (valuewidth-- > value.length) targetarray[targetoffset + valuewidth] = 0;
}

@ -513,7 +513,12 @@ public final class plasmaCrawlStacker {
this.anchors = (int) entry.getColLong(8);
this.forkfactor = (int) entry.getColLong(9);
this.flags = new bitfield(entry.getColBytes(10));
this.handle = Integer.parseInt(new String(entry.getColBytes(11), "UTF-8"));
try {
this.handle = Integer.parseInt(new String(entry.getColBytes(11), "UTF-8"));
} catch (NumberFormatException ee) {
System.out.println("BUG in stackCrawlMessage. entry = " + entry.toString());
throw new RuntimeException(ee.getMessage());
}
} catch (Exception e) {
e.printStackTrace();
throw new IllegalStateException(e.toString());

@ -66,7 +66,7 @@ public class plasmaParserDocument {
String charset; // the charset of the document
String[] keywords; // most resources provide a keyword field
String shortTitle; // a shortTitle mostly appears in the window header (border)
String longTitle; // the real title of the document, commonly h1-tags
private String longTitle; // the real title of the document, commonly h1-tags
String[] sections; // if present: more titles/headlines appearing in the document
String abstrct; // an abstract, if present: short content description
private Object text; // the clear text, all that is visible

@ -1677,7 +1677,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlHash,
urlLength, urlComps,
wordStat.count,
document.longTitle.length(),
document.getMainLongTitle().length(),
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
wordStat.posInText,

@ -64,7 +64,7 @@ public final class plasmaWordIndex implements indexRI {
private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private final indexRAMRI dhtOutCache, dhtInCache;
private final indexRAMRI dhtOutCache, dhtInCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private int assortmentBufferSize; // kb
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
@ -334,7 +334,7 @@ public final class plasmaWordIndex implements indexRI {
// if ((s.length() > 4) && (c > 1)) System.out.println("# " + s + ":" + c);
wordHash = indexEntryAttribute.word2hash(word);
ientry = new indexURLEntry(urlHash,
urlLength, urlComps, (document == null) ? urlLength : document.longTitle.length(),
urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(),
wprop.count,
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
@ -630,7 +630,7 @@ public final class plasmaWordIndex implements indexRI {
return null;
}
private class rotatingContainerIterator implements Iterator {
public class rotatingContainerIterator implements Iterator {
Iterator i;
int resourceLevel;

@ -334,4 +334,8 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
public int indexSize(String wordHash) {
throw new UnsupportedOperationException();
}
}

@ -45,7 +45,6 @@
package de.anomic.server.logging;
import java.util.ArrayList;
import java.util.Date;
import java.util.logging.ErrorManager;
import java.util.logging.Filter;
import java.util.logging.Formatter;

@ -205,7 +205,13 @@ public final class yacySeedDB {
private synchronized kelondroMap openSeedTable(File seedDBFile) {
new File(seedDBFile.getParent()).mkdirs();
return new kelondroMap(kelondroDyn.open(seedDBFile, (seedDBBufferKB * 0x400) / 3, preloadTime / 3, commonHashLength, 480, '#'), sortFields, accFields);
try {
return new kelondroMap(kelondroDyn.open(seedDBFile, (seedDBBufferKB * 0x400) / 3, preloadTime / 3, commonHashLength, 480, '#'), sortFields, accFields);
} catch (Exception e) {
seedDBFile.delete();
// try again
return new kelondroMap(kelondroDyn.open(seedDBFile, (seedDBBufferKB * 0x400) / 3, preloadTime / 3, commonHashLength, 480, '#'), sortFields, accFields);
}
}
protected synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) {

Loading…
Cancel
Save