(this commit is just to move development files to my other computer, no functionality change so far) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5509 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
d399444e49
commit
b74159feb8
@ -0,0 +1,214 @@
|
||||
// indexRAMRI.java
|
||||
// (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 2005 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate: 2009-01-02 12:38:20 +0100 (Fr, 02 Jan 2009) $
|
||||
// $LastChangedRevision: 5432 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.index;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import de.anomic.kelondro.kelondroCloneableIterator;
|
||||
import de.anomic.kelondro.kelondroMergeIterator;
|
||||
import de.anomic.kelondro.kelondroOrder;
|
||||
import de.anomic.kelondro.kelondroRow;
|
||||
|
||||
/*
|
||||
* an index cell is a part of the horizontal index in the new segment-oriented index
|
||||
* data structure of YaCy. If there is no filter in front of a cell, it might also be
|
||||
* the organization for a complete segment index. Each cell consists of a number of BLOB files, that
|
||||
* must be merged to represent a single index. In fact these index files are only merged on demand
|
||||
* if there are too many of them. An index merge can be done with a stream read and stream write operation.
|
||||
* in normal operation, there are only a number of read-only BLOB files and a single RAM cache that is
|
||||
* kept in the RAM as long as a given limit of entries is reached. Then the cache is flushed and becomes
|
||||
* another BLOB file in the index array.
|
||||
*/
|
||||
|
||||
public final class indexCell implements indexRI {
|
||||
|
||||
// class variables
|
||||
private indexContainerBLOBArray array;
|
||||
private indexContainerRAMHeap ram;
|
||||
private int maxRamEntries;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public indexCell(
|
||||
final File cellPath,
|
||||
final kelondroRow payloadrow,
|
||||
final int maxRamEntries
|
||||
) throws IOException {
|
||||
this.array = new indexContainerBLOBArray(cellPath, payloadrow);
|
||||
this.ram = new indexContainerRAMHeap(payloadrow);
|
||||
this.maxRamEntries = maxRamEntries;
|
||||
}
|
||||
|
||||
private void cacheDump() throws IOException {
|
||||
// dump the ram
|
||||
File dumpFile = this.array.newContainerBLOBFile();
|
||||
this.ram.dump(dumpFile);
|
||||
// get a fresh ram cache
|
||||
this.ram = new indexContainerRAMHeap(this.array.rowdef());
|
||||
// add the dumped indexContainerBLOB to the array
|
||||
this.array.mountBLOBContainer(dumpFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized void addEntries(indexContainer newEntries) throws IOException {
|
||||
this.ram.add(newEntries);
|
||||
if (this.ram.size() > this.maxRamEntries) cacheDump();
|
||||
}
|
||||
|
||||
/**
|
||||
* clear the RAM and BLOB part, deletes everything in the cell
|
||||
*/
|
||||
public synchronized void clear() throws IOException {
|
||||
this.ram.clear();
|
||||
this.array.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* when a cell is closed, the current RAM is dumped to a file which will be opened as
|
||||
* BLOB file the next time a cell is opened. A name for the dump is automatically generated
|
||||
* and is composed of the current date and the cell salt
|
||||
*/
|
||||
public synchronized void close() {
|
||||
// dump the ram
|
||||
try {
|
||||
this.ram.dump(this.array.newContainerBLOBFile());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
// close all
|
||||
this.ram.close();
|
||||
this.array.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* deleting a container affects the containers in RAM and all the BLOB files
|
||||
* the deleted containers are merged and returned as result of the method
|
||||
*/
|
||||
public indexContainer deleteContainer(String wordHash) throws IOException {
|
||||
indexContainer c0 = this.ram.delete(wordHash);
|
||||
indexContainer c1 = this.array.get(wordHash);
|
||||
if (c1 == null) {
|
||||
if (c0 == null) return null;
|
||||
return c0;
|
||||
}
|
||||
this.array.delete(wordHash);
|
||||
if (c0 == null) return c1;
|
||||
return c1.merge(c0);
|
||||
}
|
||||
|
||||
/**
|
||||
* all containers in the BLOBs and the RAM are merged and returned
|
||||
*/
|
||||
public indexContainer getContainer(String wordHash, Set<String> urlselection) throws IOException {
|
||||
indexContainer c0 = this.ram.get(wordHash);
|
||||
indexContainer c1 = this.array.get(wordHash);
|
||||
if (c1 == null) {
|
||||
if (c0 == null) return null;
|
||||
return c0;
|
||||
}
|
||||
if (c0 == null) return c1;
|
||||
return c1.merge(c0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks if there is any container for this wordHash, either in RAM or any BLOB
|
||||
*/
|
||||
public boolean hasContainer(String wordHash) {
|
||||
if (this.ram.has(wordHash)) return true;
|
||||
return this.array.has(wordHash);
|
||||
}
|
||||
|
||||
public int minMem() {
|
||||
return 10 * 1024 * 1024;
|
||||
}
|
||||
|
||||
/**
|
||||
* remove url references from a selected word hash. this deletes also in the BLOB
|
||||
* files, which means that there exists new gap entries after the deletion
|
||||
* The gaps are never merged in place, but can be eliminated when BLOBs are merged into
|
||||
* new BLOBs. This returns the sum of all url references that have been removed
|
||||
* @throws IOException
|
||||
* @throws IOException
|
||||
*/
|
||||
public int removeEntries(String wordHash, Set<String> urlHashes) throws IOException {
|
||||
int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHashes));
|
||||
return reduced / this.array.rowdef().objectsize;
|
||||
}
|
||||
|
||||
public boolean removeEntry(String wordHash, String urlHash) throws IOException {
|
||||
int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHash));
|
||||
return reduced > 0;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return this.ram.size() + this.array.size();
|
||||
}
|
||||
|
||||
public kelondroCloneableIterator<indexContainer> wordContainers(String startWordHash, boolean rot) throws IOException {
|
||||
return wordContainers(startWordHash, rot, false);
|
||||
}
|
||||
|
||||
public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startWordHash, boolean rot, final boolean ramOnly) throws IOException {
|
||||
final kelondroOrder<indexContainer> containerOrder = new indexContainerOrder(this.ram.rowdef().getOrdering().clone());
|
||||
containerOrder.rotate(new indexContainer(startWordHash, this.ram.rowdef(), 0));
|
||||
if (ramOnly) {
|
||||
return this.ram.wordContainers(startWordHash, false);
|
||||
}
|
||||
return new kelondroMergeIterator<indexContainer>(
|
||||
this.ram.wordContainers(startWordHash, false),
|
||||
this.array.wordContainers(startWordHash, false),
|
||||
containerOrder,
|
||||
indexContainer.containerMergeMethod,
|
||||
true);
|
||||
}
|
||||
|
||||
public class RemoveRewriter implements indexContainerBLOBArray.ContainerRewriter {
|
||||
|
||||
Set<String> urlHashes;
|
||||
|
||||
public RemoveRewriter(Set<String> urlHashes) {
|
||||
this.urlHashes = urlHashes;
|
||||
}
|
||||
|
||||
public RemoveRewriter(String urlHash) {
|
||||
this.urlHashes = new HashSet<String>();
|
||||
this.urlHashes.add(urlHash);
|
||||
}
|
||||
|
||||
public indexContainer rewrite(indexContainer container) {
|
||||
container.removeEntries(urlHashes);
|
||||
return container;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue