// indexRAMRI.java // (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 2005 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate: 2009-01-02 12:38:20 +0100 (Fr, 02 Jan 2009) $ // $LastChangedRevision: 5432 $ // $LastChangedBy: orbiter $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package de.anomic.index; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.coding.kelondroOrder; import de.anomic.kelondro.index.Row; /* * an index cell is a part of the horizontal index in the new segment-oriented index * data structure of YaCy. If there is no filter in front of a cell, it might also be * the organization for a complete segment index. Each cell consists of a number of BLOB files, that * must be merged to represent a single index. In fact these index files are only merged on demand * if there are too many of them. An index merge can be done with a stream read and stream write operation. * in normal operation, there are only a number of read-only BLOB files and a single RAM cache that is * kept in the RAM as long as a given limit of entries is reached. Then the cache is flushed and becomes * another BLOB file in the index array. */ public final class indexCell implements indexRI { // class variables private indexContainerBLOBArray array; private indexContainerRAMHeap ram; private int maxRamEntries; @SuppressWarnings("unchecked") public indexCell( final File cellPath, final Row payloadrow, final int maxRamEntries ) throws IOException { this.array = new indexContainerBLOBArray(cellPath, payloadrow); this.ram = new indexContainerRAMHeap(payloadrow); this.maxRamEntries = maxRamEntries; } private void cacheDump() throws IOException { // dump the ram File dumpFile = this.array.newContainerBLOBFile(); this.ram.dump(dumpFile); // get a fresh ram cache this.ram = new indexContainerRAMHeap(this.array.rowdef()); // add the dumped indexContainerBLOB to the array this.array.mountBLOBContainer(dumpFile); } /** * add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs * @throws IOException */ public synchronized void addEntries(indexContainer newEntries) throws IOException { this.ram.add(newEntries); if (this.ram.size() > this.maxRamEntries) cacheDump(); } /** * clear the RAM and BLOB part, deletes everything in the cell */ public synchronized void clear() throws IOException { this.ram.clear(); this.array.clear(); } /** * when a cell is closed, the current RAM is dumped to a file which will be opened as * BLOB file the next time a cell is opened. A name for the dump is automatically generated * and is composed of the current date and the cell salt */ public synchronized void close() { // dump the ram try { this.ram.dump(this.array.newContainerBLOBFile()); } catch (IOException e) { e.printStackTrace(); } // close all this.ram.close(); this.array.close(); } /** * deleting a container affects the containers in RAM and all the BLOB files * the deleted containers are merged and returned as result of the method */ public indexContainer deleteContainer(String wordHash) throws IOException { indexContainer c0 = this.ram.delete(wordHash); indexContainer c1 = this.array.get(wordHash); if (c1 == null) { if (c0 == null) return null; return c0; } this.array.delete(wordHash); if (c0 == null) return c1; return c1.merge(c0); } /** * all containers in the BLOBs and the RAM are merged and returned */ public indexContainer getContainer(String wordHash, Set urlselection) throws IOException { indexContainer c0 = this.ram.get(wordHash); indexContainer c1 = this.array.get(wordHash); if (c1 == null) { if (c0 == null) return null; return c0; } if (c0 == null) return c1; return c1.merge(c0); } /** * checks if there is any container for this wordHash, either in RAM or any BLOB */ public boolean hasContainer(String wordHash) { if (this.ram.has(wordHash)) return true; return this.array.has(wordHash); } public int minMem() { return 10 * 1024 * 1024; } /** * remove url references from a selected word hash. this deletes also in the BLOB * files, which means that there exists new gap entries after the deletion * The gaps are never merged in place, but can be eliminated when BLOBs are merged into * new BLOBs. This returns the sum of all url references that have been removed * @throws IOException * @throws IOException */ public int removeEntries(String wordHash, Set urlHashes) throws IOException { int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHashes)); return reduced / this.array.rowdef().objectsize; } public boolean removeEntry(String wordHash, String urlHash) throws IOException { int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHash)); return reduced > 0; } public int size() { return this.ram.size() + this.array.size(); } public kelondroCloneableIterator wordContainers(String startWordHash, boolean rot) throws IOException { return wordContainers(startWordHash, rot, false); } public synchronized kelondroCloneableIterator wordContainers(final String startWordHash, boolean rot, final boolean ramOnly) throws IOException { final kelondroOrder containerOrder = new indexContainerOrder(this.ram.rowdef().getOrdering().clone()); containerOrder.rotate(new indexContainer(startWordHash, this.ram.rowdef(), 0)); if (ramOnly) { return this.ram.wordContainers(startWordHash, false); } return new kelondroMergeIterator( this.ram.wordContainers(startWordHash, false), this.array.wordContainers(startWordHash, false), containerOrder, indexContainer.containerMergeMethod, true); } public class RemoveRewriter implements indexContainerBLOBArray.ContainerRewriter { Set urlHashes; public RemoveRewriter(Set urlHashes) { this.urlHashes = urlHashes; } public RemoveRewriter(String urlHash) { this.urlHashes = new HashSet(); this.urlHashes.add(urlHash); } public indexContainer rewrite(indexContainer container) { container.removeEntries(urlHashes); return container; } } }