git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@86 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
3f85978519
commit
1d7fed87dc
@ -0,0 +1,41 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>YaCy: Press Material, Publications, Presentations</title>
|
||||||
|
<meta http-equiv="content-type" content="text/html;charset=iso-8859-1">
|
||||||
|
<!-- <meta name="Content-Language" content="German, Deutsch, de, at, ch"> -->
|
||||||
|
<meta name="Content-Language" content="English, Englisch">
|
||||||
|
<meta name="keywords" content="YaCy HTTP Proxy search engine spider indexer java network open free download Mac Windwos Software development">
|
||||||
|
<meta name="description" content="YaCy Software HTTP Proxy Freeware Home Page">
|
||||||
|
<meta name="copyright" content="Michael Christen">
|
||||||
|
<script src="navigation.js" type="text/javascript"></script>
|
||||||
|
<link rel="stylesheet" media="all" href="style.css">
|
||||||
|
<!-- Realisation: Michael Christen; Contact: mc<at>anomic.de-->
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#fefefe" marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
|
||||||
|
<SCRIPT LANGUAGE="JavaScript1.1"><!--
|
||||||
|
globalheader();
|
||||||
|
//--></SCRIPT>
|
||||||
|
<!-- ----- HERE STARTS CONTENT PART ----- -->
|
||||||
|
|
||||||
|
<h2>Press Material, Publications, Presentations</h2>
|
||||||
|
|
||||||
|
<p>Here you can find links to documents that had been published about YaCy by YaCy-Authors</p><br>
|
||||||
|
|
||||||
|
<p>Deutsche Dokumentation / German-only documents
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://www.yacy.net/yacy/material/YaCy-Datenschleuder086.pdf"><b>"YaCy -- Peer-to-Peer Web-Suchmaschine"</b></a> - Veröffentlichung in der Datenschleuder #086; technische Details zur Funktionsweise</li>
|
||||||
|
<li><a href="http://www.yacy.net/yacy/material/YaCy-nichtMonopolisierbar.pdf"><b>Vortrag zur SuMa-eV Veranstaltung: "Portale/Suchmaschinen - und ihre Grenzen"</b> - pdf/präsentierfertige Folien</a></li>
|
||||||
|
<li><a href="http://www.yacy.net/yacy/material/YaCy-nichtMonopolisierbar/index.html"><b>Vortrag zur SuMa-eV Veranstaltung: "Portale/Suchmaschinen - und ihre Grenzen"</b> - Web-Präsentation</a></li>
|
||||||
|
<li><a href="http://www.yacy.net/yacy/material/YaCy-FlyerD.pdf"><b>Flyer "Das Wichtigste zu YaCy im Überblick"</b></a></li>
|
||||||
|
</ul></p><br>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ----- HERE ENDS CONTENT PART ----- -->
|
||||||
|
<SCRIPT LANGUAGE="JavaScript1.1"><!--
|
||||||
|
globalfooter();
|
||||||
|
//--></SCRIPT>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,380 @@
|
|||||||
|
// plasmaWordIndexCache.java
|
||||||
|
// -------------------------
|
||||||
|
// part of YACY
|
||||||
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
|
// first published on http://www.anomic.de
|
||||||
|
// Frankfurt, Germany, 2005
|
||||||
|
// last major change: 6.5.2005
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
//
|
||||||
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||||
|
// running) means that you agree that the Author(s) is (are) not responsible
|
||||||
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||||
|
// by usage of this softare or this documentation. The usage of this software
|
||||||
|
// is on your own risk. The installation and usage (starting/running) of this
|
||||||
|
// software may allow other people or application to access your computer and
|
||||||
|
// any attached devices and is highly dependent on the configuration of the
|
||||||
|
// software which must be done by the user of the software; the author(s) is
|
||||||
|
// (are) also not responsible for proper configuration and usage of the
|
||||||
|
// software, even if provoked by documentation provided together with
|
||||||
|
// the software.
|
||||||
|
//
|
||||||
|
// Any changes to this file according to the GPL as documented in the file
|
||||||
|
// gpl.txt aside this file in the shipment you received can be done to the
|
||||||
|
// lines that follows this copyright notice here, but changes must not be
|
||||||
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
|
// the intact and unchanged copyright notice.
|
||||||
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
|
|
||||||
|
package de.anomic.plasma;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
import java.lang.RuntimeException;
|
||||||
|
import de.anomic.kelondro.*;
|
||||||
|
import de.anomic.server.serverLog;
|
||||||
|
import de.anomic.yacy.yacySeedDB;
|
||||||
|
|
||||||
|
public class plasmaWordIndexCache implements plasmaWordIndexInterface {
|
||||||
|
|
||||||
|
private static final String indexDumpFileName = "indexDump.stack";
|
||||||
|
|
||||||
|
static String minKey, maxKey;
|
||||||
|
|
||||||
|
// class variables
|
||||||
|
private File databaseRoot;
|
||||||
|
private plasmaWordIndexInterface backend;
|
||||||
|
private TreeMap cache;
|
||||||
|
private kelondroMScoreCluster hashScore;
|
||||||
|
private HashMap hashDate;
|
||||||
|
private int maxWords;
|
||||||
|
private serverLog log;
|
||||||
|
|
||||||
|
static {
|
||||||
|
maxKey = "";
|
||||||
|
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z';
|
||||||
|
minKey = "";
|
||||||
|
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
|
||||||
|
}
|
||||||
|
|
||||||
|
public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, serverLog log) {
|
||||||
|
this.databaseRoot = databaseRoot;
|
||||||
|
this.cache = new TreeMap();
|
||||||
|
this.hashScore = new kelondroMScoreCluster();
|
||||||
|
this.hashDate = new HashMap();
|
||||||
|
this.maxWords = 10000;
|
||||||
|
this.backend = backend;
|
||||||
|
this.log = log;
|
||||||
|
try {
|
||||||
|
restore();
|
||||||
|
} catch (IOException e){
|
||||||
|
log.logError("unable to restore cache dump: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dump(int waitingSeconds) throws IOException {
|
||||||
|
log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)");
|
||||||
|
File indexDumpFile = new File(databaseRoot, indexDumpFileName);
|
||||||
|
if (indexDumpFile.exists()) indexDumpFile.delete();
|
||||||
|
kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.attrSpaceLong});
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
long messageTime = System.currentTimeMillis() + 5000;
|
||||||
|
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
|
||||||
|
synchronized (cache) {
|
||||||
|
Iterator i = cache.entrySet().iterator();
|
||||||
|
Map.Entry entry;
|
||||||
|
String wordHash;
|
||||||
|
plasmaWordIndexEntryContainer container;
|
||||||
|
long creationTime;
|
||||||
|
plasmaWordIndexEntry wordEntry;
|
||||||
|
byte[][] row = new byte[4][];
|
||||||
|
while (i.hasNext()) {
|
||||||
|
// get entries
|
||||||
|
entry = (Map.Entry) i.next();
|
||||||
|
wordHash = (String) entry.getKey();
|
||||||
|
creationTime = getCreationTime(wordHash);
|
||||||
|
container = (plasmaWordIndexEntryContainer) entry.getValue();
|
||||||
|
|
||||||
|
// put entries on stack
|
||||||
|
if (container != null) {
|
||||||
|
Iterator ci = container.entries();
|
||||||
|
while (ci.hasNext()) {
|
||||||
|
wordEntry = (plasmaWordIndexEntry) ci.next();
|
||||||
|
row[0] = wordHash.getBytes();
|
||||||
|
row[1] = kelondroRecords.long2bytes(container.size(), 4);
|
||||||
|
row[2] = kelondroRecords.long2bytes(creationTime, 8);
|
||||||
|
row[3] = wordEntry.toEncodedForm(true).getBytes();
|
||||||
|
dumpStack.push(row);
|
||||||
|
urlcount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wordcount++;
|
||||||
|
|
||||||
|
// write a log
|
||||||
|
if (System.currentTimeMillis() > messageTime) {
|
||||||
|
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
|
||||||
|
log.logInfo("dumping status: " + wordcount + " words done, " + ((cache.size() - wordcount) / wordsPerSecond) + " seconds remaining");
|
||||||
|
messageTime = System.currentTimeMillis() + 5000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.logSystem("dumped " + urlcount + " word/url relations in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
|
||||||
|
}
|
||||||
|
|
||||||
|
private long restore() throws IOException {
|
||||||
|
File indexDumpFile = new File(databaseRoot, indexDumpFileName);
|
||||||
|
if (!(indexDumpFile.exists())) return 0;
|
||||||
|
kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0);
|
||||||
|
log.logSystem("restore dump of index cache, " + dumpStack.size() + " word/url relations");
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
long messageTime = System.currentTimeMillis() + 5000;
|
||||||
|
long urlCount = 0, urlsPerSecond = 0;
|
||||||
|
synchronized (cache) {
|
||||||
|
Iterator i = dumpStack.iterator();
|
||||||
|
kelondroRecords.Node node;
|
||||||
|
String wordHash;
|
||||||
|
plasmaWordIndexEntryContainer container;
|
||||||
|
long creationTime;
|
||||||
|
plasmaWordIndexEntry wordEntry;
|
||||||
|
byte[][] row = new byte[4][];
|
||||||
|
while (i.hasNext()) {
|
||||||
|
// get out one entry
|
||||||
|
node = (kelondroRecords.Node) i.next();
|
||||||
|
row = node.getValues();
|
||||||
|
wordHash = new String(row[0]);
|
||||||
|
creationTime = kelondroRecords.bytes2long(row[2]);
|
||||||
|
wordEntry = new plasmaWordIndexEntry(wordHash, new String(row[3]));
|
||||||
|
|
||||||
|
// store to cache
|
||||||
|
addEntry(wordHash, wordEntry, creationTime);
|
||||||
|
urlCount++;
|
||||||
|
|
||||||
|
// write a log
|
||||||
|
if (System.currentTimeMillis() > messageTime) {
|
||||||
|
urlsPerSecond = urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
|
||||||
|
log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining");
|
||||||
|
messageTime = System.currentTimeMillis() + 5000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.logSystem("restored " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
|
||||||
|
return urlCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int maxURLinWordCache() {
|
||||||
|
return hashScore.getScore(hashScore.getMaxObject());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int wordCacheRAMSize() {
|
||||||
|
return cache.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxWords(int maxWords) {
|
||||||
|
this.maxWords = maxWords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
if (backend.size() > cache.size()) return backend.size(); else return cache.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator wordHashes(String startWordHash, boolean up) {
|
||||||
|
if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
|
||||||
|
return new iterateCombined(cache.keySet().iterator(), backend.wordHashes(startWordHash, true), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class iterateCombined implements Iterator {
|
||||||
|
|
||||||
|
Comparator comp;
|
||||||
|
Iterator a, b;
|
||||||
|
String na, nb;
|
||||||
|
boolean up;
|
||||||
|
|
||||||
|
public iterateCombined(Iterator a, Iterator b, boolean up) {
|
||||||
|
this.a = a;
|
||||||
|
this.b = b;
|
||||||
|
this.up = up;
|
||||||
|
this.comp = kelondroMSetTools.fastStringComparator(up);
|
||||||
|
nexta();
|
||||||
|
nextb();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void nexta() {
|
||||||
|
if (a.hasNext()) na = (String) a.next(); else na = null;
|
||||||
|
}
|
||||||
|
private void nextb() {
|
||||||
|
if (b.hasNext()) nb = (String) b.next(); else nb = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return (na != null) || (nb != null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object next() {
|
||||||
|
String s;
|
||||||
|
if (na == null) {
|
||||||
|
s = nb;
|
||||||
|
nextb();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
if (nb == null) {
|
||||||
|
s = na;
|
||||||
|
nexta();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
// compare the strings
|
||||||
|
int c = comp.compare(na, nb);
|
||||||
|
if (c == 0) {
|
||||||
|
s = na;
|
||||||
|
//System.out.println("Iterate Hash: take " + s + " from file&cache");
|
||||||
|
nexta();
|
||||||
|
nextb();
|
||||||
|
return s;
|
||||||
|
} else if ((up) && (c < 0)) {
|
||||||
|
s = na;
|
||||||
|
nexta();
|
||||||
|
return s;
|
||||||
|
} else {
|
||||||
|
s = nb;
|
||||||
|
nextb();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int flushKey(String key) {
|
||||||
|
plasmaWordIndexEntryContainer container = null;
|
||||||
|
long time;
|
||||||
|
synchronized (cache) {
|
||||||
|
container = (plasmaWordIndexEntryContainer) cache.get(key);
|
||||||
|
if (container == null) return 0; // flushing of nonexisting key
|
||||||
|
time = getCreationTime(key);
|
||||||
|
cache.remove(key);
|
||||||
|
hashScore.deleteScore(key);
|
||||||
|
hashDate.remove(key);
|
||||||
|
}
|
||||||
|
return backend.addEntries(container, time);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int flushToLimit() {
|
||||||
|
if ((hashScore.size() == 0) && (cache.size() == 0)) {
|
||||||
|
serverLog.logDebug("PLASMA INDEXING", "flushToLimit: called but cache is empty");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if ((hashScore.size() == 0) && (cache.size() != 0)) {
|
||||||
|
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=0 but cache.size=" + cache.size());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if ((hashScore.size() != 0) && (cache.size() == 0)) {
|
||||||
|
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=" + hashScore.size() + " but cache.size=0");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
|
||||||
|
int total = 0;
|
||||||
|
synchronized (hashScore) {
|
||||||
|
String key;
|
||||||
|
int count;
|
||||||
|
Long createTime;
|
||||||
|
while (hashScore.size() >= maxWords) {
|
||||||
|
key = (String) hashScore.getMaxObject();
|
||||||
|
createTime = (Long) hashDate.get(key);
|
||||||
|
count = hashScore.getScore(key);
|
||||||
|
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
|
||||||
|
log.logDebug("key " + key + " is too fresh, abandon flush (count=" + count + ", cachesize=" + cache.size() + ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (count < 5) log.logWarning("flushing of key " + key + " not appropriate (too less entries, count=" + count + "): increase cache size");
|
||||||
|
log.logDebug("flushing key " + key + ", count=" + count + ", cachesize=" + cache.size());
|
||||||
|
total += flushKey(key);
|
||||||
|
if (total > 100) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
|
||||||
|
flushKey(wordHash);
|
||||||
|
return backend.getIndex(wordHash, deleteIfEmpty);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getCreationTime(String wordHash) {
|
||||||
|
Long time = (Long) hashDate.get(wordHash);
|
||||||
|
if (time == null) return 0;
|
||||||
|
return time.longValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteIndex(String wordHash) {
|
||||||
|
synchronized (cache) {
|
||||||
|
cache.remove(wordHash);
|
||||||
|
hashScore.deleteScore(wordHash);
|
||||||
|
hashDate.remove(wordHash);
|
||||||
|
}
|
||||||
|
backend.deleteIndex(wordHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
|
||||||
|
flushKey(wordHash);
|
||||||
|
return backend.removeEntries(wordHash, urlHashes, deleteComplete);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
|
||||||
|
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
|
||||||
|
flushToLimit();
|
||||||
|
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
|
||||||
|
|
||||||
|
// put new words into cache
|
||||||
|
int added = 0;
|
||||||
|
synchronized (cache) {
|
||||||
|
String wordHash = container.wordHash();
|
||||||
|
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
|
||||||
|
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
|
||||||
|
added = entries.add(container);
|
||||||
|
if (added > 0) {
|
||||||
|
cache.put(wordHash, entries);
|
||||||
|
hashScore.addScore(wordHash, added);
|
||||||
|
hashDate.put(wordHash, new Long(creationTime));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//System.out.println("DEBUG: cache = " + cache.toString());
|
||||||
|
return added;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addEntry(String wordHash, plasmaWordIndexEntry newEntry, long creationTime) {
|
||||||
|
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
|
||||||
|
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
|
||||||
|
if (entries.add(newEntry)) {
|
||||||
|
cache.put(wordHash, entries);
|
||||||
|
hashScore.incScore(wordHash);
|
||||||
|
hashDate.put(wordHash, new Long(creationTime));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close(int waitingSeconds) {
|
||||||
|
try {
|
||||||
|
dump(waitingSeconds);
|
||||||
|
} catch (IOException e){
|
||||||
|
log.logError("unable to dump cache: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,255 @@
|
|||||||
|
// plasmaWordIndexClassicDB.java
|
||||||
|
// -----------------------------
|
||||||
|
// part of YACY
|
||||||
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
|
// first published on http://www.anomic.de
|
||||||
|
// Frankfurt, Germany, 2005
|
||||||
|
// last major change: 6.5.2005
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
//
|
||||||
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||||
|
// running) means that you agree that the Author(s) is (are) not responsible
|
||||||
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||||
|
// by usage of this softare or this documentation. The usage of this software
|
||||||
|
// is on your own risk. The installation and usage (starting/running) of this
|
||||||
|
// software may allow other people or application to access your computer and
|
||||||
|
// any attached devices and is highly dependent on the configuration of the
|
||||||
|
// software which must be done by the user of the software; the author(s) is
|
||||||
|
// (are) also not responsible for proper configuration and usage of the
|
||||||
|
// software, even if provoked by documentation provided together with
|
||||||
|
// the software.
|
||||||
|
//
|
||||||
|
// Any changes to this file according to the GPL as documented in the file
|
||||||
|
// gpl.txt aside this file in the shipment you received can be done to the
|
||||||
|
// lines that follows this copyright notice here, but changes must not be
|
||||||
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
|
// the intact and unchanged copyright notice.
|
||||||
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
|
|
||||||
|
package de.anomic.plasma;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import de.anomic.kelondro.*;
|
||||||
|
import de.anomic.server.serverLog;
|
||||||
|
import de.anomic.yacy.yacySeedDB;
|
||||||
|
|
||||||
|
public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
|
||||||
|
|
||||||
|
|
||||||
|
// class variables
|
||||||
|
private File databaseRoot;
|
||||||
|
private serverLog log;
|
||||||
|
private int size;
|
||||||
|
|
||||||
|
public plasmaWordIndexClassicDB(File databaseRoot, serverLog log) throws IOException {
|
||||||
|
this.databaseRoot = databaseRoot;
|
||||||
|
this.log = log;
|
||||||
|
this.size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator wordHashes(String startHash, boolean up) {
|
||||||
|
return new iterateFiles(startHash, up);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class iterateFiles implements Iterator {
|
||||||
|
|
||||||
|
private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
|
||||||
|
private Comparator comp; // for string-compare
|
||||||
|
private String buffer; // the prefetch-buffer
|
||||||
|
|
||||||
|
public iterateFiles(String startHash, boolean up) {
|
||||||
|
this.hierarchy = new ArrayList();
|
||||||
|
this.comp = kelondroMSetTools.fastStringComparator(up);
|
||||||
|
|
||||||
|
// the we initially fill the hierarchy with the content of the root folder
|
||||||
|
String path = "WORDS";
|
||||||
|
TreeSet list = list(new File(databaseRoot, path));
|
||||||
|
|
||||||
|
// if we have a start hash then we find the appropriate subdirectory to start
|
||||||
|
if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
|
||||||
|
delete(startHash.substring(0, 1), list);
|
||||||
|
if (list.size() > 0) {
|
||||||
|
hierarchy.add(list);
|
||||||
|
String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
|
||||||
|
int pathc = 0;
|
||||||
|
while ((pathc < paths.length) &&
|
||||||
|
(comp.compare((String) list.first(), paths[pathc]) == 0)) {
|
||||||
|
path = path + "/" + paths[pathc];
|
||||||
|
list = list(new File(databaseRoot, path));
|
||||||
|
delete(paths[pathc], list);
|
||||||
|
if (list.size() == 0) break;
|
||||||
|
hierarchy.add(list);
|
||||||
|
pathc++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
|
||||||
|
} else {
|
||||||
|
hierarchy.add(list);
|
||||||
|
buffer = next0();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void delete(String pattern, TreeSet names) {
|
||||||
|
String name;
|
||||||
|
while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private TreeSet list(File path) {
|
||||||
|
//System.out.println("PATH: " + path);
|
||||||
|
TreeSet t = new TreeSet(comp);
|
||||||
|
String[] l = path.list();
|
||||||
|
if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
|
||||||
|
//else System.out.println("DEBUG: wrong path " + path);
|
||||||
|
//System.out.println(t);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized String next0() {
|
||||||
|
// the object is a File pointing to the corresponding file
|
||||||
|
File f;
|
||||||
|
String n;
|
||||||
|
TreeSet t;
|
||||||
|
do {
|
||||||
|
t = null;
|
||||||
|
while ((t == null) && (hierarchy.size() > 0)) {
|
||||||
|
t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
|
||||||
|
if (t.size() == 0) {
|
||||||
|
hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
|
||||||
|
t = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
|
||||||
|
// fetch value
|
||||||
|
f = new File(n = (String) t.first());
|
||||||
|
t.remove(n);
|
||||||
|
// if the value represents another folder, we step into the next hierarchy
|
||||||
|
if (f.isDirectory()) {
|
||||||
|
t = list(f);
|
||||||
|
if (t.size() == 0) {
|
||||||
|
// the folder is empty, delete it
|
||||||
|
f.delete();
|
||||||
|
} else {
|
||||||
|
hierarchy.add(t);
|
||||||
|
}
|
||||||
|
f = null;
|
||||||
|
}
|
||||||
|
} while (f == null);
|
||||||
|
// thats it
|
||||||
|
if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return n.substring(0, yacySeedDB.commonHashLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return buffer != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object next() {
|
||||||
|
String r = buffer;
|
||||||
|
while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
|
||||||
|
try {
|
||||||
|
return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.logError("plasmaWordIndexClassic.getIndex: " + e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getCreationTime(String wordHash) {
|
||||||
|
File f = plasmaWordIndexEntity.wordHash2path(databaseRoot, wordHash);
|
||||||
|
if (f.exists()) return f.lastModified(); else return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void deleteIndex(String wordHash) {
|
||||||
|
try {
|
||||||
|
plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.logError("plasmaWordIndexClassic.deleteIndex: " + e.getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
|
||||||
|
// removes all given url hashes from a single word index. Returns number of deletions.
|
||||||
|
plasmaWordIndexEntity pi = getIndex(wordHash, true);
|
||||||
|
int count = 0;
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < urlHashes.length; i++)
|
||||||
|
if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
|
||||||
|
int size = pi.size();
|
||||||
|
pi.close(); pi = null;
|
||||||
|
// check if we can remove the index completely
|
||||||
|
if ((deleteComplete) && (size == 0)) deleteIndex(wordHash);
|
||||||
|
return count;
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.logError("plasmaWordIndexClassic.removeEntries: " + e.getMessage());
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
|
||||||
|
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
|
||||||
|
// fetch the index cache
|
||||||
|
if (container.size() == 0) return 0;
|
||||||
|
|
||||||
|
// open file
|
||||||
|
try {
|
||||||
|
plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, container.wordHash(), false);
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
// write from vector
|
||||||
|
if (container != null) {
|
||||||
|
Iterator i = container.entries();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
if (pi.addEntry((plasmaWordIndexEntry) i.next())) count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// close and return
|
||||||
|
pi.close();
|
||||||
|
pi = null;
|
||||||
|
return count;
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.logError("plasmaWordIndexClassic.addEntries: " + e.getMessage());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close(int waitingSeconds) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,97 @@
|
|||||||
|
// plasmaIndexEntryContainer.java
|
||||||
|
// ------------------------------
|
||||||
|
// part of YaCy
|
||||||
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
|
// first published on http://www.anomic.de
|
||||||
|
// Frankfurt, Germany, 2005
|
||||||
|
// last major change: 07.05.2005
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
//
|
||||||
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||||
|
// running) means that you agree that the Author(s) is (are) not responsible
|
||||||
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||||
|
// by usage of this softare or this documentation. The usage of this software
|
||||||
|
// is on your own risk. The installation and usage (starting/running) of this
|
||||||
|
// software may allow other people or application to access your computer and
|
||||||
|
// any attached devices and is highly dependent on the configuration of the
|
||||||
|
// software which must be done by the user of the software; the author(s) is
|
||||||
|
// (are) also not responsible for proper configuration and usage of the
|
||||||
|
// software, even if provoked by documentation provided together with
|
||||||
|
// the software.
|
||||||
|
//
|
||||||
|
// Any changes to this file according to the GPL as documented in the file
|
||||||
|
// gpl.txt aside this file in the shipment you received can be done to the
|
||||||
|
// lines that follows this copyright notice here, but changes must not be
|
||||||
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
|
// the intact and unchanged copyright notice.
|
||||||
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
|
package de.anomic.plasma;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class plasmaWordIndexEntryContainer {
|
||||||
|
|
||||||
|
private String wordHash;
|
||||||
|
private HashMap container;
|
||||||
|
|
||||||
|
public plasmaWordIndexEntryContainer(String wordHash) {
|
||||||
|
this.wordHash = wordHash;
|
||||||
|
container = new HashMap(); // a urlhash/plasmaWordIndexEntry - relation
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return container.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String wordHash() {
|
||||||
|
return wordHash;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean add(plasmaWordIndexEntry entry) {
|
||||||
|
// returns true if the new entry was added, false if it already existet
|
||||||
|
String urlHash = entry.getUrlHash();
|
||||||
|
if (container.containsKey(urlHash)) return false;
|
||||||
|
container.put(urlHash, entry);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(plasmaWordIndexEntryContainer c) {
|
||||||
|
// returns the number of new elements
|
||||||
|
Iterator i = c.entries();
|
||||||
|
int x = 0;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
if (add((plasmaWordIndexEntry) i.next())) x++;
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator entries() {
|
||||||
|
// returns an iterator of plasmaWordIndexEntry objects
|
||||||
|
return container.values().iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static plasmaWordIndexEntryContainer instantContainer(String wordHash, plasmaWordIndexEntry entry) {
|
||||||
|
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
|
||||||
|
c.add(entry);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "C[" + wordHash + "] has " + container.size() + " entries";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,275 +0,0 @@
|
|||||||
// plasmaWordIndexFileCache.java
|
|
||||||
// -----------------------------
|
|
||||||
// part of YACY
|
|
||||||
// (C) by Michael Peter Christen; mc@anomic.de
|
|
||||||
// first published on http://www.anomic.de
|
|
||||||
// Frankfurt, Germany, 2004
|
|
||||||
// last major change: 22.01.2004
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
//
|
|
||||||
// Using this software in any meaning (reading, learning, copying, compiling,
|
|
||||||
// running) means that you agree that the Author(s) is (are) not responsible
|
|
||||||
// for cost, loss of data or any harm that may be caused directly or indirectly
|
|
||||||
// by usage of this softare or this documentation. The usage of this software
|
|
||||||
// is on your own risk. The installation and usage (starting/running) of this
|
|
||||||
// software may allow other people or application to access your computer and
|
|
||||||
// any attached devices and is highly dependent on the configuration of the
|
|
||||||
// software which must be done by the user of the software; the author(s) is
|
|
||||||
// (are) also not responsible for proper configuration and usage of the
|
|
||||||
// software, even if provoked by documentation provided together with
|
|
||||||
// the software.
|
|
||||||
//
|
|
||||||
// Any changes to this file according to the GPL as documented in the file
|
|
||||||
// gpl.txt aside this file in the shipment you received can be done to the
|
|
||||||
// lines that follows this copyright notice here, but changes must not be
|
|
||||||
// done inside the copyright notive above. A re-distribution must contain
|
|
||||||
// the intact and unchanged copyright notice.
|
|
||||||
// Contributions and changes to the program code must be marked as such.
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
The plasmaIndexCache manages a database table with a list of
|
|
||||||
indexEntries in it. This is done in a completely different fashion
|
|
||||||
as organized by the plasmaIndex tables. The entries are not
|
|
||||||
sorted and just stored in a buffer.
|
|
||||||
Whenever during a seach an index is retrieved, first it's buffer
|
|
||||||
is flushed into the corresponding index table, so that it can be
|
|
||||||
sorted into the remaining index entry elements.
|
|
||||||
The cache database consist of
|
|
||||||
- the word hash as primary key
|
|
||||||
- one column with a one-byte counter
|
|
||||||
- a number of more columns with indexEntry elements
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// compile with
|
|
||||||
// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java
|
|
||||||
|
|
||||||
package de.anomic.plasma;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
import de.anomic.kelondro.kelondroException;
|
|
||||||
import de.anomic.kelondro.kelondroTree;
|
|
||||||
import de.anomic.server.serverLog;
|
|
||||||
|
|
||||||
public class plasmaWordIndexFileCache {
|
|
||||||
|
|
||||||
private static final String indexCacheFileName = "indexCache.db";
|
|
||||||
private static final int buffers = 50; // number of buffered entries per word
|
|
||||||
|
|
||||||
// class variables
|
|
||||||
private File databaseRoot;
|
|
||||||
private kelondroTree indexCache;
|
|
||||||
private int bufferkb;
|
|
||||||
|
|
||||||
public plasmaWordIndexFileCache(File databaseRoot, int bufferkb) throws IOException {
|
|
||||||
this.databaseRoot = databaseRoot;
|
|
||||||
this.bufferkb = bufferkb;
|
|
||||||
File indexCacheFile = new File(databaseRoot, indexCacheFileName);
|
|
||||||
if (indexCacheFile.exists()) {
|
|
||||||
// simply open the file
|
|
||||||
indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400);
|
|
||||||
} else {
|
|
||||||
createCacheFile(indexCacheFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void resetCacheFile() {
|
|
||||||
// this has to be used in emergencies only
|
|
||||||
// it can happen that there is a serious db inconsistency; in that case we re-create the indexCache
|
|
||||||
try { indexCache.close(); } catch (IOException e) {}
|
|
||||||
File indexCacheFile = new File(databaseRoot, indexCacheFileName);
|
|
||||||
if (indexCacheFile.exists()) indexCacheFile.delete();
|
|
||||||
try {
|
|
||||||
createCacheFile(indexCacheFile);
|
|
||||||
} catch (IOException e) {
|
|
||||||
de.anomic.server.serverLog.logError("PLASMA", "plasmaWordIndexFileCache.resetCacheFile: serious failure creating the cache file: " + e.getMessage());
|
|
||||||
indexCache = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createCacheFile(File indexCacheFile) throws IOException {
|
|
||||||
// create a new file
|
|
||||||
int[] columns = new int[buffers + 2];
|
|
||||||
columns[0] = plasmaWordIndexEntry.wordHashLength;
|
|
||||||
columns[1] = 1;
|
|
||||||
for (int i = 0; i < buffers; i++) columns[i + 2] = plasmaCrawlLURL.urlHashLength + plasmaWordIndexEntry.attrSpaceShort;
|
|
||||||
indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400, columns);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void close() throws IOException {
|
|
||||||
indexCache.close();
|
|
||||||
indexCache = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte[][] getCache(String wordHash) throws IOException {
|
|
||||||
// read one line from the cache; if none exists: construct one
|
|
||||||
byte[][] row;
|
|
||||||
try {
|
|
||||||
row = indexCache.get(wordHash.getBytes());
|
|
||||||
} catch (Exception e) {
|
|
||||||
// we had some negativeSeekOffsetExceptions here, and also loops may cause this
|
|
||||||
// in that case the indexCache is corrupt
|
|
||||||
System.out.println("Error in plasmaWordINdexFileCache.getCache: index for hash " + wordHash + " is corrupt:" + e.toString());
|
|
||||||
//e.printStackTrace();
|
|
||||||
row = null;
|
|
||||||
}
|
|
||||||
if (row == null) {
|
|
||||||
row = new byte[indexCache.columns()][];
|
|
||||||
row[0] = wordHash.getBytes();
|
|
||||||
row[1] = new byte[1];
|
|
||||||
row[1][0] = (byte) 0;
|
|
||||||
}
|
|
||||||
return row;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected Iterator wordHashes(String wordHash, boolean up) throws IOException {
|
|
||||||
try {
|
|
||||||
return indexCache.rows(up, false, (wordHash == null) ? null : wordHash.getBytes());
|
|
||||||
} catch (kelondroException e) {
|
|
||||||
de.anomic.server.serverLog.logError("PLASMA", "kelondro error in plasmaWordIndexFileCache: " + e.getMessage() + "; deleting index for " + wordHash);
|
|
||||||
deleteComplete(wordHash);
|
|
||||||
return new HashSet().iterator();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) throws IOException {
|
|
||||||
// first flush the index cache, if there is any for that word hash
|
|
||||||
byte[][] row = indexCache.get(wordHash.getBytes());
|
|
||||||
if (row != null) {
|
|
||||||
int entries = (int) row[1][0];
|
|
||||||
if (entries != 0) flushCache(row, null); // if the cache has entries, flush it
|
|
||||||
indexCache.remove(wordHash.getBytes()); // delete the cache index row; suppose to be empty now
|
|
||||||
}
|
|
||||||
// then return the index from the uncached file (with new entries)
|
|
||||||
return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addEntriesToIndex(String wordHash, Vector /* of plasmaIndexEntry */ newEntries) throws IOException {
|
|
||||||
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
|
|
||||||
// fetch the index cache
|
|
||||||
if (newEntries.size() == 0) return;
|
|
||||||
byte[][] row = getCache(wordHash);
|
|
||||||
int entries = (int) row[1][0];
|
|
||||||
// check if the index cache is full
|
|
||||||
if (entries + 2 + newEntries.size() >= indexCache.columns()) {
|
|
||||||
flushCache(row, newEntries); // and put in new values
|
|
||||||
entries = 0;
|
|
||||||
row[1][0] = (byte) 0; // set number of entries to zero
|
|
||||||
} else {
|
|
||||||
// put in the new values
|
|
||||||
String newEntry;
|
|
||||||
for (int i = 0; i < newEntries.size(); i++) {
|
|
||||||
newEntry = ((plasmaWordIndexEntry) newEntries.elementAt(i)).getUrlHash() + ((plasmaWordIndexEntry) newEntries.elementAt(i)).toEncodedForm(false);
|
|
||||||
row[entries + 2] = newEntry.getBytes();
|
|
||||||
entries++;
|
|
||||||
}
|
|
||||||
row[1][0] = (byte) entries;
|
|
||||||
try {
|
|
||||||
indexCache.put(row);
|
|
||||||
} catch (kelondroException e) {
|
|
||||||
// this is a very bad case; a database inconsistency occurred
|
|
||||||
serverLog.logError("PLASMA", "fatal error in plasmaWordIndexFileCache.addEntriesToIndex: write of " + wordHash + " to index cache failed - " + e.getMessage() + " - indexCache.db deleted");
|
|
||||||
resetCacheFile();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// this is a very bad case; a database inconsistency occurred
|
|
||||||
serverLog.logError("PLASMA", "fatal error in plasmaWordIndexFileCache.addEntriesToIndex: write of " + wordHash + " to index cache failed - " + e.getMessage() + " - indexCache.db deleted");
|
|
||||||
resetCacheFile();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// finished!
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void deleteComplete(String wordHash) throws IOException {
|
|
||||||
plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash);
|
|
||||||
indexCache.remove(wordHash.getBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException {
|
|
||||||
// removes all given url hashes from a single word index. Returns number of deletions.
|
|
||||||
plasmaWordIndexEntity pi = getIndex(wordHash, true);
|
|
||||||
int count = 0;
|
|
||||||
for (int i = 0; i < urlHashes.length; i++) if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
|
|
||||||
int size = pi.size();
|
|
||||||
pi.close(); pi = null;
|
|
||||||
// check if we can remove the index completely
|
|
||||||
if ((deleteComplete) && (size == 0)) {
|
|
||||||
// remove index
|
|
||||||
if (!(plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash)))
|
|
||||||
System.out.println("DEBUG: cannot remove index file for word hash " + wordHash);
|
|
||||||
// remove cache
|
|
||||||
indexCache.remove(wordHash.getBytes());
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized void flushCache(byte[][] row, Vector indexEntries) throws IOException {
|
|
||||||
String wordHash = new String(row[0]);
|
|
||||||
int entries = (int) row[1][0];
|
|
||||||
if ((entries == 0) && ((indexEntries == null) || (indexEntries.size() == 0))) return;
|
|
||||||
|
|
||||||
// open file
|
|
||||||
plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, wordHash, false);
|
|
||||||
|
|
||||||
// write from array
|
|
||||||
plasmaWordIndexEntry entry;
|
|
||||||
for (int i = 0; i < entries; i++) {
|
|
||||||
entry = new plasmaWordIndexEntry(new String(row[i + 2], 0, plasmaCrawlLURL.urlHashLength),
|
|
||||||
new String(row[i + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort));
|
|
||||||
pi.addEntry(entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// write from vector
|
|
||||||
if (indexEntries != null) {
|
|
||||||
for (int i = 0; i < indexEntries.size(); i++)
|
|
||||||
pi.addEntry((plasmaWordIndexEntry) indexEntries.elementAt(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// close and return
|
|
||||||
pi.close();
|
|
||||||
pi = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int size(String wordHash) throws IOException {
|
|
||||||
// return number of entries in specific cache
|
|
||||||
byte[][] row = indexCache.get(wordHash.getBytes());
|
|
||||||
if (row == null) return 0;
|
|
||||||
return (int) row[1][0];
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int size() {
|
|
||||||
if (indexCache == null) return 0; else return indexCache.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
private plasmaIndex getIndexF(String wordHash) throws IOException {
|
|
||||||
return new plasmaIndex(databaseRoot, wordHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addEntryToIndexF(String wordHash, plasmaIndexEntry entry) throws IOException {
|
|
||||||
plasmaIndex pi = new plasmaIndex(databaseRoot, wordHash);
|
|
||||||
pi.addEntry(entry);
|
|
||||||
pi.close();
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
}
|
|
@ -0,0 +1,62 @@
|
|||||||
|
// plasmaWordIndexInterface.java
|
||||||
|
// -----------------------------
|
||||||
|
// part of YACY
|
||||||
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
|
// first published on http://www.anomic.de
|
||||||
|
// Frankfurt, Germany, 2005
|
||||||
|
// last major change: 6.5.2005
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
//
|
||||||
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
||||||
|
// running) means that you agree that the Author(s) is (are) not responsible
|
||||||
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
||||||
|
// by usage of this softare or this documentation. The usage of this software
|
||||||
|
// is on your own risk. The installation and usage (starting/running) of this
|
||||||
|
// software may allow other people or application to access your computer and
|
||||||
|
// any attached devices and is highly dependent on the configuration of the
|
||||||
|
// software which must be done by the user of the software; the author(s) is
|
||||||
|
// (are) also not responsible for proper configuration and usage of the
|
||||||
|
// software, even if provoked by documentation provided together with
|
||||||
|
// the software.
|
||||||
|
//
|
||||||
|
// Any changes to this file according to the GPL as documented in the file
|
||||||
|
// gpl.txt aside this file in the shipment you received can be done to the
|
||||||
|
// lines that follows this copyright notice here, but changes must not be
|
||||||
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
|
// the intact and unchanged copyright notice.
|
||||||
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
|
|
||||||
|
package de.anomic.plasma;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public interface plasmaWordIndexInterface {
|
||||||
|
|
||||||
|
public int size();
|
||||||
|
|
||||||
|
public Iterator wordHashes(String startWordHash, boolean up);
|
||||||
|
|
||||||
|
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
|
||||||
|
public long getCreationTime(String wordHash);
|
||||||
|
public void deleteIndex(String wordHash);
|
||||||
|
|
||||||
|
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
|
||||||
|
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime);
|
||||||
|
|
||||||
|
public void close(int waitingSeconds);
|
||||||
|
|
||||||
|
}
|
@ -1,253 +0,0 @@
|
|||||||
// plasmaIndexRAMCache.java
|
|
||||||
// -----------------------
|
|
||||||
// part of YACY
|
|
||||||
// (C) by Michael Peter Christen; mc@anomic.de
|
|
||||||
// first published on http://www.anomic.de
|
|
||||||
// Frankfurt, Germany, 2004
|
|
||||||
// last major change: 22.12.2004
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
//
|
|
||||||
// Using this software in any meaning (reading, learning, copying, compiling,
|
|
||||||
// running) means that you agree that the Author(s) is (are) not responsible
|
|
||||||
// for cost, loss of data or any harm that may be caused directly or indirectly
|
|
||||||
// by usage of this softare or this documentation. The usage of this software
|
|
||||||
// is on your own risk. The installation and usage (starting/running) of this
|
|
||||||
// software may allow other people or application to access your computer and
|
|
||||||
// any attached devices and is highly dependent on the configuration of the
|
|
||||||
// software which must be done by the user of the software; the author(s) is
|
|
||||||
// (are) also not responsible for proper configuration and usage of the
|
|
||||||
// software, even if provoked by documentation provided together with
|
|
||||||
// the software.
|
|
||||||
//
|
|
||||||
// Any changes to this file according to the GPL as documented in the file
|
|
||||||
// gpl.txt aside this file in the shipment you received can be done to the
|
|
||||||
// lines that follows this copyright notice here, but changes must not be
|
|
||||||
// done inside the copyright notive above. A re-distribution must contain
|
|
||||||
// the intact and unchanged copyright notice.
|
|
||||||
// Contributions and changes to the program code must be marked as such.
|
|
||||||
|
|
||||||
// compile with
|
|
||||||
// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java
|
|
||||||
|
|
||||||
|
|
||||||
package de.anomic.plasma;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
import de.anomic.kelondro.kelondroMScoreCluster;
|
|
||||||
import de.anomic.server.serverLog;
|
|
||||||
import de.anomic.yacy.yacySeedDB;
|
|
||||||
|
|
||||||
public class plasmaWordIndexRAMCache extends Thread {
|
|
||||||
|
|
||||||
static String minKey, maxKey;
|
|
||||||
|
|
||||||
// class variables
|
|
||||||
TreeMap cache;
|
|
||||||
kelondroMScoreCluster hashScore;
|
|
||||||
plasmaWordIndexFileCache pic;
|
|
||||||
boolean terminate;
|
|
||||||
long terminateUntil;
|
|
||||||
int maxWords;
|
|
||||||
|
|
||||||
static {
|
|
||||||
maxKey = "";
|
|
||||||
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z';
|
|
||||||
minKey = "";
|
|
||||||
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
|
|
||||||
}
|
|
||||||
|
|
||||||
public plasmaWordIndexRAMCache(File databaseRoot, int bufferkb) throws IOException {
|
|
||||||
this.pic = new plasmaWordIndexFileCache(databaseRoot, bufferkb);
|
|
||||||
this.cache = new TreeMap();
|
|
||||||
this.hashScore = new kelondroMScoreCluster();
|
|
||||||
this.maxWords = 1000;
|
|
||||||
this.terminate = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int maxURLinWordCache() {
|
|
||||||
return hashScore.getScore(hashScore.getMaxObject());
|
|
||||||
}
|
|
||||||
|
|
||||||
public int wordCacheRAMSize() {
|
|
||||||
return cache.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMaxWords(int maxWords) {
|
|
||||||
this.maxWords = maxWords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
serverLog.logSystem("PLASMA INDEXING", "started word cache management");
|
|
||||||
int check;
|
|
||||||
// permanently flush cache elements
|
|
||||||
while (!(terminate)) {
|
|
||||||
if (hashScore.size() < 100) try {Thread.currentThread().sleep(10000);} catch (InterruptedException e) {}
|
|
||||||
while ((!(terminate)) && (cache != null) && (hashScore.size() > 0)) try {
|
|
||||||
check = hashScore.size();
|
|
||||||
flushSpecific(false);
|
|
||||||
//serverLog.logDebug("PLASMA INDEXING", "single flush. bevore=" + check + "; after=" + hashScore.size());
|
|
||||||
try {Thread.currentThread().sleep(10 + ((maxWords / 10) / (1 + hashScore.size())));} catch (InterruptedException e) {}
|
|
||||||
} catch (IOException e) {
|
|
||||||
serverLog.logError("PLASMA INDEXING", "PANIK! exception in main cache loop: " + e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
terminate = true;
|
|
||||||
cache = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
serverLog.logSystem("PLASMA INDEXING", "CATCHED TERMINATION SIGNAL: start final flush");
|
|
||||||
|
|
||||||
// close all;
|
|
||||||
try {
|
|
||||||
// first flush everything
|
|
||||||
while ((hashScore.size() > 0) && (System.currentTimeMillis() < terminateUntil)) {
|
|
||||||
flushSpecific(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// then close file cache:
|
|
||||||
pic.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
serverLog.logDebug("PLASMA INDEXING", "interrupted final flush: " + e.toString());
|
|
||||||
}
|
|
||||||
// report
|
|
||||||
if (hashScore.size() == 0)
|
|
||||||
serverLog.logSystem("PLASMA INDEXING", "finished final flush; flushed all words");
|
|
||||||
else
|
|
||||||
serverLog.logError("PLASMA INDEXING", "terminated final flush; " + hashScore.size() + " words lost");
|
|
||||||
|
|
||||||
// delete data
|
|
||||||
cache = null;
|
|
||||||
hashScore = null;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close(int waitingBoundSeconds) {
|
|
||||||
terminate = true;
|
|
||||||
// wait until terination is done
|
|
||||||
// we can do at least 6 flushes/second
|
|
||||||
int waitingtime = 10 + (((cache == null) ? 0 : cache.size()) / 5); // seconds
|
|
||||||
if (waitingtime > waitingBoundSeconds) waitingtime = waitingBoundSeconds; // upper bound
|
|
||||||
this.terminateUntil = System.currentTimeMillis() + (waitingtime * 1000);
|
|
||||||
terminate = true;
|
|
||||||
while ((cache != null) && (waitingtime > 0)) {
|
|
||||||
serverLog.logDebug("PLASMA INDEXING", "final word flush; cache.size=" + cache.size() + "; time-out in " + waitingtime + " seconds");
|
|
||||||
try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {}
|
|
||||||
waitingtime -= 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int flushSpecific(boolean greatest) throws IOException {
|
|
||||||
//System.out.println("DEBUG: plasmaIndexRAMCache.flushSpecific(" + ((greatest) ? "greatest" : "smallest") + "); cache.size() = " + cache.size());
|
|
||||||
if ((hashScore.size() == 0) && (cache.size() == 0)) {
|
|
||||||
serverLog.logDebug("PLASMA INDEXING", "flushSpecific: called but cache is empty");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if ((hashScore.size() == 0) && (cache.size() != 0)) {
|
|
||||||
serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=0 but cache.size=" + cache.size());
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if ((hashScore.size() != 0) && (cache.size() == 0)) {
|
|
||||||
serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + " but cache.size=0");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
|
|
||||||
|
|
||||||
String key = (String) ((greatest) ? hashScore.getMaxObject() : hashScore.getMinObject());
|
|
||||||
return flushKey(key, "flushSpecific");
|
|
||||||
}
|
|
||||||
|
|
||||||
private int flushKey(String key, String caller) throws IOException {
|
|
||||||
Vector v = null;
|
|
||||||
v = (Vector) cache.get(key);
|
|
||||||
if (v == null) return 0; // flushing of nonexisting key
|
|
||||||
synchronized (cache) {
|
|
||||||
cache.remove(key);
|
|
||||||
hashScore.deleteScore(key);
|
|
||||||
}
|
|
||||||
pic.addEntriesToIndex(key, v);
|
|
||||||
return v.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized Iterator wordHashesMem(String wordHash, int count) throws IOException {
|
|
||||||
// returns a list of hashes from a specific start point
|
|
||||||
// we need to flush some of the elements in the cache first
|
|
||||||
// maybe we flush too much, but this is not easy to find out and it does not matter
|
|
||||||
TreeMap subMap = new TreeMap(cache.subMap((wordHash == null) ? minKey : wordHash, maxKey));
|
|
||||||
int flushcount = subMap.size();
|
|
||||||
if (flushcount > count) flushcount = count;
|
|
||||||
String key;
|
|
||||||
for (int i = 0; i < flushcount ; i++) {
|
|
||||||
key = (String) subMap.firstKey();
|
|
||||||
flushKey(key, "getSequentialWordHashesMem");
|
|
||||||
subMap.remove(key);
|
|
||||||
}
|
|
||||||
// finally return the result from the underlying hash list:
|
|
||||||
return pic.wordHashes(wordHash, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public plasmaWordIndexEntity getIndexMem(String wordHash, boolean deleteIfEmpty) throws IOException {
|
|
||||||
flushKey(wordHash, "getIndexMem");
|
|
||||||
return pic.getIndex(wordHash, deleteIfEmpty);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int addEntryToIndexMem(String wordHash, plasmaWordIndexEntry entry) throws IOException {
|
|
||||||
// make space for new words
|
|
||||||
int flushc = 0;
|
|
||||||
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
|
|
||||||
synchronized (hashScore) {
|
|
||||||
while (hashScore.size() > maxWords) flushc += flushSpecific(true);
|
|
||||||
}
|
|
||||||
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
|
|
||||||
|
|
||||||
// put new words into cache
|
|
||||||
synchronized (cache) {
|
|
||||||
Vector v = (Vector) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
|
|
||||||
if (v == null) v = new Vector();
|
|
||||||
v.add(entry);
|
|
||||||
cache.put(wordHash, v);
|
|
||||||
hashScore.incScore(wordHash);
|
|
||||||
}
|
|
||||||
return flushc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void deleteComplete(String wordHash) throws IOException {
|
|
||||||
cache.remove(wordHash);
|
|
||||||
hashScore.deleteScore(wordHash);
|
|
||||||
pic.deleteComplete(wordHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int removeEntriesMem(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException {
|
|
||||||
flushKey(wordHash, "removeEntriesMem");
|
|
||||||
return pic.removeEntries(wordHash, urlHashes, deleteComplete);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int sizeMin() {
|
|
||||||
// it is not easy to find out the correct size of the cache
|
|
||||||
// to make the result correct, it would be necessary to flush the complete ram cache
|
|
||||||
// instead, we return the minimum size of the cache, which is the maximun of either the
|
|
||||||
// ram or table cache
|
|
||||||
if ((hashScore == null) || (pic == null)) return 0;
|
|
||||||
return (hashScore.size() < pic.size()) ? pic.size() : hashScore.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
@ -1,8 +1,2 @@
|
|||||||
#plasmaParser configuration file
|
#plasmaParser configuration file
|
||||||
#Mon May 02 10:12:02 CEST 2005
|
#Sat May 07 22:32:33 CEST 2005
|
||||||
application/atom+xml=de.anomic.plasma.parser.rss.rssParser
|
|
||||||
text/rss=de.anomic.plasma.parser.rss.rssParser
|
|
||||||
application/rss+xml=de.anomic.plasma.parser.rss.rssParser
|
|
||||||
application/rdf+xml=de.anomic.plasma.parser.rss.rssParser
|
|
||||||
application/msword=de.anomic.plasma.parser.doc.docParser
|
|
||||||
application/pdf=de.anomic.plasma.parser.pdf.pdfParser
|
|
||||||
|
Loading…
Reference in new issue