From 32b5b057b9b30742afba2edce4319afa8be3a7a6 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Tue, 29 Apr 2008 22:31:05 +0000
Subject: [PATCH] - modified, simplified old kelondroHTCache object; I believe
it should be replaced by something completely new - removed tree data type in
kelondroHTCache - added new class kelondroHeap; may be the core for a storage
object that will once replace the many-files strategy of kelondroHTCache -
removed compatibility mode in indexRAMRI
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4747 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
defaults/yacy.init | 19 --
.../de/anomic/index/indexContainerHeap.java | 75 +------
source/de/anomic/index/indexRAMRI.java | 15 +-
source/de/anomic/kelondro/kelondroHeap.java | 194 ++++++++++++++++++
source/de/anomic/plasma/plasmaHTCache.java | 188 +++++------------
.../de/anomic/plasma/plasmaSwitchboard.java | 76 +------
6 files changed, 246 insertions(+), 321 deletions(-)
create mode 100755 source/de/anomic/kelondro/kelondroHeap.java
diff --git a/defaults/yacy.init b/defaults/yacy.init
index a31e2361b..f8c4d3b76 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -206,25 +206,6 @@ proxyCacheSize__pro = 1024
# storage place for new releases
releases = DATA/RELEASE
-# use the mostly direct mapping of URLs to Filenames
-# makes it easy watching the content of the cache using file browsers
-# problems arise when a file already exists where a new entry expects a directory
-# or vice versa.
-# when set to false, the file names are set to the hash of the URL and the
-# directory is build from protokoll, hostname and port, as with the old
-# layout.
-# the advantage of this scheme is that no directory/file collisions can
-# occurr.
-# switching this flag will take effect after a restart of yacy.
-# files that are present under the previously used layout will be renamed
-# to the new location and thus be accessible immediately. so an accumulated
-# cache is still usable after the switch.
-# possible values are {tree, hash}
-proxyCacheLayout = hash
-
-# the migration flag shows, if the different layout shall be migrated from one to another
-proxyCacheMigration = true
-
# the following mime-types are the whitelist for indexing
#
# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
diff --git a/source/de/anomic/index/indexContainerHeap.java b/source/de/anomic/index/indexContainerHeap.java
index fe8a015d0..bcd1bf78f 100755
--- a/source/de/anomic/index/indexContainerHeap.java
+++ b/source/de/anomic/index/indexContainerHeap.java
@@ -42,19 +42,12 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
-import de.anomic.kelondro.kelondroBase64Order;
-import de.anomic.kelondro.kelondroBufferedRA;
import de.anomic.kelondro.kelondroByteOrder;
import de.anomic.kelondro.kelondroBytesLongMap;
import de.anomic.kelondro.kelondroCloneableIterator;
-import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroFixedWidthArray;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
-import de.anomic.kelondro.kelondroRow.EntryIndex;
-import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog;
-import de.anomic.yacy.yacySeedDB;
public final class indexContainerHeap {
@@ -154,7 +147,7 @@ public final class indexContainerHeap {
synchronized (index) {
is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
- // dont test available() here because this does not work for files > 2GB
+ // don't test available() here because this does not work for files > 2GB
loop: while (true) {
// remember seek position
seek0 = seek;
@@ -476,70 +469,4 @@ public final class indexContainerHeap {
cache.put(wordHash, container);
}
- /**
- * this is a compatibility method for a old heap dump format. don't use it if not necessary
- * @param indexArrayFile
- * @throws IOException
- */
- public void restoreArray(File indexArrayFile) throws IOException {
- // is only here to read old array data structures
- if (!(indexArrayFile.exists())) return;
- this.readOnlyMode = false;
- kelondroFixedWidthArray dumpArray;
- kelondroBufferedRA readBuffer = null;
- kelondroRow bufferStructureBasis = new kelondroRow(
- "byte[] wordhash-" + yacySeedDB.commonHashLength + ", " +
- "Cardinal occ-4 {b256}, " +
- "Cardinal time-8 {b256}, " +
- "byte[] urlprops-" + payloadrow.objectsize,
- kelondroBase64Order.enhancedCoder, 0);
- dumpArray = new kelondroFixedWidthArray(indexArrayFile, bufferStructureBasis, 0);
- log.logInfo("started restore of ram cache '" + indexArrayFile.getName() + "', " + dumpArray.size() + " word/URL relations");
- long startTime = System.currentTimeMillis();
- long messageTime = System.currentTimeMillis() + 5000;
- long urlCount = 0, urlsPerSecond = 0;
- this.cache = Collections.synchronizedSortedMap(new TreeMap(new kelondroByteOrder.StringOrder(payloadrow.getOrdering())));
- try {
- Iterator i = dumpArray.contentRows(-1);
- String wordHash;
- //long creationTime;
- indexRWIRowEntry wordEntry;
- kelondroRow.EntryIndex row;
- while (i.hasNext()) {
- // get out one entry
- row = i.next();
- if ((row == null) || (row.empty(0)) || (row.empty(3))) continue;
- wordHash = row.getColString(0, "UTF-8");
- //creationTime = kelondroRecords.bytes2long(row[2]);
- wordEntry = new indexRWIRowEntry(row.getColBytes(3));
-
- // store to cache
- indexContainer container = cache.get(wordHash);
- if (container == null) container = new indexContainer(wordHash, payloadrow, 1);
- container.put(wordEntry);
- cache.put(wordHash, container);
-
- urlCount++;
- // protect against memory shortage
- //while (serverMemory.free() < 1000000) {flushFromMem(); java.lang.System.gc();}
- // write a log
- if (System.currentTimeMillis() > messageTime) {
- serverMemory.gc(1000, "indexRAMRI, for better statistic-2"); // for better statistic - thq
- urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
- log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpArray.size() - urlCount) / urlsPerSecond) + " seconds remaining, free mem = " + (serverMemory.free() / 1024 / 1024) + "MB");
- messageTime = System.currentTimeMillis() + 5000;
- }
- }
- if (readBuffer != null) readBuffer.close();
- dumpArray.close();
- dumpArray = null;
- log.logInfo("finished restore: " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
- } catch (kelondroException e) {
- // restore failed
- log.logSevere("failed restore of indexCache array dump: " + e.getMessage(), e);
- } finally {
- if (dumpArray != null) try {dumpArray.close();}catch(Exception e){}
- }
- }
-
}
diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java
index c6dd02800..ca1dc2644 100644
--- a/source/de/anomic/index/indexRAMRI.java
+++ b/source/de/anomic/index/indexRAMRI.java
@@ -61,24 +61,11 @@ public final class indexRAMRI implements indexRI, indexRIReader {
this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
this.log = log;
- File indexArrayFile = new File(databaseRoot, oldArrayName);
this.indexHeapFile = new File(databaseRoot, newHeapName);
this.heap = new indexContainerHeap(payloadrow, log);
// read in dump of last session
- if (indexArrayFile.exists()) {
- try {
- heap.restoreArray(indexArrayFile);
- for (indexContainer ic : (Iterable) heap.wordContainers(null, false)) {
- this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote()));
- this.hashScore.setScore(ic.getWordHash(), ic.size());
- }
- } catch (IOException e){
- log.logSevere("unable to restore cache dump: " + e.getMessage(), e);
- }
- indexArrayFile.delete();
- if (indexArrayFile.exists()) log.logSevere("cannot delete old array file: " + indexArrayFile.toString() + "; please delete manually");
- } else if (indexHeapFile.exists()) {
+ if (indexHeapFile.exists()) {
try {
heap.initWriteMode(indexHeapFile);
for (indexContainer ic : (Iterable) heap.wordContainers(null, false)) {
diff --git a/source/de/anomic/kelondro/kelondroHeap.java b/source/de/anomic/kelondro/kelondroHeap.java
new file mode 100755
index 000000000..f4aecf26d
--- /dev/null
+++ b/source/de/anomic/kelondro/kelondroHeap.java
@@ -0,0 +1,194 @@
+// kelondroHeap.java
+// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 30.04.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
+// $LastChangedRevision: 4558 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+package de.anomic.kelondro;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import de.anomic.kelondro.kelondroByteOrder;
+import de.anomic.kelondro.kelondroBytesLongMap;
+
+public final class kelondroHeap {
+
+ private kelondroBytesLongMap index;
+ private File heapFile;
+ private kelondroByteOrder ordering;
+
+ /**
+ * create a heap file: a arbitrary number of BLOBs, indexed by an access key
+ * The heap file will be opened at initialization time, indexed and closed again.
+ * Heap files are only opened when BLOBs are read from it or new one are appended
+ * @param heapFile
+ * @param keylength
+ * @param ordering
+ * @throws IOException
+ */
+ public kelondroHeap(File heapFile, int keylength, kelondroByteOrder ordering) throws IOException {
+ this.index = null;
+ this.ordering = ordering;
+ this.heapFile = heapFile;
+ if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist");
+ if (heapFile.length() >= (long) Integer.MAX_VALUE) throw new IOException("file " + heapFile + " too large, index can only be crated for files less than 2GB");
+
+ this.index = new kelondroBytesLongMap(keylength, this.ordering, 0);
+ DataInputStream is = null;
+ String keystring;
+ byte[] key = new byte[keylength];
+ int reclen;
+ long seek = 0, seek0;
+ is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
+
+ // don't test available() here because this does not work for files > 2GB
+ loop: while (true) {
+ // remember seek position
+ seek0 = seek;
+
+ // read length of the following record without the length of the record size bytes
+ try {
+ reclen = is.readInt();
+ } catch (IOException e) {
+ break loop; // terminate loop
+ }
+ seek += 4L;
+
+ // read key
+ try {
+ is.readFully(key);
+ } catch (IOException e) {
+ break loop; // terminate loop
+ }
+ keystring = new String(key);
+ seek += (long) keystring.length();
+
+ // skip content
+ seek += (long) reclen;
+ while (reclen > 0) reclen -= is.skip(reclen);
+
+ // store access address to entry
+ try {
+ index.addl(key, seek0);
+ } catch (IOException e) {
+ e.printStackTrace();
+ break loop;
+ }
+ }
+ is.close();
+ }
+
+ /**
+ * the number of BLOBs in the heap
+ * @return the number of BLOBs in the heap
+ */
+ public int size() {
+ return this.index.size();
+ }
+
+ /**
+ * test if a key is in the heap file
+ * @param key
+ * @return true if the key exists, false othervise
+ */
+ public boolean has(String key) {
+ assert index != null;
+ assert index.row().primaryKeyLength == key.length();
+
+ // check if the index contains the key
+ try {
+ return index.getl(key.getBytes()) >= 0;
+ } catch (IOException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+
+ /**
+ * add a BLOB to the heap
+ * @param key
+ * @param blob
+ * @throws IOException
+ */
+ public synchronized void add(String key, byte[] blob) throws IOException {
+ add(key, blob, 0, blob.length);
+ }
+
+ /**
+ * add a BLOB to the heap
+ * @param key
+ * @param blob
+ * @throws IOException
+ */
+ public synchronized void add(String key, byte[] blob, int offset, int len) throws IOException {
+ assert index.row().primaryKeyLength == key.length();
+ if ((blob == null) || (blob.length == 0)) return;
+ DataOutputStream os = null;
+ try {
+ os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile)));
+ } catch (FileNotFoundException e) {
+ throw new IOException(e.getMessage());
+ }
+ int pos = os.size();
+ os.writeInt(len);
+ os.write(key.getBytes());
+ os.write(blob, offset, len);
+ os.close();
+ index.putl(key.getBytes(), pos);
+ }
+
+ /**
+ * read a blob from the heap
+ * @param key
+ * @return
+ * @throws IOException
+ */
+ public byte[] get(String key) throws IOException {
+ assert index.row().primaryKeyLength == key.length();
+
+ // check if the index contains the key
+ long pos = index.getl(key.getBytes());
+ if (pos < 0) return null;
+
+ // access the file and read the container
+ RandomAccessFile raf = new RandomAccessFile(heapFile, "r");
+ int len = raf.readInt();
+ byte[] record = new byte[len];
+
+ raf.seek(pos + 4 + index.row().primaryKeyLength);
+ raf.readFully(record);
+
+ raf.close();
+ return record;
+ }
+
+}
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 47c01f0b9..b248fdf1a 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -61,14 +61,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.lang.StringBuffer;
import java.net.InetAddress;
-import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.Map;
-import java.util.TreeMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -100,16 +98,14 @@ public final class plasmaHTCache {
public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
static kelondroMapObjects responseHeaderDB = null;
- private static final LinkedList cacheStack = new LinkedList();
- private static final Map cacheAge = Collections.synchronizedMap(new TreeMap()); // a - relation
+ private static final ConcurrentLinkedQueue cacheStack = new ConcurrentLinkedQueue();
+ private static final ConcurrentHashMap cacheAge = new ConcurrentHashMap(); // a - relation
public static long curCacheSize = 0;
public static long maxCacheSize;
public static File cachePath;
public static final serverLog log = new serverLog("HTCACHE");
- public static final HashSet filesInUse = new HashSet(); // can we delete this file
- public static String cacheLayout;
- public static boolean cacheMigration;
-
+ private static long lastcleanup = System.currentTimeMillis();
+
private static ResourceInfoFactory objFactory = new ResourceInfoFactory();
private static serverThread cacheScanThread;
@@ -126,29 +122,6 @@ public final class plasmaHTCache {
public static final char DT_BINARY = 'b';
public static final char DT_UNKNOWN = 'u';
- // appearance locations: (used for flags)
- public static final int AP_TITLE = 0; // title tag from html header
- public static final int AP_H1 = 1; // headline - top level
- public static final int AP_H2 = 2; // headline, second level
- public static final int AP_H3 = 3; // headline, 3rd level
- public static final int AP_H4 = 4; // headline, 4th level
- public static final int AP_H5 = 5; // headline, 5th level
- public static final int AP_H6 = 6; // headline, 6th level
- public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam)
- public static final int AP_DOM = 8; // word inside an url: in Domain
- public static final int AP_PATH = 9; // word inside an url: in path
- public static final int AP_IMG = 10; // tag inside image references
- public static final int AP_ANCHOR = 11; // anchor description
- public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance)
- public static final int AP_BOLD = 13; // may be interpreted as emphasized
- public static final int AP_ITALICS = 14; // may be interpreted as emphasized
- public static final int AP_WEAK = 15; // for Text that is small or bareley visible
- public static final int AP_INVISIBLE = 16; // good for spam detection
- public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags)
- public static final int AP_AUTHOR = 18; // word appears in author name
- public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags)
- public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags)
-
// URL attributes
public static final int UA_LOCAL = 0; // URL was crawled locally
public static final int UA_TILDE = 1; // tilde appears in URL
@@ -229,11 +202,9 @@ public final class plasmaHTCache {
return doctype;
}
- public static void init(File htCachePath, long CacheSizeMax, String layout, boolean migration) {
+ public static void init(File htCachePath, long CacheSizeMax) {
cachePath = htCachePath;
- cacheLayout = layout;
- cacheMigration = migration;
maxCacheSize = CacheSizeMax;
@@ -328,9 +299,7 @@ public final class plasmaHTCache {
}
public static int size() {
- synchronized (cacheStack) {
- return cacheStack.size();
- }
+ return cacheStack.size();
}
public static int dbSize() {
@@ -338,17 +307,11 @@ public final class plasmaHTCache {
}
public static void push(Entry entry) {
- synchronized (cacheStack) {
- cacheStack.add(entry);
- }
+ cacheStack.add(entry);
}
public static Entry pop() {
- synchronized (cacheStack) {
- if (cacheStack.size() > 0)
- return cacheStack.removeFirst();
- return null;
- }
+ return cacheStack.poll();
}
/**
@@ -388,18 +351,15 @@ public final class plasmaHTCache {
return true;
}
- private static long lastcleanup = System.currentTimeMillis();
public static void writeFileAnnouncement(File file) {
- synchronized (cacheAge) {
- if (file.exists()) {
- curCacheSize += file.length();
- if (System.currentTimeMillis() - lastcleanup > 300000) {
- // call the cleanup job only every 5 minutes
- cleanup();
- lastcleanup = System.currentTimeMillis();
- }
- cacheAge.put(ageString(file.lastModified(), file), file);
+ if (file.exists()) {
+ curCacheSize += file.length();
+ if (System.currentTimeMillis() - lastcleanup > 300000) {
+ // call the cleanup job only every 5 minutes
+ cleanup();
+ lastcleanup = System.currentTimeMillis();
}
+ cacheAge.put(ageString(file.lastModified(), file), file);
}
}
@@ -419,7 +379,7 @@ public final class plasmaHTCache {
}
private static boolean deleteFile(File obj) {
- if (obj.exists() && !filesInUse.contains(obj)) {
+ if (obj.exists()) {
long size = obj.length();
if (obj.delete()) {
curCacheSize -= size;
@@ -446,41 +406,38 @@ public final class plasmaHTCache {
private static void cleanupDoIt(long newCacheSize) {
File file;
- synchronized (cacheAge) {
- Iterator> iter = cacheAge.entrySet().iterator();
- Map.Entry entry;
- while (iter.hasNext() && curCacheSize >= newCacheSize) {
- if (Thread.currentThread().isInterrupted()) return;
- entry = iter.next();
- String key = entry.getKey();
- file = entry.getValue();
- long t = Long.parseLong(key.substring(0, 16), 16);
- if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted
- if (file != null) {
- if (filesInUse.contains(file)) continue;
- if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString());
- // This needs to be called *before* the file is deleted
- String urlHash = getHash(file);
- if (deleteFileandDirs(file, "OLD")) {
- try {
- // As the file is gone, the entry in responseHeader.db is not needed anymore
- if (urlHash != null) {
- if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash);
- responseHeaderDB.remove(urlHash);
- } else {
- yacyURL url = getURL(file);
- if (url != null) {
- if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true));
- responseHeaderDB.remove(url.hash());
- }
+ Iterator> iter = cacheAge.entrySet().iterator();
+ Map.Entry entry;
+ while (iter.hasNext() && curCacheSize >= newCacheSize) {
+ if (Thread.currentThread().isInterrupted()) return;
+ entry = iter.next();
+ String key = entry.getKey();
+ file = entry.getValue();
+ long t = Long.parseLong(key.substring(0, 16), 16);
+ if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted
+ if (file != null) {
+ if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString());
+ // This needs to be called *before* the file is deleted
+ String urlHash = getHash(file);
+ if (deleteFileandDirs(file, "OLD")) {
+ try {
+ // As the file is gone, the entry in responseHeader.db is not needed anymore
+ if (urlHash != null) {
+ if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash);
+ responseHeaderDB.remove(urlHash);
+ } else {
+ yacyURL url = getURL(file);
+ if (url != null) {
+ if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true));
+ responseHeaderDB.remove(url.hash());
}
- } catch (IOException e) {
- log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
}
+ } catch (IOException e) {
+ log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
}
}
- iter.remove();
}
+ iter.remove();
}
}
@@ -664,29 +621,6 @@ public final class plasmaHTCache {
return plasmaParser.mediaExtContains(urlString);
}
- /**
- * This function moves an old cached object (if it exists) to the new position
- */
- private static void moveCachedObject(File oldpath, File newpath) {
- try {
- if (oldpath.exists() && oldpath.isFile() && (!newpath.exists())) {
- long d = oldpath.lastModified();
- newpath.getParentFile().mkdirs();
- if (oldpath.renameTo(newpath)) {
- cacheAge.put(ageString(d, newpath), newpath);
- File obj = oldpath.getParentFile();
- while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
- if (obj.delete()) if (log.isFine()) log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString());
- obj = obj.getParentFile();
- }
- }
- }
- } catch (Exception e) {
- if (log.isFine()) log.logFine("moveCachedObject('" + oldpath.toString() + "','" +
- newpath.toString() + "')", e);
- }
- }
-
private static String replaceRegex(String input, String regex, String replacement) {
if (input == null) { return ""; }
if (input.length() > 0) {
@@ -767,34 +701,9 @@ public final class plasmaHTCache {
fileName.append('!').append(port);
}
- // generate cache path according to storage method
- if (cacheLayout.equals("tree")) {
- File FileTree = treeFile(fileName, "tree", path);
- if (cacheMigration) {
- moveCachedObject(hashFile(fileName, "hash", extention, url.hash()), FileTree);
- moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileTree); // temporary migration
- moveCachedObject(treeFile(fileName, null, path), FileTree); // temporary migration
- }
- return FileTree;
- }
- if (cacheLayout.equals("hash")) {
- File FileFlat = hashFile(fileName, "hash", extention, url.hash());
- if (cacheMigration) {
- moveCachedObject(treeFile(fileName, "tree", path), FileFlat);
- moveCachedObject(treeFile(fileName, null, path), FileFlat); // temporary migration
- moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileFlat); // temporary migration
- }
- return FileFlat;
- }
- return null;
- }
-
- private static File treeFile(StringBuffer fileName, String prefix, String path) {
- StringBuffer f = new StringBuffer(fileName.length() + 30);
- f.append(fileName);
- if (prefix != null) f.append('/').append(prefix);
- f.append(path);
- return new File(cachePath, f.toString());
+ // generate cache path
+ File FileFlat = hashFile(fileName, "hash", extention, url.hash());
+ return FileFlat;
}
private static File hashFile(StringBuffer fileName, String prefix, String extention, String urlhash) {
@@ -807,7 +716,6 @@ public final class plasmaHTCache {
return new File(cachePath, f.toString());
}
-
/**
* This is a helper function that extracts the Hash from the filename
*/
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 3fb42e891..73d7c8503 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -593,46 +593,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitchpublic static final String PROXY_CACHE_LAYOUT_TREE = "tree"
- * Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored in a folder-layout
- * according to the layout, the URL purported. The first folder is either http
or https
- * depending on the protocol used to fetch the website, descending follows the hostname and the sub-folders on the
- * website up to the actual file itself.
- * When using tree
, be aware that
- * the possibility of inconsistencies between folders and files with the same name may occur which prevent proper
- * storage of the fetched site. Below is an example how files are stored:
- *
- * /html/
- * /html/www.example.com/
- * /html/www.example.com/index/
- * /html/www.example.com/index/en/
- * /html/www.example.com/index/en/index.html
- */
- public static final String PROXY_CACHE_LAYOUT_TREE = "tree";
- /**
- * public static final String PROXY_CACHE_LAYOUT_HASH = "hash"
- * Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored using the MD5-sum of
- * their respective URLs. This method prevents collisions on some websites caused by using the {@link #PROXY_CACHE_LAYOUT_TREE}
- * layout.
- * Similarly to {@link #PROXY_CACHE_LAYOUT_TREE}, the top-folders name is given by the protocol used to fetch the site,
- * followed by either www
or – if the hostname does not start with "www" – other
.
- * Afterwards the next folder has the rest of the hostname as name, followed by a folder hash
which contains
- * a folder consisting of the first two letters of the hash. Another folder named after the 3rd and 4th letters of the
- * hash follows which finally contains the file named after the full 18-characters long hash.
- * Below is an example how files are stored:
- *
- * /html/
- * /html/www/
- * /html/www/example.com/
- * /html/www/example.com/hash/
- * /html/www/example.com/hash/0d/
- * /html/www/example.com/hash/0d/f8/
- * /html/www/example.com/hash/0d/f8/0df83a8444f48317d8
- */
- public static final String PROXY_CACHE_LAYOUT_HASH = "hash";
- public static final String PROXY_CACHE_MIGRATION = "proxyCacheMigration";
//////////////////////////////////////////////////////////////////////////////////////////////
// Cluster settings
@@ -1087,9 +1047,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch i1 = sbQueue.entryIterator(true);
- while (i1.hasNext()) {
- queueEntry = i1.next();
- if ((queueEntry != null) && (queueEntry.url() != null) && (queueEntry.cacheFile().exists())) {
- plasmaHTCache.filesInUse.add(queueEntry.cacheFile());
- count++;
- }
- }
- this.log.logConfig(count + " files in htcache reported to the cachemanager as in use.");
-
// define an extension-blacklist
log.logConfig("Parser: Initializing Extension Mappings for Media/Parser");
plasmaParser.initMediaExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT,"")));
@@ -1696,12 +1641,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch