From 32b5b057b9b30742afba2edce4319afa8be3a7a6 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 29 Apr 2008 22:31:05 +0000 Subject: [PATCH] - modified, simplified old kelondroHTCache object; I believe it should be replaced by something completely new - removed tree data type in kelondroHTCache - added new class kelondroHeap; may be the core for a storage object that will once replace the many-files strategy of kelondroHTCache - removed compatibility mode in indexRAMRI git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4747 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- defaults/yacy.init | 19 -- .../de/anomic/index/indexContainerHeap.java | 75 +------ source/de/anomic/index/indexRAMRI.java | 15 +- source/de/anomic/kelondro/kelondroHeap.java | 194 ++++++++++++++++++ source/de/anomic/plasma/plasmaHTCache.java | 188 +++++------------ .../de/anomic/plasma/plasmaSwitchboard.java | 76 +------ 6 files changed, 246 insertions(+), 321 deletions(-) create mode 100755 source/de/anomic/kelondro/kelondroHeap.java diff --git a/defaults/yacy.init b/defaults/yacy.init index a31e2361b..f8c4d3b76 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -206,25 +206,6 @@ proxyCacheSize__pro = 1024 # storage place for new releases releases = DATA/RELEASE -# use the mostly direct mapping of URLs to Filenames -# makes it easy watching the content of the cache using file browsers -# problems arise when a file already exists where a new entry expects a directory -# or vice versa. -# when set to false, the file names are set to the hash of the URL and the -# directory is build from protokoll, hostname and port, as with the old -# layout. -# the advantage of this scheme is that no directory/file collisions can -# occurr. -# switching this flag will take effect after a restart of yacy. -# files that are present under the previously used layout will be renamed -# to the new location and thus be accessible immediately. so an accumulated -# cache is still usable after the switch. -# possible values are {tree, hash} -proxyCacheLayout = hash - -# the migration flag shows, if the different layout shall be migrated from one to another -proxyCacheMigration = true - # the following mime-types are the whitelist for indexing # # parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser diff --git a/source/de/anomic/index/indexContainerHeap.java b/source/de/anomic/index/indexContainerHeap.java index fe8a015d0..bcd1bf78f 100755 --- a/source/de/anomic/index/indexContainerHeap.java +++ b/source/de/anomic/index/indexContainerHeap.java @@ -42,19 +42,12 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroBufferedRA; import de.anomic.kelondro.kelondroByteOrder; import de.anomic.kelondro.kelondroBytesLongMap; import de.anomic.kelondro.kelondroCloneableIterator; -import de.anomic.kelondro.kelondroException; -import de.anomic.kelondro.kelondroFixedWidthArray; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; -import de.anomic.kelondro.kelondroRow.EntryIndex; -import de.anomic.server.serverMemory; import de.anomic.server.logging.serverLog; -import de.anomic.yacy.yacySeedDB; public final class indexContainerHeap { @@ -154,7 +147,7 @@ public final class indexContainerHeap { synchronized (index) { is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024)); - // dont test available() here because this does not work for files > 2GB + // don't test available() here because this does not work for files > 2GB loop: while (true) { // remember seek position seek0 = seek; @@ -476,70 +469,4 @@ public final class indexContainerHeap { cache.put(wordHash, container); } - /** - * this is a compatibility method for a old heap dump format. don't use it if not necessary - * @param indexArrayFile - * @throws IOException - */ - public void restoreArray(File indexArrayFile) throws IOException { - // is only here to read old array data structures - if (!(indexArrayFile.exists())) return; - this.readOnlyMode = false; - kelondroFixedWidthArray dumpArray; - kelondroBufferedRA readBuffer = null; - kelondroRow bufferStructureBasis = new kelondroRow( - "byte[] wordhash-" + yacySeedDB.commonHashLength + ", " + - "Cardinal occ-4 {b256}, " + - "Cardinal time-8 {b256}, " + - "byte[] urlprops-" + payloadrow.objectsize, - kelondroBase64Order.enhancedCoder, 0); - dumpArray = new kelondroFixedWidthArray(indexArrayFile, bufferStructureBasis, 0); - log.logInfo("started restore of ram cache '" + indexArrayFile.getName() + "', " + dumpArray.size() + " word/URL relations"); - long startTime = System.currentTimeMillis(); - long messageTime = System.currentTimeMillis() + 5000; - long urlCount = 0, urlsPerSecond = 0; - this.cache = Collections.synchronizedSortedMap(new TreeMap(new kelondroByteOrder.StringOrder(payloadrow.getOrdering()))); - try { - Iterator i = dumpArray.contentRows(-1); - String wordHash; - //long creationTime; - indexRWIRowEntry wordEntry; - kelondroRow.EntryIndex row; - while (i.hasNext()) { - // get out one entry - row = i.next(); - if ((row == null) || (row.empty(0)) || (row.empty(3))) continue; - wordHash = row.getColString(0, "UTF-8"); - //creationTime = kelondroRecords.bytes2long(row[2]); - wordEntry = new indexRWIRowEntry(row.getColBytes(3)); - - // store to cache - indexContainer container = cache.get(wordHash); - if (container == null) container = new indexContainer(wordHash, payloadrow, 1); - container.put(wordEntry); - cache.put(wordHash, container); - - urlCount++; - // protect against memory shortage - //while (serverMemory.free() < 1000000) {flushFromMem(); java.lang.System.gc();} - // write a log - if (System.currentTimeMillis() > messageTime) { - serverMemory.gc(1000, "indexRAMRI, for better statistic-2"); // for better statistic - thq - urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime); - log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpArray.size() - urlCount) / urlsPerSecond) + " seconds remaining, free mem = " + (serverMemory.free() / 1024 / 1024) + "MB"); - messageTime = System.currentTimeMillis() + 5000; - } - } - if (readBuffer != null) readBuffer.close(); - dumpArray.close(); - dumpArray = null; - log.logInfo("finished restore: " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds"); - } catch (kelondroException e) { - // restore failed - log.logSevere("failed restore of indexCache array dump: " + e.getMessage(), e); - } finally { - if (dumpArray != null) try {dumpArray.close();}catch(Exception e){} - } - } - } diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index c6dd02800..ca1dc2644 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -61,24 +61,11 @@ public final class indexRAMRI implements indexRI, indexRIReader { this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit; this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit; this.log = log; - File indexArrayFile = new File(databaseRoot, oldArrayName); this.indexHeapFile = new File(databaseRoot, newHeapName); this.heap = new indexContainerHeap(payloadrow, log); // read in dump of last session - if (indexArrayFile.exists()) { - try { - heap.restoreArray(indexArrayFile); - for (indexContainer ic : (Iterable) heap.wordContainers(null, false)) { - this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote())); - this.hashScore.setScore(ic.getWordHash(), ic.size()); - } - } catch (IOException e){ - log.logSevere("unable to restore cache dump: " + e.getMessage(), e); - } - indexArrayFile.delete(); - if (indexArrayFile.exists()) log.logSevere("cannot delete old array file: " + indexArrayFile.toString() + "; please delete manually"); - } else if (indexHeapFile.exists()) { + if (indexHeapFile.exists()) { try { heap.initWriteMode(indexHeapFile); for (indexContainer ic : (Iterable) heap.wordContainers(null, false)) { diff --git a/source/de/anomic/kelondro/kelondroHeap.java b/source/de/anomic/kelondro/kelondroHeap.java new file mode 100755 index 000000000..f4aecf26d --- /dev/null +++ b/source/de/anomic/kelondro/kelondroHeap.java @@ -0,0 +1,194 @@ +// kelondroHeap.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 30.04.2008 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $ +// $LastChangedRevision: 4558 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; + +import de.anomic.kelondro.kelondroByteOrder; +import de.anomic.kelondro.kelondroBytesLongMap; + +public final class kelondroHeap { + + private kelondroBytesLongMap index; + private File heapFile; + private kelondroByteOrder ordering; + + /** + * create a heap file: a arbitrary number of BLOBs, indexed by an access key + * The heap file will be opened at initialization time, indexed and closed again. + * Heap files are only opened when BLOBs are read from it or new one are appended + * @param heapFile + * @param keylength + * @param ordering + * @throws IOException + */ + public kelondroHeap(File heapFile, int keylength, kelondroByteOrder ordering) throws IOException { + this.index = null; + this.ordering = ordering; + this.heapFile = heapFile; + if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist"); + if (heapFile.length() >= (long) Integer.MAX_VALUE) throw new IOException("file " + heapFile + " too large, index can only be crated for files less than 2GB"); + + this.index = new kelondroBytesLongMap(keylength, this.ordering, 0); + DataInputStream is = null; + String keystring; + byte[] key = new byte[keylength]; + int reclen; + long seek = 0, seek0; + is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024)); + + // don't test available() here because this does not work for files > 2GB + loop: while (true) { + // remember seek position + seek0 = seek; + + // read length of the following record without the length of the record size bytes + try { + reclen = is.readInt(); + } catch (IOException e) { + break loop; // terminate loop + } + seek += 4L; + + // read key + try { + is.readFully(key); + } catch (IOException e) { + break loop; // terminate loop + } + keystring = new String(key); + seek += (long) keystring.length(); + + // skip content + seek += (long) reclen; + while (reclen > 0) reclen -= is.skip(reclen); + + // store access address to entry + try { + index.addl(key, seek0); + } catch (IOException e) { + e.printStackTrace(); + break loop; + } + } + is.close(); + } + + /** + * the number of BLOBs in the heap + * @return the number of BLOBs in the heap + */ + public int size() { + return this.index.size(); + } + + /** + * test if a key is in the heap file + * @param key + * @return true if the key exists, false othervise + */ + public boolean has(String key) { + assert index != null; + assert index.row().primaryKeyLength == key.length(); + + // check if the index contains the key + try { + return index.getl(key.getBytes()) >= 0; + } catch (IOException e) { + e.printStackTrace(); + return false; + } + } + + /** + * add a BLOB to the heap + * @param key + * @param blob + * @throws IOException + */ + public synchronized void add(String key, byte[] blob) throws IOException { + add(key, blob, 0, blob.length); + } + + /** + * add a BLOB to the heap + * @param key + * @param blob + * @throws IOException + */ + public synchronized void add(String key, byte[] blob, int offset, int len) throws IOException { + assert index.row().primaryKeyLength == key.length(); + if ((blob == null) || (blob.length == 0)) return; + DataOutputStream os = null; + try { + os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile))); + } catch (FileNotFoundException e) { + throw new IOException(e.getMessage()); + } + int pos = os.size(); + os.writeInt(len); + os.write(key.getBytes()); + os.write(blob, offset, len); + os.close(); + index.putl(key.getBytes(), pos); + } + + /** + * read a blob from the heap + * @param key + * @return + * @throws IOException + */ + public byte[] get(String key) throws IOException { + assert index.row().primaryKeyLength == key.length(); + + // check if the index contains the key + long pos = index.getl(key.getBytes()); + if (pos < 0) return null; + + // access the file and read the container + RandomAccessFile raf = new RandomAccessFile(heapFile, "r"); + int len = raf.readInt(); + byte[] record = new byte[len]; + + raf.seek(pos + 4 + index.row().primaryKeyLength); + raf.readFully(record); + + raf.close(); + return record; + } + +} diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 47c01f0b9..b248fdf1a 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -61,14 +61,12 @@ import java.io.IOException; import java.io.InputStream; import java.lang.StringBuffer; import java.net.InetAddress; -import java.util.Collections; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.Map; -import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -100,16 +98,14 @@ public final class plasmaHTCache { public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day static kelondroMapObjects responseHeaderDB = null; - private static final LinkedList cacheStack = new LinkedList(); - private static final Map cacheAge = Collections.synchronizedMap(new TreeMap()); // a - relation + private static final ConcurrentLinkedQueue cacheStack = new ConcurrentLinkedQueue(); + private static final ConcurrentHashMap cacheAge = new ConcurrentHashMap(); // a - relation public static long curCacheSize = 0; public static long maxCacheSize; public static File cachePath; public static final serverLog log = new serverLog("HTCACHE"); - public static final HashSet filesInUse = new HashSet(); // can we delete this file - public static String cacheLayout; - public static boolean cacheMigration; - + private static long lastcleanup = System.currentTimeMillis(); + private static ResourceInfoFactory objFactory = new ResourceInfoFactory(); private static serverThread cacheScanThread; @@ -126,29 +122,6 @@ public final class plasmaHTCache { public static final char DT_BINARY = 'b'; public static final char DT_UNKNOWN = 'u'; - // appearance locations: (used for flags) - public static final int AP_TITLE = 0; // title tag from html header - public static final int AP_H1 = 1; // headline - top level - public static final int AP_H2 = 2; // headline, second level - public static final int AP_H3 = 3; // headline, 3rd level - public static final int AP_H4 = 4; // headline, 4th level - public static final int AP_H5 = 5; // headline, 5th level - public static final int AP_H6 = 6; // headline, 6th level - public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam) - public static final int AP_DOM = 8; // word inside an url: in Domain - public static final int AP_PATH = 9; // word inside an url: in path - public static final int AP_IMG = 10; // tag inside image references - public static final int AP_ANCHOR = 11; // anchor description - public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) - public static final int AP_BOLD = 13; // may be interpreted as emphasized - public static final int AP_ITALICS = 14; // may be interpreted as emphasized - public static final int AP_WEAK = 15; // for Text that is small or bareley visible - public static final int AP_INVISIBLE = 16; // good for spam detection - public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) - public static final int AP_AUTHOR = 18; // word appears in author name - public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) - public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) - // URL attributes public static final int UA_LOCAL = 0; // URL was crawled locally public static final int UA_TILDE = 1; // tilde appears in URL @@ -229,11 +202,9 @@ public final class plasmaHTCache { return doctype; } - public static void init(File htCachePath, long CacheSizeMax, String layout, boolean migration) { + public static void init(File htCachePath, long CacheSizeMax) { cachePath = htCachePath; - cacheLayout = layout; - cacheMigration = migration; maxCacheSize = CacheSizeMax; @@ -328,9 +299,7 @@ public final class plasmaHTCache { } public static int size() { - synchronized (cacheStack) { - return cacheStack.size(); - } + return cacheStack.size(); } public static int dbSize() { @@ -338,17 +307,11 @@ public final class plasmaHTCache { } public static void push(Entry entry) { - synchronized (cacheStack) { - cacheStack.add(entry); - } + cacheStack.add(entry); } public static Entry pop() { - synchronized (cacheStack) { - if (cacheStack.size() > 0) - return cacheStack.removeFirst(); - return null; - } + return cacheStack.poll(); } /** @@ -388,18 +351,15 @@ public final class plasmaHTCache { return true; } - private static long lastcleanup = System.currentTimeMillis(); public static void writeFileAnnouncement(File file) { - synchronized (cacheAge) { - if (file.exists()) { - curCacheSize += file.length(); - if (System.currentTimeMillis() - lastcleanup > 300000) { - // call the cleanup job only every 5 minutes - cleanup(); - lastcleanup = System.currentTimeMillis(); - } - cacheAge.put(ageString(file.lastModified(), file), file); + if (file.exists()) { + curCacheSize += file.length(); + if (System.currentTimeMillis() - lastcleanup > 300000) { + // call the cleanup job only every 5 minutes + cleanup(); + lastcleanup = System.currentTimeMillis(); } + cacheAge.put(ageString(file.lastModified(), file), file); } } @@ -419,7 +379,7 @@ public final class plasmaHTCache { } private static boolean deleteFile(File obj) { - if (obj.exists() && !filesInUse.contains(obj)) { + if (obj.exists()) { long size = obj.length(); if (obj.delete()) { curCacheSize -= size; @@ -446,41 +406,38 @@ public final class plasmaHTCache { private static void cleanupDoIt(long newCacheSize) { File file; - synchronized (cacheAge) { - Iterator> iter = cacheAge.entrySet().iterator(); - Map.Entry entry; - while (iter.hasNext() && curCacheSize >= newCacheSize) { - if (Thread.currentThread().isInterrupted()) return; - entry = iter.next(); - String key = entry.getKey(); - file = entry.getValue(); - long t = Long.parseLong(key.substring(0, 16), 16); - if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted - if (file != null) { - if (filesInUse.contains(file)) continue; - if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString()); - // This needs to be called *before* the file is deleted - String urlHash = getHash(file); - if (deleteFileandDirs(file, "OLD")) { - try { - // As the file is gone, the entry in responseHeader.db is not needed anymore - if (urlHash != null) { - if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); - responseHeaderDB.remove(urlHash); - } else { - yacyURL url = getURL(file); - if (url != null) { - if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); - responseHeaderDB.remove(url.hash()); - } + Iterator> iter = cacheAge.entrySet().iterator(); + Map.Entry entry; + while (iter.hasNext() && curCacheSize >= newCacheSize) { + if (Thread.currentThread().isInterrupted()) return; + entry = iter.next(); + String key = entry.getKey(); + file = entry.getValue(); + long t = Long.parseLong(key.substring(0, 16), 16); + if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted + if (file != null) { + if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString()); + // This needs to be called *before* the file is deleted + String urlHash = getHash(file); + if (deleteFileandDirs(file, "OLD")) { + try { + // As the file is gone, the entry in responseHeader.db is not needed anymore + if (urlHash != null) { + if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); + responseHeaderDB.remove(urlHash); + } else { + yacyURL url = getURL(file); + if (url != null) { + if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); + responseHeaderDB.remove(url.hash()); } - } catch (IOException e) { - log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); } + } catch (IOException e) { + log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); } } - iter.remove(); } + iter.remove(); } } @@ -664,29 +621,6 @@ public final class plasmaHTCache { return plasmaParser.mediaExtContains(urlString); } - /** - * This function moves an old cached object (if it exists) to the new position - */ - private static void moveCachedObject(File oldpath, File newpath) { - try { - if (oldpath.exists() && oldpath.isFile() && (!newpath.exists())) { - long d = oldpath.lastModified(); - newpath.getParentFile().mkdirs(); - if (oldpath.renameTo(newpath)) { - cacheAge.put(ageString(d, newpath), newpath); - File obj = oldpath.getParentFile(); - while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) { - if (obj.delete()) if (log.isFine()) log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString()); - obj = obj.getParentFile(); - } - } - } - } catch (Exception e) { - if (log.isFine()) log.logFine("moveCachedObject('" + oldpath.toString() + "','" + - newpath.toString() + "')", e); - } - } - private static String replaceRegex(String input, String regex, String replacement) { if (input == null) { return ""; } if (input.length() > 0) { @@ -767,34 +701,9 @@ public final class plasmaHTCache { fileName.append('!').append(port); } - // generate cache path according to storage method - if (cacheLayout.equals("tree")) { - File FileTree = treeFile(fileName, "tree", path); - if (cacheMigration) { - moveCachedObject(hashFile(fileName, "hash", extention, url.hash()), FileTree); - moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileTree); // temporary migration - moveCachedObject(treeFile(fileName, null, path), FileTree); // temporary migration - } - return FileTree; - } - if (cacheLayout.equals("hash")) { - File FileFlat = hashFile(fileName, "hash", extention, url.hash()); - if (cacheMigration) { - moveCachedObject(treeFile(fileName, "tree", path), FileFlat); - moveCachedObject(treeFile(fileName, null, path), FileFlat); // temporary migration - moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileFlat); // temporary migration - } - return FileFlat; - } - return null; - } - - private static File treeFile(StringBuffer fileName, String prefix, String path) { - StringBuffer f = new StringBuffer(fileName.length() + 30); - f.append(fileName); - if (prefix != null) f.append('/').append(prefix); - f.append(path); - return new File(cachePath, f.toString()); + // generate cache path + File FileFlat = hashFile(fileName, "hash", extention, url.hash()); + return FileFlat; } private static File hashFile(StringBuffer fileName, String prefix, String extention, String urlhash) { @@ -807,7 +716,6 @@ public final class plasmaHTCache { return new File(cachePath, f.toString()); } - /** * This is a helper function that extracts the Hash from the filename */ diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 3fb42e891..73d7c8503 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -593,46 +593,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitchpublic static final String PROXY_CACHE_LAYOUT_TREE = "tree"

- *

Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored in a folder-layout - * according to the layout, the URL purported. The first folder is either http or https - * depending on the protocol used to fetch the website, descending follows the hostname and the sub-folders on the - * website up to the actual file itself.

- *

When using tree, be aware that - * the possibility of inconsistencies between folders and files with the same name may occur which prevent proper - * storage of the fetched site. Below is an example how files are stored:

- *
-     * /html/
-     * /html/www.example.com/
-     * /html/www.example.com/index/
-     * /html/www.example.com/index/en/
-     * /html/www.example.com/index/en/index.html
- */ - public static final String PROXY_CACHE_LAYOUT_TREE = "tree"; - /** - *

public static final String PROXY_CACHE_LAYOUT_HASH = "hash"

- *

Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored using the MD5-sum of - * their respective URLs. This method prevents collisions on some websites caused by using the {@link #PROXY_CACHE_LAYOUT_TREE} - * layout.

- *

Similarly to {@link #PROXY_CACHE_LAYOUT_TREE}, the top-folders name is given by the protocol used to fetch the site, - * followed by either www or – if the hostname does not start with "www" – other. - * Afterwards the next folder has the rest of the hostname as name, followed by a folder hash which contains - * a folder consisting of the first two letters of the hash. Another folder named after the 3rd and 4th letters of the - * hash follows which finally contains the file named after the full 18-characters long hash. - * Below is an example how files are stored:

- *
-     * /html/
-     * /html/www/
-     * /html/www/example.com/
-     * /html/www/example.com/hash/
-     * /html/www/example.com/hash/0d/
-     * /html/www/example.com/hash/0d/f8/
-     * /html/www/example.com/hash/0d/f8/0df83a8444f48317d8
- */ - public static final String PROXY_CACHE_LAYOUT_HASH = "hash"; - public static final String PROXY_CACHE_MIGRATION = "proxyCacheMigration"; ////////////////////////////////////////////////////////////////////////////////////////////// // Cluster settings @@ -1087,9 +1047,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch i1 = sbQueue.entryIterator(true); - while (i1.hasNext()) { - queueEntry = i1.next(); - if ((queueEntry != null) && (queueEntry.url() != null) && (queueEntry.cacheFile().exists())) { - plasmaHTCache.filesInUse.add(queueEntry.cacheFile()); - count++; - } - } - this.log.logConfig(count + " files in htcache reported to the cachemanager as in use."); - // define an extension-blacklist log.logConfig("Parser: Initializing Extension Mappings for Media/Parser"); plasmaParser.initMediaExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT,""))); @@ -1696,12 +1641,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch