removed htcache stati (superfluous now)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@396 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 277048501e
commit 2f0d7ea8d3

@ -499,6 +499,13 @@ public final class httpc {
return ((status.charAt(0) == '2') || (status.charAt(0) == '3')); return ((status.charAt(0) == '2') || (status.charAt(0) == '3'));
} }
public byte[] writeContent() throws IOException {
int contentLength = (int) this.responseHeader.contentLength();
serverByteBuffer sbb = new serverByteBuffer((contentLength==-1)?8192:contentLength);
writeContentX(null, sbb, httpc.this.clientInput);
return sbb.getBytes();
}
public byte[] writeContent(OutputStream procOS) throws IOException { public byte[] writeContent(OutputStream procOS) throws IOException {
int contentLength = (int) this.responseHeader.contentLength(); int contentLength = (int) this.responseHeader.contentLength();
serverByteBuffer sbb = new serverByteBuffer((contentLength==-1)?8192:contentLength); serverByteBuffer sbb = new serverByteBuffer((contentLength==-1)?8192:contentLength);

@ -618,19 +618,19 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
// totally fresh file // totally fresh file
cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
cacheEntry.cacheArray = cacheArray; cacheEntry.cacheArray = cacheArray;
cacheManager.push(cacheEntry); cacheManager.push(cacheEntry);
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} else if (sizeBeforeDelete == cacheArray.length) { } else if (sizeBeforeDelete == cacheArray.length) {
// before we came here we deleted a cache entry // before we came here we deleted a cache entry
cacheArray = null; cacheArray = null;
cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD;
cacheManager.push(cacheEntry); // unnecessary update //cacheManager.push(cacheEntry); // unnecessary update
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT");
} else { } else {
// before we came here we deleted a cache entry // before we came here we deleted a cache entry
cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD;
cacheEntry.cacheArray = cacheArray; cacheEntry.cacheArray = cacheArray;
cacheManager.push(cacheEntry); // necessary update, write response header to cache cacheManager.push(cacheEntry); // necessary update, write response header to cache
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
@ -644,17 +644,17 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
this.theLogger.logDebug("for write-file of " + url + ": contentLength = " + contentLength + ", sizeBeforeDelete = " + sizeBeforeDelete); this.theLogger.logDebug("for write-file of " + url + ": contentLength = " + contentLength + ", sizeBeforeDelete = " + sizeBeforeDelete);
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
// totally fresh file // totally fresh file
cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
cacheManager.push(cacheEntry); cacheManager.push(cacheEntry);
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} else if (sizeBeforeDelete == cacheFile.length()) { } else if (sizeBeforeDelete == cacheFile.length()) {
// before we came here we deleted a cache entry // before we came here we deleted a cache entry
cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD;
cacheManager.push(cacheEntry); // unnecessary update //cacheManager.push(cacheEntry); // unnecessary update
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT");
} else { } else {
// before we came here we deleted a cache entry // before we came here we deleted a cache entry
cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD;
cacheManager.push(cacheEntry); // necessary update, write response header to cache cacheManager.push(cacheEntry); // necessary update, write response header to cache
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
} }
@ -668,12 +668,12 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
// no old file and no load. just data passing // no old file and no load. just data passing
cacheEntry.status = plasmaHTCache.CACHE_PASSING; //cacheEntry.status = plasmaHTCache.CACHE_PASSING;
cacheManager.push(cacheEntry); //cacheManager.push(cacheEntry);
} else { } else {
// before we came here we deleted a cache entry // before we came here we deleted a cache entry
cacheEntry.status = plasmaHTCache.CACHE_STALE_NO_RELOAD; //cacheEntry.status = plasmaHTCache.CACHE_STALE_NO_RELOAD;
cacheManager.push(cacheEntry); //cacheManager.push(cacheEntry);
} }
conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} }

@ -324,11 +324,11 @@ public final class plasmaCrawlWorker extends Thread {
File cacheFile = cacheManager.getCachePath(url); File cacheFile = cacheManager.getCachePath(url);
try { try {
String error = null; String error = null;
if (!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) { if ((!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) &&
(!(plasmaParser.supportedFileExt(url)))) {
// if the response has not the right file type then reject file // if the response has not the right file type then reject file
remote.close(); remote.close();
log.logInfo("REJECTED WRONG MIME TYPE " + res.responseHeader.mime() + " for url " + url.toString()); log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for url " + url.toString());
htCache.status = plasmaHTCache.CACHE_UNFILLED;
} else { } else {
// we write the new cache entry to file system directly // we write the new cache entry to file system directly
cacheFile.getParentFile().mkdirs(); cacheFile.getParentFile().mkdirs();
@ -337,11 +337,11 @@ public final class plasmaCrawlWorker extends Thread {
fos = new FileOutputStream(cacheFile); fos = new FileOutputStream(cacheFile);
res.writeContent(fos); // superfluous write to array res.writeContent(fos); // superfluous write to array
htCache.cacheArray = null; htCache.cacheArray = null;
cacheManager.writeFileAnnouncement(cacheFile);
//htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file //htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
} finally { } finally {
if (fos!=null)try{fos.close();}catch(Exception e){} if (fos!=null)try{fos.close();}catch(Exception e){}
} }
htCache.status = plasmaHTCache.CACHE_FILL;
} }
// enQueue new entry with response header // enQueue new entry with response header
if (profile != null) { if (profile != null) {

@ -86,6 +86,7 @@ public final class plasmaHTCache {
public final File cachePath; public final File cachePath;
public static serverLog log; public static serverLog log;
/*
public static final int CACHE_UNFILLED = 0; // default case without assignment public static final int CACHE_UNFILLED = 0; // default case without assignment
public static final int CACHE_FILL = 1; // this means: update == true public static final int CACHE_FILL = 1; // this means: update == true
public static final int CACHE_HIT = 2; // the best case: reading from Cache public static final int CACHE_HIT = 2; // the best case: reading from Cache
@ -93,7 +94,8 @@ public final class plasmaHTCache {
public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true
public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content
public static final int CACHE_PASSING = 6; // does not touch cache, just passing public static final int CACHE_PASSING = 6; // does not touch cache, just passing
*/
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) { public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
//this.switchboard = switchboard; //this.switchboard = switchboard;
@ -156,8 +158,7 @@ public final class plasmaHTCache {
responseHeaderDB.set(urlHash, responseHeader); responseHeaderDB.set(urlHash, responseHeader);
} }
public boolean deleteFile(URL url) { private boolean deleteFile(File file) {
File file = getCachePath(url);
if (file.exists()) { if (file.exists()) {
currCacheSize -= file.length(); currCacheSize -= file.length();
return file.delete(); return file.delete();
@ -166,18 +167,17 @@ public final class plasmaHTCache {
} }
} }
public boolean deleteFile(URL url) {
return deleteFile(getCachePath(url));
}
public boolean writeFile(URL url, byte[] array) { public boolean writeFile(URL url, byte[] array) {
if (array == null) return false; if (array == null) return false;
File file = getCachePath(url);
try { try {
File file = getCachePath(url); deleteFile(file);
if (file.exists()) {
currCacheSize -= file.length();
file.delete();
}
file.getParentFile().mkdirs(); file.getParentFile().mkdirs();
serverFileUtils.write(array, file); serverFileUtils.write(array, file);
currCacheSize += file.length();
cacheAge.put(ageString(file.lastModified(), file), file);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
// this is the case of a "(Not a directory)" error, which should be prohibited // this is the case of a "(Not a directory)" error, which should be prohibited
// by the shallStoreCache() property. However, sometimes the error still occurs // by the shallStoreCache() property. However, sometimes the error still occurs
@ -188,10 +188,18 @@ public final class plasmaHTCache {
log.logError("File storage failed (IO error): " + e.getMessage()); log.logError("File storage failed (IO error): " + e.getMessage());
return false; return false;
} }
cleanup(); writeFileAnnouncement(file);
return true; return true;
} }
public void writeFileAnnouncement(File file) {
if (file.exists()) {
currCacheSize += file.length();
cacheAge.put(ageString(file.lastModified(), file), file);
cleanup();
}
}
private void cleanup() { private void cleanup() {
// clean up cache to have enough space for next entries // clean up cache to have enough space for next entries
File f; File f;
@ -509,7 +517,6 @@ public final class plasmaHTCache {
// to be defined later: // to be defined later:
this.cacheArray = null; this.cacheArray = null;
this.status = CACHE_UNFILLED;
} }
public String name() { public String name() {
@ -534,10 +541,11 @@ public final class plasmaHTCache {
} }
} }
/*
public boolean update() { public boolean update() {
return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD)); return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));
} }
*/
// the following three methods for cache read/write granting shall be as loose as possible // the following three methods for cache read/write granting shall be as loose as possible
// but also as strict as necessary to enable caching of most items // but also as strict as necessary to enable caching of most items

@ -216,7 +216,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
addEntry(wordHash, wordEntry, creationTime); addEntry(wordHash, wordEntry, creationTime);
urlCount++; urlCount++;
// protect against memory shortage // protect against memory shortage
while (rt.freeMemory() < 1000000) flushFromMem(); while (rt.freeMemory() < 1000000) {flushFromMem(); java.lang.System.gc();}
// write a log // write a log
if (System.currentTimeMillis() > messageTime) { if (System.currentTimeMillis() > messageTime) {
System.gc(); // for better statistic System.gc(); // for better statistic

Loading…
Cancel
Save