*) setting htCache.Entry fields to private

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2484 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent ab5a9bee66
commit 393a7d10be

@ -630,7 +630,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String storeError = cacheEntry.shallStoreCacheForProxy(); String storeError = cacheEntry.shallStoreCacheForProxy();
boolean storeHTCache = cacheEntry.profile.storeHTCache(); boolean storeHTCache = cacheEntry.profile.storeHTCache();
boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url,cacheEntry.responseHeader.mime()); boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url(),cacheEntry.responseHeader.mime());
if ( if (
/* /*
* Now we store the response into the htcache directory if * Now we store the response into the htcache directory if

@ -200,7 +200,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
htCache = createCacheEntry(requestDate, requestHeader, res); htCache = createCacheEntry(requestDate, requestHeader, res);
// aborting download if content is to long ... // aborting download if content is to long ...
if (htCache.cacheFile.getAbsolutePath().length() > serverSystem.maxPathLength) { if (htCache.cacheFile().getAbsolutePath().length() > serverSystem.maxPathLength) {
remote.close(); remote.close();
this.log.logInfo("REJECTED URL " + this.url.toString() + " because path too long '" + this.cacheManager.cachePath.getAbsolutePath() + "'"); this.log.logInfo("REJECTED URL " + this.url.toString() + " because path too long '" + this.cacheManager.cachePath.getAbsolutePath() + "'");
addURLtoErrorDB(plasmaCrawlEURL.DENIED_CACHEFILE_PATH_TOO_LONG); addURLtoErrorDB(plasmaCrawlEURL.DENIED_CACHEFILE_PATH_TOO_LONG);
@ -208,11 +208,11 @@ public final class CrawlWorker extends AbstractCrawlWorker {
} }
// reserve cache entry // reserve cache entry
if (!htCache.cacheFile.getCanonicalPath().startsWith(this.cacheManager.cachePath.getCanonicalPath())) { if (!htCache.cacheFile().getCanonicalPath().startsWith(this.cacheManager.cachePath.getCanonicalPath())) {
// if the response has not the right file type then reject file // if the response has not the right file type then reject file
remote.close(); remote.close();
this.log.logInfo("REJECTED URL " + this.url.toString() + " because of an invalid file path ('" + this.log.logInfo("REJECTED URL " + this.url.toString() + " because of an invalid file path ('" +
htCache.cacheFile.getCanonicalPath() + "' does not start with '" + htCache.cacheFile().getCanonicalPath() + "' does not start with '" +
this.cacheManager.cachePath.getAbsolutePath() + "')."); this.cacheManager.cachePath.getAbsolutePath() + "').");
addURLtoErrorDB(plasmaCrawlEURL.DENIED_INVALID_CACHEFILE_PATH); addURLtoErrorDB(plasmaCrawlEURL.DENIED_INVALID_CACHEFILE_PATH);
return (htCache = null); return (htCache = null);
@ -231,7 +231,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
try { try {
fos = new FileOutputStream(cacheFile); fos = new FileOutputStream(cacheFile);
res.writeContent(fos); // superfluous write to array res.writeContent(fos); // superfluous write to array
htCache.cacheArray = null; htCache.setCacheArray(null);
this.cacheManager.writeFileAnnouncement(cacheFile); this.cacheManager.writeFileAnnouncement(cacheFile);
//htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file //htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
} finally { } finally {

@ -701,23 +701,23 @@ public final class plasmaHTCache {
public final class Entry { public final class Entry {
// the class objects // the class objects
public Date initDate; // the date when the request happened; will be used as a key private Date initDate; // the date when the request happened; will be used as a key
public int depth; // the depth of prefetching private int depth; // the depth of prefetching
public httpHeader requestHeader; // we carry also the header to prevent too many file system access private httpHeader requestHeader; // we carry also the header to prevent too many file system access
public String responseStatus; private String responseStatus;
public httpHeader responseHeader; // we carry also the header to prevent too many file system access private httpHeader responseHeader; // we carry also the header to prevent too many file system access
public File cacheFile; // the cache file private File cacheFile; // the cache file
public byte[] cacheArray; // or the cache as byte-array private byte[] cacheArray; // or the cache as byte-array
public URL url; private URL url;
public String name; // the name of the link, read as anchor from an <a>-tag private String name; // the name of the link, read as anchor from an <a>-tag
public String nomalizedURLHash; private String nomalizedURLHash;
public String nomalizedURLString; private String nomalizedURLString;
public int status; // cache load/hit/stale etc status private int status; // cache load/hit/stale etc status
public Date lastModified; private Date lastModified;
public char doctype; private char doctype;
public String language; private String language;
public plasmaCrawlProfile.entry profile; private plasmaCrawlProfile.entry profile;
private String initiator; private String initiator;
protected Object clone() throws CloneNotSupportedException { protected Object clone() throws CloneNotSupportedException {
return new Entry( return new Entry(
@ -793,6 +793,19 @@ public final class plasmaHTCache {
public String name() { public String name() {
return this.name; return this.name;
} }
public URL url() {
return this.url;
}
public String urlHash() {
return this.nomalizedURLHash;
}
public plasmaCrawlProfile.entry profile() {
return this.profile;
}
public String initiator() { public String initiator() {
return this.initiator; return this.initiator;
} }
@ -804,6 +817,10 @@ public final class plasmaHTCache {
return this.cacheArray.length; return this.cacheArray.length;
} }
public int depth() {
return this.depth;
}
public URL referrerURL() { public URL referrerURL() {
if (this.requestHeader == null) return null; if (this.requestHeader == null) return null;
try { try {
@ -813,6 +830,26 @@ public final class plasmaHTCache {
} }
} }
public File cacheFile() {
return this.cacheFile;
}
public void setCacheArray(byte[] data) {
this.cacheArray = data;
}
public byte[] cacheArray() {
return this.cacheArray;
}
public httpHeader requestHeader() {
return this.requestHeader;
}
public httpHeader responseHeader() {
return this.responseHeader;
}
/* /*
public boolean update() { public boolean update() {
return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD)); return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));

@ -173,7 +173,7 @@ public class plasmaSnippetCache {
if ((fetchOnline) && (resource == null)) { if ((fetchOnline) && (resource == null)) {
plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000); plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
if (entry != null) { if (entry != null) {
header = entry.responseHeader; header = entry.responseHeader();
} }
resource = cacheManager.loadResource(url); resource = cacheManager.loadResource(url);
source = SOURCE_WEB; source = SOURCE_WEB;

@ -814,7 +814,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* Testing if the content type is supported by the available parsers * Testing if the content type is supported by the available parsers
* ========================================================================= */ * ========================================================================= */
boolean isSupportedContent = (entry.responseHeader != null) && boolean isSupportedContent = (entry.responseHeader != null) &&
plasmaParser.supportedContent(entry.url,entry.responseHeader.mime()); plasmaParser.supportedContent(entry.url(),entry.responseHeader.mime());
/* ========================================================================= /* =========================================================================
* INDEX CONTROL HEADER * INDEX CONTROL HEADER
@ -823,10 +823,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* yacy to index the response returned as answer to a request * yacy to index the response returned as answer to a request
* ========================================================================= */ * ========================================================================= */
boolean doIndexing = true; boolean doIndexing = true;
if (entry.requestHeader != null) { if (entry.requestHeader() != null) {
if ( if (
(entry.requestHeader.containsKey(httpHeader.X_YACY_INDEX_CONTROL)) && (entry.requestHeader().containsKey(httpHeader.X_YACY_INDEX_CONTROL)) &&
(((String) entry.requestHeader.get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX")) (((String) entry.requestHeader().get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX"))
) { ) {
doIndexing = false; doIndexing = false;
} }
@ -837,17 +837,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* *
* check if ip is local ip address * check if ip is local ip address
* ========================================================================= */ * ========================================================================= */
InetAddress hostAddress = httpc.dnsResolve(entry.url.getHost()); InetAddress hostAddress = httpc.dnsResolve(entry.url().getHost());
if (hostAddress == null) { if (hostAddress == null) {
if (this.remoteProxyConfig == null || !this.remoteProxyConfig.useProxy()) { if (this.remoteProxyConfig == null || !this.remoteProxyConfig.useProxy()) {
this.log.logFine("Unknown host in URL '" + entry.url + "'. Will not be indexed."); this.log.logFine("Unknown host in URL '" + entry.url() + "'. Will not be indexed.");
doIndexing = false; doIndexing = false;
} }
} else if (hostAddress.isSiteLocalAddress()) { } else if (hostAddress.isSiteLocalAddress()) {
this.log.logFine("Host in URL '" + entry.url + "' has private ip address. Will not be indexed."); this.log.logFine("Host in URL '" + entry.url() + "' has private ip address. Will not be indexed.");
doIndexing = false; doIndexing = false;
} else if (hostAddress.isLoopbackAddress()) { } else if (hostAddress.isLoopbackAddress()) {
this.log.logFine("Host in URL '" + entry.url + "' has loopback ip address. Will not be indexed."); this.log.logFine("Host in URL '" + entry.url() + "' has loopback ip address. Will not be indexed.");
doIndexing = false; doIndexing = false;
} }
@ -859,25 +859,25 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* b) the content should be indexed * b) the content should be indexed
* ========================================================================= */ * ========================================================================= */
if ( if (
(entry.profile.storeHTCache()) || (entry.profile().storeHTCache()) ||
(doIndexing && isSupportedContent) (doIndexing && isSupportedContent)
) { ) {
// store response header // store response header
if (entry.responseHeader != null) { if (entry.responseHeader != null) {
this.cacheManager.storeHeader(entry.nomalizedURLHash, entry.responseHeader); this.cacheManager.storeHeader(entry.urlHash(), entry.responseHeader);
this.log.logInfo("WROTE HEADER for " + entry.cacheFile); this.log.logInfo("WROTE HEADER for " + entry.cacheFile());
} }
// work off unwritten files // work off unwritten files
if (entry.cacheArray == null) { if (entry.cacheArray() == null) {
//this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile); //this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile);
} else { } else {
String error = entry.shallStoreCacheForProxy(); String error = entry.shallStoreCacheForProxy();
if (error == null) { if (error == null) {
this.cacheManager.writeFile(entry.url, entry.cacheArray); this.cacheManager.writeFile(entry.url(), entry.cacheArray());
this.log.logFine("WROTE FILE (" + entry.cacheArray.length + " bytes) for " + entry.cacheFile); this.log.logFine("WROTE FILE (" + entry.cacheArray().length + " bytes) for " + entry.cacheFile());
} else { } else {
this.log.logFine("WRITE OF FILE " + entry.cacheFile + " FORBIDDEN: " + error); this.log.logFine("WRITE OF FILE " + entry.cacheFile() + " FORBIDDEN: " + error);
} }
} }
} }
@ -888,24 +888,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (doIndexing && isSupportedContent){ if (doIndexing && isSupportedContent){
// registering the cachefile as in use // registering the cachefile as in use
if (entry.cacheFile.exists()) { if (entry.cacheFile().exists()) {
plasmaHTCache.filesInUse.add(entry.cacheFile); plasmaHTCache.filesInUse.add(entry.cacheFile());
} }
// enqueue for further crawling // enqueue for further crawling
enQueue(this.sbQueue.newEntry( enQueue(this.sbQueue.newEntry(
entry.url, entry.url(),
indexURL.urlHash(entry.referrerURL()), indexURL.urlHash(entry.referrerURL()),
entry.requestHeader.ifModifiedSince(), entry.requestHeader().ifModifiedSince(),
entry.requestHeader.containsKey(httpHeader.COOKIE), entry.requestHeader().containsKey(httpHeader.COOKIE),
entry.initiator(), entry.initiator(),
entry.depth, entry.depth(),
entry.profile.handle(), entry.profile().handle(),
entry.name() entry.name()
)); ));
} else { } else {
if (!entry.profile.storeHTCache() && entry.cacheFile.exists()) { if (!entry.profile().storeHTCache() && entry.cacheFile().exists()) {
this.cacheManager.deleteFile(entry.url); this.cacheManager.deleteFile(entry.url());
} }
} }

Loading…
Cancel
Save