*) setting htCache.Entry fields to private

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2484 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent ab5a9bee66
commit 393a7d10be

@ -630,7 +630,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String storeError = cacheEntry.shallStoreCacheForProxy();
boolean storeHTCache = cacheEntry.profile.storeHTCache();
boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url,cacheEntry.responseHeader.mime());
boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url(),cacheEntry.responseHeader.mime());
if (
/*
* Now we store the response into the htcache directory if

@ -200,7 +200,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
htCache = createCacheEntry(requestDate, requestHeader, res);
// aborting download if content is to long ...
if (htCache.cacheFile.getAbsolutePath().length() > serverSystem.maxPathLength) {
if (htCache.cacheFile().getAbsolutePath().length() > serverSystem.maxPathLength) {
remote.close();
this.log.logInfo("REJECTED URL " + this.url.toString() + " because path too long '" + this.cacheManager.cachePath.getAbsolutePath() + "'");
addURLtoErrorDB(plasmaCrawlEURL.DENIED_CACHEFILE_PATH_TOO_LONG);
@ -208,11 +208,11 @@ public final class CrawlWorker extends AbstractCrawlWorker {
}
// reserve cache entry
if (!htCache.cacheFile.getCanonicalPath().startsWith(this.cacheManager.cachePath.getCanonicalPath())) {
if (!htCache.cacheFile().getCanonicalPath().startsWith(this.cacheManager.cachePath.getCanonicalPath())) {
// if the response has not the right file type then reject file
remote.close();
this.log.logInfo("REJECTED URL " + this.url.toString() + " because of an invalid file path ('" +
htCache.cacheFile.getCanonicalPath() + "' does not start with '" +
htCache.cacheFile().getCanonicalPath() + "' does not start with '" +
this.cacheManager.cachePath.getAbsolutePath() + "').");
addURLtoErrorDB(plasmaCrawlEURL.DENIED_INVALID_CACHEFILE_PATH);
return (htCache = null);
@ -231,7 +231,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
try {
fos = new FileOutputStream(cacheFile);
res.writeContent(fos); // superfluous write to array
htCache.cacheArray = null;
htCache.setCacheArray(null);
this.cacheManager.writeFileAnnouncement(cacheFile);
//htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
} finally {

@ -701,23 +701,23 @@ public final class plasmaHTCache {
public final class Entry {
// the class objects
public Date initDate; // the date when the request happened; will be used as a key
public int depth; // the depth of prefetching
public httpHeader requestHeader; // we carry also the header to prevent too many file system access
public String responseStatus;
public httpHeader responseHeader; // we carry also the header to prevent too many file system access
public File cacheFile; // the cache file
public byte[] cacheArray; // or the cache as byte-array
public URL url;
public String name; // the name of the link, read as anchor from an <a>-tag
public String nomalizedURLHash;
public String nomalizedURLString;
public int status; // cache load/hit/stale etc status
public Date lastModified;
public char doctype;
public String language;
public plasmaCrawlProfile.entry profile;
private String initiator;
private Date initDate; // the date when the request happened; will be used as a key
private int depth; // the depth of prefetching
private httpHeader requestHeader; // we carry also the header to prevent too many file system access
private String responseStatus;
private httpHeader responseHeader; // we carry also the header to prevent too many file system access
private File cacheFile; // the cache file
private byte[] cacheArray; // or the cache as byte-array
private URL url;
private String name; // the name of the link, read as anchor from an <a>-tag
private String nomalizedURLHash;
private String nomalizedURLString;
private int status; // cache load/hit/stale etc status
private Date lastModified;
private char doctype;
private String language;
private plasmaCrawlProfile.entry profile;
private String initiator;
protected Object clone() throws CloneNotSupportedException {
return new Entry(
@ -793,6 +793,19 @@ public final class plasmaHTCache {
public String name() {
return this.name;
}
public URL url() {
return this.url;
}
public String urlHash() {
return this.nomalizedURLHash;
}
public plasmaCrawlProfile.entry profile() {
return this.profile;
}
public String initiator() {
return this.initiator;
}
@ -804,6 +817,10 @@ public final class plasmaHTCache {
return this.cacheArray.length;
}
public int depth() {
return this.depth;
}
public URL referrerURL() {
if (this.requestHeader == null) return null;
try {
@ -813,6 +830,26 @@ public final class plasmaHTCache {
}
}
public File cacheFile() {
return this.cacheFile;
}
public void setCacheArray(byte[] data) {
this.cacheArray = data;
}
public byte[] cacheArray() {
return this.cacheArray;
}
public httpHeader requestHeader() {
return this.requestHeader;
}
public httpHeader responseHeader() {
return this.responseHeader;
}
/*
public boolean update() {
return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));

@ -173,7 +173,7 @@ public class plasmaSnippetCache {
if ((fetchOnline) && (resource == null)) {
plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
if (entry != null) {
header = entry.responseHeader;
header = entry.responseHeader();
}
resource = cacheManager.loadResource(url);
source = SOURCE_WEB;

@ -814,7 +814,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* Testing if the content type is supported by the available parsers
* ========================================================================= */
boolean isSupportedContent = (entry.responseHeader != null) &&
plasmaParser.supportedContent(entry.url,entry.responseHeader.mime());
plasmaParser.supportedContent(entry.url(),entry.responseHeader.mime());
/* =========================================================================
* INDEX CONTROL HEADER
@ -823,10 +823,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* yacy to index the response returned as answer to a request
* ========================================================================= */
boolean doIndexing = true;
if (entry.requestHeader != null) {
if (entry.requestHeader() != null) {
if (
(entry.requestHeader.containsKey(httpHeader.X_YACY_INDEX_CONTROL)) &&
(((String) entry.requestHeader.get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX"))
(entry.requestHeader().containsKey(httpHeader.X_YACY_INDEX_CONTROL)) &&
(((String) entry.requestHeader().get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX"))
) {
doIndexing = false;
}
@ -837,17 +837,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
*
* check if ip is local ip address
* ========================================================================= */
InetAddress hostAddress = httpc.dnsResolve(entry.url.getHost());
InetAddress hostAddress = httpc.dnsResolve(entry.url().getHost());
if (hostAddress == null) {
if (this.remoteProxyConfig == null || !this.remoteProxyConfig.useProxy()) {
this.log.logFine("Unknown host in URL '" + entry.url + "'. Will not be indexed.");
this.log.logFine("Unknown host in URL '" + entry.url() + "'. Will not be indexed.");
doIndexing = false;
}
} else if (hostAddress.isSiteLocalAddress()) {
this.log.logFine("Host in URL '" + entry.url + "' has private ip address. Will not be indexed.");
this.log.logFine("Host in URL '" + entry.url() + "' has private ip address. Will not be indexed.");
doIndexing = false;
} else if (hostAddress.isLoopbackAddress()) {
this.log.logFine("Host in URL '" + entry.url + "' has loopback ip address. Will not be indexed.");
this.log.logFine("Host in URL '" + entry.url() + "' has loopback ip address. Will not be indexed.");
doIndexing = false;
}
@ -859,25 +859,25 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
* b) the content should be indexed
* ========================================================================= */
if (
(entry.profile.storeHTCache()) ||
(entry.profile().storeHTCache()) ||
(doIndexing && isSupportedContent)
) {
// store response header
if (entry.responseHeader != null) {
this.cacheManager.storeHeader(entry.nomalizedURLHash, entry.responseHeader);
this.log.logInfo("WROTE HEADER for " + entry.cacheFile);
this.cacheManager.storeHeader(entry.urlHash(), entry.responseHeader);
this.log.logInfo("WROTE HEADER for " + entry.cacheFile());
}
// work off unwritten files
if (entry.cacheArray == null) {
if (entry.cacheArray() == null) {
//this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile);
} else {
String error = entry.shallStoreCacheForProxy();
if (error == null) {
this.cacheManager.writeFile(entry.url, entry.cacheArray);
this.log.logFine("WROTE FILE (" + entry.cacheArray.length + " bytes) for " + entry.cacheFile);
this.cacheManager.writeFile(entry.url(), entry.cacheArray());
this.log.logFine("WROTE FILE (" + entry.cacheArray().length + " bytes) for " + entry.cacheFile());
} else {
this.log.logFine("WRITE OF FILE " + entry.cacheFile + " FORBIDDEN: " + error);
this.log.logFine("WRITE OF FILE " + entry.cacheFile() + " FORBIDDEN: " + error);
}
}
}
@ -888,24 +888,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (doIndexing && isSupportedContent){
// registering the cachefile as in use
if (entry.cacheFile.exists()) {
plasmaHTCache.filesInUse.add(entry.cacheFile);
if (entry.cacheFile().exists()) {
plasmaHTCache.filesInUse.add(entry.cacheFile());
}
// enqueue for further crawling
enQueue(this.sbQueue.newEntry(
entry.url,
entry.url(),
indexURL.urlHash(entry.referrerURL()),
entry.requestHeader.ifModifiedSince(),
entry.requestHeader.containsKey(httpHeader.COOKIE),
entry.requestHeader().ifModifiedSince(),
entry.requestHeader().containsKey(httpHeader.COOKIE),
entry.initiator(),
entry.depth,
entry.profile.handle(),
entry.depth(),
entry.profile().handle(),
entry.name()
));
} else {
if (!entry.profile.storeHTCache() && entry.cacheFile.exists()) {
this.cacheManager.deleteFile(entry.url);
if (!entry.profile().storeHTCache() && entry.cacheFile().exists()) {
this.cacheManager.deleteFile(entry.url());
}
}

Loading…
Cancel
Save