nullpointer fix for profile(); other minor change;

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2009 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 19 years ago
parent 7f167be945
commit 92110aea32

@ -44,21 +44,22 @@
package de.anomic.plasma; package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroStack;
import de.anomic.server.serverDate;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.server.serverDate;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
public class plasmaSwitchboardQueue { public class plasmaSwitchboardQueue {
private kelondroStack sbQueueStack; private kelondroStack sbQueueStack;
@ -327,16 +328,26 @@ public class plasmaSwitchboardQueue {
* if the answer is 'NO' (do not index), it returns a string with the reason * if the answer is 'NO' (do not index), it returns a string with the reason
* to reject the crawling demand in clear text * to reject the crawling demand in clear text
*/ */
public String shallIndexCacheForProxy() { public final String shallIndexCacheForProxy() {
if (profile() == null) {
return "shallIndexCacheForProxy: profile() is null !";
}
// check profile // check profile
if (!profile().localIndexing()) { return "Indexing_Not_Allowed"; } if (!profile().localIndexing()) {
return "Indexing_Not_Allowed";
}
String nURL = normalizedURLString(); String nURL = normalizedURLString();
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if (!profile().crawlingQ()) { if (!profile().crawlingQ()) {
if (plasmaHTCache.isPOST(nURL)) { return "Dynamic_(POST)"; } if (plasmaHTCache.isPOST(nURL)) {
if (plasmaHTCache.isCGI(nURL)) { return "Dynamic_(CGI)"; } return "Dynamic_(POST)";
}
if (plasmaHTCache.isCGI(nURL)) {
return "Dynamic_(CGI)";
}
} }
// -authorization cases in request // -authorization cases in request
@ -346,7 +357,9 @@ public class plasmaSwitchboardQueue {
// we checked that in shallStoreCache // we checked that in shallStoreCache
// a picture cannot be indexed // a picture cannot be indexed
if (plasmaHTCache.noIndexingURL(nURL)) { return "Media_Content_(forbidden)"; } if (plasmaHTCache.noIndexingURL(nURL)) {
return "Media_Content_(forbidden)";
}
// -cookies in request // -cookies in request
// unfortunately, we cannot index pages which have been requested with a cookie // unfortunately, we cannot index pages which have been requested with a cookie
@ -361,15 +374,21 @@ public class plasmaSwitchboardQueue {
// thus we do not care about it here for indexing // thus we do not care about it here for indexing
if (responseHeader() != null) { if (responseHeader() != null) {
// a picture cannot be indexed // a picture cannot be indexed
if (plasmaHTCache.isPicture(responseHeader())) return "Media_Content_(Picture)"; if (plasmaHTCache.isPicture(responseHeader())) {
if (!plasmaHTCache.isText(responseHeader())) return "Media_Content_(not_text)"; return "Media_Content_(Picture)";
}
if (!plasmaHTCache.isText(responseHeader())) {
return "Media_Content_(not_text)";
}
// -if-modified-since in request // -if-modified-since in request
// if the page is fresh at the very moment we can index it // if the page is fresh at the very moment we can index it
if ((ifModifiedSince != null) && (responseHeader().containsKey(httpHeader.LAST_MODIFIED))) { if ((ifModifiedSince != null) && (responseHeader().containsKey(httpHeader.LAST_MODIFIED))) {
// parse date // parse date
Date d = responseHeader().lastModified(); Date d = responseHeader().lastModified();
if (d == null) d = new Date(serverDate.correctedUTCTime()); if (d == null) {
d = new Date(serverDate.correctedUTCTime());
}
// finally, we shall treat the cache as stale if the modification time is after the if-.. time // finally, we shall treat the cache as stale if the modification time is after the if-.. time
if (d.after(ifModifiedSince)) { if (d.after(ifModifiedSince)) {
//System.out.println("***not indexed because if-modified-since"); //System.out.println("***not indexed because if-modified-since");
@ -392,9 +411,9 @@ public class plasmaSwitchboardQueue {
// the expires value gives us a very easy hint when the cache is stale // the expires value gives us a very easy hint when the cache is stale
// sometimes, the expires date is set to the past to prevent that a page is cached // sometimes, the expires date is set to the past to prevent that a page is cached
// we use that information to see if we should index it // we use that information to see if we should index it
Date expires = responseHeader().expires(); final Date expires = responseHeader().expires();
if (expires != null) { if (expires != null && expires.before(new Date(serverDate.correctedUTCTime()))) {
if (expires.before(new Date(serverDate.correctedUTCTime()))) return "Stale_(Expired)"; return "Stale_(Expired)";
} }
// -lastModified in cached response // -lastModified in cached response
@ -415,15 +434,17 @@ public class plasmaSwitchboardQueue {
cacheControl.startsWith("NO-CACHE") || cacheControl.startsWith("NO-CACHE") ||
cacheControl.startsWith("NO-STORE")) { cacheControl.startsWith("NO-STORE")) {
// easy case // easy case
return "Stale_(denied_by_cache-control=" + cacheControl+ ")"; return "Stale_(denied_by_cache-control=" + cacheControl + ")";
// } else if (cacheControl.startsWith("PUBLIC")) { // } else if (cacheControl.startsWith("PUBLIC")) {
// // ok, do nothing // // ok, do nothing
} else if (cacheControl.startsWith("MAX-AGE=")) { } else if (cacheControl.startsWith("MAX-AGE=")) {
// we need also the load date // we need also the load date
Date date = responseHeader().date(); final Date date = responseHeader().date();
if (date == null) return "Stale_(no_date_given_in_response)"; if (date == null) {
return "Stale_(no_date_given_in_response)";
}
try { try {
long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live final long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
if (serverDate.correctedUTCTime() - date.getTime() > ttl) { if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
//System.out.println("***not indexed because cache-control"); //System.out.println("***not indexed because cache-control");
return "Stale_(expired_by_cache-control)"; return "Stale_(expired_by_cache-control)";
@ -443,11 +464,15 @@ public class plasmaSwitchboardQueue {
* if the answer is 'NO' (do not index), it returns a string with the reason * if the answer is 'NO' (do not index), it returns a string with the reason
* to reject the crawling demand in clear text * to reject the crawling demand in clear text
*/ */
public String shallIndexCacheForCrawler() { public final String shallIndexCacheForCrawler() {
if (profile() == null) {
return "shallIndexCacheForCrawler: profile() is null !";
}
// check profile // check profile
if (!profile().localIndexing()) { return "Indexing_Not_Allowed"; } if (!profile().localIndexing()) { return "Indexing_Not_Allowed"; }
String nURL = normalizedURLString(); final String nURL = normalizedURLString();
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if (!profile().crawlingQ()) { if (!profile().crawlingQ()) {

Loading…
Cancel
Save