added framework for session id filtering (not ready yet)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6671 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 2bc36de336
commit d8d9984913

@ -0,0 +1 @@
PHPSESSIONID

@ -222,6 +222,9 @@ fileHost = localpeer
# specify the path to the MIME matching file table
mimeTable = defaults/httpd.mime
# specify the path to the sessionid name file
sessionidNamesFile = defaults/sessionid.names
# a path to the file cache, used for the internal proxy and as crawl buffer
# This will be used if the server is addressed as a proxy
proxyCache = DATA/HTCACHE

@ -275,7 +275,12 @@ public final class Switchboard extends serverSwitch {
this.log.logConfig("Work Path: " + this.workPath.toString());
this.dictionariesPath = getConfigPath(SwitchboardConstants.DICTIONARY_SOURCE_PATH, SwitchboardConstants.DICTIONARY_SOURCE_PATH_DEFAULT);
this.log.logConfig("Dictionaries Path:" + this.dictionariesPath.toString());
// init sessionid name file
final String sessionidNamesFile = getConfig("sessionidNamesFile","");
this.log.logConfig("Loading sessionid file " + sessionidNamesFile);
DigestURI.initSessionIDNames(new File(getRootPath(), sessionidNamesFile));
// init tables
this.tables = new WorkTables(this.workPath);
@ -1545,7 +1550,7 @@ public final class Switchboard extends serverSwitch {
return ((Boolean)status[SwitchboardConstants.CRAWLJOB_STATUS]).booleanValue();
}
}
public indexingQueueEntry parseDocument(final indexingQueueEntry in) {
in.queueEntry.updateStatus(Response.QUEUE_STATE_PARSING);

@ -30,7 +30,9 @@ package net.yacy.kelondro.data.meta;
import java.io.File;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.text.Collator;
import java.util.Iterator;
import java.util.Locale;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -39,6 +41,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.Domains;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.Punycode;
import net.yacy.kelondro.util.Punycode.PunycodeException;
@ -53,6 +56,23 @@ public class DigestURI implements Serializable {
private static final Pattern patternAmp = Pattern.compile("&");
private static final Pattern patternMail = Pattern.compile("^[a-z]+:.*?");
// session id handling
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
private static final TreeSet<String> sessionIDnames;
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
sessionIDnames = new TreeSet<String>(insensitiveCollator);
}
public static final void initSessionIDNames(File idNamesFile) {
for (String s: FileUtils.loadList(idNamesFile)) {
if (s == null) continue;
s = s.trim();
if (s.length() > 0) sessionIDnames.add(s);
}
}
// class variables
private String protocol, host, userInfo, path, quest, ref, hash;
private int port;
@ -554,15 +574,15 @@ public class DigestURI implements Serializable {
}
public String getFile() {
return getFile(true);
return getFile(false);
}
public String getFile(final boolean includeReference) {
public String getFile(final boolean excludeReference) {
// this is the path plus quest plus ref
// if there is no quest and no ref the result is identical to getPath
// this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile()
if (quest != null) return ((includeReference) && (ref != null)) ? path + "?" + quest + "#" + ref : path + "?" + quest;
return ((includeReference) && (ref != null)) ? path + "#" + ref : path;
if (quest == null) return (excludeReference || ref == null) ? path : path + "#" + ref;
return (excludeReference || ref == null) ? path + "?" + quest : path + "?" + quest + "#" + ref;
}
public String getFileName() {
@ -636,15 +656,15 @@ public class DigestURI implements Serializable {
return toNormalform(false, true);
}
public String toNormalform(final boolean stripReference, final boolean stripAmp) {
String result = toNormalform(!stripReference);
public String toNormalform(final boolean excludeReference, final boolean stripAmp) {
String result = toNormalform(excludeReference);
if (stripAmp) {
result = result.replaceAll("&amp;", "&");
}
return result;
}
private String toNormalform(final boolean includeReference) {
private String toNormalform(final boolean excludeReference) {
// generates a normal form of the URL
boolean defaultPort = false;
if (this.protocol.equals("mailto")) {
@ -658,7 +678,7 @@ public class DigestURI implements Serializable {
} else if (this.protocol.equals("file")) {
defaultPort = true;
}
final String path = this.getFile(includeReference);
final String path = this.getFile(excludeReference);
if (defaultPort) {
return

Loading…
Cancel
Save