From f23471c47130dfa0233d42f599f54790f74cac1c Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 25 Nov 2013 00:14:53 +0100 Subject: [PATCH] add check to prevent index entries containing url_file_ext_s with ";jsession=xyz" note: check could be implemented in MultiProtocolURL (but at this time didn't oversee possible implication) --- source/net/yacy/search/schema/CollectionConfiguration.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index b3f6908e7..136a9805d 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -213,6 +213,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri String filename = digestURL.getFileName(); String extension = MultiProtocolURL.getFileExtension(filename); String filenameStub = filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename; + // remove possible jsession (or other url parm like "img.jpg;jsession=123") + // TODO: consider to implement ";jsession=123" check in getFileExtension() + if (extension.indexOf(';') >= 0) extension = extension.substring(0,extension.indexOf(';')); + if (allAttr || contains(CollectionSchema.url_chars_i)) add(doc, CollectionSchema.url_chars_i, us.length()); if (allAttr || contains(CollectionSchema.url_protocol_s)) add(doc, CollectionSchema.url_protocol_s, digestURL.getProtocol()); if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURL.getPaths());