added url_file_name_s in default collection schema for the file name

without the file extension. This part of the file path is removed from the multi-field url_paths_sxt, which has now not the file name as last part of the path list. The same applies to the new fields source_file_name_s and target_file_name_s in the webgraph schema.
12 years ago · 16d1d744fa
parent 8d1c4c423d
commit 16d1d744fa
26 changed files with 136 additions and 116 deletions
--- a/defaults/solr.collection.schema
+++ b/defaults/solr.collection.schema
@ -334,12 +334,15 @@ underline_txt
 ## the protocol of the url
 url_protocol_s

-## all path elements in the url
-url_paths_sxt
+## the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension
+url_file_name_s

 ## the file name extension
 url_file_ext_s

+## all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name
+url_paths_sxt
+
 ## number of key-value pairs in search part of the url
 #url_parameter_i

--- a/defaults/solr.webgraph.schema
+++ b/defaults/solr.webgraph.schema
@ -41,6 +41,9 @@ source_id_s
 ## the url without the protocol (source)
 #source_urlstub_s

+## the file name without the extension (source)
+#source_file_name_s
+
 ## the file name extension (source)
 #source_file_ext_s

@ -53,7 +56,7 @@ source_id_s
 ## count of all path elements in the url (source)
 #source_path_folders_count_i

-## all path elements in the url (source)
+## all path elements in the url without the file name (source)
 #source_path_folders_sxt

 ## number of key-value pairs in search part of the url (source)
@ -132,6 +135,9 @@ target_protocol_s
 ## the url without the protocol (target)
 target_urlstub_s

+## the file name without the extension (target)
+target_file_name_s
+
 ## the file name extension (target)
 target_file_ext_s

@ -144,7 +150,7 @@ target_file_ext_s
 ## count of all path elements in the url (target)
 #target_path_folders_count_i

-## all path elements in the url (target)
+## all path elements in the url without the file name (target)
 target_path_folders_sxt

 ## number of key-value pairs in search part of the url (target)
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -185,7 +185,7 @@ public class ViewFile {
        }

        final String[] wordArray = wordArray(post.get("words", null));
-
+        final String ext = MultiProtocolURI.getFileExtension(url.getFileName());
        if (viewMode.equals("plain")) {

            // TODO: how to handle very large files here ?
@ -209,7 +209,6 @@ public class ViewFile {

        } else if (viewMode.equals("iframeCache")) {
            prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
-            final String ext = url.getFileExtension();
            prop.put("viewMode_png", 0);
            prop.put("viewMode_html", 0);
            if (ext.length() > 0 && "jpg.jpeg.png.gif".indexOf(ext) >= 0) {
@ -389,7 +388,7 @@ public class ViewFile {
            prop.put("error_md5", urlEntry.md5());
            prop.put("error_lat", urlEntry.lat());
            prop.put("error_lon", urlEntry.lon());
-            prop.put("error_doctype", Response.doctype2mime(url.getFileExtension(), urlEntry.doctype()));
+            prop.put("error_doctype", Response.doctype2mime(ext, urlEntry.doctype()));
            prop.put("error_language", urlEntry.language());
            prop.put("error_flags", urlEntry.flags().toString());
            prop.put("error_wordCount", urlEntry.wordCount());
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@ -29,6 +29,7 @@ import java.util.List;

 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.RSSMessage;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.document.analysis.Classification.ContentDomain;
@ -189,6 +190,7 @@ public class yacysearchitem {
 //            prop.putHTML("content_value", Interaction.TripleGet(result.urlstring(), "http://virtual.x/hasvalue", "anonymous"));
 // END interaction

+            String resultFileName = resultURL.getFileName();
            prop.putHTML("content_target", target);
            if (faviconURL != null && fileType == FileType.HTML) sb.loader.loadIfNotExistBackground(faviconURL, 1024 * 1024 * 10, null, TextSnippet.snippetMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
            prop.putHTML("content_faviconCode", URLLicense.aquireLicense(faviconURL)); // acquire license for favicon url loading
@ -210,7 +212,7 @@ public class yacysearchitem {
            prop.putHTML("content_sizename", RSSMessage.sizename(result.filesize()));
            prop.putHTML("content_showSize_sizename", RSSMessage.sizename(result.filesize()));
            prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost());
-            prop.putHTML("content_file", resultURL.getFileName());
+            prop.putHTML("content_file", resultFileName);
            prop.putHTML("content_path", resultURL.getPath());
            prop.put("content_nl", (item == theSearch.query.offset) ? 0 : 1);
            prop.putHTML("content_publisher", result.publisher());
@ -243,7 +245,7 @@ public class yacysearchitem {
                prop.put("content_heuristic_name", heuristic.heuristicName);
            }
            EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "" + item, 0, 0), false);
-            final String ext = resultURL.getFileExtension().toLowerCase();
+            final String ext = MultiProtocolURI.getFileExtension(resultFileName).toLowerCase();
            if (ext.equals("png") || ext.equals("jpg") || ext.equals("gif")) {
                final String license = URLLicense.aquireLicense(resultURL);
                prop.put("content_code", license);
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@ -269,7 +269,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU

    public final ContentDomain getContentDomain() {
        if (this.contentDomain == null) {
-            this.contentDomain = Classification.getContentDomain(this.getFileExtension());
+            this.contentDomain = Classification.getContentDomain(getFileExtension(this.getFileName()));
        }
        return this.contentDomain;
    }
@ -711,14 +711,10 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
        return this.path.substring(p + 1); // the 'real' file name
    }

-    public String getFileExtension() {
-        return getFileExtension(getFileName());
-    }
-
    public static String getFileExtension(final String fileName) {
        final int p = fileName.lastIndexOf('.');
        if (p < 0) return "";
-        return fileName.substring(p + 1);
+        return fileName.substring(p + 1).toLowerCase();
    }

    public String getPath() {
@ -726,7 +722,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
    }

    public String[] getPaths() {
-        return this.path == null ? null : this.path.charAt(0) == '/' ? CommonPattern.SLASH.split(this.path.substring(1)) : CommonPattern.SLASH.split(this.path);
+        String s = this.path == null ? "" : this.path.charAt(0) == '/' ? this.path.substring(1) : this.path;
+        int p = s.lastIndexOf('/');
+        if (p < 0) return new String[0];
+        s = s.substring(0, p); // the paths do not contain the last part, which is considered as the getFileName() part.
+        String[] paths = CommonPattern.SLASH.split(s);
+        return paths;
    }

    /**
@ -973,15 +974,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
        return (this.searchpart != null) && (this.searchpart.length() > 0);
    }

-    public final boolean isCGI() {
-        final String ls = unescape(this.path.toLowerCase());
-        return ls.indexOf(".cgi",0) >= 0 ||
-               ls.indexOf(".exe",0) >= 0;
+    public static final boolean isCGI(final String extension) {
+        return "cgi.exe.jpg.jpeg".indexOf(extension.toLowerCase()) >= 0;
    }

-    public final boolean isImage() {
-        final String ext = getFileExtension().toLowerCase();
-        return "png.gif.jpg.jpeg".indexOf(ext) >= 0;
+    public static final boolean isImage(final String extension) {
+        return "png.gif.jpg.jpeg".indexOf(extension.toLowerCase()) >= 0;
    }

    public final boolean isIndividual() {
--- a/source/net/yacy/cora/document/analysis/Classification.java
+++ b/source/net/yacy/cora/document/analysis/Classification.java
@ -201,10 +201,10 @@ public class Classification {
    }

    public static String url2mime(final MultiProtocolURI url, final String dfltMime) {
-        return url == null ? "application/octet-stream" : ext2mime(url.getFileExtension(), dfltMime);
+        return url == null ? "application/octet-stream" : ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()), dfltMime);
    }

    public static String url2mime(final MultiProtocolURI url) {
-        return url == null ? "application/octet-stream" : ext2mime(url.getFileExtension());
+        return url == null ? "application/octet-stream" : ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
    }
 }
--- a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
+++ b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
@ -146,43 +146,43 @@ public class SchemaConfiguration extends Configuration implements Serializable {
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final String value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final Date value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final String[] value) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final Integer[] value) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final List<?> values) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || (values != null && !values.isEmpty()))) key.add(doc, values);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final int value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final long value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
        if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
    }

    public void add(final SolrInputDocument doc, final SchemaDeclaration key, final boolean value) {
-        assert !key.isMultiValued();
-        if (isEmpty() || contains(key)) key.add(doc, value);
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
+        if ((isEmpty() || contains(key)) && (!this.lazy || value)) key.add(doc, value);
    }

    public static Date getDate(SolrInputDocument doc, final SchemaDeclaration key) {
--- a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
@ -148,14 +148,16 @@ public class JsonResponseWriter implements QueryResponseWriter {
                    solitaireTag(writer, stag, value.stringValue());
                    continue;
                }
-                
                // some special handling here
                if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) {
                    String u = value.stringValue();
                    try {
                        url = new MultiProtocolURI(u);
+                        String filename = url.getFileName();
                        solitaireTag(writer, "link", u);
-                        solitaireTag(writer, "file", url.getFileName());
+                        solitaireTag(writer, "file", filename);
+                        // get image license
+                        if (MultiProtocolURI.isImage(filename)) URLLicense.aquireLicense(urlhash, url.toNormalform(true));
                    } catch (MalformedURLException e) {}
                    continue;
                }
@ -206,9 +208,6 @@ public class JsonResponseWriter implements QueryResponseWriter {
                //missing: "code","faviconCode"
            }
            
-            // get image license
-            if (url.isImage()) URLLicense.aquireLicense(urlhash, url.toNormalform(true));
-            
            // compute snippet from texts            
            solitaireTag(writer, "path", path.toString());
            solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title);
--- a/source/net/yacy/crawler/data/Latency.java
+++ b/source/net/yacy/crawler/data/Latency.java
@ -204,7 +204,7 @@ public class Latency {
        // for CGI accesses, we double the minimum time
        // mostly there is a database access in the background
        // which creates a lot of unwanted IO on target site
-        if (url.isCGI()) waiting = waiting * 2;
+        if (MultiProtocolURI.isCGI(url.getFileName())) waiting = waiting * 2;

        // if we have accessed the domain many times, get slower (the flux factor)
        if (!local) waiting += host.flux(waiting);
@ -238,7 +238,7 @@ public class Latency {
        // for CGI accesses, we double the minimum time
        // mostly there is a database access in the background
        // which creates a lot of unwanted IO on target site
-        if (url.isCGI()) { waiting = waiting * 2; s.append(", isCGI = true -> double"); }
+        if (MultiProtocolURI.isCGI(url.getFileName())) { waiting = waiting * 2; s.append(", isCGI = true -> double"); }

        // if we have accessed the domain many times, get slower (the flux factor)
        int flux = host.flux(waiting);
--- a/source/net/yacy/crawler/data/ResultImages.java
+++ b/source/net/yacy/crawler/data/ResultImages.java
@ -74,7 +74,7 @@ public class ResultImages {
                image.height() > 100 &&
                image.width() < 1200 &&
                image.height() < 1000 &&
-                !"gif".equals(image.url().getFileExtension())) {
+                !"gif".equals(MultiProtocolURI.getFileExtension(image.url().getFileName()))) {
                // && ((urlString.lastIndexOf(".jpg") != -1)) ||
                // ((urlString.lastIndexOf(".png") != -1)){

--- a/source/net/yacy/crawler/retrieval/FileLoader.java
+++ b/source/net/yacy/crawler/retrieval/FileLoader.java
@ -31,6 +31,7 @@ import java.util.Date;
 import java.util.List;

 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.protocol.HeaderFramework;
@ -94,7 +95,7 @@ public class FileLoader {
        }

        // create response header
-        String mime = Classification.ext2mime(url.getFileExtension());
+        String mime = Classification.ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
        ResponseHeader responseHeader = new ResponseHeader(200);
        responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
        responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
--- a/source/net/yacy/crawler/retrieval/Response.java
+++ b/source/net/yacy/crawler/retrieval/Response.java
@ -70,7 +70,7 @@ public class Response {

    // doctype calculation
    public static char docType(final MultiProtocolURI url) {
-        String ext = url.getFileExtension();
+        String ext = MultiProtocolURI.getFileExtension(url.getFileName());
        if (ext == null) return DT_UNKNOWN;
        if (ext.equals(".gif"))  return DT_IMAGE;
        if (ext.equals(".ico"))  return DT_IMAGE;
@ -169,7 +169,7 @@ public class Response {
        // request and response headers may be zero in case that we process surrogates
        this.requestHeader = new RequestHeader();
        this.responseHeader = new ResponseHeader(200);
-        this.responseHeader.put(HeaderFramework.CONTENT_TYPE, Classification.ext2mime(request.url().getFileExtension(), "text/plain")); // tell parser how to handle the content
+        this.responseHeader.put(HeaderFramework.CONTENT_TYPE, Classification.ext2mime(MultiProtocolURI.getFileExtension(request.url().getFileName()), "text/plain")); // tell parser how to handle the content
        if (!request.isEmpty()) this.responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(request.size()));
        this.profile = profile;
        this.status = QUEUE_STATE_FRESH;
@ -291,7 +291,7 @@ public class Response {
            return "dynamic_post";
        }

-        if (url().isCGI()) {
+        if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
            return "dynamic_cgi";
        }

@ -390,7 +390,7 @@ public class Response {
        if (url().isPOST()) {
            return false;
        }
-        if (url().isCGI()) {
+        if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
            return false;
        }

@ -541,7 +541,7 @@ public class Response {
            if (url().isPOST()) {
                return "Dynamic_(POST)";
            }
-            if (url().isCGI()) {
+            if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
                return "Dynamic_(CGI)";
            }
        }
@ -684,7 +684,7 @@ public class Response {
        // CGI access makes the page very individual, and therefore not usable in caches
        if (!profile().crawlingQ()) {
            if (url().isPOST()) { return "Dynamic_(POST)"; }
-            if (url().isCGI()) { return "Dynamic_(CGI)"; }
+            if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) { return "Dynamic_(CGI)"; }
        }

        // -authorization cases in request
--- a/source/net/yacy/crawler/retrieval/SMBLoader.java
+++ b/source/net/yacy/crawler/retrieval/SMBLoader.java
@ -113,7 +113,7 @@ public class SMBLoader {
        }

        // create response header
-        String mime = Classification.ext2mime(url.getFileExtension());
+        String mime = Classification.ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
        ResponseHeader responseHeader = new ResponseHeader(200);
        responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
        responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
--- a/source/net/yacy/data/ymark/YMarkAutoTagger.java
+++ b/source/net/yacy/data/ymark/YMarkAutoTagger.java
@ -10,6 +10,7 @@ import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.ArrayBlockingQueue;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.crawler.retrieval.Response;
@ -161,7 +162,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 			}
 			final String clean =  YMarkUtil.cleanTagsString(buffer.toString());
 			if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
-				return document.getFileExtension();
+				return MultiProtocolURI.getFileExtension(document.dc_source().getFileName());
 			}
 			return clean;
 		} finally {
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -153,8 +153,8 @@ public class Document {
        return this.languages;
    }

-    public String getFileExtension() {
-    	return this.source.getFileExtension();
+    public String getFileName() {
+    	return this.source.getFileName();
    }

    public Map<String, Set<String>> getGenericFacets() {
--- a/source/net/yacy/document/LibraryProvider.java
+++ b/source/net/yacy/document/LibraryProvider.java
@ -90,7 +90,7 @@ public class LibraryProvider {

        private Dictionary(final String nickname, final String url) {
            try {
-                this.filename = new MultiProtocolURI(url).getFileName();
+                this.filename = (new MultiProtocolURI(url)).getFileName();
            } catch ( final MalformedURLException e ) {
                assert false;
            }
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -194,7 +194,7 @@ public final class TextParser {
        try {
            idioms = parsers(location, mimeType);
        } catch (final Parser.Failure e) {
-            final String errorMsg = "Parser Failure for extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "': " + e.getMessage();
+            final String errorMsg = "Parser Failure for extension '" + MultiProtocolURI.getFileExtension(location.getFileName()) + "' or mimetype '" + mimeType + "': " + e.getMessage();
            AbstractParser.log.logWarning(errorMsg);
            throw new Parser.Failure(errorMsg, location);
        }
@ -218,7 +218,7 @@ public final class TextParser {
        try {
            idioms = parsers(location, mimeType);
        } catch (final Parser.Failure e) {
-            final String errorMsg = "Parser Failure for extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "': " + e.getMessage();
+            final String errorMsg = "Parser Failure for extension '" + MultiProtocolURI.getFileExtension(location.getFileName()) + "' or mimetype '" + mimeType + "': " + e.getMessage();
            AbstractParser.log.logWarning(errorMsg);
            throw new Parser.Failure(errorMsg, location);
        }
@ -252,7 +252,7 @@ public final class TextParser {
            final InputStream sourceStream
        ) throws Parser.Failure {
        if (AbstractParser.log.isFine()) AbstractParser.log.logFine("Parsing '" + location + "' from stream");
-        final String fileExt = location.getFileExtension();
+        final String fileExt = MultiProtocolURI.getFileExtension(location.getFileName());
        final String documentCharset = htmlParser.patchCharsetEncoding(charset);
        assert parser != null;

@ -272,7 +272,7 @@ public final class TextParser {
            final String charset,
            final byte[] sourceArray
        ) throws Parser.Failure {
-        final String fileExt = location.getFileExtension();
+        final String fileExt = MultiProtocolURI.getFileExtension(location.getFileName());
        if (AbstractParser.log.isFine()) AbstractParser.log.logFine("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "' from byte[]");
        final String documentCharset = htmlParser.patchCharsetEncoding(charset);
        assert !parsers.isEmpty();
@ -312,7 +312,7 @@ public final class TextParser {

        if (docs == null) {
            if (failedParser.isEmpty()) {
-                final String errorMsg = "Parsing content with file extension '" + location.getFileExtension() + "' and mimetype '" + mimeType + "' failed.";
+                final String errorMsg = "Parsing content with file extension '" + fileExt + "' and mimetype '" + mimeType + "' failed.";
                //log.logWarning("Unable to parse '" + location + "'. " + errorMsg);
                throw new Parser.Failure(errorMsg, location);
            }
@ -362,7 +362,7 @@ public final class TextParser {
        final Set<Parser> idioms = new HashSet<Parser>(2);

        // check extension
-        String ext = url.getFileExtension();
+        String ext = MultiProtocolURI.getFileExtension(url.getFileName());
        Set<Parser> idiom;
        if (ext != null && ext.length() > 0) {
            ext = ext.toLowerCase();
@ -428,11 +428,11 @@ public final class TextParser {
     * @return an error if the extension is not supported, null otherwise
     */
    public static String supportsExtension(final MultiProtocolURI url) {
-        return supportsExtension(url.getFileExtension().toLowerCase());
+        return supportsExtension(MultiProtocolURI.getFileExtension(url.getFileName()).toLowerCase());
    }

    public static String mimeOf(final MultiProtocolURI url) {
-        return mimeOf(url.getFileExtension());
+        return mimeOf(MultiProtocolURI.getFileExtension(url.getFileName()));
    }

    public static String mimeOf(final String ext) {
--- a/source/net/yacy/document/parser/audioTagParser.java
+++ b/source/net/yacy/document/parser/audioTagParser.java
@ -72,8 +72,9 @@ public class audioTagParser extends AbstractParser implements Parser {
            final String charset, final InputStream source)
            throws Parser.Failure, InterruptedException {

-    	final String filename = location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName());
-   	    final String fileext = '.'+location.getFileExtension();
+        String filename = location.getFileName();
+        final String fileext = '.' + MultiProtocolURI.getFileExtension(filename);
+        filename = filename.isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(filename);
    	String mime = mimeType;
   	    
    	// fix mimeType
@ -190,7 +191,7 @@ public class audioTagParser extends AbstractParser implements Parser {
 	                this,
 	                null,
 	                null,
-	                singleList(location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName())), // title
+	                singleList(filename), // title
 	                "", // author
 	                location.getHost(),
 	                null,
--- a/source/net/yacy/document/parser/genericParser.java
+++ b/source/net/yacy/document/parser/genericParser.java
@ -47,7 +47,7 @@ public class genericParser extends AbstractParser implements Parser {
    public Document[] parse(final DigestURI location, final String mimeType,
            final String charset, final InputStream source1)
            throws Parser.Failure, InterruptedException {
-
+        String filename = location.getFileName();
        final Document[] docs = new Document[]{new Document(
                location,
                mimeType,
@ -55,7 +55,7 @@ public class genericParser extends AbstractParser implements Parser {
                this,
                null,
                null,
-                singleList(location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName())), // title
+                singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(filename)), // title
                "", // author
                location.getHost(),
                null,
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -473,10 +473,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            final String href = tagopts.getProperty("href", EMPTY_STRING);
            DigestURI url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
-                final String f = url.getFileName();
-                final int p = f.lastIndexOf('.');
-                final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1);
-                if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) {
+                final String ext = MultiProtocolURI.getFileExtension(url.getFileName());
+                if (ext.equals("png") || ext.equals("gif") || ext.equals("jpg") || ext.equals("jpeg") || ext.equals("tiff") || ext.equals("tif")) {
                    // special handling of such urls: put them to the image urls
                    final ImageEntry ie = new ImageEntry(url, recursiveParse(text), -1, -1, -1);
                    addImage(this.images, ie);
@ -656,7 +654,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        String ext;
        ArrayList<DigestURI> f = new ArrayList<DigestURI>();
        for (final DigestURI url: this.anchors.keySet()) {
-            ext = url.getFileExtension();
+            ext = MultiProtocolURI.getFileExtension(url.getFileName());
            if (ext == null) continue;
            if (ext.equals("swf")) f.add(url);
        }
@ -666,7 +664,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public boolean containsFlash() {
        String ext;
        for (final MultiProtocolURI url: this.anchors.keySet()) {
-            ext = url.getFileExtension();
+            ext = MultiProtocolURI.getFileExtension(url.getFileName());
            if (ext == null) continue;
            if (ext.equals("swf")) return true;
        }
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -99,8 +99,9 @@ public class genericImageParser extends AbstractParser implements Parser {
        String author = null;
        String keywords = null;
        String description = null;
-        if (mimeType.equals("image/bmp") ||
-            location.getFileExtension().equalsIgnoreCase("bmp")) {
+        String filename = location.getFileName();
+        String ext = MultiProtocolURI.getFileExtension(filename);
+        if (mimeType.equals("image/bmp") || ext.equalsIgnoreCase("bmp")) {
            byte[] b;
            try {
                b = FileUtils.read(sourceStream);
@ -110,10 +111,7 @@ public class genericImageParser extends AbstractParser implements Parser {
            }
            final IMAGEMAP imap = bmpParser.parse(b);
            ii = parseJavaImage(location, imap.getImage());
-        } else if (mimeType.equals("image/jpeg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpeg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpe")) {
+        } else if (mimeType.equals("image/jpeg") || ext.equalsIgnoreCase("jpg") || ext.equalsIgnoreCase("jpeg") || ext.equalsIgnoreCase("jpe")) {
            // use the exif parser from
            // http://www.drewnoakes.com/drewnoakes.com/code/exif/
            // javadoc is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/javadoc/
@ -190,7 +188,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        final String infoString = ii.info.toString();
        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));

-        if (title == null || title.isEmpty()) title = MultiProtocolURI.unescape(location.getFileName());
+        if (title == null || title.isEmpty()) title = MultiProtocolURI.unescape(filename);

        return new Document[]{new Document(
             location,
@ -297,7 +295,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        DigestURI uri;
        try {
            uri = new DigestURI("http://localhost/" + image.getName());
-            final Document[] document = parser.parse(uri, "image/" + uri.getFileExtension(), "UTF-8", new FileInputStream(image));
+            final Document[] document = parser.parse(uri, "image/" + MultiProtocolURI.getFileExtension(uri.getFileName()), "UTF-8", new FileInputStream(image));
            System.out.println(document[0].toString());
        } catch (final MalformedURLException e) {
            e.printStackTrace();
--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@ -33,6 +33,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
@ -64,7 +65,7 @@ public class tarParser extends AbstractParser implements Parser {

        final List<Document> docacc = new ArrayList<Document>();
        Document[] subDocs = null;
-        final String ext = url.getFileExtension().toLowerCase();
+        final String ext = MultiProtocolURI.getFileExtension(url.getFileName()).toLowerCase();
        if (ext.equals("gz") || ext.equals("tgz")) {
            try {
                source = new GZIPInputStream(source);
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@ -35,10 +35,10 @@ import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.BlockingQueue;
@ -79,6 +79,7 @@ import net.yacy.kelondro.util.Bitfield;
 import net.yacy.search.index.Segment;
 import net.yacy.search.index.Segment.ReferenceReport;
 import net.yacy.search.index.Segment.ReferenceReportCache;
+import net.yacy.search.schema.WebgraphConfiguration.Subgraph;

 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
@ -256,8 +257,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
            add(doc, CollectionSchema.description_words_val, cv);
        }

+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
        if (allAttr || contains(CollectionSchema.author)) add(doc, CollectionSchema.author, md.dc_creator());
-        if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype()));
+        if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, Response.doctype2mime(extension, md.doctype()));
        if (allAttr || contains(CollectionSchema.last_modified)) add(doc, CollectionSchema.last_modified, md.moddate());
        if (allAttr || contains(CollectionSchema.wordcount_i)) add(doc, CollectionSchema.wordcount_i, md.wordCount());

@ -274,7 +277,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri

        // path elements of link
        if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension);

        if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, md.limage());
        if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal());
@ -474,8 +478,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        add(doc, CollectionSchema.fuzzy_signature_unique_b, true); // this must be corrected afterwards!

        // path elements of link
+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
        if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension);

        // get list of all links; they will be shrinked by urls that appear in other fields of the solr schema
        Set<DigestURI> inboundLinks = document.inboundLinks();
@ -695,8 +702,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                    outboundLinks.remove(canonical);
                    add(doc, CollectionSchema.canonical_s, canonical.toNormalform(false));
                    // set a flag if this is equal to sku
-                    if (contains(CollectionSchema.canonical_equal_sku_b) && canonical.equals(docurl)) {
-                        add(doc, CollectionSchema.canonical_equal_sku_b, true);
+                    if (contains(CollectionSchema.canonical_equal_sku_b)) {
+                        add(doc, CollectionSchema.canonical_equal_sku_b, canonical.equals(docurl));
                    }
                }
            }
@ -784,9 +791,16 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        if (allAttr || contains(CollectionSchema.inboundlinksnofollowcount_i)) add(doc, CollectionSchema.inboundlinksnofollowcount_i, document.inboundLinkNofollowCount());
        if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, outboundLinks.size());
        if (allAttr || contains(CollectionSchema.outboundlinksnofollowcount_i)) add(doc, CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount());
+        Map<DigestURI, Properties> alllinks = document.getAnchors();
        
+        // create a subgraph
+        Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
+        //if () {
+            webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, true, inboundLinks, citations);
+            webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, false, outboundLinks, citations);
+        //}
+            
        // list all links
-        WebgraphConfiguration.Subgraph subgraph = webgraph.edges(digestURI, responseHeader, collections, clickdepth, document.getAnchors(), images, inboundLinks, outboundLinks, citations);
        doc.webgraphDocuments.addAll(subgraph.edges);
        if (allAttr || contains(CollectionSchema.inboundlinks_protocol_sxt)) add(doc, CollectionSchema.inboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[0]));
        if (allAttr || contains(CollectionSchema.inboundlinks_urlstub_txt)) add(doc, CollectionSchema.inboundlinks_urlstub_txt, subgraph.urlStubs[0]);
@ -1164,8 +1178,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        if (contains(CollectionSchema.load_date_dt)) add(solrdoc, CollectionSchema.load_date_dt, new Date());

        // path elements of link
+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
        if (contains(CollectionSchema.url_paths_sxt)) add(solrdoc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (contains(CollectionSchema.url_file_ext_s)) add(solrdoc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (contains(CollectionSchema.url_file_name_s)) add(solrdoc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (contains(CollectionSchema.url_file_ext_s)) add(solrdoc, CollectionSchema.url_file_ext_s, extension);
        
        // fail reason and status
        if (contains(CollectionSchema.failreason_s)) add(solrdoc, CollectionSchema.failreason_s, failReason);
--- a/source/net/yacy/search/schema/CollectionSchema.java
+++ b/source/net/yacy/search/schema/CollectionSchema.java
@ -152,8 +152,9 @@ public enum CollectionSchema implements SchemaDeclaration {
    publisher_url_s(SolrType.string, true, true, false, false, false, "publisher url as defined in http://support.google.com/plus/answer/1713826?hl=de"),
    
    url_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url"),
-    url_paths_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url"),
+    url_file_name_s(SolrType.string, true, true, false, false, false, "the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension"),
    url_file_ext_s(SolrType.string, true, true, false, false, false, "the file name extension"),
+    url_paths_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"),
    url_parameter_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url"),
    url_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url"),
    url_parameter_value_sxt(SolrType.string, true, true, true, false, false, "the values from key-value pairs in the search part of the url"),
--- a/source/net/yacy/search/schema/WebgraphConfiguration.java
+++ b/source/net/yacy/search/schema/WebgraphConfiguration.java
@ -42,6 +42,7 @@ import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;

 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.federate.solr.ProcessType;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.federate.solr.SchemaDeclaration;
@ -111,31 +112,13 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
        }
    }
    
-    public Subgraph edges(
-            final DigestURI source, final ResponseHeader responseHeader, String[] collections, int clickdepth_source,
-            final Map<DigestURI, Properties> alllinks,
-            final Map<DigestURI, ImageEntry> images,
-            final Set<DigestURI> inboundLinks,
-            final Set<DigestURI> outboundLinks,
-            IndexCell<CitationReference> citations
-            ) {
-        boolean allAttr = this.isEmpty();
-        Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
-        addEdges(
-                subgraph, source, responseHeader, collections, clickdepth_source,
-                allAttr, alllinks, images, true, inboundLinks, citations);
-        addEdges(
-                subgraph, source, responseHeader, collections, clickdepth_source,
-                allAttr, alllinks, images, false, outboundLinks, citations);
-        return subgraph;
-    }
-    
-    private void addEdges(
+    public void addEdges(
            final Subgraph subgraph,
            final DigestURI source, final ResponseHeader responseHeader, String[] collections, int clickdepth_source,
-            final boolean allAttr, final Map<DigestURI, Properties> alllinks, final Map<DigestURI, ImageEntry> images,
+            final Map<DigestURI, Properties> alllinks, final Map<DigestURI, ImageEntry> images,
            final boolean inbound, final Set<DigestURI> links,
            final IndexCell<CitationReference> citations) {
+        boolean allAttr = this.isEmpty();
        for (final DigestURI target_url: links) {

            Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
@ -194,7 +177,12 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
                if (allAttr || contains(WebgraphSchema.source_host_organizationdnc_s)) add(edge, WebgraphSchema.source_host_organizationdnc_s, orga + '.' + dnc);
                if (allAttr || contains(WebgraphSchema.source_host_subdomain_s)) add(edge, WebgraphSchema.source_host_subdomain_s, subdom);
            }
-            if (allAttr || contains(WebgraphSchema.source_file_ext_s)) add(edge, WebgraphSchema.source_file_ext_s, source.getFileExtension());
+            if (allAttr || contains(WebgraphSchema.source_file_ext_s) || contains(WebgraphSchema.source_file_name_s)) {
+                String source_file_name = source.getFileName();
+                String source_file_ext = MultiProtocolURI.getFileExtension(source_file_name);
+                add(edge, WebgraphSchema.source_file_name_s, source_file_name.toLowerCase().endsWith("." + source_file_ext) ? source_file_name.substring(0, source_file_name.length() - source_file_ext.length() - 1) : source_file_name);
+                add(edge, WebgraphSchema.source_file_ext_s, source_file_ext);
+            }
            if (allAttr || contains(WebgraphSchema.source_path_s)) add(edge, WebgraphSchema.source_path_s, source.getPath());
            if (allAttr || contains(WebgraphSchema.source_path_folders_count_i) || contains(WebgraphSchema.source_path_folders_sxt)) {
                String[] paths = source.getPaths();
@ -251,7 +239,12 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
                if (allAttr || contains(WebgraphSchema.target_host_organizationdnc_s)) add(edge, WebgraphSchema.target_host_organizationdnc_s, orga + '.' + dnc);
                if (allAttr || contains(WebgraphSchema.target_host_subdomain_s)) add(edge, WebgraphSchema.target_host_subdomain_s, subdom);
            }
-            if (allAttr || contains(WebgraphSchema.target_file_ext_s)) add(edge, WebgraphSchema.target_file_ext_s, target_url.getFileExtension());
+            if (allAttr || contains(WebgraphSchema.target_file_ext_s) || contains(WebgraphSchema.target_file_name_s)) {
+                String target_file_name = target_url.getFileName();
+                String target_file_ext = MultiProtocolURI.getFileExtension(target_file_name);
+                add(edge, WebgraphSchema.target_file_name_s, target_file_name.toLowerCase().endsWith("." + target_file_ext) ? target_file_name.substring(0, target_file_name.length() - target_file_ext.length() - 1) : target_file_name);
+                add(edge, WebgraphSchema.target_file_ext_s, target_file_ext);
+            }
            if (allAttr || contains(WebgraphSchema.target_path_s)) add(edge, WebgraphSchema.target_path_s, target_url.getPath());
            if (allAttr || contains(WebgraphSchema.target_path_folders_count_i) || contains(WebgraphSchema.target_path_folders_sxt)) {
                String[] paths = target_url.getPaths();
--- a/source/net/yacy/search/schema/WebgraphSchema.java
+++ b/source/net/yacy/search/schema/WebgraphSchema.java
@ -41,11 +41,12 @@ public enum WebgraphSchema implements SchemaDeclaration {
    source_id_s(SolrType.string, true, true, false, false, false, "primary key of document, the URL hash (source)"),
    source_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url (source)"),
    source_urlstub_s(SolrType.string, true, true, false, false, false, "the url without the protocol (source)"),
+    source_file_name_s(SolrType.string, true, true, false, false, false, "the file name without the extension (source)"),
    source_file_ext_s(SolrType.string, true, true, false, false, false, "the file name extension (source)"),
    source_chars_i(SolrType.num_integer, true, true, false, false, false, "number of all characters in the url (source)"),
    source_path_s(SolrType.string, true, true, false, false, false, "path of the url (source)"),
    source_path_folders_count_i(SolrType.num_integer, true, true, false, false, false, "count of all path elements in the url (source)"),
-    source_path_folders_sxt(SolrType.string, true, true, true, false, false, "all path elements in the url (source)"),
+    source_path_folders_sxt(SolrType.string, true, true, true, false, false, "all path elements in the url without the file name (source)"),
    source_parameter_count_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url (source)"),
    source_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url (source)"),
    source_parameter_value_sxt(SolrType.string, true, true, true, false, false, "the values from key-value pairs in the search part of the url (source)"),
@ -73,11 +74,12 @@ public enum WebgraphSchema implements SchemaDeclaration {
    target_id_s(SolrType.string, true, true, false, false, false, "primary key of document, the URL hash (target)"),
    target_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url (target)"),
    target_urlstub_s(SolrType.string, true, true, false, false, false, "the url without the protocol (target)"),
+    target_file_name_s(SolrType.string, true, true, false, false, false, "the file name without the extension (target)"),
    target_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension (target)"),
    target_chars_i(SolrType.num_integer, true, true, false, false, false, "number of all characters in the url (target)"),
    target_path_s(SolrType.string, true, true, false, false, false, "path of the url (target)"),
    target_path_folders_count_i(SolrType.num_integer, true, true, false, false, false, "count of all path elements in the url (target)"),
-    target_path_folders_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url (target)"),
+    target_path_folders_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url without the file name (target)"),
    target_parameter_count_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url (target)"),
    target_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url (target)"),
    target_parameter_value_sxt(SolrType.string, true, true, true, false, true, "the values from key-value pairs in the search part of the url (target)"),