From 16d1d744faa4c1bf82403c852c273e70b24179a8 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Tue, 25 Jun 2013 16:27:20 +0200
Subject: [PATCH] added url_file_name_s in default collection schema for the
 file name without the file extension. This part of the file path is removed
 from the multi-field url_paths_sxt, which has now not the file name as last
 part of the path list.

The same applies to the new fields source_file_name_s and
target_file_name_s in the webgraph schema.
---
 defaults/solr.collection.schema               |  7 +++-
 defaults/solr.webgraph.schema                 | 10 ++++-
 htroot/ViewFile.java                          |  5 +--
 htroot/yacysearchitem.java                    |  6 ++-
 .../yacy/cora/document/MultiProtocolURI.java  | 26 ++++++-------
 .../document/analysis/Classification.java     |  4 +-
 .../federate/solr/SchemaConfiguration.java    | 18 ++++-----
 .../responsewriter/JsonResponseWriter.java    |  9 ++---
 source/net/yacy/crawler/data/Latency.java     |  4 +-
 .../net/yacy/crawler/data/ResultImages.java   |  2 +-
 .../yacy/crawler/retrieval/FileLoader.java    |  3 +-
 .../net/yacy/crawler/retrieval/Response.java  | 12 +++---
 .../net/yacy/crawler/retrieval/SMBLoader.java |  2 +-
 .../net/yacy/data/ymark/YMarkAutoTagger.java  |  3 +-
 source/net/yacy/document/Document.java        |  4 +-
 source/net/yacy/document/LibraryProvider.java |  2 +-
 source/net/yacy/document/TextParser.java      | 16 ++++----
 .../yacy/document/parser/audioTagParser.java  |  7 ++--
 .../yacy/document/parser/genericParser.java   |  4 +-
 .../document/parser/html/ContentScraper.java  | 10 ++---
 .../parser/images/genericImageParser.java     | 14 +++----
 .../net/yacy/document/parser/tarParser.java   |  3 +-
 .../schema/CollectionConfiguration.java       | 33 ++++++++++++----
 .../yacy/search/schema/CollectionSchema.java  |  3 +-
 .../search/schema/WebgraphConfiguration.java  | 39 ++++++++-----------
 .../yacy/search/schema/WebgraphSchema.java    |  6 ++-
 26 files changed, 136 insertions(+), 116 deletions(-)

diff --git a/defaults/solr.collection.schema b/defaults/solr.collection.schema
index 4c10cc5b8..a37a5bfff 100644
--- a/defaults/solr.collection.schema
+++ b/defaults/solr.collection.schema
@@ -334,12 +334,15 @@ underline_txt
 ## the protocol of the url
 url_protocol_s
 
-## all path elements in the url
-url_paths_sxt
+## the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension
+url_file_name_s
 
 ## the file name extension
 url_file_ext_s
 
+## all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name
+url_paths_sxt
+
 ## number of key-value pairs in search part of the url
 #url_parameter_i
 
diff --git a/defaults/solr.webgraph.schema b/defaults/solr.webgraph.schema
index dec0dcb02..d2c505d17 100644
--- a/defaults/solr.webgraph.schema
+++ b/defaults/solr.webgraph.schema
@@ -41,6 +41,9 @@ source_id_s
 ## the url without the protocol (source)
 #source_urlstub_s
 
+## the file name without the extension (source)
+#source_file_name_s
+
 ## the file name extension (source)
 #source_file_ext_s
 
@@ -53,7 +56,7 @@ source_id_s
 ## count of all path elements in the url (source)
 #source_path_folders_count_i
 
-## all path elements in the url (source)
+## all path elements in the url without the file name (source)
 #source_path_folders_sxt
 
 ## number of key-value pairs in search part of the url (source)
@@ -132,6 +135,9 @@ target_protocol_s
 ## the url without the protocol (target)
 target_urlstub_s
 
+## the file name without the extension (target)
+target_file_name_s
+
 ## the file name extension (target)
 target_file_ext_s
 
@@ -144,7 +150,7 @@ target_file_ext_s
 ## count of all path elements in the url (target)
 #target_path_folders_count_i
 
-## all path elements in the url (target)
+## all path elements in the url without the file name (target)
 target_path_folders_sxt
 
 ## number of key-value pairs in search part of the url (target)
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index a67cd4921..223589113 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -185,7 +185,7 @@ public class ViewFile {
         }
 
         final String[] wordArray = wordArray(post.get("words", null));
-
+        final String ext = MultiProtocolURI.getFileExtension(url.getFileName());
         if (viewMode.equals("plain")) {
 
             // TODO: how to handle very large files here ?
@@ -209,7 +209,6 @@ public class ViewFile {
 
         } else if (viewMode.equals("iframeCache")) {
             prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
-            final String ext = url.getFileExtension();
             prop.put("viewMode_png", 0);
             prop.put("viewMode_html", 0);
             if (ext.length() > 0 && "jpg.jpeg.png.gif".indexOf(ext) >= 0) {
@@ -389,7 +388,7 @@ public class ViewFile {
             prop.put("error_md5", urlEntry.md5());
             prop.put("error_lat", urlEntry.lat());
             prop.put("error_lon", urlEntry.lon());
-            prop.put("error_doctype", Response.doctype2mime(url.getFileExtension(), urlEntry.doctype()));
+            prop.put("error_doctype", Response.doctype2mime(ext, urlEntry.doctype()));
             prop.put("error_language", urlEntry.language());
             prop.put("error_flags", urlEntry.flags().toString());
             prop.put("error_wordCount", urlEntry.wordCount());
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index 80783193a..dc21ee141 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -29,6 +29,7 @@ import java.util.List;
 
 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.RSSMessage;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.document.analysis.Classification.ContentDomain;
@@ -189,6 +190,7 @@ public class yacysearchitem {
 //            prop.putHTML("content_value", Interaction.TripleGet(result.urlstring(), "http://virtual.x/hasvalue", "anonymous"));
 // END interaction
 
+            String resultFileName = resultURL.getFileName();
             prop.putHTML("content_target", target);
             if (faviconURL != null && fileType == FileType.HTML) sb.loader.loadIfNotExistBackground(faviconURL, 1024 * 1024 * 10, null, TextSnippet.snippetMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
             prop.putHTML("content_faviconCode", URLLicense.aquireLicense(faviconURL)); // acquire license for favicon url loading
@@ -210,7 +212,7 @@ public class yacysearchitem {
             prop.putHTML("content_sizename", RSSMessage.sizename(result.filesize()));
             prop.putHTML("content_showSize_sizename", RSSMessage.sizename(result.filesize()));
             prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost());
-            prop.putHTML("content_file", resultURL.getFileName());
+            prop.putHTML("content_file", resultFileName);
             prop.putHTML("content_path", resultURL.getPath());
             prop.put("content_nl", (item == theSearch.query.offset) ? 0 : 1);
             prop.putHTML("content_publisher", result.publisher());
@@ -243,7 +245,7 @@ public class yacysearchitem {
                 prop.put("content_heuristic_name", heuristic.heuristicName);
             }
             EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "" + item, 0, 0), false);
-            final String ext = resultURL.getFileExtension().toLowerCase();
+            final String ext = MultiProtocolURI.getFileExtension(resultFileName).toLowerCase();
             if (ext.equals("png") || ext.equals("jpg") || ext.equals("gif")) {
                 final String license = URLLicense.aquireLicense(resultURL);
                 prop.put("content_code", license);
diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java
index 394aaa87c..f7dcb5b3d 100644
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@@ -269,7 +269,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
 
     public final ContentDomain getContentDomain() {
         if (this.contentDomain == null) {
-            this.contentDomain = Classification.getContentDomain(this.getFileExtension());
+            this.contentDomain = Classification.getContentDomain(getFileExtension(this.getFileName()));
         }
         return this.contentDomain;
     }
@@ -711,14 +711,10 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
         return this.path.substring(p + 1); // the 'real' file name
     }
 
-    public String getFileExtension() {
-        return getFileExtension(getFileName());
-    }
-
     public static String getFileExtension(final String fileName) {
         final int p = fileName.lastIndexOf('.');
         if (p < 0) return "";
-        return fileName.substring(p + 1);
+        return fileName.substring(p + 1).toLowerCase();
     }
 
     public String getPath() {
@@ -726,7 +722,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
     }
 
     public String[] getPaths() {
-        return this.path == null ? null : this.path.charAt(0) == '/' ? CommonPattern.SLASH.split(this.path.substring(1)) : CommonPattern.SLASH.split(this.path);
+        String s = this.path == null ? "" : this.path.charAt(0) == '/' ? this.path.substring(1) : this.path;
+        int p = s.lastIndexOf('/');
+        if (p < 0) return new String[0];
+        s = s.substring(0, p); // the paths do not contain the last part, which is considered as the getFileName() part.
+        String[] paths = CommonPattern.SLASH.split(s);
+        return paths;
     }
 
     /**
@@ -973,15 +974,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
         return (this.searchpart != null) && (this.searchpart.length() > 0);
     }
 
-    public final boolean isCGI() {
-        final String ls = unescape(this.path.toLowerCase());
-        return ls.indexOf(".cgi",0) >= 0 ||
-               ls.indexOf(".exe",0) >= 0;
+    public static final boolean isCGI(final String extension) {
+        return "cgi.exe.jpg.jpeg".indexOf(extension.toLowerCase()) >= 0;
     }
 
-    public final boolean isImage() {
-        final String ext = getFileExtension().toLowerCase();
-        return "png.gif.jpg.jpeg".indexOf(ext) >= 0;
+    public static final boolean isImage(final String extension) {
+        return "png.gif.jpg.jpeg".indexOf(extension.toLowerCase()) >= 0;
     }
 
     public final boolean isIndividual() {
diff --git a/source/net/yacy/cora/document/analysis/Classification.java b/source/net/yacy/cora/document/analysis/Classification.java
index 0d32da96b..61bfd5e31 100644
--- a/source/net/yacy/cora/document/analysis/Classification.java
+++ b/source/net/yacy/cora/document/analysis/Classification.java
@@ -201,10 +201,10 @@ public class Classification {
     }
 
     public static String url2mime(final MultiProtocolURI url, final String dfltMime) {
-        return url == null ? "application/octet-stream" : ext2mime(url.getFileExtension(), dfltMime);
+        return url == null ? "application/octet-stream" : ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()), dfltMime);
     }
 
     public static String url2mime(final MultiProtocolURI url) {
-        return url == null ? "application/octet-stream" : ext2mime(url.getFileExtension());
+        return url == null ? "application/octet-stream" : ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
     }
 }
diff --git a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
index 27c4211d8..f6dccd598 100644
--- a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
+++ b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java
@@ -146,43 +146,43 @@ public class SchemaConfiguration extends Configuration implements Serializable {
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final String value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final Date value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.getTime() > 0))) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final String[] value) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final Integer[] value) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && value.length > 0))) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final List<?> values) {
-        assert key.isMultiValued();
+        assert key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || (values != null && !values.isEmpty()))) key.add(doc, values);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final int value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final long value) {
-        assert !key.isMultiValued();
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
         if ((isEmpty() || contains(key)) && (!this.lazy || value != 0)) key.add(doc, value);
     }
 
     public void add(final SolrInputDocument doc, final SchemaDeclaration key, final boolean value) {
-        assert !key.isMultiValued();
-        if (isEmpty() || contains(key)) key.add(doc, value);
+        assert !key.isMultiValued() : "key = " + key.getSolrFieldName();
+        if ((isEmpty() || contains(key)) && (!this.lazy || value)) key.add(doc, value);
     }
 
     public static Date getDate(SolrInputDocument doc, final SchemaDeclaration key) {
diff --git a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
index 1dfd38d1c..0aa684f27 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/JsonResponseWriter.java
@@ -148,14 +148,16 @@ public class JsonResponseWriter implements QueryResponseWriter {
                     solitaireTag(writer, stag, value.stringValue());
                     continue;
                 }
-                
                 // some special handling here
                 if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) {
                     String u = value.stringValue();
                     try {
                         url = new MultiProtocolURI(u);
+                        String filename = url.getFileName();
                         solitaireTag(writer, "link", u);
-                        solitaireTag(writer, "file", url.getFileName());
+                        solitaireTag(writer, "file", filename);
+                        // get image license
+                        if (MultiProtocolURI.isImage(filename)) URLLicense.aquireLicense(urlhash, url.toNormalform(true));
                     } catch (MalformedURLException e) {}
                     continue;
                 }
@@ -206,9 +208,6 @@ public class JsonResponseWriter implements QueryResponseWriter {
                 //missing: "code","faviconCode"
             }
             
-            // get image license
-            if (url.isImage()) URLLicense.aquireLicense(urlhash, url.toNormalform(true));
-            
             // compute snippet from texts            
             solitaireTag(writer, "path", path.toString());
             solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title);
diff --git a/source/net/yacy/crawler/data/Latency.java b/source/net/yacy/crawler/data/Latency.java
index ea5b5ca12..4e5d86918 100644
--- a/source/net/yacy/crawler/data/Latency.java
+++ b/source/net/yacy/crawler/data/Latency.java
@@ -204,7 +204,7 @@ public class Latency {
         // for CGI accesses, we double the minimum time
         // mostly there is a database access in the background
         // which creates a lot of unwanted IO on target site
-        if (url.isCGI()) waiting = waiting * 2;
+        if (MultiProtocolURI.isCGI(url.getFileName())) waiting = waiting * 2;
 
         // if we have accessed the domain many times, get slower (the flux factor)
         if (!local) waiting += host.flux(waiting);
@@ -238,7 +238,7 @@ public class Latency {
         // for CGI accesses, we double the minimum time
         // mostly there is a database access in the background
         // which creates a lot of unwanted IO on target site
-        if (url.isCGI()) { waiting = waiting * 2; s.append(", isCGI = true -> double"); }
+        if (MultiProtocolURI.isCGI(url.getFileName())) { waiting = waiting * 2; s.append(", isCGI = true -> double"); }
 
         // if we have accessed the domain many times, get slower (the flux factor)
         int flux = host.flux(waiting);
diff --git a/source/net/yacy/crawler/data/ResultImages.java b/source/net/yacy/crawler/data/ResultImages.java
index 62d76ce5d..1d9fdab5a 100644
--- a/source/net/yacy/crawler/data/ResultImages.java
+++ b/source/net/yacy/crawler/data/ResultImages.java
@@ -74,7 +74,7 @@ public class ResultImages {
                 image.height() > 100 &&
                 image.width() < 1200 &&
                 image.height() < 1000 &&
-                !"gif".equals(image.url().getFileExtension())) {
+                !"gif".equals(MultiProtocolURI.getFileExtension(image.url().getFileName()))) {
                 // && ((urlString.lastIndexOf(".jpg") != -1)) ||
                 // ((urlString.lastIndexOf(".png") != -1)){
 
diff --git a/source/net/yacy/crawler/retrieval/FileLoader.java b/source/net/yacy/crawler/retrieval/FileLoader.java
index 526762c28..049f6b031 100644
--- a/source/net/yacy/crawler/retrieval/FileLoader.java
+++ b/source/net/yacy/crawler/retrieval/FileLoader.java
@@ -31,6 +31,7 @@ import java.util.Date;
 import java.util.List;
 
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.protocol.HeaderFramework;
@@ -94,7 +95,7 @@ public class FileLoader {
         }
 
         // create response header
-        String mime = Classification.ext2mime(url.getFileExtension());
+        String mime = Classification.ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
         ResponseHeader responseHeader = new ResponseHeader(200);
         responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
         responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java
index 5230bbe2b..d398388fc 100644
--- a/source/net/yacy/crawler/retrieval/Response.java
+++ b/source/net/yacy/crawler/retrieval/Response.java
@@ -70,7 +70,7 @@ public class Response {
 
     // doctype calculation
     public static char docType(final MultiProtocolURI url) {
-        String ext = url.getFileExtension();
+        String ext = MultiProtocolURI.getFileExtension(url.getFileName());
         if (ext == null) return DT_UNKNOWN;
         if (ext.equals(".gif"))  return DT_IMAGE;
         if (ext.equals(".ico"))  return DT_IMAGE;
@@ -169,7 +169,7 @@ public class Response {
         // request and response headers may be zero in case that we process surrogates
         this.requestHeader = new RequestHeader();
         this.responseHeader = new ResponseHeader(200);
-        this.responseHeader.put(HeaderFramework.CONTENT_TYPE, Classification.ext2mime(request.url().getFileExtension(), "text/plain")); // tell parser how to handle the content
+        this.responseHeader.put(HeaderFramework.CONTENT_TYPE, Classification.ext2mime(MultiProtocolURI.getFileExtension(request.url().getFileName()), "text/plain")); // tell parser how to handle the content
         if (!request.isEmpty()) this.responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(request.size()));
         this.profile = profile;
         this.status = QUEUE_STATE_FRESH;
@@ -291,7 +291,7 @@ public class Response {
             return "dynamic_post";
         }
 
-        if (url().isCGI()) {
+        if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
             return "dynamic_cgi";
         }
 
@@ -390,7 +390,7 @@ public class Response {
         if (url().isPOST()) {
             return false;
         }
-        if (url().isCGI()) {
+        if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
             return false;
         }
 
@@ -541,7 +541,7 @@ public class Response {
             if (url().isPOST()) {
                 return "Dynamic_(POST)";
             }
-            if (url().isCGI()) {
+            if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) {
                 return "Dynamic_(CGI)";
             }
         }
@@ -684,7 +684,7 @@ public class Response {
         // CGI access makes the page very individual, and therefore not usable in caches
         if (!profile().crawlingQ()) {
             if (url().isPOST()) { return "Dynamic_(POST)"; }
-            if (url().isCGI()) { return "Dynamic_(CGI)"; }
+            if (MultiProtocolURI.isCGI(MultiProtocolURI.getFileExtension(url().getFileName()))) { return "Dynamic_(CGI)"; }
         }
 
         // -authorization cases in request
diff --git a/source/net/yacy/crawler/retrieval/SMBLoader.java b/source/net/yacy/crawler/retrieval/SMBLoader.java
index 26c68fc04..3b120c240 100644
--- a/source/net/yacy/crawler/retrieval/SMBLoader.java
+++ b/source/net/yacy/crawler/retrieval/SMBLoader.java
@@ -113,7 +113,7 @@ public class SMBLoader {
         }
 
         // create response header
-        String mime = Classification.ext2mime(url.getFileExtension());
+        String mime = Classification.ext2mime(MultiProtocolURI.getFileExtension(url.getFileName()));
         ResponseHeader responseHeader = new ResponseHeader(200);
         responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
         responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
diff --git a/source/net/yacy/data/ymark/YMarkAutoTagger.java b/source/net/yacy/data/ymark/YMarkAutoTagger.java
index 92ddcf083..f72266176 100644
--- a/source/net/yacy/data/ymark/YMarkAutoTagger.java
+++ b/source/net/yacy/data/ymark/YMarkAutoTagger.java
@@ -10,6 +10,7 @@ import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.ArrayBlockingQueue;
 
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.crawler.retrieval.Response;
@@ -161,7 +162,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 			}
 			final String clean =  YMarkUtil.cleanTagsString(buffer.toString());
 			if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
-				return document.getFileExtension();
+				return MultiProtocolURI.getFileExtension(document.dc_source().getFileName());
 			}
 			return clean;
 		} finally {
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index d5e7bbda6..6d5e76fbe 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -153,8 +153,8 @@ public class Document {
         return this.languages;
     }
 
-    public String getFileExtension() {
-    	return this.source.getFileExtension();
+    public String getFileName() {
+    	return this.source.getFileName();
     }
 
     public Map<String, Set<String>> getGenericFacets() {
diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java
index 8b31a6363..c7771a973 100644
--- a/source/net/yacy/document/LibraryProvider.java
+++ b/source/net/yacy/document/LibraryProvider.java
@@ -90,7 +90,7 @@ public class LibraryProvider {
 
         private Dictionary(final String nickname, final String url) {
             try {
-                this.filename = new MultiProtocolURI(url).getFileName();
+                this.filename = (new MultiProtocolURI(url)).getFileName();
             } catch ( final MalformedURLException e ) {
                 assert false;
             }
diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index 25f74a7d2..cb965d46e 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -194,7 +194,7 @@ public final class TextParser {
         try {
             idioms = parsers(location, mimeType);
         } catch (final Parser.Failure e) {
-            final String errorMsg = "Parser Failure for extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "': " + e.getMessage();
+            final String errorMsg = "Parser Failure for extension '" + MultiProtocolURI.getFileExtension(location.getFileName()) + "' or mimetype '" + mimeType + "': " + e.getMessage();
             AbstractParser.log.logWarning(errorMsg);
             throw new Parser.Failure(errorMsg, location);
         }
@@ -218,7 +218,7 @@ public final class TextParser {
         try {
             idioms = parsers(location, mimeType);
         } catch (final Parser.Failure e) {
-            final String errorMsg = "Parser Failure for extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "': " + e.getMessage();
+            final String errorMsg = "Parser Failure for extension '" + MultiProtocolURI.getFileExtension(location.getFileName()) + "' or mimetype '" + mimeType + "': " + e.getMessage();
             AbstractParser.log.logWarning(errorMsg);
             throw new Parser.Failure(errorMsg, location);
         }
@@ -252,7 +252,7 @@ public final class TextParser {
             final InputStream sourceStream
         ) throws Parser.Failure {
         if (AbstractParser.log.isFine()) AbstractParser.log.logFine("Parsing '" + location + "' from stream");
-        final String fileExt = location.getFileExtension();
+        final String fileExt = MultiProtocolURI.getFileExtension(location.getFileName());
         final String documentCharset = htmlParser.patchCharsetEncoding(charset);
         assert parser != null;
 
@@ -272,7 +272,7 @@ public final class TextParser {
             final String charset,
             final byte[] sourceArray
         ) throws Parser.Failure {
-        final String fileExt = location.getFileExtension();
+        final String fileExt = MultiProtocolURI.getFileExtension(location.getFileName());
         if (AbstractParser.log.isFine()) AbstractParser.log.logFine("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "' from byte[]");
         final String documentCharset = htmlParser.patchCharsetEncoding(charset);
         assert !parsers.isEmpty();
@@ -312,7 +312,7 @@ public final class TextParser {
 
         if (docs == null) {
             if (failedParser.isEmpty()) {
-                final String errorMsg = "Parsing content with file extension '" + location.getFileExtension() + "' and mimetype '" + mimeType + "' failed.";
+                final String errorMsg = "Parsing content with file extension '" + fileExt + "' and mimetype '" + mimeType + "' failed.";
                 //log.logWarning("Unable to parse '" + location + "'. " + errorMsg);
                 throw new Parser.Failure(errorMsg, location);
             }
@@ -362,7 +362,7 @@ public final class TextParser {
         final Set<Parser> idioms = new HashSet<Parser>(2);
 
         // check extension
-        String ext = url.getFileExtension();
+        String ext = MultiProtocolURI.getFileExtension(url.getFileName());
         Set<Parser> idiom;
         if (ext != null && ext.length() > 0) {
             ext = ext.toLowerCase();
@@ -428,11 +428,11 @@ public final class TextParser {
      * @return an error if the extension is not supported, null otherwise
      */
     public static String supportsExtension(final MultiProtocolURI url) {
-        return supportsExtension(url.getFileExtension().toLowerCase());
+        return supportsExtension(MultiProtocolURI.getFileExtension(url.getFileName()).toLowerCase());
     }
 
     public static String mimeOf(final MultiProtocolURI url) {
-        return mimeOf(url.getFileExtension());
+        return mimeOf(MultiProtocolURI.getFileExtension(url.getFileName()));
     }
 
     public static String mimeOf(final String ext) {
diff --git a/source/net/yacy/document/parser/audioTagParser.java b/source/net/yacy/document/parser/audioTagParser.java
index dd3cc2d44..aef05c58d 100644
--- a/source/net/yacy/document/parser/audioTagParser.java
+++ b/source/net/yacy/document/parser/audioTagParser.java
@@ -72,8 +72,9 @@ public class audioTagParser extends AbstractParser implements Parser {
             final String charset, final InputStream source)
             throws Parser.Failure, InterruptedException {
 
-    	final String filename = location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName());
-   	    final String fileext = '.'+location.getFileExtension();
+        String filename = location.getFileName();
+        final String fileext = '.' + MultiProtocolURI.getFileExtension(filename);
+        filename = filename.isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(filename);
     	String mime = mimeType;
    	    
     	// fix mimeType
@@ -190,7 +191,7 @@ public class audioTagParser extends AbstractParser implements Parser {
 	                this,
 	                null,
 	                null,
-	                singleList(location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName())), // title
+	                singleList(filename), // title
 	                "", // author
 	                location.getHost(),
 	                null,
diff --git a/source/net/yacy/document/parser/genericParser.java b/source/net/yacy/document/parser/genericParser.java
index 9ad666367..359746844 100644
--- a/source/net/yacy/document/parser/genericParser.java
+++ b/source/net/yacy/document/parser/genericParser.java
@@ -47,7 +47,7 @@ public class genericParser extends AbstractParser implements Parser {
     public Document[] parse(final DigestURI location, final String mimeType,
             final String charset, final InputStream source1)
             throws Parser.Failure, InterruptedException {
-
+        String filename = location.getFileName();
         final Document[] docs = new Document[]{new Document(
                 location,
                 mimeType,
@@ -55,7 +55,7 @@ public class genericParser extends AbstractParser implements Parser {
                 this,
                 null,
                 null,
-                singleList(location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName())), // title
+                singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURI.unescape(filename)), // title
                 "", // author
                 location.getHost(),
                 null,
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 854e5666d..1d10b350e 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -473,10 +473,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
             final String href = tagopts.getProperty("href", EMPTY_STRING);
             DigestURI url;
             if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
-                final String f = url.getFileName();
-                final int p = f.lastIndexOf('.');
-                final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1);
-                if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) {
+                final String ext = MultiProtocolURI.getFileExtension(url.getFileName());
+                if (ext.equals("png") || ext.equals("gif") || ext.equals("jpg") || ext.equals("jpeg") || ext.equals("tiff") || ext.equals("tif")) {
                     // special handling of such urls: put them to the image urls
                     final ImageEntry ie = new ImageEntry(url, recursiveParse(text), -1, -1, -1);
                     addImage(this.images, ie);
@@ -656,7 +654,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         String ext;
         ArrayList<DigestURI> f = new ArrayList<DigestURI>();
         for (final DigestURI url: this.anchors.keySet()) {
-            ext = url.getFileExtension();
+            ext = MultiProtocolURI.getFileExtension(url.getFileName());
             if (ext == null) continue;
             if (ext.equals("swf")) f.add(url);
         }
@@ -666,7 +664,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     public boolean containsFlash() {
         String ext;
         for (final MultiProtocolURI url: this.anchors.keySet()) {
-            ext = url.getFileExtension();
+            ext = MultiProtocolURI.getFileExtension(url.getFileName());
             if (ext == null) continue;
             if (ext.equals("swf")) return true;
         }
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index 8d306ebfe..7203f75ae 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -99,8 +99,9 @@ public class genericImageParser extends AbstractParser implements Parser {
         String author = null;
         String keywords = null;
         String description = null;
-        if (mimeType.equals("image/bmp") ||
-            location.getFileExtension().equalsIgnoreCase("bmp")) {
+        String filename = location.getFileName();
+        String ext = MultiProtocolURI.getFileExtension(filename);
+        if (mimeType.equals("image/bmp") || ext.equalsIgnoreCase("bmp")) {
             byte[] b;
             try {
                 b = FileUtils.read(sourceStream);
@@ -110,10 +111,7 @@ public class genericImageParser extends AbstractParser implements Parser {
             }
             final IMAGEMAP imap = bmpParser.parse(b);
             ii = parseJavaImage(location, imap.getImage());
-        } else if (mimeType.equals("image/jpeg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpeg") ||
-                   location.getFileExtension().equalsIgnoreCase("jpe")) {
+        } else if (mimeType.equals("image/jpeg") || ext.equalsIgnoreCase("jpg") || ext.equalsIgnoreCase("jpeg") || ext.equalsIgnoreCase("jpe")) {
             // use the exif parser from
             // http://www.drewnoakes.com/drewnoakes.com/code/exif/
             // javadoc is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/javadoc/
@@ -190,7 +188,7 @@ public class genericImageParser extends AbstractParser implements Parser {
         final String infoString = ii.info.toString();
         images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
 
-        if (title == null || title.isEmpty()) title = MultiProtocolURI.unescape(location.getFileName());
+        if (title == null || title.isEmpty()) title = MultiProtocolURI.unescape(filename);
 
         return new Document[]{new Document(
              location,
@@ -297,7 +295,7 @@ public class genericImageParser extends AbstractParser implements Parser {
         DigestURI uri;
         try {
             uri = new DigestURI("http://localhost/" + image.getName());
-            final Document[] document = parser.parse(uri, "image/" + uri.getFileExtension(), "UTF-8", new FileInputStream(image));
+            final Document[] document = parser.parse(uri, "image/" + MultiProtocolURI.getFileExtension(uri.getFileName()), "UTF-8", new FileInputStream(image));
             System.out.println(document[0].toString());
         } catch (final MalformedURLException e) {
             e.printStackTrace();
diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java
index d2507cf15..8f03b6b85 100644
--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@@ -33,6 +33,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
 
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
@@ -64,7 +65,7 @@ public class tarParser extends AbstractParser implements Parser {
 
         final List<Document> docacc = new ArrayList<Document>();
         Document[] subDocs = null;
-        final String ext = url.getFileExtension().toLowerCase();
+        final String ext = MultiProtocolURI.getFileExtension(url.getFileName()).toLowerCase();
         if (ext.equals("gz") || ext.equals("tgz")) {
             try {
                 source = new GZIPInputStream(source);
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index c93142c5d..b75ae0459 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -35,10 +35,10 @@ import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.BlockingQueue;
@@ -79,6 +79,7 @@ import net.yacy.kelondro.util.Bitfield;
 import net.yacy.search.index.Segment;
 import net.yacy.search.index.Segment.ReferenceReport;
 import net.yacy.search.index.Segment.ReferenceReportCache;
+import net.yacy.search.schema.WebgraphConfiguration.Subgraph;
 
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
@@ -256,8 +257,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
             add(doc, CollectionSchema.description_words_val, cv);
         }
 
+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
         if (allAttr || contains(CollectionSchema.author)) add(doc, CollectionSchema.author, md.dc_creator());
-        if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype()));
+        if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, Response.doctype2mime(extension, md.doctype()));
         if (allAttr || contains(CollectionSchema.last_modified)) add(doc, CollectionSchema.last_modified, md.moddate());
         if (allAttr || contains(CollectionSchema.wordcount_i)) add(doc, CollectionSchema.wordcount_i, md.wordCount());
 
@@ -274,7 +277,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
 
         // path elements of link
         if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension);
 
         if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, md.limage());
         if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal());
@@ -474,8 +478,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         add(doc, CollectionSchema.fuzzy_signature_unique_b, true); // this must be corrected afterwards!
 
         // path elements of link
+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
         if (allAttr || contains(CollectionSchema.url_paths_sxt)) add(doc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension);
 
         // get list of all links; they will be shrinked by urls that appear in other fields of the solr schema
         Set<DigestURI> inboundLinks = document.inboundLinks();
@@ -695,8 +702,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                     outboundLinks.remove(canonical);
                     add(doc, CollectionSchema.canonical_s, canonical.toNormalform(false));
                     // set a flag if this is equal to sku
-                    if (contains(CollectionSchema.canonical_equal_sku_b) && canonical.equals(docurl)) {
-                        add(doc, CollectionSchema.canonical_equal_sku_b, true);
+                    if (contains(CollectionSchema.canonical_equal_sku_b)) {
+                        add(doc, CollectionSchema.canonical_equal_sku_b, canonical.equals(docurl));
                     }
                 }
             }
@@ -784,9 +791,16 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         if (allAttr || contains(CollectionSchema.inboundlinksnofollowcount_i)) add(doc, CollectionSchema.inboundlinksnofollowcount_i, document.inboundLinkNofollowCount());
         if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, outboundLinks.size());
         if (allAttr || contains(CollectionSchema.outboundlinksnofollowcount_i)) add(doc, CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount());
+        Map<DigestURI, Properties> alllinks = document.getAnchors();
         
+        // create a subgraph
+        Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
+        //if () {
+            webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, true, inboundLinks, citations);
+            webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, false, outboundLinks, citations);
+        //}
+            
         // list all links
-        WebgraphConfiguration.Subgraph subgraph = webgraph.edges(digestURI, responseHeader, collections, clickdepth, document.getAnchors(), images, inboundLinks, outboundLinks, citations);
         doc.webgraphDocuments.addAll(subgraph.edges);
         if (allAttr || contains(CollectionSchema.inboundlinks_protocol_sxt)) add(doc, CollectionSchema.inboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[0]));
         if (allAttr || contains(CollectionSchema.inboundlinks_urlstub_txt)) add(doc, CollectionSchema.inboundlinks_urlstub_txt, subgraph.urlStubs[0]);
@@ -1164,8 +1178,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         if (contains(CollectionSchema.load_date_dt)) add(solrdoc, CollectionSchema.load_date_dt, new Date());
 
         // path elements of link
+        String filename = digestURI.getFileName();
+        String extension = MultiProtocolURI.getFileExtension(filename);
         if (contains(CollectionSchema.url_paths_sxt)) add(solrdoc, CollectionSchema.url_paths_sxt, digestURI.getPaths());
-        if (contains(CollectionSchema.url_file_ext_s)) add(solrdoc, CollectionSchema.url_file_ext_s, digestURI.getFileExtension());
+        if (contains(CollectionSchema.url_file_name_s)) add(solrdoc, CollectionSchema.url_file_name_s, filename.toLowerCase().endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename);
+        if (contains(CollectionSchema.url_file_ext_s)) add(solrdoc, CollectionSchema.url_file_ext_s, extension);
         
         // fail reason and status
         if (contains(CollectionSchema.failreason_s)) add(solrdoc, CollectionSchema.failreason_s, failReason);
diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java
index 1e49c8148..295171f6a 100644
--- a/source/net/yacy/search/schema/CollectionSchema.java
+++ b/source/net/yacy/search/schema/CollectionSchema.java
@@ -152,8 +152,9 @@ public enum CollectionSchema implements SchemaDeclaration {
     publisher_url_s(SolrType.string, true, true, false, false, false, "publisher url as defined in http://support.google.com/plus/answer/1713826?hl=de"),
     
     url_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url"),
-    url_paths_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url"),
+    url_file_name_s(SolrType.string, true, true, false, false, false, "the file name (which is the string after the last '/' and before the query part from '?' on) without the file extension"),
     url_file_ext_s(SolrType.string, true, true, false, false, false, "the file name extension"),
+    url_paths_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url hpath (see: http://www.ietf.org/rfc/rfc1738.txt) without the file name"),
     url_parameter_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url"),
     url_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url"),
     url_parameter_value_sxt(SolrType.string, true, true, true, false, false, "the values from key-value pairs in the search part of the url"),
diff --git a/source/net/yacy/search/schema/WebgraphConfiguration.java b/source/net/yacy/search/schema/WebgraphConfiguration.java
index bd377175d..7773fb9a5 100644
--- a/source/net/yacy/search/schema/WebgraphConfiguration.java
+++ b/source/net/yacy/search/schema/WebgraphConfiguration.java
@@ -42,6 +42,7 @@ import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
 
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.federate.solr.ProcessType;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.federate.solr.SchemaDeclaration;
@@ -111,31 +112,13 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
         }
     }
     
-    public Subgraph edges(
-            final DigestURI source, final ResponseHeader responseHeader, String[] collections, int clickdepth_source,
-            final Map<DigestURI, Properties> alllinks,
-            final Map<DigestURI, ImageEntry> images,
-            final Set<DigestURI> inboundLinks,
-            final Set<DigestURI> outboundLinks,
-            IndexCell<CitationReference> citations
-            ) {
-        boolean allAttr = this.isEmpty();
-        Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
-        addEdges(
-                subgraph, source, responseHeader, collections, clickdepth_source,
-                allAttr, alllinks, images, true, inboundLinks, citations);
-        addEdges(
-                subgraph, source, responseHeader, collections, clickdepth_source,
-                allAttr, alllinks, images, false, outboundLinks, citations);
-        return subgraph;
-    }
-    
-    private void addEdges(
+    public void addEdges(
             final Subgraph subgraph,
             final DigestURI source, final ResponseHeader responseHeader, String[] collections, int clickdepth_source,
-            final boolean allAttr, final Map<DigestURI, Properties> alllinks, final Map<DigestURI, ImageEntry> images,
+            final Map<DigestURI, Properties> alllinks, final Map<DigestURI, ImageEntry> images,
             final boolean inbound, final Set<DigestURI> links,
             final IndexCell<CitationReference> citations) {
+        boolean allAttr = this.isEmpty();
         for (final DigestURI target_url: links) {
 
             Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
@@ -194,7 +177,12 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
                 if (allAttr || contains(WebgraphSchema.source_host_organizationdnc_s)) add(edge, WebgraphSchema.source_host_organizationdnc_s, orga + '.' + dnc);
                 if (allAttr || contains(WebgraphSchema.source_host_subdomain_s)) add(edge, WebgraphSchema.source_host_subdomain_s, subdom);
             }
-            if (allAttr || contains(WebgraphSchema.source_file_ext_s)) add(edge, WebgraphSchema.source_file_ext_s, source.getFileExtension());
+            if (allAttr || contains(WebgraphSchema.source_file_ext_s) || contains(WebgraphSchema.source_file_name_s)) {
+                String source_file_name = source.getFileName();
+                String source_file_ext = MultiProtocolURI.getFileExtension(source_file_name);
+                add(edge, WebgraphSchema.source_file_name_s, source_file_name.toLowerCase().endsWith("." + source_file_ext) ? source_file_name.substring(0, source_file_name.length() - source_file_ext.length() - 1) : source_file_name);
+                add(edge, WebgraphSchema.source_file_ext_s, source_file_ext);
+            }
             if (allAttr || contains(WebgraphSchema.source_path_s)) add(edge, WebgraphSchema.source_path_s, source.getPath());
             if (allAttr || contains(WebgraphSchema.source_path_folders_count_i) || contains(WebgraphSchema.source_path_folders_sxt)) {
                 String[] paths = source.getPaths();
@@ -251,7 +239,12 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
                 if (allAttr || contains(WebgraphSchema.target_host_organizationdnc_s)) add(edge, WebgraphSchema.target_host_organizationdnc_s, orga + '.' + dnc);
                 if (allAttr || contains(WebgraphSchema.target_host_subdomain_s)) add(edge, WebgraphSchema.target_host_subdomain_s, subdom);
             }
-            if (allAttr || contains(WebgraphSchema.target_file_ext_s)) add(edge, WebgraphSchema.target_file_ext_s, target_url.getFileExtension());
+            if (allAttr || contains(WebgraphSchema.target_file_ext_s) || contains(WebgraphSchema.target_file_name_s)) {
+                String target_file_name = target_url.getFileName();
+                String target_file_ext = MultiProtocolURI.getFileExtension(target_file_name);
+                add(edge, WebgraphSchema.target_file_name_s, target_file_name.toLowerCase().endsWith("." + target_file_ext) ? target_file_name.substring(0, target_file_name.length() - target_file_ext.length() - 1) : target_file_name);
+                add(edge, WebgraphSchema.target_file_ext_s, target_file_ext);
+            }
             if (allAttr || contains(WebgraphSchema.target_path_s)) add(edge, WebgraphSchema.target_path_s, target_url.getPath());
             if (allAttr || contains(WebgraphSchema.target_path_folders_count_i) || contains(WebgraphSchema.target_path_folders_sxt)) {
                 String[] paths = target_url.getPaths();
diff --git a/source/net/yacy/search/schema/WebgraphSchema.java b/source/net/yacy/search/schema/WebgraphSchema.java
index 15d257263..3086edf50 100644
--- a/source/net/yacy/search/schema/WebgraphSchema.java
+++ b/source/net/yacy/search/schema/WebgraphSchema.java
@@ -41,11 +41,12 @@ public enum WebgraphSchema implements SchemaDeclaration {
     source_id_s(SolrType.string, true, true, false, false, false, "primary key of document, the URL hash (source)"),
     source_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url (source)"),
     source_urlstub_s(SolrType.string, true, true, false, false, false, "the url without the protocol (source)"),
+    source_file_name_s(SolrType.string, true, true, false, false, false, "the file name without the extension (source)"),
     source_file_ext_s(SolrType.string, true, true, false, false, false, "the file name extension (source)"),
     source_chars_i(SolrType.num_integer, true, true, false, false, false, "number of all characters in the url (source)"),
     source_path_s(SolrType.string, true, true, false, false, false, "path of the url (source)"),
     source_path_folders_count_i(SolrType.num_integer, true, true, false, false, false, "count of all path elements in the url (source)"),
-    source_path_folders_sxt(SolrType.string, true, true, true, false, false, "all path elements in the url (source)"),
+    source_path_folders_sxt(SolrType.string, true, true, true, false, false, "all path elements in the url without the file name (source)"),
     source_parameter_count_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url (source)"),
     source_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url (source)"),
     source_parameter_value_sxt(SolrType.string, true, true, true, false, false, "the values from key-value pairs in the search part of the url (source)"),
@@ -73,11 +74,12 @@ public enum WebgraphSchema implements SchemaDeclaration {
     target_id_s(SolrType.string, true, true, false, false, false, "primary key of document, the URL hash (target)"),
     target_protocol_s(SolrType.string, true, true, false, false, false, "the protocol of the url (target)"),
     target_urlstub_s(SolrType.string, true, true, false, false, false, "the url without the protocol (target)"),
+    target_file_name_s(SolrType.string, true, true, false, false, false, "the file name without the extension (target)"),
     target_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension (target)"),
     target_chars_i(SolrType.num_integer, true, true, false, false, false, "number of all characters in the url (target)"),
     target_path_s(SolrType.string, true, true, false, false, false, "path of the url (target)"),
     target_path_folders_count_i(SolrType.num_integer, true, true, false, false, false, "count of all path elements in the url (target)"),
-    target_path_folders_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url (target)"),
+    target_path_folders_sxt(SolrType.string, true, true, true, false, true, "all path elements in the url without the file name (target)"),
     target_parameter_count_i(SolrType.num_integer, true, true, false, false, false, "number of key-value pairs in search part of the url (target)"),
     target_parameter_key_sxt(SolrType.string, true, true, true, false, false, "the keys from key-value pairs in the search part of the url (target)"),
     target_parameter_value_sxt(SolrType.string, true, true, true, false, true, "the values from key-value pairs in the search part of the url (target)"),