diff --git a/source/net/yacy/document/AbstractParser.java b/source/net/yacy/document/AbstractParser.java
index 15f300d7b..daf4a91a2 100644
--- a/source/net/yacy/document/AbstractParser.java
+++ b/source/net/yacy/document/AbstractParser.java
@@ -39,7 +39,8 @@ public abstract class AbstractParser implements Parser {
     protected final Set<String> SUPPORTED_MIME_TYPES = new LinkedHashSet<String>(); 
     protected final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     private   final String name;
-
+    protected Object scraperObject; // used scraper or source object if any, otherwise null
+    
     /**
      * initialize a parser with a name
      * @param name
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 75b56606f..419dcb625 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -92,13 +92,13 @@ public class Document {
     private final Set<String> languages;
     private boolean indexingDenied;
     private final double lon, lat;
-    private final Object parserObject; // the source object that was used to create the Document
+    private final Parser parserObject; // the source object that was used to create the Document
     private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document
     private final Date lastModified;
     private int crawldepth;
 
     public Document(final DigestURL location, final String mimeType, final String charset,
-                    final Object parserObject,
+                    final Parser parserObject,
                     final Set<String> languages,
                     final String[] keywords,
                     final List<String> titles,
@@ -160,11 +160,29 @@ public class Document {
         if (contentDomain != ContentDomain.ALL) return contentDomain;
         return this.dc_source().getContentDomainFromExt();
     }
-    
-    public Object getParserObject() {
+
+    /**
+     * The parser used to generate the document
+     * @return Parser
+     */
+    public Parser getParserObject() {
         return this.parserObject;
     }
 
+    /**
+     * Confinient call to get the source/scraper object of the underlaying parser
+     * if the parser uses a scraper, like htmlParser
+     * @return scraper object typically of type ContentScraper but may also of type DCEntry
+     */
+    public Object getScraperObject() {
+        if (this.parserObject instanceof AbstractParser) {
+            if (((AbstractParser) this.parserObject).scraperObject != null) {
+                return ((AbstractParser) this.parserObject).scraperObject;
+            }
+        }
+        return null;
+    }
+    
     public Set<String> getContentLanguages() {
         return this.languages;
     }
@@ -931,9 +949,9 @@ dc_rights
 
         // clean up parser data
         for (final Document doc: docs) {
-            Object parserObject = doc.getParserObject();
-            if (parserObject instanceof ContentScraper) {
-                final ContentScraper html = (ContentScraper) parserObject;
+            Object scraper = doc.getScraperObject();
+            if (scraper instanceof ContentScraper) {
+                final ContentScraper html = (ContentScraper) scraper;
                 html.close();
             }
         }
@@ -979,9 +997,9 @@ dc_rights
                     if (!entry.getKey().attachedNofollow()) result.put(entry.getKey(), entry.getValue());
                 }
             }
-            final Object parser = d.getParserObject();
-            if (parser instanceof ContentScraper) {
-                final ContentScraper html = (ContentScraper) parser;
+            final Object scraper = d.getScraperObject();
+            if (scraper instanceof ContentScraper) {
+                final ContentScraper html = (ContentScraper) scraper;
                 String refresh = html.getRefreshPath();
                 if (refresh != null && refresh.length() > 0) try {result.put(new AnchorURL(refresh), "refresh");} catch (final MalformedURLException e) {}
                 AnchorURL canonical = html.getCanonical();
diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java
index f60af57d3..8d54f93c3 100644
--- a/source/net/yacy/document/content/DCEntry.java
+++ b/source/net/yacy/document/content/DCEntry.java
@@ -45,6 +45,8 @@ import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.util.CommonPattern;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.document.Document;
+import net.yacy.document.parser.genericParser;
+import net.yacy.search.schema.CollectionSchema;
 
 public class DCEntry extends MultiMapSolrParams {
 
@@ -330,11 +332,15 @@ public class DCEntry extends MultiMapSolrParams {
         languages.add(getLanguage());
         List<String> t = new ArrayList<String>(1);
         t.add(getTitle());
+        
+        // for processing during indexing, embed entry as source scraperObject in a standard parserobj object
+        genericParser parserobj = new genericParser(this); // init the simplest parser with DCEntry as source/scraperObject used during indexing
+
         return new Document(
             getIdentifier(true),
             "text/html",
             StandardCharsets.UTF_8.name(),
-            this,
+            parserobj,
             languages,
             getSubject(), // might be null
             t,
@@ -343,7 +349,7 @@ public class DCEntry extends MultiMapSolrParams {
             null,
             getDescriptions(),
             getLon(), getLat(),
-            get("text_t", ""),
+            get(CollectionSchema.text_t.name(), ""),
             null,
             null,
             null,
diff --git a/source/net/yacy/document/parser/genericParser.java b/source/net/yacy/document/parser/genericParser.java
index 0d6d64d6b..1d9f2d1c8 100644
--- a/source/net/yacy/document/parser/genericParser.java
+++ b/source/net/yacy/document/parser/genericParser.java
@@ -44,6 +44,17 @@ public class genericParser extends AbstractParser implements Parser {
         // this parser is used if no other fits. This parser fits all
     }
 
+    /**
+     * Constructor to allow to set a scraperObject
+     * because it is desired to keep the scraper/source object protected
+     * This is used for surrogate parsers to set a other source/scraper then ContentScraper
+     * @param scraper
+     */
+    public genericParser(Object scraper) {
+        super("Generic Parser");
+        this.scraperObject = scraper;
+    }
+
     @Override
     public Document[] parse(
             final DigestURL location,
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index f668d3001..b293404bb 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -34,9 +34,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.StandardCharsets;
 import java.nio.charset.UnsupportedCharsetException;
-import java.util.HashSet;
 import java.util.LinkedHashMap;
-import java.util.Set;
 
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
@@ -60,21 +58,29 @@ public class htmlParser extends AbstractParser implements Parser {
 
     private static final int maxLinks = 10000;
 
-    public final static String[] htmlExtensions = new String[]{
-        "htm","html","shtml","shtm","stm","xhtml","phtml","phtm",
-        "tpl","php","php2","php3","php4","php5","cfm","asp","aspx","tex","txt","msg"
-        }; 
-    
-    public final static Set<String> htmlExtensionsSet;
-    
-    static {
-        htmlExtensionsSet = new HashSet<>(htmlExtensions.length);
-        for (String ext: htmlExtensions) htmlExtensionsSet.add(ext);
-    }
-    
     public htmlParser() {
         super("Streaming HTML Parser");
-        this.SUPPORTED_EXTENSIONS.addAll(htmlExtensionsSet);
+        this.SUPPORTED_EXTENSIONS.add("htm");
+        this.SUPPORTED_EXTENSIONS.add("html");
+        this.SUPPORTED_EXTENSIONS.add("shtml");
+        this.SUPPORTED_EXTENSIONS.add("shtm");
+        this.SUPPORTED_EXTENSIONS.add("stm");
+        this.SUPPORTED_EXTENSIONS.add("xhtml");
+        this.SUPPORTED_EXTENSIONS.add("phtml");
+        this.SUPPORTED_EXTENSIONS.add("phtm");
+        this.SUPPORTED_EXTENSIONS.add("tpl");
+        this.SUPPORTED_EXTENSIONS.add("php");
+        this.SUPPORTED_EXTENSIONS.add("php2");
+        this.SUPPORTED_EXTENSIONS.add("php3");
+        this.SUPPORTED_EXTENSIONS.add("php4");
+        this.SUPPORTED_EXTENSIONS.add("php5");
+        this.SUPPORTED_EXTENSIONS.add("cfm");
+        this.SUPPORTED_EXTENSIONS.add("asp");
+        this.SUPPORTED_EXTENSIONS.add("aspx");
+        this.SUPPORTED_EXTENSIONS.add("tex");
+        this.SUPPORTED_EXTENSIONS.add("txt");
+        this.SUPPORTED_EXTENSIONS.add("msg");
+
         this.SUPPORTED_MIME_TYPES.add("text/html");
         this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
         this.SUPPORTED_MIME_TYPES.add("application/xhtml+xml");
@@ -97,7 +103,8 @@ public class htmlParser extends AbstractParser implements Parser {
         try {
             // first get a document from the parsed html
             Charset[] detectedcharsetcontainer = new Charset[]{null};
-            final ContentScraper scraper = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
+            scraperObject = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
+            ContentScraper scraper = (ContentScraper)scraperObject; // shortcut to access ContentScraper methodes
             // parseToScraper also detects/corrects/sets charset from html content tag
             final Document document = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraper);
             Document documentSnapshot = null;
@@ -130,7 +137,7 @@ public class htmlParser extends AbstractParser implements Parser {
      * @param scraper
      * @return
      */
-    private static Document transformScraper(final DigestURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
+    private Document transformScraper(final DigestURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
         final String[] sections = new String[
                  scraper.getHeadlines(1).length +
                  scraper.getHeadlines(2).length +
@@ -150,7 +157,7 @@ public class htmlParser extends AbstractParser implements Parser {
                 location,
                 mimeType,
                 charSet,
-                scraper,
+                this,
                 scraper.getContentLanguages(),
                 scraper.getKeywords(),
                 scraper.getTitles(),
@@ -178,7 +185,7 @@ public class htmlParser extends AbstractParser implements Parser {
         } catch (UnsupportedEncodingException e) {
             sourceStream = new ByteArrayInputStream(UTF8.getBytes(input));
         }
-        ContentScraper scraper;
+        ContentScraper scraper; // for this static methode no need to init local this.scraperObject
         try {
             scraper = parseToScraper(location, documentCharset, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
         } catch (Failure e) {
@@ -242,6 +249,7 @@ public class htmlParser extends AbstractParser implements Parser {
         }
         
         // parsing the content
+        // for this static methode no need to init local this.scraperObject here
         final ContentScraper scraper = new ContentScraper(location, maxLinks, vocabularyScraper, timezoneOffset);
         final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, Math.max(64, Math.min(4096, sourceStream.available())));
         try {
diff --git a/source/net/yacy/document/parser/swfParser.java b/source/net/yacy/document/parser/swfParser.java
index 81bd0473d..f0c7a163f 100644
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@@ -68,10 +68,9 @@ public class swfParser extends AbstractParser implements Parser {
         try {
             final SWF2HTML swf2html = new SWF2HTML();
             String contents = "";
-            ContentScraper htmlscraper=null;
             try {
-            	contents = swf2html.convertSWFToHTML(source);
-                htmlscraper =  htmlParser.parseToScraper(location, charset, scraper, timezoneOffset, contents, 100);
+                contents = swf2html.convertSWFToHTML(source);
+                scraperObject = htmlParser.parseToScraper(location, charset, scraper, timezoneOffset, contents, 100);
             } catch (final NegativeArraySizeException e) {
                 throw new Parser.Failure(e.getMessage(), location);
             } catch (final IOException e) {
@@ -79,29 +78,9 @@ public class swfParser extends AbstractParser implements Parser {
             } catch (final Exception e) {
                 throw new Parser.Failure(e.getMessage(), location);
             }
-            /*
-            String url = null;
-            String urlnr = null;
-            final String linebreak = System.getProperty("line.separator");
-            final List<AnchorURL> anchors = new ArrayList<AnchorURL>();
-            int urls = 0;
-            int urlStart = -1;
-            int urlEnd = 0;
-            int p0 = 0;
 
-            //extracting urls
-            while ((urlStart = contents.indexOf("http://",urlEnd)) >= 0){
-                urlEnd = contents.indexOf(linebreak,urlStart);
-                url = contents.substring(urlStart,urlEnd);
-                urlnr = Integer.toString(++urls);
-                AnchorURL u = new AnchorURL(url);
-                u.setNameProperty(urlnr);
-                anchors.add(u);
-                contents = contents.substring(0,urlStart)+contents.substring(urlEnd);
-            }
-            */
-
-           // As the result of parsing this function must return a plasmaParserDocument object
+            // As the result of parsing this function must return a plasmaParserDocument object
+            ContentScraper htmlscraper = (ContentScraper) this.scraperObject; // shortcut to access ContentScraper methodes
             return new Document[]{new Document(
                 location, // url of the source document
                 mimeType, // the documents mime type
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index 6814f8737..be50a96bc 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -601,11 +601,14 @@ public class Segment {
                 crawlProfile != null && document.getDepth() <= crawlProfile.snapshotMaxdepth() &&
                 !crawlProfile.snapshotsMustnotmatch().matcher(urlNormalform).matches()) {
             // load pdf in case that is wanted. This can later be used to compute a web page preview in the search results
-            String ext = MultiProtocolURL.getFileExtension(url.getFile()).toLowerCase();
-            if (ext.length() == 0 || url.getFile().length() <= 1 || htmlParser.htmlExtensionsSet.contains(ext)) {
+            Parser p = document.getParserObject();
+            boolean mimesupported = false;
+            if (p instanceof htmlParser)
+                    mimesupported = ((htmlParser)p).supportedMimeTypes().contains(document.dc_format());
+
+            if (mimesupported)
                 // STORE IMAGE AND METADATA
                 Transactions.store(vector, true, crawlProfile.snapshotLoadImage(), crawlProfile.snapshotReplaceold(), proxy, acceptLanguage);
-            }
         }
         
         // STORE TO SOLR
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index e621e22f4..7fda7067c 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -335,29 +335,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         if (allAttr || contains(CollectionSchema.audiolinkscount_i)) add(doc, CollectionSchema.audiolinkscount_i, md.laudio());
         if (allAttr || contains(CollectionSchema.videolinkscount_i)) add(doc, CollectionSchema.videolinkscount_i, md.lvideo());
         if (allAttr || contains(CollectionSchema.applinkscount_i)) add(doc, CollectionSchema.applinkscount_i, md.lapp());
-        if (allAttr || contains(CollectionSchema.text_t)) {
-        	// construct the text from other metadata parts.
-        	// This is necessary here since that is used to search the link when no other data (parsed text body) is available
-        	StringBuilder sb = new StringBuilder(120);
-        	// accText(sb, md.dc_title()); // default search field via getQueryFields(), not needed for snippet (always displayed)
-        	// accText(sb, md.dc_creator()); // author is in Default ranking/getQueryFields
-        	// accText(sb, md.dc_publisher()); // has it's own metadata field publisher_t (not part of default queryfields) and mostly N/A
-        	// accText(sb, md.snippet()); // above added to description_txt, default search field via getQueryFields(), description_txt incl. in snippet calculation
-        	accText(sb, md.url().toTokens());
-        	// accText(sb, keywords); // default search field via getQueryFields(), keywords not incl. in snippet calculation
-        	add(doc, CollectionSchema.text_t, sb.toString());
-        }
 
         return doc;
     }
 
-    private static void accText(final StringBuilder sb, String text) {
-    	if (text == null || text.length() == 0) return;
-    	if (sb.length() != 0) sb.append(' ');
-    	text = text.trim();
-    	if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.');
-    }
-
     public static class Subgraph {
         public final ArrayList<String>[] urlProtocols, urlStubs, urlAnchorTexts;
         @SuppressWarnings("unchecked")
@@ -541,11 +522,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
 
         Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
         int c = 0;
-        final Object parser = document.getParserObject();
+        final Object scraper = document.getScraperObject();
         boolean containsCanonical = false;
         DigestURL canonical = null;
-        if (parser instanceof ContentScraper) {
-            final ContentScraper html = (ContentScraper) parser;
+        if (scraper instanceof ContentScraper) {
+            final ContentScraper html = (ContentScraper) scraper;
             List<ImageEntry> images = html.getImages();
 
             // header tags
@@ -885,9 +866,9 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
             }
         }
 
-        if (parser instanceof DCEntry) {
+        if (scraper instanceof DCEntry) {
             // the document was created with a surrogate parsing; overwrite all md: -entries to Solr
-            DCEntry dcentry = (DCEntry) parser;
+            DCEntry dcentry = (DCEntry) scraper;
             for (Map.Entry<String, String[]> entry: dcentry.getMap().entrySet()) {
                 String tag = entry.getKey();
                 if (!tag.startsWith("md:") || tag.length() < 4) continue;