From 4304e07e6fc7be21293dac9af601c1e5a241119f Mon Sep 17 00:00:00 2001
From: Michael Christen <mc@yacy.net>
Date: Sun, 15 Jan 2023 01:20:12 +0100
Subject: [PATCH] crawl profile adoption to new tag valency attribute

---
 source/net/yacy/crawler/CrawlSwitchboard.java |  55 ++--
 .../net/yacy/crawler/RecrawlBusyThread.java   |   4 +-
 .../net/yacy/crawler/data/CrawlProfile.java   | 169 ++++++-----
 .../net/yacy/crawler/retrieval/Response.java  |  11 +-
 source/net/yacy/document/AbstractParser.java  |  71 ++---
 source/net/yacy/document/Parser.java          | 130 ++++----
 source/net/yacy/document/TextParser.java      |  78 +++--
 .../document/importer/MediawikiImporter.java  | 205 ++++++-------
 .../parser/AbstractCompressorParser.java      | 282 ++++++++++--------
 .../net/yacy/document/parser/bzipParser.java  | 139 ++++-----
 .../net/yacy/document/parser/gzipParser.java  | 214 ++++++-------
 .../net/yacy/document/parser/htmlParser.java  | 154 +++++++---
 .../yacy/document/parser/sevenzipParser.java  |  27 +-
 .../net/yacy/document/parser/tarParser.java   | 280 ++++++++---------
 .../net/yacy/document/parser/zipParser.java   |   6 +-
 source/net/yacy/htroot/Crawler_p.java         |   1 +
 source/net/yacy/htroot/QuickCrawlLink_p.java  |   3 +-
 .../net/yacy/repository/LoaderDispatcher.java |  11 +-
 source/net/yacy/search/Switchboard.java       |   6 +-
 .../net/yacy/search/index/DocumentIndex.java  |  21 +-
 20 files changed, 1024 insertions(+), 843 deletions(-)
diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java
index bbcbc4018..aca7b95cd 100644
--- a/source/net/yacy/crawler/CrawlSwitchboard.java
+++ b/source/net/yacy/crawler/CrawlSwitchboard.java
@@ -51,6 +51,7 @@ import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.data.NoticedURL.StackType;
 import net.yacy.crawler.retrieval.Request;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.blob.MapHeap;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.index.RowHandleSet;
@@ -60,7 +61,7 @@ import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
 
 public final class CrawlSwitchboard {
-	
+    
     public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep";
     public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow";
     public static final String CRAWL_PROFILE_RECRAWL_JOB = "recrawlJob";
@@ -75,7 +76,7 @@ public final class CrawlSwitchboard {
 
     public static Set<String> DEFAULT_PROFILES = new HashSet<String>();
     static {
-    	DEFAULT_PROFILES.add(CRAWL_PROFILE_AUTOCRAWL_DEEP);
+        DEFAULT_PROFILES.add(CRAWL_PROFILE_AUTOCRAWL_DEEP);
         DEFAULT_PROFILES.add(CRAWL_PROFILE_AUTOCRAWL_SHALLOW);
         DEFAULT_PROFILES.add(CRAWL_PROFILE_RECRAWL_JOB);
         DEFAULT_PROFILES.add(CRAWL_PROFILE_PROXY);
@@ -93,11 +94,11 @@ public final class CrawlSwitchboard {
 
     // Default time cycle in minutes before an indexed URL by a given crawl profile can be accepted for recrawl */
 
-	/**
-	 * The default recrawl time cycle in minutes for recrawl jobs. The recrawl date
-	 * limit can be set up by the recrawl job selection query, but a default limit
-	 * prevent unwanted overload on targets)
-	 */
+    /**
+     * The default recrawl time cycle in minutes for recrawl jobs. The recrawl date
+     * limit can be set up by the recrawl job selection query, but a default limit
+     * prevent unwanted overload on targets)
+     */
     public static final long CRAWL_PROFILE_RECRAWL_JOB_RECRAWL_CYCLE = 60L; // on hour
     public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 60L * 24L; // one day
     public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 60L * 24L * 30L; // 30 days
@@ -139,7 +140,7 @@ public final class CrawlSwitchboard {
             try {
                 p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
             } catch (final IOException | SpaceExceededException | RuntimeException e ) {
-            	ConcurrentLog.warn("CrawlProfiles", "Could not load profile " + handle, e);
+                ConcurrentLog.warn("CrawlProfiles", "Could not load profile " + handle, e);
                 p = null;
             }
             if ( p == null ) {
@@ -275,16 +276,15 @@ public final class CrawlSwitchboard {
     public RowHandleSet getURLHashes(final byte[] profileKey) {
         return this.profilesActiveCrawlsCounter.get(ASCII.String(profileKey));
     }
-    
-    
+
     private void initActiveCrawlProfiles() {
-    	final Switchboard sb = Switchboard.getSwitchboard();
-    	
-    	// generate new default entry for deep auto crawl
-    	this.defaultAutocrawlDeepProfile =
-    	    new CrawlProfile(
-    	        CRAWL_PROFILE_AUTOCRAWL_DEEP,
-    	        CrawlProfile.MATCH_ALL_STRING,   //crawlerUrlMustMatch
+        final Switchboard sb = Switchboard.getSwitchboard();
+
+        // generate new default entry for deep auto crawl
+        this.defaultAutocrawlDeepProfile =
+            new CrawlProfile(
+                CRAWL_PROFILE_AUTOCRAWL_DEEP,
+                CrawlProfile.MATCH_ALL_STRING,   //crawlerUrlMustMatch
                 CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
                 CrawlProfile.MATCH_ALL_STRING,   //crawlerIpMustMatch
                 CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
@@ -308,12 +308,13 @@ public final class CrawlSwitchboard {
                 CacheStrategy.NOCACHE,
                 "robot_" + CRAWL_PROFILE_AUTOCRAWL_DEEP,
                 ClientIdentification.yacyInternetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
-    	this.profilesActiveCrawls.put(
-    	    UTF8.getBytes(this.defaultAutocrawlDeepProfile.handle()),
-    	    this.defaultAutocrawlDeepProfile);
-    	// generate new default entry for shallow auto crawl
+        this.profilesActiveCrawls.put(
+            UTF8.getBytes(this.defaultAutocrawlDeepProfile.handle()),
+            this.defaultAutocrawlDeepProfile);
+        // generate new default entry for shallow auto crawl
         this.defaultAutocrawlShallowProfile =
             new CrawlProfile(
                 CRAWL_PROFILE_AUTOCRAWL_SHALLOW,
@@ -341,6 +342,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.NOCACHE,
                 "robot_" + CRAWL_PROFILE_AUTOCRAWL_SHALLOW,
                 ClientIdentification.yacyInternetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -364,7 +366,7 @@ public final class CrawlSwitchboard {
                 true,
                 CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE),
                 -1,
-				false, true, true, false, // crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow,
+                false, true, true, false, // crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow,
                 sb.getConfigBool(SwitchboardConstants.PROXY_INDEXING_LOCAL_TEXT, true),
                 sb.getConfigBool(SwitchboardConstants.PROXY_INDEXING_LOCAL_MEDIA, true),
                 true,
@@ -373,6 +375,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFFRESH,
                 "robot_" + CRAWL_PROFILE_PROXY,
                 ClientIdentification.yacyProxyAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -405,6 +408,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFFRESH,
                 "robot_" + CRAWL_PROFILE_REMOTE,
                 ClientIdentification.yacyInternetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -437,6 +441,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -469,6 +474,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -509,6 +515,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_GREEDY_LEARNING_TEXT,
                 ClientIdentification.browserAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -541,6 +548,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -573,6 +581,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -605,6 +614,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.NOCACHE,
                 "robot_" + CRAWL_PROFILE_SURROGATE,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(
@@ -640,6 +650,7 @@ public final class CrawlSwitchboard {
                 CacheStrategy.NOCACHE,
                 collection,
                 ClientIdentification.yacyIntranetCrawlerAgentName,
+                TagValency.EVAL,
                 null, null,
                 0);
         this.profilesActiveCrawls.put(UTF8.getBytes(genericPushProfile.handle()), genericPushProfile);
diff --git a/source/net/yacy/crawler/RecrawlBusyThread.java b/source/net/yacy/crawler/RecrawlBusyThread.java
index 7c3571705..af32db0cf 100644
--- a/source/net/yacy/crawler/RecrawlBusyThread.java
+++ b/source/net/yacy/crawler/RecrawlBusyThread.java
@@ -43,6 +43,7 @@ import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.NoticedURL;
 import net.yacy.crawler.retrieval.Request;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.workflow.AbstractBusyThread;
 import net.yacy.search.Switchboard;
 import net.yacy.search.schema.CollectionSchema;
@@ -355,7 +356,8 @@ public class RecrawlBusyThread extends AbstractBusyThread {
                 true, true, true, false, // crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow,
                 true, true, true, false, -1, false, true, CrawlProfile.MATCH_NEVER_STRING, CacheStrategy.IFFRESH,
                 "robot_" + CrawlSwitchboard.CRAWL_PROFILE_RECRAWL_JOB,
-                ClientIdentification.yacyInternetCrawlerAgentName, null, null, 0);
+                ClientIdentification.yacyInternetCrawlerAgentName, 
+                TagValency.EVAL, null, null, 0);
         return profile;
     }
 
diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java
index 72d7bff9a..7acb8bd2c 100644
--- a/source/net/yacy/crawler/data/CrawlProfile.java
+++ b/source/net/yacy/crawler/data/CrawlProfile.java
@@ -55,6 +55,7 @@ import net.yacy.cora.util.CommonPattern;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.CrawlSwitchboard;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.search.query.QueryParams;
 import net.yacy.search.schema.CollectionSchema;
@@ -69,19 +70,19 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
 
     /** Regular expression pattern matching everything */
     public static final String  MATCH_ALL_STRING    = ".*";
-    
+
     /** Regular expression pattern matching nothing */
     public static final String  MATCH_NEVER_STRING  = "";
-    
+
     /** Empty Solr query */
     public static final String  SOLR_EMPTY_QUERY  = "";
-    
+
     /** Match all Solr query */
     public static final String  SOLR_MATCH_ALL_QUERY  = AbstractSolrConnector.CATCHALL_QUERY;
-    
+
     /** Regular expression matching everything */
     public static final Pattern MATCH_ALL_PATTERN   = Pattern.compile(MATCH_ALL_STRING);
-    
+
     /** Regular expression matching nothing */
     public static final Pattern MATCH_NEVER_PATTERN = Pattern.compile(MATCH_NEVER_STRING);
 
@@ -126,14 +127,15 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         INDEX_TEXT                   ("indexText",                  false, CrawlAttribute.BOOLEAN, "Index Text"),
         INDEX_MEDIA                  ("indexMedia",                 false, CrawlAttribute.BOOLEAN, "Index Media"),
         COLLECTIONS                  ("collections",                false, CrawlAttribute.STRING,  "Collections (comma-separated list)"),
-        IGNORE_DIV_CLASS_NAME        ("ignore_class_name",      false, CrawlAttribute.STRING,  "Ignore DIV Class names"),
+        DEFAULT_VALENCY              ("default_valency",            false, CrawlAttribute.STRING,  "default tag valency"),
+        VALENCY_SWITCH_TAG_NAME      ("valency_switch_tag_name",    false, CrawlAttribute.STRING,  "DIV Class names when default valency shall be switched"),
         SCRAPER                      ("scraper",                    false, CrawlAttribute.STRING,  "Declaration for Vocabulary Scraper"),
         TIMEZONEOFFSET               ("timezoneOffset",             true,  CrawlAttribute.INTEGER, "Time Zone of Crawl Start Agent");
-        
+
         public static final int BOOLEAN = 0;
         public static final int INTEGER = 1;
         public static final int STRING = 2;
-        
+
         public final String key, label;
         public final boolean readonly;
         public final int type;
@@ -143,39 +145,39 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
             this.type = type;
             this.label = label;
         }
-        
+
         @Override
         public String toString() {
             return this.key;
         }
   }
-    
-    
+
     private Pattern crawlerurlmustmatch = null, crawlerurlmustnotmatch = null;
-    
+
     /** Pattern on the URL a document must match to allow adding its embedded links to the crawl stack */
     private Pattern crawlerOriginUrlMustMatch = null;
-    
+
     /** Pattern on the URL a document must not match to allow adding its embedded links to the crawl stack */
     private Pattern crawlerOriginUrlMustNotMatch = null;
-    
+
     private Pattern crawleripmustmatch = null, crawleripmustnotmatch = null;
     private Pattern crawlernodepthlimitmatch = null;
     private Pattern indexurlmustmatch = null, indexurlmustnotmatch = null;
     private Pattern indexcontentmustmatch = null, indexcontentmustnotmatch = null;
-    
+
     /** Pattern on the media type documents must match before being indexed 
      * @see CollectionSchema#content_type */
     private Pattern indexMediaTypeMustMatch = null;
-    
+
     /** Pattern on the media type documents must not match before being indexed
      * @see CollectionSchema#content_type  */
     private Pattern indexMediaTypeMustNotMatch = null;
-    
+
     private Pattern snapshotsMustnotmatch = null;
 
     private final Map<String, AtomicInteger> doms;
-    private final Set<String> ignore_class_name;
+    private final TagValency defaultValency;
+    private final Set<String> valencySwitchTagNames;
     private final VocabularyScraper scraper;
 
     /**
@@ -238,7 +240,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
                  final CacheStrategy cacheStrategy,
                  final String collections,
                  final String userAgentName,
-                 final Set<String> ignore_class_name,
+                 final TagValency defaultValency,
+                 final Set<String> valencySwitchTagNames,
                  final VocabularyScraper scraper,
                  final int timezoneOffset) {
         super(40);
@@ -252,40 +255,42 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         put(CrawlAttribute.NAME.key,             name);
         put(CrawlAttribute.AGENT_NAME.key, userAgentName);
         put(CrawlAttribute.CRAWLER_ALWAYS_CHECK_MEDIA_TYPE.key, true);
-        put(CrawlAttribute.CRAWLER_URL_MUSTMATCH.key,     (crawlerUrlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerUrlMustMatch);
-        put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key,  (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
-        put(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key, (crawlerUrlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerUrlMustMatch);
-        put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key,  (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
-        put(CrawlAttribute.CRAWLER_IP_MUSTMATCH.key,      (crawlerIpMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerIpMustMatch);
-        put(CrawlAttribute.CRAWLER_IP_MUSTNOTMATCH.key,   (crawlerIpMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerIpMustNotMatch);
-        put(CrawlAttribute.CRAWLER_COUNTRY_MUSTMATCH.key, (crawlerCountryMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerCountryMustMatch);
+        put(CrawlAttribute.CRAWLER_URL_MUSTMATCH.key,         (crawlerUrlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerUrlMustMatch);
+        put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key,      (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
+        put(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key,  (crawlerUrlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerUrlMustMatch);
+        put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key,      (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
+        put(CrawlAttribute.CRAWLER_IP_MUSTMATCH.key,          (crawlerIpMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerIpMustMatch);
+        put(CrawlAttribute.CRAWLER_IP_MUSTNOTMATCH.key,       (crawlerIpMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerIpMustNotMatch);
+        put(CrawlAttribute.CRAWLER_COUNTRY_MUSTMATCH.key,     (crawlerCountryMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerCountryMustMatch);
         put(CrawlAttribute.CRAWLER_URL_NODEPTHLIMITMATCH.key, (crawlerNoDepthLimitMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerNoDepthLimitMatch);
-        put(CrawlAttribute.INDEXING_URL_MUSTMATCH.key, (indexUrlMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustMatch);
-        put(CrawlAttribute.INDEXING_URL_MUSTNOTMATCH.key, (indexUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustNotMatch);
-        put(CrawlAttribute.INDEXING_CONTENT_MUSTMATCH.key, (indexContentMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexContentMustMatch);
+        put(CrawlAttribute.INDEXING_URL_MUSTMATCH.key,        (indexUrlMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustMatch);
+        put(CrawlAttribute.INDEXING_URL_MUSTNOTMATCH.key,     (indexUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustNotMatch);
+        put(CrawlAttribute.INDEXING_CONTENT_MUSTMATCH.key,    (indexContentMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexContentMustMatch);
         put(CrawlAttribute.INDEXING_CONTENT_MUSTNOTMATCH.key, (indexContentMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexContentMustNotMatch);
-        put(CrawlAttribute.DEPTH.key,            depth);
-        put(CrawlAttribute.DIRECT_DOC_BY_URL.key, directDocByURL);
-        put(CrawlAttribute.RECRAWL_IF_OLDER.key, recrawlIfOlder == null ? Long.MAX_VALUE : recrawlIfOlder.getTime());
-        put(CrawlAttribute.DOM_MAX_PAGES.key,    domMaxPages);
-        put(CrawlAttribute.CRAWLING_Q.key,       crawlingQ); // crawling of urls with '?'
-        put(CrawlAttribute.FOLLOW_FRAMES.key,    followFrames); // load pages contained in frames or ifames
-        put(CrawlAttribute.OBEY_HTML_ROBOTS_NOINDEX.key, obeyHtmlRobotsNoindex); // if false, then a meta robots tag containing 'noindex' is ignored
+        put(CrawlAttribute.DEPTH.key,                     depth);
+        put(CrawlAttribute.DIRECT_DOC_BY_URL.key,         directDocByURL);
+        put(CrawlAttribute.RECRAWL_IF_OLDER.key,          recrawlIfOlder == null ? Long.MAX_VALUE : recrawlIfOlder.getTime());
+        put(CrawlAttribute.DOM_MAX_PAGES.key,             domMaxPages);
+        put(CrawlAttribute.CRAWLING_Q.key,                crawlingQ); // crawling of urls with '?'
+        put(CrawlAttribute.FOLLOW_FRAMES.key,             followFrames); // load pages contained in frames or ifames
+        put(CrawlAttribute.OBEY_HTML_ROBOTS_NOINDEX.key,  obeyHtmlRobotsNoindex); // if false, then a meta robots tag containing 'noindex' is ignored
         put(CrawlAttribute.OBEY_HTML_ROBOTS_NOFOLLOW.key, obeyHtmlRobotsNofollow);
-        put(CrawlAttribute.INDEX_TEXT.key,       indexText);
-        put(CrawlAttribute.INDEX_MEDIA.key,      indexMedia);
-        put(CrawlAttribute.STORE_HTCACHE.key,    storeHTCache);
-        put(CrawlAttribute.REMOTE_INDEXING.key,  remoteIndexing);
-        put(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key, snapshotsMaxDepth);
-        put(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key, snapshotsLoadImage);
-        put(CrawlAttribute.SNAPSHOTS_REPLACEOLD.key, snapshotsReplaceOld);
-        put(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key, snapshotsMustnotmatch);
-        put(CrawlAttribute.CACHE_STRAGEGY.key,   cacheStrategy.toString());
-        put(CrawlAttribute.COLLECTIONS.key,      CommonPattern.SPACE.matcher(collections.trim()).replaceAll(""));
+        put(CrawlAttribute.INDEX_TEXT.key,                indexText);
+        put(CrawlAttribute.INDEX_MEDIA.key,               indexMedia);
+        put(CrawlAttribute.STORE_HTCACHE.key,             storeHTCache);
+        put(CrawlAttribute.REMOTE_INDEXING.key,           remoteIndexing);
+        put(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key,        snapshotsMaxDepth);
+        put(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key,       snapshotsLoadImage);
+        put(CrawlAttribute.SNAPSHOTS_REPLACEOLD.key,      snapshotsReplaceOld);
+        put(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key,    snapshotsMustnotmatch);
+        put(CrawlAttribute.CACHE_STRAGEGY.key,            cacheStrategy.toString());
+        put(CrawlAttribute.COLLECTIONS.key,               CommonPattern.SPACE.matcher(collections.trim()).replaceAll(""));
         // we transform the ignore_class_name and scraper information into a JSON Array
-        this.ignore_class_name = ignore_class_name == null ? new HashSet<String>() : ignore_class_name;
-        String jsonString = new JSONArray(ignore_class_name).toString();
-        put(CrawlAttribute.IGNORE_DIV_CLASS_NAME.key, jsonString);
+        this.defaultValency = defaultValency;
+        this.valencySwitchTagNames = valencySwitchTagNames == null ? new HashSet<String>() : valencySwitchTagNames;
+        String jsonString = new JSONArray(valencySwitchTagNames).toString();
+        put(CrawlAttribute.DEFAULT_VALENCY.key, defaultValency.name());
+        put(CrawlAttribute.VALENCY_SWITCH_TAG_NAME.key, jsonString);
         this.scraper = scraper == null ? new VocabularyScraper() : scraper;
         jsonString = this.scraper.toString();
         assert jsonString != null && jsonString.length() > 0 && jsonString.charAt(0) == '{' : "jsonString = " + jsonString;
@@ -305,9 +310,11 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         super(ext == null ? 1 : ext.size());
         if (ext != null) putAll(ext);
         this.doms = new ConcurrentHashMap<String, AtomicInteger>();
-        String jsonString = ext.get(CrawlAttribute.IGNORE_DIV_CLASS_NAME.key);
+        String defaultValency = ext.get(CrawlAttribute.DEFAULT_VALENCY.key);
+        this.defaultValency = defaultValency == null || defaultValency.length() == 0 ? TagValency.EVAL : TagValency.valueOf(defaultValency);
+        String jsonString = ext.get(CrawlAttribute.VALENCY_SWITCH_TAG_NAME.key);
         JSONArray a;
-        if(jsonString == null) {
+        if (jsonString == null) {
             a = new JSONArray();
         } else {
             try {
@@ -317,9 +324,9 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
                 a = new JSONArray();
             }
         }
-        this.ignore_class_name = new HashSet<String>();
+        this.valencySwitchTagNames = new HashSet<String>();
         for (int i = 0; i < a.length(); i++) try {
-            this.ignore_class_name.add(a.getString(i));
+            this.valencySwitchTagNames.add(a.getString(i));
         } catch (JSONException e) {}
         jsonString = ext.get(CrawlAttribute.SCRAPER.key);
         if (jsonString == null || jsonString.length() == 0) {
@@ -336,14 +343,18 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
     }
 
-    public Set<String> ignoreDivClassName() {
-        return this.ignore_class_name;
+    public TagValency defaultValency() {
+        return this.defaultValency;
+    }
+
+    public Set<String> valencySwitchTagNames() {
+        return this.valencySwitchTagNames;
     }
 
     public VocabularyScraper scraper() {
         return this.scraper;
     }
-    
+
     public void domInc(final String domain) {
         if (domain == null) return; // may be correct for file system crawls
         final AtomicInteger dp = this.doms.get(domain);
@@ -427,7 +438,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         //if (r == null) return null;
         return r;
     }
-    
+
     private Map<String, Pattern> cmap = null;
 
     /**
@@ -440,7 +451,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         this.cmap = collectionParser(r);
         return this.cmap;
     }
-    
+
     public static Map<String, Pattern> collectionParser(String collectionString) {
         if (collectionString == null || collectionString.length() == 0) return new HashMap<String, Pattern>();
         String[] cs = CommonPattern.COMMA.split(collectionString);
@@ -470,7 +481,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         final String r = get(CrawlAttribute.COLLECTIONS.key);
         return r == null || r.length() == 0 || "user".equals(r) ? name() : r;
     }
-    
+
     /**
      * Gets the regex which must be matched by URLs in order to be crawled.
      * @return regex which must be matched
@@ -484,7 +495,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.crawlerurlmustmatch;
     }
-    
+
     /**
      * Render the urlMustMatchPattern as a String of limited size, suffixing it with
      * "..." when it is truncated. Used to prevent unnecessary growth of the logs,
@@ -516,7 +527,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.crawlerurlmustnotmatch;
     }
-    
+
     /**
      * Get the pattern on the URL a document must match to allow adding its embedded links to the crawl stack
      * 
@@ -538,7 +549,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.crawlerOriginUrlMustMatch;
     }
-    
+
     /**
      * Get the pattern on the URL a document must not match to allow adding its embedded links to the crawl stack
      * 
@@ -601,7 +612,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         if (list.length == 1 && list.length == 0) list = new String[0];
         return list;
     }
-    
+
     /**
      * If the regex matches with the url, then there is no depth limit on the crawl (it overrides depth == 0)
      * @return regex which must be matched
@@ -643,7 +654,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.indexurlmustnotmatch;
     }
-    
+
     /**
      * Gets the regex which must be matched by URLs in order to be indexed.
      * @return regex which must be matched
@@ -671,7 +682,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.indexcontentmustnotmatch;
     }
-    
+
     /**
      * Get the Pattern on media type that documents must match in order to be indexed
      * 
@@ -693,7 +704,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.indexMediaTypeMustMatch;
     }
-    
+
     /**
      * Get the Pattern on media type that documents must not match in order to be indexed
      * 
@@ -715,9 +726,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
         return this.indexMediaTypeMustNotMatch;
     }
-    
-    
-    
+
     /**
      * Gets depth of crawl job (or height of the tree which will be
      * created by the crawler).
@@ -743,7 +752,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         if (r == null) return false;
         return (r.equals(Boolean.TRUE.toString()));
     }
-    
+
     /**
      * @return true when the crawler must always cross check the eventual URL file
      *         extension against the actual Media Type, even when file extension is
@@ -772,7 +781,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     public void setCacheStrategy(final CacheStrategy newStrategy) {
         put(CrawlAttribute.CACHE_STRAGEGY.key, newStrategy.toString());
     }
-    
+
     /**
      * Gets the minimum date that an entry must have to be re-crawled.
      * @return time in ms representing a date
@@ -847,13 +856,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         if (r == null) return false;
         return (r.equals(Boolean.TRUE.toString()));
     }
-    
+
     public boolean remoteIndexing() {
         final String r = get(CrawlAttribute.REMOTE_INDEXING.key);
         if (r == null) return false;
         return (r.equals(Boolean.TRUE.toString()));
     }
-    
+
     public int snapshotMaxdepth() {
         final String r = get(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key);
         if (r == null) return -1;
@@ -866,7 +875,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
             return -1;
         }
     }
-    
+
     public boolean snapshotLoadImage() {
         final String r = get(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key);
         if (r == null) return false;
@@ -878,7 +887,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         if (r == null) return false;
         return (r.equals(Boolean.TRUE.toString()));
     }
-    
+
     public Pattern snapshotsMustnotmatch() {
         if (this.snapshotsMustnotmatch == null) {
             final String r = get(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key);
@@ -887,7 +896,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
             } catch (final PatternSyntaxException e) { this.snapshotsMustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
         }
         return this.snapshotsMustnotmatch;
-    }    
+    }
 
     public int timezoneOffset() {
         final String timezoneOffset = get(CrawlAttribute.TIMEZONEOFFSET.key);
@@ -898,7 +907,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
             return 0;
         }
     }
-    
+
     /**
      * get a recrawl date for a given age in minutes
      * @param oldTimeMinutes
@@ -946,7 +955,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         if ("http".equals(protocol) || "https".equals(protocol)) protocol = "https?+";
         return new StringBuilder(host.length() + 20).append(protocol).append("://(www.)?").append(Pattern.quote(host.toLowerCase(Locale.ROOT))).append(url.getPath()).append(".*").toString();
     }
-    
+
     public boolean isPushCrawlProfile() {
         return this.name().startsWith(CrawlProfile.CRAWL_PROFILE_PUSH_STUB);
     }
@@ -1008,7 +1017,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", this.handle());
         prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", deleteButton);
         prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", this.handle());
-        
+
         int i = 0;
         if (active && this.domMaxPages() > 0 && this.domMaxPages() != Integer.MAX_VALUE) {
             String item;
@@ -1021,7 +1030,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         prop.put(CRAWL_PROFILE_PREFIX+count+"_crawlingDomFilterContent", i);
 
     }
-    
+
     public static void main(String[] args) {
         // test to convert the key set from set to string and back
         Set<String> a = new HashSet<>();
diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java
index 42a1558c7..acf7c6fd0 100644
--- a/source/net/yacy/crawler/retrieval/Response.java
+++ b/source/net/yacy/crawler/retrieval/Response.java
@@ -48,6 +48,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.search.Switchboard;
 
 public class Response {
@@ -853,7 +854,7 @@ public class Response {
         // 4) proxy-load (initiator is "------------")
         // 5) local prefetch/crawling (initiator is own seedHash)
         // 6) local fetching for global crawling (other known or unknown initiator)
-    	// 7) local surrogates processing (can not be known here : crawl profile is required)
+        // 7) local surrogates processing (can not be known here : crawl profile is required)
         EventOrigin processCase = EventOrigin.UNKNOWN;
         // FIXME the equals seems to be incorrect: String.equals(boolean)
         if (initiator() == null || initiator().length == 0 || ASCII.String(initiator()).equals("------------")) {
@@ -873,9 +874,13 @@ public class Response {
         final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.getContentType());
         if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url());
         try {
-            return TextParser.parseSource(url(), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(), new HashSet<String>(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
+            return TextParser.parseSource(
+                    url(), this.responseHeader == null ? null : this.responseHeader.getContentType(),
+                    this.responseHeader == null ? StandardCharsets.UTF_8.name() : this.responseHeader.getCharacterEncoding(),
+                    TagValency.EVAL, new HashSet<String>(),
+                    new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
         } catch(Parser.Failure e) {
-        	throw e;
+            throw e;
         } catch (final Exception e) {
             return null;
         }
diff --git a/source/net/yacy/document/AbstractParser.java b/source/net/yacy/document/AbstractParser.java
index 893687497..f8132be1e 100644
--- a/source/net/yacy/document/AbstractParser.java
+++ b/source/net/yacy/document/AbstractParser.java
@@ -32,6 +32,7 @@ import java.util.Set;
 
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.document.parser.html.TagValency;
 
 public abstract class AbstractParser implements Parser {
 
@@ -41,20 +42,20 @@ public abstract class AbstractParser implements Parser {
     protected final Set<String> SUPPORTED_MIME_TYPES = new LinkedHashSet<String>(); 
     protected final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     private   final String name;
-    
+
     /**
      * initialize a parser with a name
      * @param name
      */
     public AbstractParser(final String name) {
-	    this.name = name;
-	}
+        this.name = name;
+    }
 
     /*
      *  The following abstract implementations create a circular call which would cause an endless loop when called.
      *  They are both here because one of them must be overridden by the implementing class.
      */
-    
+
     @Override
     public Document[] parse(
             DigestURL url,
@@ -64,7 +65,7 @@ public abstract class AbstractParser implements Parser {
             int timezoneOffset,
             InputStream source
             ) throws Parser.Failure, InterruptedException {
-    	return parse(url, mimeType, charset, new HashSet<String>(), scraper, timezoneOffset, source);
+        return parse(url, mimeType, charset, TagValency.EVAL, new HashSet<String>(), scraper, timezoneOffset, source);
     }
 
     @Override
@@ -72,15 +73,15 @@ public abstract class AbstractParser implements Parser {
             DigestURL url,
             String mimeType,
             String charset,
-            Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             VocabularyScraper scraper,
             int timezoneOffset,
             InputStream source
             ) throws Parser.Failure, InterruptedException {
-    	return parse(url, mimeType, charset, scraper, timezoneOffset, source);
+        return parse(url, mimeType, charset, scraper, timezoneOffset, source);
     }
-    
-    
+
     /*
      *  The following abstract implementations create a circular call which would cause an endless loop when called.
      *  They are both here because one of them must be overridden by the implementing class.
@@ -88,32 +89,33 @@ public abstract class AbstractParser implements Parser {
 
     @Override
     public Document[] parseWithLimits(
-    		final DigestURL location,
-    		final String mimeType,
-    		final String charset,
-    		final VocabularyScraper scraper,
-    		final int timezoneOffset,
-    		final InputStream source,
-    		final int maxLinks,
-    		final long maxBytes) throws UnsupportedOperationException, Failure, InterruptedException {
-    	return parseWithLimits(location, mimeType, charset, new HashSet<String>(), scraper, timezoneOffset, source, maxLinks, maxBytes);
+            final DigestURL location,
+            final String mimeType,
+            final String charset,
+            final VocabularyScraper scraper,
+            final int timezoneOffset,
+            final InputStream source,
+            final int maxLinks,
+            final long maxBytes) throws UnsupportedOperationException, Failure, InterruptedException {
+        return parseWithLimits(location, mimeType, charset, TagValency.EVAL, new HashSet<String>(), scraper, timezoneOffset, source, maxLinks, maxBytes);
     }
-    
+
     @Override
     public Document[] parseWithLimits(
-    		DigestURL location,
-    		String mimeType,
-    		String charset,
-    		final Set<String> ignore_class_name,
-    		VocabularyScraper scraper,
-    		int timezoneOffset,
-    		InputStream source,
-    		int maxLinks,
-    		long maxBytes)
-    		throws Failure, InterruptedException, UnsupportedOperationException {
-    	return parseWithLimits(location, mimeType, charset, scraper, timezoneOffset, source, maxLinks, maxBytes);
+            DigestURL location,
+            String mimeType,
+            String charset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            VocabularyScraper scraper,
+            int timezoneOffset,
+            InputStream source,
+            int maxLinks,
+            long maxBytes)
+            throws Failure, InterruptedException, UnsupportedOperationException {
+        return parseWithLimits(location, mimeType, charset, scraper, timezoneOffset, source, maxLinks, maxBytes);
     }
-    
+
     /**
      * return the name of the parser
      */
@@ -164,12 +166,11 @@ public abstract class AbstractParser implements Parser {
         if (t != null) c.add(t);
         return c;
     }
-    
+
     @Override
     public boolean isParseWithLimitsSupported() {
-    	/* Please override on subclasses when parseWithLimits is supported */
-    	return false;
+        /* Please override on subclasses when parseWithLimits is supported */
+        return false;
     }
-    
 
 }
diff --git a/source/net/yacy/document/Parser.java b/source/net/yacy/document/Parser.java
index fda309be9..f2940a02c 100644
--- a/source/net/yacy/document/Parser.java
+++ b/source/net/yacy/document/Parser.java
@@ -28,6 +28,7 @@ import java.util.Set;
 
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.document.parser.html.TagValency;
 
 public interface Parser {
 
@@ -63,72 +64,87 @@ public interface Parser {
             int timezoneOffset,
             InputStream source
             ) throws Parser.Failure, InterruptedException;
-    
+
     public Document[] parse(
             DigestURL url,
             String mimeType,
             String charset,
-            Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             VocabularyScraper scraper,
             int timezoneOffset,
             InputStream source
             ) throws Parser.Failure, InterruptedException;
-    
+
+    /**
+    * Parse an input stream, eventually terminating processing when a total of
+    * maxLinks URLS (anchors, images links, media links...) have been reached,
+    * or when maxBytes content bytes have been processed, thus potentially
+    * resulting in partially parsed documents (with
+    * {@link Document#isPartiallyParsed()} returning true). Some parser
+    * implementations will not support parsing within maxLinks or maxBytes
+    * limits : make sure to check this by calling fist
+    * {@link #isParseWithLimitsSupported()}, or a UnsupportedOperationException
+    * could be thrown.
+    * 
+    * @param url
+    *            the URL of the source
+    * @param mimeType
+    *            the mime type of the source, if known
+    * @param charset
+    *            the charset name of the source, if known
+    * @param scraper
+    *            an entity scraper to detect facets from text annotation
+    *            context
+    * @param timezoneOffset
+    *            the local time zone offset
+    * @param source
+    *            a input stream
+    * @param maxLinks
+    *            the maximum total number of links to parse and add to the
+    *            result documents
+    * @param maxBytes
+    *            the maximum number of content bytes to process
+    * @return a list of documents that result from parsing the source, with
+    *         empty or null text.
+    * @throws Parser.Failure
+    *             when the parser processing failed
+    * @throws InterruptedException
+    *             when the processing was interrupted before termination
+    * @throws UnsupportedOperationException
+    *             when the parser implementation doesn't support parsing within
+    *             limits
+    */
+    public Document[] parseWithLimits(
+            DigestURL url,
+            String mimeType,
+            String charset,
+            VocabularyScraper scraper,
+            int timezoneOffset,
+            InputStream source,
+            int maxLinks,
+            long maxBytes)
+                    throws Parser.Failure, InterruptedException, UnsupportedOperationException;
+
+
+    public Document[] parseWithLimits(
+            final DigestURL location,
+            final String mimeType,
+            final String documentCharset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper vocscraper,
+            final int timezoneOffset,
+            final InputStream sourceStream,
+            final int maxLinks,
+            final long maxBytes)
+                    throws Parser.Failure, InterruptedException, UnsupportedOperationException;
+
     /**
-	 * Parse an input stream, eventually terminating processing when a total of
-	 * maxLinks URLS (anchors, images links, media links...) have been reached,
-	 * or when maxBytes content bytes have been processed, thus potentially
-	 * resulting in partially parsed documents (with
-	 * {@link Document#isPartiallyParsed()} returning true). Some parser
-	 * implementations will not support parsing within maxLinks or maxBytes
-	 * limits : make sure to check this by calling fist
-	 * {@link #isParseWithLimitsSupported()}, or a UnsupportedOperationException
-	 * could be thrown.
-	 * 
-	 * @param url
-	 *            the URL of the source
-	 * @param mimeType
-	 *            the mime type of the source, if known
-	 * @param charset
-	 *            the charset name of the source, if known
-	 * @param scraper
-	 *            an entity scraper to detect facets from text annotation
-	 *            context
-	 * @param timezoneOffset
-	 *            the local time zone offset
-	 * @param source
-	 *            a input stream
-	 * @param maxLinks
-	 *            the maximum total number of links to parse and add to the
-	 *            result documents
-	 * @param maxBytes
-	 *            the maximum number of content bytes to process
-	 * @return a list of documents that result from parsing the source, with
-	 *         empty or null text.
-	 * @throws Parser.Failure
-	 *             when the parser processing failed
-	 * @throws InterruptedException
-	 *             when the processing was interrupted before termination
-	 * @throws UnsupportedOperationException
-	 *             when the parser implementation doesn't support parsing within
-	 *             limits
-	 */
-	public Document[] parseWithLimits(DigestURL url, String mimeType, String charset,
-			VocabularyScraper scraper,
-			int timezoneOffset, InputStream source, int maxLinks, long maxBytes)
-			throws Parser.Failure, InterruptedException, UnsupportedOperationException;
-
-
-    public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String documentCharset,
-    		final Set<String> ignore_class_name, final VocabularyScraper vocscraper,
-    		final int timezoneOffset, final InputStream sourceStream, final int maxLinks, final long maxBytes)
-    				throws Parser.Failure, InterruptedException, UnsupportedOperationException;
-
-	/**
-	 * @return true when the parser implementation supports the
-	 *         parseWithLimits() operation.
-	 */
-	public boolean isParseWithLimitsSupported();
+    * @return true when the parser implementation supports the
+    *         parseWithLimits() operation.
+    */
+    public boolean isParseWithLimitsSupported();
 
     // methods to that shall make it possible to put Parser objects into a hashtable
 
diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index 83327adcc..6fefa60c8 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -51,6 +51,7 @@ import net.yacy.document.parser.docParser;
 import net.yacy.document.parser.genericParser;
 import net.yacy.document.parser.gzipParser;
 import net.yacy.document.parser.gzipParser.GZIPOpeningStreamException;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.document.parser.htmlParser;
 import net.yacy.document.parser.linkScraperParser;
 import net.yacy.document.parser.mmParser;
@@ -184,7 +185,8 @@ public final class TextParser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final int depth,
@@ -201,7 +203,7 @@ public final class TextParser {
                 throw new Parser.Failure(errorMsg, location);
             }
             sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
-            docs = parseSource(location, mimeType, charset, ignore_class_name, scraper, timezoneOffset, depth, sourceFile.length(), sourceStream);
+            docs = parseSource(location, mimeType, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, depth, sourceFile.length(), sourceStream);
         } catch (final Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
             if (e instanceof Parser.Failure) throw (Parser.Failure) e;
@@ -218,7 +220,8 @@ public final class TextParser {
             final DigestURL location,
             String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final int depth,
@@ -236,7 +239,7 @@ public final class TextParser {
         }
         assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
 
-        final Document[] docs = parseSource(location, mimeType, idioms, charset, ignore_class_name, scraper, timezoneOffset, depth, content, Integer.MAX_VALUE, Long.MAX_VALUE);
+        final Document[] docs = parseSource(location, mimeType, idioms, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, depth, content, Integer.MAX_VALUE, Long.MAX_VALUE);
 
         return docs;
     }
@@ -248,7 +251,8 @@ public final class TextParser {
             final DigestURL location,
             String mimeType,
             final String charset,
-            final Set<String> ignoreClassNames,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final int depth,
@@ -261,14 +265,15 @@ public final class TextParser {
         final Set<Parser> idioms = new HashSet<>();
         idioms.add(TextParser.genericIdiom);
 
-        return parseSource(location, mimeType, idioms, charset, ignoreClassNames, scraper, timezoneOffset, depth, content, Integer.MAX_VALUE, Long.MAX_VALUE);
+        return parseSource(location, mimeType, idioms, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, depth, content, Integer.MAX_VALUE, Long.MAX_VALUE);
     }
 
     private static Document[] parseSource(
             final DigestURL location,
             String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final int depth,
@@ -330,7 +335,7 @@ public final class TextParser {
                     CloseShieldInputStream nonCloseInputStream = new CloseShieldInputStream(markableStream);
 
                     try {
-                        return parseSource(location, mimeType, parser, charset, ignore_class_name, scraper, timezoneOffset,
+                        return parseSource(location, mimeType, parser, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset,
                                 nonCloseInputStream, maxLinks, maxBytes);
                     } catch (final Parser.Failure e) {
                         /* Try to reset the marked stream. If the failed parser has consumed too many bytes :
@@ -378,11 +383,11 @@ public final class TextParser {
         int maxBytesToRead = -1;
         if(maxBytes < Integer.MAX_VALUE) {
             /* Load at most maxBytes + 1 :
-		       - to let parsers not supporting Parser.parseWithLimits detect the maxBytes size is exceeded and end with a Parser.Failure
-		       - but let parsers supporting Parser.parseWithLimits perform partial parsing of maxBytes content */
+               - to let parsers not supporting Parser.parseWithLimits detect the maxBytes size is exceeded and end with a Parser.Failure
+               - but let parsers supporting Parser.parseWithLimits perform partial parsing of maxBytes content */
             maxBytesToRead = (int)maxBytes + 1;
         }
-        if(contentLength >= 0 && contentLength < maxBytesToRead) {
+        if (contentLength >= 0 && contentLength < maxBytesToRead) {
             maxBytesToRead = (int)contentLength;
         }
 
@@ -392,16 +397,23 @@ public final class TextParser {
         } catch (final IOException e) {
             throw new Parser.Failure(e.getMessage(), location);
         }
-        final Document[] docs = parseSource(location, mimeType, idioms, charset, ignore_class_name, scraper, timezoneOffset, depth, b, maxLinks, maxBytes);
+        final Document[] docs = parseSource(location, mimeType, idioms, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, depth, b, maxLinks, maxBytes);
 
         return docs;
     }
 
-    public static Document[] parseSource(final DigestURL location, String mimeType, final String charset,
-            final Set<String> ignore_class_name,
-            final VocabularyScraper scraper, final int timezoneOffset, final int depth, final long contentLength,
+    public static Document[] parseSource(
+            final DigestURL location,
+            String mimeType,
+            final String charset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper scraper,
+            final int timezoneOffset,
+            final int depth,
+            final long contentLength,
             final InputStream sourceStream) throws Parser.Failure {
-        return parseSource(location, mimeType, charset, ignore_class_name, scraper, timezoneOffset, depth, contentLength, sourceStream,
+        return parseSource(location, mimeType, charset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, depth, contentLength, sourceStream,
                 Integer.MAX_VALUE, Long.MAX_VALUE);
     }
 
@@ -424,10 +436,19 @@ public final class TextParser {
      * @return a list of documents that result from parsing the source, with empty or null text.
      * @throws Parser.Failure when the parser processing failed
      */
-    public static Document[] parseWithLimits(final DigestURL location, String mimeType, final String charset, final Set<String> ignoreClassNames,
-            final int timezoneOffset, final int depth, final long contentLength, final InputStream sourceStream, int maxLinks,
+    public static Document[] parseWithLimits(
+            final DigestURL location,
+            String mimeType,
+            final String charset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final int timezoneOffset,
+            final int depth,
+            final long contentLength,
+            final InputStream sourceStream,
+            int maxLinks,
             long maxBytes) throws Parser.Failure{
-        return parseSource(location, mimeType, charset, ignoreClassNames, new VocabularyScraper(), timezoneOffset, depth, contentLength,
+        return parseSource(location, mimeType, charset, defaultValency, valencySwitchTagNames, new VocabularyScraper(), timezoneOffset, depth, contentLength,
                 sourceStream, maxLinks, maxBytes);
     }
 
@@ -449,10 +470,11 @@ public final class TextParser {
      * @return a list of documents that result from parsing the source, with empty or null text.
      * @throws Parser.Failure when the parser processing failed
      */
-    public static Document[] parseWithLimits(final DigestURL location, String mimeType, final String charset,
+    public static Document[] parseWithLimits(
+            final DigestURL location, String mimeType, final String charset,
             final int timezoneOffset, final int depth, final long contentLength, final InputStream sourceStream, int maxLinks,
             long maxBytes) throws Parser.Failure{
-        return parseSource(location, mimeType, charset, new HashSet<String>(), new VocabularyScraper(), timezoneOffset, depth, contentLength,
+        return parseSource(location, mimeType, charset, TagValency.EVAL, new HashSet<String>(), new VocabularyScraper(), timezoneOffset, depth, contentLength,
                 sourceStream, maxLinks, maxBytes);
     }
 
@@ -475,7 +497,8 @@ public final class TextParser {
             final String mimeType,
             final Parser parser,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final InputStream sourceStream,
@@ -491,11 +514,11 @@ public final class TextParser {
         try {
             final Document[] docs;
             if(parser.isParseWithLimitsSupported()) {
-                docs = parser.parseWithLimits(location, mimeType, documentCharset, ignore_class_name, scraper, timezoneOffset, sourceStream, maxLinks, maxBytes);
+                docs = parser.parseWithLimits(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, sourceStream, maxLinks, maxBytes);
             } else {
                 /* Parser do not support partial parsing within limits : let's control it here*/
                 final InputStream limitedSource = new StrictLimitInputStream(sourceStream, maxBytes);
-                docs = parser.parse(location, mimeType, documentCharset, ignore_class_name, scraper, timezoneOffset, limitedSource);
+                docs = parser.parse(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, limitedSource);
             }
             return docs;
         } catch(final Parser.Failure e) {
@@ -524,7 +547,8 @@ public final class TextParser {
             final String mimeType,
             final Set<Parser> parsers,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper,
             final int timezoneOffset,
             final int depth,
@@ -552,13 +576,13 @@ public final class TextParser {
                 }
                 try {
                     if(parser.isParseWithLimitsSupported()) {
-                        docs = parser.parseWithLimits(location, mimeType, documentCharset, ignore_class_name, scraper, timezoneOffset, bis, maxLinks, maxBytes);
+                        docs = parser.parseWithLimits(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, bis, maxLinks, maxBytes);
                     } else {
                         /* Partial parsing is not supported by this parser : check content length now */
                         if(sourceArray.length > maxBytes) {
                             throw new Parser.Failure("Content size is over maximum size of " + maxBytes + "", location);
                         }
-                        docs = parser.parse(location, mimeType, documentCharset, ignore_class_name, scraper, timezoneOffset, bis);
+                        docs = parser.parse(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, bis);
                     }
                 } catch (final Parser.Failure e) {
                     if(parser instanceof gzipParser && e.getCause() instanceof GZIPOpeningStreamException &&
diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java
index d6dbb5b83..b3d75776d 100644
--- a/source/net/yacy/document/importer/MediawikiImporter.java
+++ b/source/net/yacy/document/importer/MediawikiImporter.java
@@ -68,6 +68,7 @@ import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
 import net.yacy.document.content.SurrogateReader;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.NamePrefixThreadFactory;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
@@ -101,11 +102,11 @@ public class MediawikiImporter extends Thread implements Importer {
 
 
     public MediawikiImporter(final MultiProtocolURL sourcefile, final File targetdir) {
-    	super("MediawikiImporter(" + sourcefile != null ? sourcefile.toNormalform(true) : "null sourcefile" +")");
-    	this.sourcefile = sourcefile;
-    	this.docsize = sourcefile.length();
-    	this.approxdocs = (int) (this.docsize * docspermbinxmlbz2 / 1024L / 1024L);
-    	this.targetdir = targetdir;
+        super("MediawikiImporter(" + sourcefile != null ? sourcefile.toNormalform(true) : "null sourcefile" +")");
+        this.sourcefile = sourcefile;
+        this.docsize = sourcefile.length();
+        this.approxdocs = (int) (this.docsize * docspermbinxmlbz2 / 1024L / 1024L);
+        this.targetdir = targetdir;
         this.count = 0;
         this.start = 0;
         this.hostport = null;
@@ -154,7 +155,7 @@ public class MediawikiImporter extends Thread implements Importer {
     }
 
     @SuppressWarnings("resource")
-	@Override
+    @Override
     public void run() {
         this.start = System.currentTimeMillis();
         final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
@@ -179,8 +180,8 @@ public class MediawikiImporter extends Thread implements Importer {
             boolean page = false, text = false;
             String title = null;
             final BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
-			final ExecutorService service = Executors.newCachedThreadPool(
-					new NamePrefixThreadFactory(MediawikiImporter.class.getSimpleName() + ".convertConsumer"));
+            final ExecutorService service = Executors.newCachedThreadPool(
+                    new NamePrefixThreadFactory(MediawikiImporter.class.getSimpleName() + ".convertConsumer"));
             final convertConsumer[] consumers = new convertConsumer[threads];
             final Future<?>[] consumerResults = (Future<?>[]) Array.newInstance(Future.class, threads);
             for (int i = 0; i < threads; i++) {
@@ -276,23 +277,23 @@ public class MediawikiImporter extends Thread implements Importer {
                     consumerResults[i].get(10000, TimeUnit.MILLISECONDS);
                 }
             } catch (final Exception e) {
-            	this.errorMessage = e.getMessage();
+                this.errorMessage = e.getMessage();
                 ConcurrentLog.logException(e);
             } finally {
                 out.put(poison); // output thread condition (for file.close)
                 writerResult.get(10000, TimeUnit.MILLISECONDS);
             }
         } catch (final Exception e) {
-        	this.errorMessage = e.getMessage();
+            this.errorMessage = e.getMessage();
             ConcurrentLog.logException(e);
         } finally {
-        	if(reader != null) {
+            if(reader != null) {
                 try {
-					reader.close();
-				} catch (IOException e) {
-					ConcurrentLog.warn("WIKITRANSLATION", "Could not close dump reader : " + e.getMessage());
-				}
-        	}
+                    reader.close();
+                } catch (IOException e) {
+                    ConcurrentLog.warn("WIKITRANSLATION", "Could not close dump reader : " + e.getMessage());
+                }
+            }
             try {
                 out.put(poison); // out keeps output file open until poisened, to close file if exception happend in this block
             } catch (InterruptedException ex) { }
@@ -310,7 +311,7 @@ public class MediawikiImporter extends Thread implements Importer {
         File mediawikixml;
         
         public indexMaker(final File mediawikixml) {
-        	super("MediawikiImporter.indexMaker " + mediawikixml != null ? mediawikixml.getName() : "");
+            super("MediawikiImporter.indexMaker " + mediawikixml != null ? mediawikixml.getName() : "");
             this.mediawikixml = mediawikixml;
         }
 
@@ -337,8 +338,8 @@ public class MediawikiImporter extends Thread implements Importer {
         final PositionAwareReader in = new PositionAwareReader(dumpFile);
         final indexProducer producer = new indexProducer(100, idxFromMediawikiXML(dumpFile));
         final wikiConsumer consumer = new wikiConsumer(100, producer);
-		final ExecutorService service = Executors.newCachedThreadPool(
-				new NamePrefixThreadFactory(MediawikiImporter.class.getSimpleName() + ".createIndex"));
+        final ExecutorService service = Executors.newCachedThreadPool(
+                new NamePrefixThreadFactory(MediawikiImporter.class.getSimpleName() + ".createIndex"));
         final Future<Integer> producerResult = service.submit(consumer);
         final Future<Integer> consumerResult = service.submit(producer);
         service.shutdown();
@@ -535,14 +536,14 @@ public class MediawikiImporter extends Thread implements Importer {
         }
         public void genDocument() throws Parser.Failure {
             try {
-				this.url = new AnchorURL(this.urlStub + this.title);
-				final Document[] parsed = TextParser.parseSource(this.url, "text/html", StandardCharsets.UTF_8.name(), new HashSet<String>(), new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html));
-				this.document = Document.mergeDocuments(this.url, "text/html", parsed);
-				// the wiki parser is not able to find the proper title in the source text, so it must be set here
-				this.document.setTitle(this.title);
-			} catch (final MalformedURLException e1) {
-			    ConcurrentLog.logException(e1);
-			}
+                this.url = new AnchorURL(this.urlStub + this.title);
+                final Document[] parsed = TextParser.parseSource(this.url, "text/html", StandardCharsets.UTF_8.name(), TagValency.EVAL, new HashSet<String>(), new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html));
+                this.document = Document.mergeDocuments(this.url, "text/html", parsed);
+                // the wiki parser is not able to find the proper title in the source text, so it must be set here
+                this.document.setTitle(this.title);
+            } catch (final MalformedURLException e1) {
+                ConcurrentLog.logException(e1);
+            }
         }
         public void writeXML(final OutputStreamWriter os) throws IOException {
             this.document.writeXML(os);
@@ -676,9 +677,9 @@ public class MediawikiImporter extends Thread implements Importer {
                     } catch (final Parser.Failure e) {
                         ConcurrentLog.logException(e);
                     } catch (final IOException e) {
-						// TODO Auto-generated catch block
+                        // TODO Auto-generated catch block
                         ConcurrentLog.logException(e);
-					}
+                    }
                 }
             } catch (final InterruptedException e) {
                 ConcurrentLog.logException(e);
@@ -772,78 +773,78 @@ public class MediawikiImporter extends Thread implements Importer {
 
     }
 
-	public static void main(final String[] s) {
-		if (s.length == 0) {
-			System.out.println("usage:");
-			System.out.println(" -index <wikipedia-dump>");
-			System.out.println(" -read  <start> <len> <idx-file>");
-			System.out.println(" -find  <title> <wikipedia-dump>");
-			System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
-			ConcurrentLog.shutdown();
-			return;
-		}
-
-		try {
-			// example:
-			// java -Xmx2000m -cp classes:lib/bzip2.jar
-			// de.anomic.tools.mediawikiIndex -convert
-			// DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2
-			// DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/
-
-			if (s[0].equals("-convert")) {
-				if(s.length < 3) {
-					System.out.println("usage:");
-					System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
-					ConcurrentLog.shutdown();
-					return;
-				}
-				final File targetdir = new File(s[2]);
-				try {
-					final MediawikiImporter mi = new MediawikiImporter(new MultiProtocolURL(s[1]), targetdir);
-					mi.start();
-					mi.join();
-				} catch (final InterruptedException e) {
-					ConcurrentLog.logException(e);
-				} catch (MalformedURLException e) {
-					ConcurrentLog.logException(e);
-				}
-			}
-
-			if (s[0].equals("-index")) {
-				try {
-					createIndex(new File(s[1]));
-				} catch (final IOException e) {
-					ConcurrentLog.logException(e);
-				}
-			}
-
-			if (s[0].equals("-read")) {
-				final long start = Integer.parseInt(s[1]);
-				final int len = Integer.parseInt(s[2]);
-				System.out.println(UTF8.String(read(new File(s[3]), start, len)));
-			}
-
-			if (s[0].equals("-find")) {
-				try {
-					final wikisourcerecord w = find(s[1], new File(s[2] + ".idx.xml"));
-					if (w == null) {
-						ConcurrentLog.info("WIKITRANSLATION", "not found");
-					} else {
-						System.out.println(UTF8.String(read(new File(s[2]), w.start, (int) (w.end - w.start))));
-					}
-				} catch (final IOException e) {
-					ConcurrentLog.logException(e);
-				}
-
-			}
-		} finally {
-			try {
-				HTTPClient.closeConnectionManager();
-			} catch (InterruptedException e) {
-				e.printStackTrace();
-			}
-			ConcurrentLog.shutdown();
-		}
-	}
+    public static void main(final String[] s) {
+        if (s.length == 0) {
+            System.out.println("usage:");
+            System.out.println(" -index <wikipedia-dump>");
+            System.out.println(" -read  <start> <len> <idx-file>");
+            System.out.println(" -find  <title> <wikipedia-dump>");
+            System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
+            ConcurrentLog.shutdown();
+            return;
+        }
+
+        try {
+            // example:
+            // java -Xmx2000m -cp classes:lib/bzip2.jar
+            // de.anomic.tools.mediawikiIndex -convert
+            // DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2
+            // DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/
+
+            if (s[0].equals("-convert")) {
+                if(s.length < 3) {
+                    System.out.println("usage:");
+                    System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
+                    ConcurrentLog.shutdown();
+                    return;
+                }
+                final File targetdir = new File(s[2]);
+                try {
+                    final MediawikiImporter mi = new MediawikiImporter(new MultiProtocolURL(s[1]), targetdir);
+                    mi.start();
+                    mi.join();
+                } catch (final InterruptedException e) {
+                    ConcurrentLog.logException(e);
+                } catch (MalformedURLException e) {
+                    ConcurrentLog.logException(e);
+                }
+            }
+
+            if (s[0].equals("-index")) {
+                try {
+                    createIndex(new File(s[1]));
+                } catch (final IOException e) {
+                    ConcurrentLog.logException(e);
+                }
+            }
+
+            if (s[0].equals("-read")) {
+                final long start = Integer.parseInt(s[1]);
+                final int len = Integer.parseInt(s[2]);
+                System.out.println(UTF8.String(read(new File(s[3]), start, len)));
+            }
+
+            if (s[0].equals("-find")) {
+                try {
+                    final wikisourcerecord w = find(s[1], new File(s[2] + ".idx.xml"));
+                    if (w == null) {
+                        ConcurrentLog.info("WIKITRANSLATION", "not found");
+                    } else {
+                        System.out.println(UTF8.String(read(new File(s[2]), w.start, (int) (w.end - w.start))));
+                    }
+                } catch (final IOException e) {
+                    ConcurrentLog.logException(e);
+                }
+
+            }
+        } finally {
+            try {
+                HTTPClient.closeConnectionManager();
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+            ConcurrentLog.shutdown();
+        }
+    }
 
 }
diff --git a/source/net/yacy/document/parser/AbstractCompressorParser.java b/source/net/yacy/document/parser/AbstractCompressorParser.java
index 753b894a4..09f385c05 100644
--- a/source/net/yacy/document/parser/AbstractCompressorParser.java
+++ b/source/net/yacy/document/parser/AbstractCompressorParser.java
@@ -37,6 +37,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 
 /**
  * Base class for parsing compressed files relying on Apache commons-compress
@@ -44,25 +45,25 @@ import net.yacy.document.VocabularyScraper;
  */
 public abstract class AbstractCompressorParser extends AbstractParser implements Parser {
 
-	/** Crawl depth applied when parsing internal compressed content */
-	protected static final int DEFAULT_DEPTH = 999;
-
-	/**
-	 * @param name the human readable name of the parser
-	 */
-	public AbstractCompressorParser(final String name) {
-		super(name);
-	}
-
-	/**
-	 * @param source an open input stream on a compressed source
-	 * @return a sub class of CompressorInputStream capable of uncompressing the source
-	 *         on the fly
-	 * @throws IOException when an error occurred when trying to open the compressed
-	 *                     stream
-	 */
-	protected abstract CompressorInputStream createDecompressStream(final InputStream source) throws IOException;
-	
+    /** Crawl depth applied when parsing internal compressed content */
+    protected static final int DEFAULT_DEPTH = 999;
+
+    /**
+     * @param name the human readable name of the parser
+     */
+    public AbstractCompressorParser(final String name) {
+        super(name);
+    }
+
+    /**
+     * @param source an open input stream on a compressed source
+     * @return a sub class of CompressorInputStream capable of uncompressing the source
+     *         on the fly
+     * @throws IOException when an error occurred when trying to open the compressed
+     *                     stream
+     */
+    protected abstract CompressorInputStream createDecompressStream(final InputStream source) throws IOException;
+
     /**
      * Maps the given name of a compressed file to the name that the
      * file should have after uncompression. For example, for "file.txt.xz", "file.txt" is returned.
@@ -72,116 +73,137 @@ public abstract class AbstractCompressorParser extends AbstractParser implements
      */
     protected abstract String getUncompressedFilename(final String filename);
 
-	@Override
-	public Document[] parse(final DigestURL location, final String mimeType, final String charset,
-			final Set<String> ignoreClassNames, final VocabularyScraper scraper, final int timezoneOffset,
-			final InputStream source) throws Parser.Failure, InterruptedException {
-
-		return parseWithLimits(location, mimeType, charset, scraper, timezoneOffset, source, Integer.MAX_VALUE,
-				Long.MAX_VALUE);
-	}
-
-	@Override
-	public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String charset,
-			final Set<String> ignoreClassNames, final VocabularyScraper scraper, final int timezoneOffset,
-			final InputStream source, final int maxLinks, final long maxBytes) throws Parser.Failure {
-		Document maindoc;
-		final CompressorInputStream compressedInStream;
-		try {
-			compressedInStream = createDecompressStream(source);
-		} catch (final IOException | RuntimeException e) {
-			throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
-		}
-
-		try {
-			// create maindoc for this archive, register with supplied url & mime
-			maindoc = AbstractCompressorParser.createMainDocument(location, mimeType, charset, this);
-
-			final Document[] docs = this.parseCompressedInputStream(location, null, ignoreClassNames, timezoneOffset,
-					AbstractCompressorParser.DEFAULT_DEPTH, compressedInStream, maxLinks, maxBytes);
-			if (docs != null) {
-				maindoc.addSubDocuments(docs);
-				if (docs.length > 0 && docs[0].isPartiallyParsed()) {
-					maindoc.setPartiallyParsed(true);
-				}
-			}
-		} catch (final Parser.Failure e) {
-			throw e;
-		} catch (final IOException | RuntimeException e) {
-			throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
-		}
-		return new Document[] { maindoc };
-	}
-
-	/**
-	 * Create the main parsed document for the compressed document at the given URL
-	 * and Media type
-	 *
-	 * @param location the parsed resource URL
-	 * @param mimeType the media type of the resource
-	 * @param charset  the charset name if known
-	 * @param parser   an instance of CompressorParser that is registered as the
-	 *                 parser origin of the document
-	 * @return a Document instance
-	 */
-	protected static Document createMainDocument(final DigestURL location, final String mimeType, final String charset,
-			final AbstractCompressorParser parser) {
-		final String filename = location.getFileName();
-		return new Document(location, mimeType, charset, parser, null, null,
-				AbstractParser
-						.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
-				null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date());
-	}
-
-	/**
-	 * Parse content in an open stream uncompressing on the fly a compressed
-	 * resource.
-	 *
-	 * @param location           the URL of the compressed resource
-	 * @param charset            the charset name if known
-	 * @param ignoreClassNames   an eventual set of CSS class names whose matching
-	 *                           html elements content should be ignored
-	 * @param timezoneOffset     the local time zone offset
-	 * @param compressedInStream an open stream uncompressing on the fly the
-	 *                           compressed content
-	 * @param maxLinks           the maximum total number of links to parse and add
-	 *                           to the result documents
-	 * @param maxBytes           the maximum number of content bytes to process
-	 * @return a list of documents that result from parsing the source, with empty
-	 *         or null text.
-	 * @throws Parser.Failure when the parser processing failed
-	 */
-	protected Document[] parseCompressedInputStream(final DigestURL location, final String charset,
-			final Set<String> ignoreClassNames, final int timezoneOffset, final int depth,
-			final CompressorInputStream compressedInStream, final int maxLinks, final long maxBytes) throws Failure {
-		final String compressedFileName = location.getFileName();
-		final String contentfilename = getUncompressedFilename(compressedFileName);
-		final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
-		try {
-			/*
-			 * Use the uncompressed file name for sub parsers to not unnecessarily use again
-			 * this same uncompressing parser
-			 */
-			final String locationPath = location.getPath();
-			final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length())
-					+ contentfilename;
-			final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(),
-					location.getPort(), contentPath);
-
-			/*
-			 * Rely on the supporting parsers to respect the maxLinks and maxBytes limits on
-			 * compressed content
-			 */
-			return TextParser.parseWithLimits(contentLocation, mime, charset, ignoreClassNames, timezoneOffset, depth,
-					-1, compressedInStream, maxLinks, maxBytes);
-		} catch (final MalformedURLException e) {
-			throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
-		}
-	}
-
-	@Override
-	public boolean isParseWithLimitsSupported() {
-		return true;
-	}
+    @Override
+    public Document[] parse(
+            final DigestURL location,
+            final String mimeType,
+            final String charset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper scraper,
+            final int timezoneOffset,
+            final InputStream source) throws Parser.Failure, InterruptedException {
+
+        return parseWithLimits(location, mimeType, charset, scraper, timezoneOffset, source, Integer.MAX_VALUE,
+                Long.MAX_VALUE);
+    }
+
+    @Override
+    public Document[] parseWithLimits(
+            final DigestURL location,
+            final String mimeType,
+            final String charset,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper scraper,
+            final int timezoneOffset,
+            final InputStream source,
+            final int maxLinks,
+            final long maxBytes) throws Parser.Failure {
+        Document maindoc;
+        final CompressorInputStream compressedInStream;
+        try {
+            compressedInStream = createDecompressStream(source);
+        } catch (final IOException | RuntimeException e) {
+            throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
+        }
+
+        try {
+            // create maindoc for this archive, register with supplied url & mime
+            maindoc = AbstractCompressorParser.createMainDocument(location, mimeType, charset, this);
+
+            final Document[] docs = this.parseCompressedInputStream(location, null, defaultValency, valencySwitchTagNames, timezoneOffset,
+                    AbstractCompressorParser.DEFAULT_DEPTH, compressedInStream, maxLinks, maxBytes);
+            if (docs != null) {
+                maindoc.addSubDocuments(docs);
+                if (docs.length > 0 && docs[0].isPartiallyParsed()) {
+                    maindoc.setPartiallyParsed(true);
+                }
+            }
+        } catch (final Parser.Failure e) {
+            throw e;
+        } catch (final IOException | RuntimeException e) {
+            throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
+        }
+        return new Document[] { maindoc };
+    }
+
+    /**
+     * Create the main parsed document for the compressed document at the given URL
+     * and Media type
+     *
+     * @param location the parsed resource URL
+     * @param mimeType the media type of the resource
+     * @param charset  the charset name if known
+     * @param parser   an instance of CompressorParser that is registered as the
+     *                 parser origin of the document
+     * @return a Document instance
+     */
+    protected static Document createMainDocument(final DigestURL location, final String mimeType, final String charset,
+            final AbstractCompressorParser parser) {
+        final String filename = location.getFileName();
+        return new Document(location, mimeType, charset, parser, null, null,
+                AbstractParser
+                        .singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
+                null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date());
+    }
+
+    /**
+     * Parse content in an open stream uncompressing on the fly a compressed
+     * resource.
+     *
+     * @param location           the URL of the compressed resource
+     * @param charset            the charset name if known
+     * @param ignoreClassNames   an eventual set of CSS class names whose matching
+     *                           html elements content should be ignored
+     * @param timezoneOffset     the local time zone offset
+     * @param compressedInStream an open stream uncompressing on the fly the
+     *                           compressed content
+     * @param maxLinks           the maximum total number of links to parse and add
+     *                           to the result documents
+     * @param maxBytes           the maximum number of content bytes to process
+     * @return a list of documents that result from parsing the source, with empty
+     *         or null text.
+     * @throws Parser.Failure when the parser processing failed
+     */
+    protected Document[] parseCompressedInputStream(
+            final DigestURL location,
+            final String charset,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
+            final int timezoneOffset, final int depth,
+            final CompressorInputStream compressedInStream,
+            final int maxLinks,
+            final long maxBytes) throws Failure {
+        final String compressedFileName = location.getFileName();
+        final String contentfilename = getUncompressedFilename(compressedFileName);
+        final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
+        try {
+            /*
+             * Use the uncompressed file name for sub parsers to not unnecessarily use again
+             * this same uncompressing parser
+             */
+            final String locationPath = location.getPath();
+            final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length())
+                    + contentfilename;
+            final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(),
+                    location.getPort(), contentPath);
+
+            /*
+             * Rely on the supporting parsers to respect the maxLinks and maxBytes limits on
+             * compressed content
+             */
+            return TextParser.parseWithLimits(
+                    contentLocation, mime, charset, defaultValency, valencySwitchTagNames, timezoneOffset, depth,
+                    -1, compressedInStream, maxLinks, maxBytes);
+        } catch (final MalformedURLException e) {
+            throw new Parser.Failure("Unexpected error while parsing compressed file. " + e.getMessage(), location);
+        }
+    }
+
+    @Override
+    public boolean isParseWithLimitsSupported() {
+        return true;
+    }
 
 }
diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java
index 15a63a41b..5eaed9a1d 100644
--- a/source/net/yacy/document/parser/bzipParser.java
+++ b/source/net/yacy/document/parser/bzipParser.java
@@ -45,6 +45,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.FileUtils;
 
 /**
@@ -52,7 +53,7 @@ import net.yacy.kelondro.util.FileUtils;
  * Unzips and parses the content and adds it to the created main document
  */
 public class bzipParser extends AbstractParser implements Parser {
-	
+    
     public bzipParser() {
         super("Bzip 2 UNIX Compressed File Parser");
         this.SUPPORTED_EXTENSIONS.add("bz2");
@@ -70,7 +71,8 @@ public class bzipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper, 
             final int timezoneOffset,
             final InputStream source)
@@ -99,25 +101,25 @@ public class bzipParser extends AbstractParser implements Parser {
             out = null;
 
         } catch(Exception e) {
-        	if (tempFile != null) {
-        		FileUtils.deletedelete(tempFile);
-        	}
-        	throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
+            if (tempFile != null) {
+                FileUtils.deletedelete(tempFile);
+            }
+            throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
         } finally {
-        	if(zippedContent != null) {
-        		try {
-					zippedContent.close();
-				} catch (IOException ignored) {
-					log.warn("Could not close bzip input stream");
-				}
-        	}
-        	if(out != null) {
-        		try {
-					out.close();
-				} catch (IOException e) {
-					throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
-				}
-        	}
+            if(zippedContent != null) {
+                try {
+                    zippedContent.close();
+                } catch (IOException ignored) {
+                    log.warn("Could not close bzip input stream");
+                }
+            }
+            if(out != null) {
+                try {
+                    out.close();
+                } catch (IOException e) {
+                    throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
+                }
+            }
         }
         try {
              // create maindoc for this bzip container, register with supplied url & mime
@@ -125,7 +127,7 @@ public class bzipParser extends AbstractParser implements Parser {
             // creating a new parser class to parse the unzipped content
             final String contentfilename = BZip2Utils.getUncompressedFilename(location.getFileName());
             final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
-            final Document[] docs = TextParser.parseSource(location, mime, null, ignore_class_name, scraper, timezoneOffset, 999, tempFile);
+            final Document[] docs = TextParser.parseSource(location, mime, null, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, 999, tempFile);
             if (docs != null) maindoc.addSubDocuments(docs);
         } catch (final Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
@@ -140,7 +142,7 @@ public class bzipParser extends AbstractParser implements Parser {
     
     @Override
     public boolean isParseWithLimitsSupported() {
-    	return true;
+        return true;
     }
     
     /**
@@ -151,9 +153,9 @@ public class bzipParser extends AbstractParser implements Parser {
      * @param parser instance of bzipParser that is registered as the parser origin of the document
      * @return a Document instance
      */
-	public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset, final bzipParser parser) {
-		final String filename = location.getFileName();
-		Document maindoc = new Document(
+    public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset, final bzipParser parser) {
+        final String filename = location.getFileName();
+        Document maindoc = new Document(
                 location,
                 mimeType,
                 charset,
@@ -172,49 +174,48 @@ public class bzipParser extends AbstractParser implements Parser {
                 null,
                 false,
                 new Date());
-		return maindoc;
-	}
-	
-	/**
-	 * Parse content in an open stream uncompressing on the fly a bzipped resource.
-	 * @param location the URL of the bzipped resource 
-	 * @param charset the charset name if known
-	 * @param timezoneOffset the local time zone offset
-	 * @param compressedInStream an open stream uncompressing on the fly the compressed content
-	 * @param maxLinks
-	 *            the maximum total number of links to parse and add to the
-	 *            result documents
-	 * @param maxBytes
-	 *            the maximum number of content bytes to process
-	 * @return a list of documents that result from parsing the source, with
-	 *         empty or null text.
-	 * @throws Parser.Failure
-	 *             when the parser processing failed
-	 */
-	public Document[] parseCompressedInputStream(final DigestURL location, final String charset, final int timezoneOffset, final int depth,
-			final InputStream compressedInStream, final int maxLinks, final long maxBytes) throws Failure {
+        return maindoc;
+    }
+
+    /**
+     * Parse content in an open stream uncompressing on the fly a bzipped resource.
+     * @param location the URL of the bzipped resource 
+     * @param charset the charset name if known
+     * @param timezoneOffset the local time zone offset
+     * @param compressedInStream an open stream uncompressing on the fly the compressed content
+     * @param maxLinks
+     *            the maximum total number of links to parse and add to the
+     *            result documents
+     * @param maxBytes
+     *            the maximum number of content bytes to process
+     * @return a list of documents that result from parsing the source, with
+     *         empty or null text.
+     * @throws Parser.Failure
+     *             when the parser processing failed
+     */
+    public Document[] parseCompressedInputStream(final DigestURL location, final String charset, final int timezoneOffset, final int depth,
+            final InputStream compressedInStream, final int maxLinks, final long maxBytes) throws Failure {
         // creating a new parser class to parse the unzipped content
-		final String compressedFileName = location.getFileName();
+        final String compressedFileName = location.getFileName();
         final String contentfilename = BZip2Utils.getUncompressedFilename(compressedFileName);
         final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
         try {
-        	/* Use the uncompressed file name for sub parsers to not unnecessarily use again the gzipparser */
-    		final String locationPath = location.getPath();
-        	final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length()) + contentfilename;
-			final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(), location.getPort(), contentPath);
-			
-	        /* Rely on the supporting parsers to respect the maxLinks and maxBytes limits on compressed content */
-	        return TextParser.parseWithLimits(contentLocation, mime, charset, timezoneOffset, depth, -1, compressedInStream, maxLinks, maxBytes);
-		} catch (MalformedURLException e) {
-			throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
-		}
-	}
-		
-    
+            /* Use the uncompressed file name for sub parsers to not unnecessarily use again the gzipparser */
+            final String locationPath = location.getPath();
+            final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length()) + contentfilename;
+            final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(), location.getPort(), contentPath);
+
+            /* Rely on the supporting parsers to respect the maxLinks and maxBytes limits on compressed content */
+            return TextParser.parseWithLimits(contentLocation, mime, charset, timezoneOffset, depth, -1, compressedInStream, maxLinks, maxBytes);
+        } catch (MalformedURLException e) {
+            throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
+        }
+    }
+
     @Override
     public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper,
-    		final int timezoneOffset, final InputStream source, final int maxLinks, final long maxBytes)
-    		throws Parser.Failure {
+            final int timezoneOffset, final InputStream source, final int maxLinks, final long maxBytes)
+            throws Parser.Failure {
         Document maindoc = null;
         BZip2CompressorInputStream zippedContent = null;
         try {
@@ -222,23 +223,23 @@ public class bzipParser extends AbstractParser implements Parser {
             zippedContent = new BZip2CompressorInputStream(source);
 
         } catch(Exception e) {
-        	throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
+            throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(), location);
         } 
-        
+
         try {
              // create maindoc for this bzip container, register with supplied url & mime
             maindoc = createMainDocument(location, mimeType, charset, this);
             // creating a new parser class to parse the unzipped content
             final Document[] docs = parseCompressedInputStream(location, null, timezoneOffset, 999, zippedContent, maxLinks, maxBytes);
             if (docs != null) {
-            	maindoc.addSubDocuments(docs);
-            	if(docs.length > 0 && docs[0].isPartiallyParsed()) {
-            		maindoc.setPartiallyParsed(true);
-            	}
+                maindoc.addSubDocuments(docs);
+                if(docs.length > 0 && docs[0].isPartiallyParsed()) {
+                    maindoc.setPartiallyParsed(true);
+                }
             }
         } catch (final Exception e) {
             if (e instanceof Parser.Failure) {
-            	throw (Parser.Failure) e;
+                throw (Parser.Failure) e;
             }
 
             throw new Parser.Failure("Unexpected error while parsing bzip file. " + e.getMessage(),location);
diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java
index d81d6d43a..dc4b58ae6 100644
--- a/source/net/yacy/document/parser/gzipParser.java
+++ b/source/net/yacy/document/parser/gzipParser.java
@@ -45,6 +45,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.FileUtils;
 
 /**
@@ -52,8 +53,8 @@ import net.yacy.kelondro.util.FileUtils;
  * Unzips and parses the content and adds it to the created main document
  */
 public class gzipParser extends AbstractParser implements Parser {
-	
-	private static final int DEFAULT_DEPTH = 999;
+
+    private static final int DEFAULT_DEPTH = 999;
 
     public gzipParser() {
         super("GNU Zip Compressed Archive Parser");
@@ -72,7 +73,8 @@ public class gzipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper, 
             final int timezoneOffset,
             final InputStream source) throws Parser.Failure, InterruptedException {
@@ -84,10 +86,10 @@ public class gzipParser extends AbstractParser implements Parser {
         try {
             zippedContent = new GZIPInputStream(source);
         } catch(IOException e) {
-        	/* Use a GZIPOpeningStreamException to signal the caller the error occurred directly on stream opening
-        	 * and eventually apply special error handling */
-			throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location,
-					new GZIPOpeningStreamException());
+            /* Use a GZIPOpeningStreamException to signal the caller the error occurred directly on stream opening
+             * and eventually apply special error handling */
+            throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location,
+                    new GZIPOpeningStreamException());
         }
         try {
             int read = 0;
@@ -103,32 +105,32 @@ public class gzipParser extends AbstractParser implements Parser {
                 out.write(data, 0, read);
             }
         } catch(Exception e) {
-        	if (tempFile != null) {
-        		FileUtils.deletedelete(tempFile);
-        	}
-        	throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
+            if (tempFile != null) {
+                FileUtils.deletedelete(tempFile);
+            }
+            throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
         } finally {
-        	if(zippedContent != null) {
-        		try {
-					zippedContent.close();
-				} catch (IOException ignored) {
-					log.warn("Could not close gzip input stream");
-				}
-        	}
-        	if(out != null) {
-        		try {
-					out.close();
-				} catch (IOException e) {
-					throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
-				}
-        	}
+            if(zippedContent != null) {
+                try {
+                    zippedContent.close();
+                } catch (IOException ignored) {
+                    log.warn("Could not close gzip input stream");
+                }
+            }
+            if(out != null) {
+                try {
+                    out.close();
+                } catch (IOException e) {
+                    throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
+                }
+            }
         }
         try {
             maindoc = createMainDocument(location, mimeType, charset, this);
             // creating a new parser class to parse the unzipped content
             final String contentfilename = GzipUtils.getUncompressedFilename(location.getFileName());
             final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
-            Document[] docs = TextParser.parseSource(location, mime, null, ignore_class_name, scraper, timezoneOffset, DEFAULT_DEPTH, tempFile);
+            Document[] docs = TextParser.parseSource(location, mime, null, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, DEFAULT_DEPTH, tempFile);
             if (docs != null) maindoc.addSubDocuments(docs);
         } catch (final Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
@@ -149,96 +151,96 @@ public class gzipParser extends AbstractParser implements Parser {
      * @param an instance of gzipParser that is registered as the parser origin of the document
      * @return a Document instance
      */
-	public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset, final gzipParser parser) {
-		final String filename = location.getFileName();
-		Document maindoc = new Document(
-		        location,
-		        mimeType,
-		        charset,
-		        parser,
-		        null,
-		        null,
-		        AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
-		        null,
-		        null,
-		        null,
-		        null,
-		        0.0d, 0.0d,
-		        (Object) null,
-		        null,
-		        null,
-		        null,
-		        false,
-		        new Date());
-		return maindoc;
-	}
-	
-	/**
-	 * Parse content in an open stream uncompressing on the fly a gzipped resource.
-	 * @param location the URL of the gzipped resource 
-	 * @param charset the charset name if known
-	 * @param timezoneOffset the local time zone offset
-	 * @param compressedInStream an open stream uncompressing on the fly the compressed content
-	 * @param maxLinks
-	 *            the maximum total number of links to parse and add to the
-	 *            result documents
-	 * @param maxBytes
-	 *            the maximum number of content bytes to process
-	 * @return a list of documents that result from parsing the source, with
-	 *         empty or null text.
-	 * @throws Parser.Failure
-	 *             when the parser processing failed
-	 */
-	public Document[] parseCompressedInputStream(final DigestURL location, final String charset, final int timezoneOffset, final int depth,
-			final InputStream compressedInStream, final int maxLinks, final long maxBytes) throws Failure {
+    public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset, final gzipParser parser) {
+        final String filename = location.getFileName();
+        Document maindoc = new Document(
+                location,
+                mimeType,
+                charset,
+                parser,
+                null,
+                null,
+                AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
+                null,
+                null,
+                null,
+                null,
+                0.0d, 0.0d,
+                (Object) null,
+                null,
+                null,
+                null,
+                false,
+                new Date());
+        return maindoc;
+    }
+
+    /**
+     * Parse content in an open stream uncompressing on the fly a gzipped resource.
+     * @param location the URL of the gzipped resource 
+     * @param charset the charset name if known
+     * @param timezoneOffset the local time zone offset
+     * @param compressedInStream an open stream uncompressing on the fly the compressed content
+     * @param maxLinks
+     *            the maximum total number of links to parse and add to the
+     *            result documents
+     * @param maxBytes
+     *            the maximum number of content bytes to process
+     * @return a list of documents that result from parsing the source, with
+     *         empty or null text.
+     * @throws Parser.Failure
+     *             when the parser processing failed
+     */
+    public Document[] parseCompressedInputStream(final DigestURL location, final String charset, final int timezoneOffset, final int depth,
+            final InputStream compressedInStream, final int maxLinks, final long maxBytes) throws Failure {
         // creating a new parser class to parse the unzipped content
-		final String compressedFileName = location.getFileName();
+        final String compressedFileName = location.getFileName();
         final String contentfilename = GzipUtils.getUncompressedFilename(compressedFileName);
         final String mime = TextParser.mimeOf(MultiProtocolURL.getFileExtension(contentfilename));
         try {
-        	/* Use the uncompressed file name for sub parsers to not unnecessarily use again the gzipparser */
-    		final String locationPath = location.getPath();
-        	final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length()) + contentfilename;
-			final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(), location.getPort(), contentPath);
-			
-	        /* Rely on the supporting parsers to respect the maxLinks and maxBytes limits on compressed content */
-	        return TextParser.parseWithLimits(contentLocation, mime, charset, timezoneOffset, depth, -1, compressedInStream, maxLinks, maxBytes);
-		} catch (MalformedURLException e) {
-			throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
-		}
-	}
-    
+            /* Use the uncompressed file name for sub parsers to not unnecessarily use again the gzipparser */
+            final String locationPath = location.getPath();
+            final String contentPath = locationPath.substring(0, locationPath.length() - compressedFileName.length()) + contentfilename;
+            final DigestURL contentLocation = new DigestURL(location.getProtocol(), location.getHost(), location.getPort(), contentPath);
+
+            /* Rely on the supporting parsers to respect the maxLinks and maxBytes limits on compressed content */
+            return TextParser.parseWithLimits(contentLocation, mime, charset, timezoneOffset, depth, -1, compressedInStream, maxLinks, maxBytes);
+        } catch (MalformedURLException e) {
+            throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location);
+        }
+    }
+
     @Override
     public boolean isParseWithLimitsSupported() {
-    	return true;
+        return true;
     }
-    
+
     @Override
     public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper,
-    		final int timezoneOffset, final InputStream source, final int maxLinks, final long maxBytes)
-    		throws Parser.Failure {
+            final int timezoneOffset, final InputStream source, final int maxLinks, final long maxBytes)
+            throws Parser.Failure {
         Document maindoc = null;
         GZIPInputStream zippedContent = null;
         try {
-        	/* Only use in-memory stream here (no temporary file) : the parsers 
-        	 * matching compressed content are expected to handle properly the maxBytes limit and terminate 
-        	 * before an eventual OutOfMemory occurs */
+            /* Only use in-memory stream here (no temporary file) : the parsers 
+             * matching compressed content are expected to handle properly the maxBytes limit and terminate 
+             * before an eventual OutOfMemory occurs */
             zippedContent = new GZIPInputStream(source);
         } catch(IOException e) {
-        	/* Use a GZIPOpeningStreamException to signal the caller the error occurred directly on stream opening
-        	 * and eventually apply special error handling */
-			throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location,
-					new GZIPOpeningStreamException());
+            /* Use a GZIPOpeningStreamException to signal the caller the error occurred directly on stream opening
+             * and eventually apply special error handling */
+            throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(), location,
+                    new GZIPOpeningStreamException());
         }
         try {
             maindoc = createMainDocument(location, mimeType, charset, this);
-            
+
             Document[] docs = parseCompressedInputStream(location, charset, timezoneOffset, DEFAULT_DEPTH, zippedContent, maxLinks, maxBytes);
             if (docs != null) {
-            	maindoc.addSubDocuments(docs);
-            	if(docs.length > 0 && docs[0].isPartiallyParsed()) {
-            		maindoc.setPartiallyParsed(true);
-            	}
+                maindoc.addSubDocuments(docs);
+                if(docs.length > 0 && docs[0].isPartiallyParsed()) {
+                    maindoc.setPartiallyParsed(true);
+                }
             }
         } catch (final Exception e) {
             throw new Parser.Failure("Unexpected error while parsing gzip file. " + e.getMessage(),location);
@@ -251,15 +253,15 @@ public class gzipParser extends AbstractParser implements Parser {
      */
     public class GZIPOpeningStreamException extends Exception {
 
-		/** The serialization ID */
-		private static final long serialVersionUID = 2824038185373304636L;
+        /** The serialization ID */
+        private static final long serialVersionUID = 2824038185373304636L;
+
+        public GZIPOpeningStreamException() {
+            super();
+        }
 
-		public GZIPOpeningStreamException() {
-    		super();
-    	}
-    	
-    	public GZIPOpeningStreamException(final String message) {
-    		super(message);
-    	}
+        public GZIPOpeningStreamException(final String message) {
+            super(message);
+        }
     }
 }
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index a9e5e50a0..960957490 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -63,7 +63,7 @@ import net.yacy.document.parser.html.TransformerWriter;
 
 public class htmlParser extends AbstractParser implements Parser {
 
-	/** The default maximum number of links (other than a, area, and canonical and stylesheet links) to add to a parsed document */
+    /** The default maximum number of links (other than a, area, and canonical and stylesheet links) to add to a parsed document */
     private static final int DEFAULT_MAX_LINKS = 10000;
 
     public htmlParser() {
@@ -108,42 +108,93 @@ public class htmlParser extends AbstractParser implements Parser {
             final int timezoneOffset,
             final InputStream sourceStream) throws Parser.Failure, InterruptedException {
 
-        return parseWithLimits(location, mimeType, documentCharset, new HashSet<String>(), vocscraper, timezoneOffset, sourceStream, Integer.MAX_VALUE, DEFAULT_MAX_LINKS, Long.MAX_VALUE);
+        return parseWithLimits(
+                location,
+                mimeType,
+                documentCharset,
+                TagValency.EVAL,
+                new HashSet<String>(),
+                vocscraper,
+                timezoneOffset,
+                sourceStream,
+                Integer.MAX_VALUE,
+                DEFAULT_MAX_LINKS,
+                Long.MAX_VALUE);
     }
-    
+
     @Override
     public Document[] parse(
             final DigestURL location,
             final String mimeType,
             final String documentCharset,
-            final Set<String> ignore_class_name, 
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper vocscraper,
             final int timezoneOffset,
             final InputStream sourceStream) throws Parser.Failure, InterruptedException {
 
-        return parseWithLimits(location, mimeType, documentCharset, ignore_class_name, vocscraper, timezoneOffset, sourceStream, Integer.MAX_VALUE, DEFAULT_MAX_LINKS, Long.MAX_VALUE);
+        return parseWithLimits(
+                location, mimeType,
+                documentCharset,
+                defaultValency,
+                valencySwitchTagNames,
+                vocscraper,
+                timezoneOffset,
+                sourceStream,
+                Integer.MAX_VALUE,
+                DEFAULT_MAX_LINKS,
+                Long.MAX_VALUE);
     }
     
     @Override
     public boolean isParseWithLimitsSupported() {
-    	return true;
+        return true;
     }
     
     @Override
-    public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String documentCharset,
-    		final Set<String> ignore_class_name, final VocabularyScraper vocscraper,
-    		final int timezoneOffset, final InputStream sourceStream, final int maxLinks, final long maxBytes)
-    		throws Failure {
-        return parseWithLimits(location, mimeType, documentCharset, ignore_class_name, vocscraper, timezoneOffset, sourceStream, maxLinks, maxLinks, maxBytes);
+    public Document[] parseWithLimits(
+            final DigestURL location,
+            final String mimeType,
+            final String documentCharset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper vocscraper,
+            final int timezoneOffset,
+            final InputStream sourceStream,
+            final int maxLinks,
+            final long maxBytes)
+            throws Failure {
+        return parseWithLimits(
+                location,
+                mimeType,
+                documentCharset,
+                defaultValency,
+                valencySwitchTagNames,
+                vocscraper,
+                timezoneOffset,
+                sourceStream,
+                maxLinks,
+                maxLinks,
+                maxBytes);
     }
     
-    private Document[] parseWithLimits(final DigestURL location, final String mimeType, final String documentCharset, final Set<String> ignore_class_name, final VocabularyScraper vocscraper,
-    		final int timezoneOffset, final InputStream sourceStream, final int maxAnchors, final int maxLinks, final long maxBytes)
-    		throws Failure {
+    private Document[] parseWithLimits(
+            final DigestURL location,
+            final String mimeType,
+            final String documentCharset,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper vocscraper,
+            final int timezoneOffset,
+            final InputStream sourceStream,
+            final int maxAnchors,
+            final int maxLinks,
+            final long maxBytes)
+            throws Failure {
         try {
             // first get a document from the parsed html
             Charset[] detectedcharsetcontainer = new Charset[]{null};
-            ContentScraper scraper = parseToScraper(location, documentCharset, ignore_class_name, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxAnchors, maxLinks, maxBytes);
+            ContentScraper scraper = parseToScraper(location, documentCharset, defaultValency, valencySwitchTagNames, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxAnchors, maxLinks, maxBytes);
             // parseToScraper also detects/corrects/sets charset from html content tag
             final Document document = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraper);
             Document documentSnapshot = null;
@@ -152,10 +203,10 @@ public class htmlParser extends AbstractParser implements Parser {
                 // and create a sub-document for snapshot page (which will be merged by loader)
                 // TODO: as a crawl request removes anchor part from original url getRef() is never successful - considere other handling as removeRef() in crawler
                 if (location.getRef() != null && location.getRef().startsWith("!")) {
-                    documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, ignore_class_name, vocscraper, timezoneOffset, maxAnchors, maxLinks, maxBytes);
+                    documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, vocscraper, timezoneOffset, maxAnchors, maxLinks, maxBytes);
                 } else { // head tag fragment only allowed on url without anchor hashfragment, but there are discussions that existence of hashfragment anchor takes preference (means allow both)
                     if (scraper.getMetas().containsKey("fragment") && scraper.getMetas().get("fragment").equals("!")) {
-                        documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, ignore_class_name, vocscraper, timezoneOffset, maxAnchors, maxLinks, maxBytes);
+                        documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, defaultValency, valencySwitchTagNames, vocscraper, timezoneOffset, maxAnchors, maxLinks, maxBytes);
                     }
                 }
             } catch (Exception ex1) { // ignore any exception for any issue with snapshot
@@ -221,7 +272,16 @@ public class htmlParser extends AbstractParser implements Parser {
         return ppd;
     }
 
-    public static ContentScraper parseToScraper(final DigestURL location, final String documentCharset, final Set<String> ignore_class_name, final VocabularyScraper vocabularyScraper, final int timezoneOffset, final String input, final int maxAnchors, final int maxLinks) throws IOException {
+    public static ContentScraper parseToScraper(
+            final DigestURL location,
+            final String documentCharset, 
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
+            final VocabularyScraper vocabularyScraper,
+            final int timezoneOffset,
+            final String input,
+            final int maxAnchors,
+            final int maxLinks) throws IOException {
         Charset[] detectedcharsetcontainer = new Charset[]{null};
         InputStream sourceStream;
         try {
@@ -231,7 +291,7 @@ public class htmlParser extends AbstractParser implements Parser {
         }
         ContentScraper scraper; // for this static methode no need to init local this.scraperObject
         try {
-            scraper = parseToScraper(location, documentCharset, ignore_class_name, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxAnchors, maxLinks, Long.MAX_VALUE);
+            scraper = parseToScraper(location, documentCharset, defaultValency, valencySwitchTagNames, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxAnchors, maxLinks, Long.MAX_VALUE);
         } catch (Failure e) {
             throw new IOException(e.getMessage());
         }
@@ -256,7 +316,8 @@ public class htmlParser extends AbstractParser implements Parser {
     public static ContentScraper parseToScraper(
             final DigestURL location,
             final String documentCharset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency,
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper vocabularyScraper,
             final Charset[] detectedcharsetcontainer,
             final int timezoneOffset,
@@ -264,7 +325,7 @@ public class htmlParser extends AbstractParser implements Parser {
             final int maxAnchors,
             final int maxLinks,
             final long maxBytes) throws Parser.Failure, IOException {
-    	
+        
         // make a scraper
         String charset = null;
 
@@ -280,8 +341,8 @@ public class htmlParser extends AbstractParser implements Parser {
                 htmlFilter = new ScraperInputStream(
                         sourceStream,
                         documentCharset,
-                        ignore_class_name,
-                        TagValency.EVAL,
+                        valencySwitchTagNames,
+                        defaultValency,
                         vocabularyScraper,
                         location,
                         false,
@@ -325,26 +386,26 @@ public class htmlParser extends AbstractParser implements Parser {
                 location,
                 maxAnchors,
                 maxLinks,
-                ignore_class_name,
+                valencySwitchTagNames,
                 TagValency.EVAL,
                 vocabularyScraper,
                 timezoneOffset);
         final TransformerWriter writer = new TransformerWriter(null, null, scraper, false, Math.max(64, Math.min(4096, sourceStream.available())));
         try {
-        	final long maxChars = (long)(maxBytes * detectedcharsetcontainer[0].newDecoder().averageCharsPerByte());
-        	final Reader sourceReader = new InputStreamReader(sourceStream, detectedcharsetcontainer[0]);
-			final long copiedChars = IOUtils.copyLarge(sourceReader, writer, 0, maxChars);
+            final long maxChars = (long)(maxBytes * detectedcharsetcontainer[0].newDecoder().averageCharsPerByte());
+            final Reader sourceReader = new InputStreamReader(sourceStream, detectedcharsetcontainer[0]);
+            final long copiedChars = IOUtils.copyLarge(sourceReader, writer, 0, maxChars);
             if(copiedChars > maxChars) {
-            	/* maxChars limit has been exceeded : do not fail here as we want to use the partially obtained results. */
-            	scraper.setContentSizeLimitExceeded(true);
+                /* maxChars limit has been exceeded : do not fail here as we want to use the partially obtained results. */
+                scraper.setContentSizeLimitExceeded(true);
             } else if(copiedChars == maxChars) {
-            	/* Exactly maxChars limit reached : let's check if more to read remain. */
-            	if(sourceReader.read() >= 0) {
-            		scraper.setContentSizeLimitExceeded(true);
-            	}
+                /* Exactly maxChars limit reached : let's check if more to read remain. */
+                if(sourceReader.read() >= 0) {
+                    scraper.setContentSizeLimitExceeded(true);
+                }
             }
         } catch (final IOException e) {
-       		throw new Parser.Failure("IO error:" + e.getMessage(), location);
+               throw new Parser.Failure("IO error:" + e.getMessage(), location);
         } finally {
             writer.flush();
             //sourceStream.close(); keep open for multipe parsing (close done by caller)
@@ -456,9 +517,10 @@ public class htmlParser extends AbstractParser implements Parser {
      * @return document as result of parsed snapshot or null if not exist or on any other issue with snapshot
      */
     private Document parseAlternativeSnapshot(
-    		final DigestURL location, final String mimeType, final String documentCharset,
-    		final Set<String> ignore_class_name, final VocabularyScraper vocscraper,
-    		final int timezoneOffset, final int maxAnchors, final int maxLinks, final long maxBytes) {
+            final DigestURL location, final String mimeType, final String documentCharset,
+            final TagValency defaultValency, final Set<String> valencySwitchTagNames,
+            final VocabularyScraper vocscraper,
+            final int timezoneOffset, final int maxAnchors, final int maxLinks, final long maxBytes) {
         Document documentSnapshot = null;
         try {
             // construct url for case (1) with anchor
@@ -476,17 +538,17 @@ public class htmlParser extends AbstractParser implements Parser {
             Charset[] detectedcharsetcontainer = new Charset[]{null};
             InputStream snapshotStream = null;
             try {
-            	snapshotStream = locationSnapshot.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
-            	ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, ignore_class_name, vocscraper, detectedcharsetcontainer, timezoneOffset, snapshotStream, maxAnchors, maxLinks, maxBytes);
+                snapshotStream = locationSnapshot.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
+                ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, defaultValency, valencySwitchTagNames, vocscraper, detectedcharsetcontainer, timezoneOffset, snapshotStream, maxAnchors, maxLinks, maxBytes);
                 documentSnapshot = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraperSnapshot);
             } finally {
-            	if(snapshotStream != null) {
-            		try {
-            			snapshotStream.close();
-            		} catch(IOException e) {
-            			AbstractParser.log.warn("Could not close snapshot stream : " + e.getMessage());
-            		}
-            	}
+                if(snapshotStream != null) {
+                    try {
+                        snapshotStream.close();
+                    } catch(IOException e) {
+                        AbstractParser.log.warn("Could not close snapshot stream : " + e.getMessage());
+                    }
+                }
             }
             AbstractParser.log.info("parse snapshot "+locationSnapshot.toString() + " additional to " + location.toString());
         } catch (IOException | Failure ex) { }
diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java
index 116cc9288..6b3f3bbb8 100644
--- a/source/net/yacy/document/parser/sevenzipParser.java
+++ b/source/net/yacy/document/parser/sevenzipParser.java
@@ -44,6 +44,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.FileUtils;
 import SevenZip.ArchiveExtractCallback;
 import SevenZip.IInStream;
@@ -63,7 +64,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final int timezoneOffset,
             final IInStream source) throws Parser.Failure, InterruptedException {
 
@@ -94,7 +96,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
         } catch (final IOException e) {
             throw new Parser.Failure("error opening 7zip archive: " + e.getMessage(), location);
         }
-        final SZParserExtractCallback aec = new SZParserExtractCallback(AbstractParser.log, archive, doc, location.getFile(), ignore_class_name, timezoneOffset);
+        final SZParserExtractCallback aec = new SZParserExtractCallback(AbstractParser.log, archive, doc, location.getFile(), defaultValency, valencySwitchTagNames, timezoneOffset);
         AbstractParser.log.fine("processing archive contents...");
         try {
             archive.Extract(null, -1, 0, aec);
@@ -116,10 +118,11 @@ public class sevenzipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final int timezoneOffset,
             final byte[] source) throws Parser.Failure, InterruptedException {
-        return parse(location, mimeType, charset, ignore_class_name, timezoneOffset, new ByteArrayIInStream(source));
+        return parse(location, mimeType, charset, defaultValency, valencySwitchTagNames, timezoneOffset, new ByteArrayIInStream(source));
     }
 
     @Override
@@ -127,14 +130,15 @@ public class sevenzipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper, 
             final int timezoneOffset,
             final InputStream source) throws Parser.Failure, InterruptedException {
         try {
             final ByteArrayOutputStream cfos = new ByteArrayOutputStream();
             FileUtils.copy(source, cfos);
-            return new Document[]{parse(location, mimeType, charset, ignore_class_name, timezoneOffset, cfos.toByteArray())};
+            return new Document[]{parse(location, mimeType, charset, defaultValency, valencySwitchTagNames, timezoneOffset, cfos.toByteArray())};
         } catch (final IOException e) {
             throw new Parser.Failure("error processing 7zip archive: " + e.getMessage(), location);
         }
@@ -148,7 +152,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
          private ByteArrayOutputStream cfos = null;
          private final Document doc;
          private final String prefix;
-         private Set<String> ignore_class_name;
+         private final TagValency defaultValency;
+         private Set<String> valencySwitchTagNames;
          private final int timezoneOffset;
 
          public SZParserExtractCallback(
@@ -156,13 +161,15 @@ public class sevenzipParser extends AbstractParser implements Parser {
                  final IInArchive handler,
                  final Document doc,
                  final String prefix,
-                 final Set<String> ignore_class_name,
+                 final TagValency defaultValency, 
+                 final Set<String> valencySwitchTagNames,
                  final int timezoneOffset) {
              super.Init(handler);
              this.log = logger;
              this.doc = doc;
              this.prefix = prefix;
-             this.ignore_class_name = ignore_class_name;
+             this.defaultValency = defaultValency;
+             this.valencySwitchTagNames = valencySwitchTagNames;
              this.timezoneOffset = timezoneOffset;
          }
 
@@ -205,7 +212,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
                      // below for reversion of the effects
                      final AnchorURL url = AnchorURL.newAnchor(this.doc.dc_source(), this.prefix + "/" + super.filePath);
                      final String mime = TextParser.mimeOf(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
-                     theDocs = TextParser.parseSource(url, mime, null, this.ignore_class_name, new VocabularyScraper(), timezoneOffset, this.doc.getDepth() + 1, this.cfos.toByteArray());
+                     theDocs = TextParser.parseSource(url, mime, null,this.defaultValency, this.valencySwitchTagNames, new VocabularyScraper(), timezoneOffset, this.doc.getDepth() + 1, this.cfos.toByteArray());
 
                      this.doc.addSubDocuments(theDocs);
                  }
diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java
index f1b7059c2..f0122c3cb 100644
--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@@ -45,6 +45,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.FileUtils;
 
 // this is a new implementation of this parser idiom using multiple documents as result set
@@ -70,7 +71,8 @@ public class tarParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper, 
             final int timezoneOffset,
             InputStream source) throws Parser.Failure, InterruptedException {
@@ -104,17 +106,17 @@ public class tarParser extends AbstractParser implements Parser {
                 try {
                     tmp = FileUtils.createTempFile(this.getClass(), name);
                     FileUtils.copy(tis, tmp, entry.getSize());
-					/*
-					 * Create an appropriate sub location to prevent unwanted fallback to the tarparser on resources included in the archive. 
-					 * We use the tar file name as the parent sub path. Example : http://host/archive.tar/name.
-					 * Indeed if we create a sub location with a '#' separator such as http://host/archive.tar#name, the
-					 * extension of the URL is still ".tar", thus incorrectly making the tar parser
-					 * as a possible parser for the sub resource.
-					 */
+/*
+ * Create an appropriate sub location to prevent unwanted fallback to the tarparser on resources included in the archive. 
+ * We use the tar file name as the parent sub path. Example : http://host/archive.tar/name.
+ * Indeed if we create a sub location with a '#' separator such as http://host/archive.tar#name, the
+ * extension of the URL is still ".tar", thus incorrectly making the tar parser
+ * as a possible parser for the sub resource.
+ */
                     final DigestURL subLocation = new DigestURL(parentTarURL, name);
-                    final Document[] subDocs = TextParser.parseSource(subLocation, mime, null, ignore_class_name, scraper, timezoneOffset,	999, tmp);
+                    final Document[] subDocs = TextParser.parseSource(subLocation, mime, null, defaultValency, valencySwitchTagNames, scraper, timezoneOffset,999, tmp);
                     if (subDocs == null) {
-                    	continue;
+                    continue;
                     }
                     maindoc.addSubDocuments(subDocs);
                 } catch (final Parser.Failure e) {
@@ -130,146 +132,146 @@ public class tarParser extends AbstractParser implements Parser {
         return new Document[]{maindoc};
     }
 
-	@Override
-	public boolean isParseWithLimitsSupported() {
-		return true;
-	}
+@Override
+public boolean isParseWithLimitsSupported() {
+return true;
+}
 
-	@Override
-	public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String charset,
-			final VocabularyScraper scraper, final int timezoneOffset, final InputStream source, final int maxLinks,
-			final long maxBytes) throws Failure, InterruptedException, UnsupportedOperationException {
+@Override
+public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String charset,
+final VocabularyScraper scraper, final int timezoneOffset, final InputStream source, final int maxLinks,
+final long maxBytes) throws Failure, InterruptedException, UnsupportedOperationException {
 
-		final DigestURL parentTarURL = createParentTarURL(location);
+final DigestURL parentTarURL = createParentTarURL(location);
 
-		final TarArchiveInputStream tis = new TarArchiveInputStream(source);
+final TarArchiveInputStream tis = new TarArchiveInputStream(source);
 
-		// create maindoc for this tar container
-		final Document maindoc = createMainDocument(location, mimeType, charset, this);
+// create maindoc for this tar container
+final Document maindoc = createMainDocument(location, mimeType, charset, this);
 
-		// loop through the elements in the tar file and parse every single file inside
-		TarArchiveEntry entry;
-		int totalProcessedLinks = 0;
-		while (true) {
-			try {
-				entry = tis.getNextTarEntry();
-				if (entry == null) {
-					break;
-				}
+// loop through the elements in the tar file and parse every single file inside
+TarArchiveEntry entry;
+int totalProcessedLinks = 0;
+while (true) {
+try {
+entry = tis.getNextTarEntry();
+if (entry == null) {
+break;
+}
 
-				/*
-				 * We are here sure at least one entry has still to be processed : let's check
-				 * now the bytes limit as sub parsers applied on eventual previous entries may
-				 * not support partial parsing and would have thrown a Parser.Failure instead of
-				 * marking the document as partially parsed.
-				 */
-				if (tis.getBytesRead() >= maxBytes) {
-					maindoc.setPartiallyParsed(true);
-					break;
-				}
+/*
+ * We are here sure at least one entry has still to be processed : let's check
+ * now the bytes limit as sub parsers applied on eventual previous entries may
+ * not support partial parsing and would have thrown a Parser.Failure instead of
+ * marking the document as partially parsed.
+ */
+if (tis.getBytesRead() >= maxBytes) {
+maindoc.setPartiallyParsed(true);
+break;
+}
 
-				if (entry.isDirectory() || entry.getSize() <= 0) {
-					continue;
-				}
-				final String name = entry.getName();
-				final int idx = name.lastIndexOf('.');
-				final String mime = TextParser.mimeOf((idx > -1) ? name.substring(idx + 1) : "");
-				try {
-					/*
-					 * Rely on the supporting parsers to respect the maxLinks and maxBytes limits on
-					 * compressed content
-					 */
+if (entry.isDirectory() || entry.getSize() <= 0) {
+continue;
+}
+final String name = entry.getName();
+final int idx = name.lastIndexOf('.');
+final String mime = TextParser.mimeOf((idx > -1) ? name.substring(idx + 1) : "");
+try {
+/*
+ * Rely on the supporting parsers to respect the maxLinks and maxBytes limits on
+ * compressed content
+ */
 
-					/*
-					 * Create an appropriate sub location to prevent unwanted fallback to the
-					 * tarparser on resources included in the archive. We use the tar file name as
-					 * the parent sub path. Example : http://host/archive.tar/name. Indeed if we
-					 * create a sub location with a '#' separator such as
-					 * http://host/archive.tar#name, the extension of the URL is still ".tar", thus
-					 * incorrectly making the tar parser as a possible parser for the sub resource.
-					 */
-					final DigestURL subLocation = new DigestURL(parentTarURL, name);
-					final Document[] subDocs = TextParser.parseWithLimits(subLocation, mime, null, timezoneOffset, 999,
-							entry.getSize(), tis, maxLinks - totalProcessedLinks, maxBytes - tis.getBytesRead());
+/*
+ * Create an appropriate sub location to prevent unwanted fallback to the
+ * tarparser on resources included in the archive. We use the tar file name as
+ * the parent sub path. Example : http://host/archive.tar/name. Indeed if we
+ * create a sub location with a '#' separator such as
+ * http://host/archive.tar#name, the extension of the URL is still ".tar", thus
+ * incorrectly making the tar parser as a possible parser for the sub resource.
+ */
+final DigestURL subLocation = new DigestURL(parentTarURL, name);
+final Document[] subDocs = TextParser.parseWithLimits(subLocation, mime, null, timezoneOffset, 999,
+entry.getSize(), tis, maxLinks - totalProcessedLinks, maxBytes - tis.getBytesRead());
 
-					/*
-					 * If the parser(s) did not consume all bytes in the entry, these ones will be
-					 * skipped by the next call to getNextTarEntry()
-					 */
-					if (subDocs == null) {
-						continue;
-					}
-					maindoc.addSubDocuments(subDocs);
-					for (Document subDoc : subDocs) {
-						if (subDoc.getAnchors() != null) {
-							totalProcessedLinks += subDoc.getAnchors().size();
-						}
-					}
-					/*
-					 * Check if a limit has been exceeded (we are sure to pass here when maxLinks
-					 * has been exceeded as this limit require parser support for partial parsing to
-					 * be detected)
-					 */
-					if (subDocs[0].isPartiallyParsed()) {
-						maindoc.setPartiallyParsed(true);
-						break;
-					}
-				} catch (final Parser.Failure e) {
-					AbstractParser.log.warn("tar parser entry " + name + ": " + e.getMessage());
-				}
-			} catch (final IOException e) {
-				AbstractParser.log.warn("tar parser:" + e.getMessage());
-				break;
-			}
-		}
-		return new Document[] { maindoc };
-	}
+/*
+ * If the parser(s) did not consume all bytes in the entry, these ones will be
+ * skipped by the next call to getNextTarEntry()
+ */
+if (subDocs == null) {
+continue;
+}
+maindoc.addSubDocuments(subDocs);
+for (Document subDoc : subDocs) {
+if (subDoc.getAnchors() != null) {
+totalProcessedLinks += subDoc.getAnchors().size();
+}
+}
+/*
+ * Check if a limit has been exceeded (we are sure to pass here when maxLinks
+ * has been exceeded as this limit require parser support for partial parsing to
+ * be detected)
+ */
+if (subDocs[0].isPartiallyParsed()) {
+maindoc.setPartiallyParsed(true);
+break;
+}
+} catch (final Parser.Failure e) {
+AbstractParser.log.warn("tar parser entry " + name + ": " + e.getMessage());
+}
+} catch (final IOException e) {
+AbstractParser.log.warn("tar parser:" + e.getMessage());
+break;
+}
+}
+return new Document[] { maindoc };
+}
 
-	/**
-	 * Generate a parent URL to use for generating sub URLs on tar archive entries.
-	 * 
-	 * @param tarURL
-	 *            the URL of the tar archive
-	 * @return an URL ending with a "/" suitable as a base URL for archive entries
-	 */
-	private DigestURL createParentTarURL(final DigestURL tarURL) {
-		String locationStr = tarURL.toNormalform(false);
-		if (!locationStr.endsWith("/")) {
-			locationStr += "/";
-		}
-		DigestURL parentTarURL;
-		try {
-			parentTarURL = new DigestURL(locationStr);
-		} catch (MalformedURLException e1) {
-			/* This should not happen */
-			parentTarURL = tarURL;
-		}
-		return parentTarURL;
-	}
+/**
+ * Generate a parent URL to use for generating sub URLs on tar archive entries.
+ * 
+ * @param tarURL
+ *            the URL of the tar archive
+ * @return an URL ending with a "/" suitable as a base URL for archive entries
+ */
+private DigestURL createParentTarURL(final DigestURL tarURL) {
+String locationStr = tarURL.toNormalform(false);
+if (!locationStr.endsWith("/")) {
+locationStr += "/";
+}
+DigestURL parentTarURL;
+try {
+parentTarURL = new DigestURL(locationStr);
+} catch (MalformedURLException e1) {
+/* This should not happen */
+parentTarURL = tarURL;
+}
+return parentTarURL;
+}
 
-	/**
-	 * Create the main resulting parsed document for a tar container
-	 * 
-	 * @param location
-	 *            the parsed resource URL
-	 * @param mimeType
-	 *            the media type of the resource
-	 * @param charset
-	 *            the charset name if known
-	 * @param parser
-	 *            instance of tarParser that is registered as the parser origin of
-	 *            the document
-	 * @return a Document instance
-	 */
-	public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset,
-			final tarParser parser) {
-		final String filename = location.getFileName();
-		final Document maindoc = new Document(location, mimeType, charset, parser, null, null,
-				AbstractParser
-						.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
-				null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date());
-		return maindoc;
-	}
+/**
+ * Create the main resulting parsed document for a tar container
+ * 
+ * @param location
+ *            the parsed resource URL
+ * @param mimeType
+ *            the media type of the resource
+ * @param charset
+ *            the charset name if known
+ * @param parser
+ *            instance of tarParser that is registered as the parser origin of
+ *            the document
+ * @return a Document instance
+ */
+public static Document createMainDocument(final DigestURL location, final String mimeType, final String charset,
+final tarParser parser) {
+final String filename = location.getFileName();
+final Document maindoc = new Document(location, mimeType, charset, parser, null, null,
+AbstractParser
+.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
+null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date());
+return maindoc;
+}
 
     public final static boolean isTar(File f) {
         if (!f.exists() || f.length() < 0x105) return false;
diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java
index 7d47131a5..c994f096f 100644
--- a/source/net/yacy/document/parser/zipParser.java
+++ b/source/net/yacy/document/parser/zipParser.java
@@ -39,6 +39,7 @@ import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.util.MemoryControl;
 
@@ -72,7 +73,8 @@ public class zipParser extends AbstractParser implements Parser {
             final DigestURL location,
             final String mimeType,
             final String charset,
-            final Set<String> ignore_class_name,
+            final TagValency defaultValency, 
+            final Set<String> valencySwitchTagNames,
             final VocabularyScraper scraper, 
             final int timezoneOffset,
             final InputStream source)
@@ -121,7 +123,7 @@ public class zipParser extends AbstractParser implements Parser {
                     FileUtils.copy(zis, tmp, entry.getSize());
                     final DigestURL virtualURL = DigestURL.newURL(location, "#" + name);
                     //this.log.logInfo("ZIP file parser: " + virtualURL.toNormalform(false, false));
-                    final Document[] docs = TextParser.parseSource(virtualURL, mime, null, ignore_class_name, scraper, timezoneOffset, 999, tmp);
+                    final Document[] docs = TextParser.parseSource(virtualURL, mime, null, defaultValency, valencySwitchTagNames, scraper, timezoneOffset, 999, tmp);
                     if (docs == null) continue;
                     maindoc.addSubDocuments(docs);
                 } catch (final Parser.Failure e) {
diff --git a/source/net/yacy/htroot/Crawler_p.java b/source/net/yacy/htroot/Crawler_p.java
index 66750684a..44b944fe8 100644
--- a/source/net/yacy/htroot/Crawler_p.java
+++ b/source/net/yacy/htroot/Crawler_p.java
@@ -626,6 +626,7 @@ public class Crawler_p {
                             cachePolicy,
                             collection,
                             agentName,
+                            TagValency.EVAL,
                             ignoreclassname,
                             new VocabularyScraper(vocabulary_scraper),
                             timezoneOffset);
diff --git a/source/net/yacy/htroot/QuickCrawlLink_p.java b/source/net/yacy/htroot/QuickCrawlLink_p.java
index adf0497d1..4230b69ea 100644
--- a/source/net/yacy/htroot/QuickCrawlLink_p.java
+++ b/source/net/yacy/htroot/QuickCrawlLink_p.java
@@ -43,6 +43,7 @@ import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.retrieval.Request;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
 import net.yacy.search.index.Segment;
@@ -161,7 +162,7 @@ public class QuickCrawlLink_p {
                         CacheStrategy.IFFRESH,
                         collection,
                         ClientIdentification.yacyIntranetCrawlerAgentName,
-                        null, null,
+                        TagValency.EVAL, null, null,
                         timezoneOffset);
                 sb.crawler.putActive(pe.handle().getBytes(), pe);
             } catch (final Exception e) {
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 0fa5e537a..1d1439482 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -709,7 +709,16 @@ public final class LoaderDispatcher {
         final String supportError = TextParser.supports(url, responseHeader.getContentType());
         if (supportError != null) throw new IOException("no parser support: " + supportError);
         try {
-            documents = TextParser.parseSource(url, responseHeader.getContentType(), responseHeader.getCharacterEncoding(), response.profile().ignoreDivClassName(), response.profile().scraper(), timezoneOffset, response.depth(), response.getContent());
+            documents = TextParser.parseSource(
+                    url,
+                    responseHeader.getContentType(),
+                    responseHeader.getCharacterEncoding(),
+                    response.profile().defaultValency(),
+                    response.profile().valencySwitchTagNames(),
+                    response.profile().scraper(),
+                    timezoneOffset,
+                    response.depth(),
+                    response.getContent());
             if (documents == null) throw new IOException("document == null");
         } catch (final Exception e) {
             throw new IOException("parser error: " + e.getMessage());
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index a7a971ce5..34ebfcc94 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2945,7 +2945,8 @@ public final class Switchboard extends serverSwitch {
                     documents = TextParser.genericParseSource(new AnchorURL(response.url()),
                             response.getMimeType(),
                             response.getCharacterEncoding(),
-                            response.profile().ignoreDivClassName(),
+                            response.profile().defaultValency(),
+                            response.profile().valencySwitchTagNames(),
                             response.profile().scraper(),
                             response.profile().timezoneOffset(),
                             response.depth(),
@@ -2963,7 +2964,8 @@ public final class Switchboard extends serverSwitch {
                                 new AnchorURL(response.url()),
                                 response.getMimeType(),
                                 response.getCharacterEncoding(),
-                                response.profile().ignoreDivClassName(),
+                                response.profile().defaultValency(),
+                                response.profile().valencySwitchTagNames(),
                                 response.profile().scraper(),
                                 response.profile().timezoneOffset(),
                                 response.depth(),
diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java
index e7334a91b..db0619381 100644
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@@ -45,6 +45,7 @@ import net.yacy.document.Document;
 import net.yacy.document.LibraryProvider;
 import net.yacy.document.TextParser;
 import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.TagValency;
 import net.yacy.kelondro.workflow.WorkflowProcessor;
 import net.yacy.search.schema.CollectionConfiguration;
 import net.yacy.search.schema.WebgraphConfiguration;
@@ -162,24 +163,24 @@ public class DocumentIndex extends Segment {
         }
         InputStream sourceStream = null;
         try {
-        	sourceStream = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
-            documents = TextParser.parseSource(url, null, null, new HashSet<String>(), new VocabularyScraper(), timezoneOffset, 0, length, sourceStream);
+            sourceStream = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
+            documents = TextParser.parseSource(url, null, null, TagValency.EVAL, new HashSet<String>(), new VocabularyScraper(), timezoneOffset, 0, length, sourceStream);
         } catch (final Exception e ) {
             throw new IOException("cannot parse " + url.toNormalform(false) + ": " + e.getMessage());
         } finally {
-        	if(sourceStream != null) {
-        		try {
-        			sourceStream.close();
-        		} catch(IOException e) {
-        			ConcurrentLog.warn("DocumentIndex", "Could not close source stream : " + e.getMessage());
-        		}
-        	}
+            if(sourceStream != null) {
+                try {
+                    sourceStream.close();
+                } catch(IOException e) {
+                    ConcurrentLog.warn("DocumentIndex", "Could not close source stream : " + e.getMessage());
+                }
+            }
         }
         //Document document = Document.mergeDocuments(url, null, documents);
         final SolrInputDocument[] rows = new SolrInputDocument[documents.length];
         int c = 0;
         for ( final Document document : documents ) {
-        	if (document == null) continue;
+            if (document == null) continue;
             final Condenser condenser = new Condenser(document, null, true, true, LibraryProvider.dymLib, true, true, 0);
             rows[c++] =
                 super.storeDocument(