diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index e52b2aa1a..7f36b52a5 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -67,7 +67,9 @@ import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.util.ISO639;
 
-
+/**
+ * A content scraper supporting HTML tags.
+ */
 public class ContentScraper extends AbstractScraper implements Scraper {
 
     private final static int MAX_TAGSIZE = 1024 * 1024;
@@ -220,7 +222,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
      * evaluation scores: count appearance of specific attributes
      */
     private final Evaluation evaluationScores;
-
+    
+    /** Set to true when a limit on content size scraped has been exceeded */
+    private boolean contentSizeLimitExceeded;
+    
     /**
      * scrape a document
      * @param root the document root url
@@ -271,6 +276,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         this.canonical = null;
         this.publisher = null;
         this.breadcrumbs = 0;
+        this.contentSizeLimitExceeded = false;
     }
 
     @Override
@@ -608,8 +614,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     	return iconRels;
     }
 
+    /**
+     * Process a tag processed as a singleton (no end tag, or not processing the eventual end tag)
+     * @param tag the tag to parse. Must not be null.
+     */
     @Override
-    public void scrapeTag0(Tag tag) {
+    public void scrapeTag0(final Tag tag) {
         checkOpts(tag);
         if (tag.name.equalsIgnoreCase("img")) {
             final String src = tag.opts.getProperty("src", EMPTY_STRING);
@@ -768,8 +778,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         this.fireScrapeTag0(tag.name, tag.opts);
     }
 
+    /**
+     * Process a paired tag (has a start and an end tag)
+     * @param tag the tag to process. Must not be null.
+     */
     @Override
-    public void scrapeTag1(Tag tag) {
+    public void scrapeTag1(final Tag tag) {
         checkOpts(tag);
         // System.out.println("ScrapeTag1: tag.tagname=" + tag.tagname + ", opts=" + tag.opts.toString() + ", text=" + UTF8.String(text));
         if (tag.name.equalsIgnoreCase("a") && tag.content.length() < 2048) {
@@ -1079,7 +1093,21 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     public Map<DigestURL, IconEntry> getIcons() {
         return this.icons;
     }
-
+    
+    /**
+     * @return true when a limit on content size scraped has been exceeded
+     */
+    public boolean isContentSizeLimitExceeded() {
+		return this.contentSizeLimitExceeded;
+	}
+    
+    /**
+     * @param contentSizeLimitExceeded set to true when a limit on content size scraped has been exceeded
+     */
+    public void setContentSizeLimitExceeded(final boolean contentSizeLimitExceeded) {
+		this.contentSizeLimitExceeded = contentSizeLimitExceeded;
+	}
+    
     /*
     DC in html example:
     <meta name="DC.title" lang="en" content="Expressing Dublin Core in HTML/XHTML meta and link elements" />
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index 77798ed5c..ab4a7fa25 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -41,6 +41,8 @@ import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.util.CommonPattern;
+import net.yacy.cora.util.StreamLimitException;
+import net.yacy.cora.util.StrictLimitInputStream;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
@@ -56,7 +58,8 @@ import com.ibm.icu.text.CharsetDetector;
 
 public class htmlParser extends AbstractParser implements Parser {
 
-    private static final int maxLinks = 10000;
+	/** The default maximum number of links to add to a parsed document */
+    private static final int DEFAULT_MAX_LINKS = 10000;
 
     public htmlParser() {
         super("Streaming HTML Parser");
@@ -100,10 +103,22 @@ public class htmlParser extends AbstractParser implements Parser {
             final int timezoneOffset,
             final InputStream sourceStream) throws Parser.Failure, InterruptedException {
 
+        return parseWithLimits(location, mimeType, documentCharset, vocscraper, timezoneOffset, sourceStream, DEFAULT_MAX_LINKS, Long.MAX_VALUE);
+    }
+    
+    @Override
+    public boolean isParseWithLimitsSupported() {
+    	return true;
+    }
+    
+    @Override
+    public Document[] parseWithLimits(final DigestURL location, final String mimeType, final String documentCharset, final VocabularyScraper vocscraper,
+    		final int timezoneOffset, final InputStream sourceStream, final int maxLinks, final long maxBytes)
+    		throws Failure {
         try {
             // first get a document from the parsed html
             Charset[] detectedcharsetcontainer = new Charset[]{null};
-            ContentScraper scraper = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
+            ContentScraper scraper = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks, maxBytes);
             // parseToScraper also detects/corrects/sets charset from html content tag
             final Document document = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraper);
             Document documentSnapshot = null;
@@ -112,10 +127,10 @@ public class htmlParser extends AbstractParser implements Parser {
                 // and create a sub-document for snapshot page (which will be merged by loader)
                 // TODO: as a crawl request removes anchor part from original url getRef() is never successful - considere other handling as removeRef() in crawler
                 if (location.getRef() != null && location.getRef().startsWith("!")) {
-                    documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, vocscraper, timezoneOffset);
+                    documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, vocscraper, timezoneOffset, maxLinks, maxBytes);
                 } else { // head tag fragment only allowed on url without anchor hashfragment, but there are discussions that existence of hashfragment anchor takes preference (means allow both)
                     if (scraper.getMetas().containsKey("fragment") && scraper.getMetas().get("fragment").equals("!")) {
-                        documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, vocscraper, timezoneOffset);
+                        documentSnapshot = parseAlternativeSnapshot(location, mimeType, documentCharset, vocscraper, timezoneOffset, maxLinks, maxBytes);
                     }
                 }
             } catch (Exception ex1) { // ignore any exception for any issue with snapshot
@@ -127,6 +142,8 @@ public class htmlParser extends AbstractParser implements Parser {
             throw new Parser.Failure("IOException in htmlParser: " + e.getMessage(), location);
         }
     }
+    
+    
 
     /**
      *  the transformScraper method transforms a scraper object into a document object
@@ -173,6 +190,7 @@ public class htmlParser extends AbstractParser implements Parser {
                 scraper.getDate());
         ppd.setScraperObject(scraper);
         ppd.setIcons(scraper.getIcons());
+        ppd.setPartiallyParsed(scraper.isContentSizeLimitExceeded());
         
         return ppd;
     }
@@ -187,21 +205,40 @@ public class htmlParser extends AbstractParser implements Parser {
         }
         ContentScraper scraper; // for this static methode no need to init local this.scraperObject
         try {
-            scraper = parseToScraper(location, documentCharset, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
+            scraper = parseToScraper(location, documentCharset, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks, Long.MAX_VALUE);
         } catch (Failure e) {
             throw new IOException(e.getMessage());
         }
         return scraper;
     }
     
+    /**
+     * Parse the resource at location and return the resulting ContentScraper
+     * @param location the URL of the resource to parse
+     * @param documentCharset the document charset name if known
+     * @param vocabularyScraper a vocabulary scraper
+     * @param detectedcharsetcontainer a mutable array of Charsets : filled with the charset detected when parsing
+     * @param timezoneOffset the local time zone offset
+     * @param sourceStream an open stream on the resource to parse
+     * @param maxLinks the maximum number of links to store in the scraper
+     * @param maxBytes the maximum number of content bytes to process
+     * @return a scraper containing parsed information
+     * @throws Parser.Failure when an error occurred while parsing
+     * @throws IOException when a read/write error occurred while trying to detect the charset
+     */
     public static ContentScraper parseToScraper(
             final DigestURL location,
             final String documentCharset,
             final VocabularyScraper vocabularyScraper,
-            Charset[] detectedcharsetcontainer,
+            final Charset[] detectedcharsetcontainer,
             final int timezoneOffset,
             InputStream sourceStream,
-            final int maxLinks) throws Parser.Failure, IOException {
+            final int maxLinks,
+            final long maxBytes) throws Parser.Failure, IOException {
+    	
+    	if(maxBytes >= 0 && maxBytes < Long.MAX_VALUE) {
+    		sourceStream = new StrictLimitInputStream(sourceStream, maxBytes);
+    	}
 
         // make a scraper
         String charset = null;
@@ -254,8 +291,17 @@ public class htmlParser extends AbstractParser implements Parser {
         final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, Math.max(64, Math.min(4096, sourceStream.available())));
         try {
             FileUtils.copy(sourceStream, writer, detectedcharsetcontainer[0]);
+        } catch(StreamLimitException e) {
+        	/* maxBytes limit has been reached : do not fail here as we want to use the partially obtained results. */
+        	scraper.setContentSizeLimitExceeded(true);
         } catch (final IOException e) {
-            throw new Parser.Failure("IO error:" + e.getMessage(), location);
+    		/* A StreamLimitException may be itself wrapped in an IOException by a InputStreamReader */
+        	if(e.getCause() instanceof StreamLimitException) {
+            	/* maxBytes limit has been reached : do not fail here as we want to use the partially obtained results. */
+            	scraper.setContentSizeLimitExceeded(true);
+        	} else {
+        		throw new Parser.Failure("IO error:" + e.getMessage(), location);
+        	}
         } finally {
             writer.flush();
             //sourceStream.close(); keep open for multipe parsing (close done by caller)
@@ -361,10 +407,12 @@ public class htmlParser extends AbstractParser implements Parser {
      * @param documentCharset
      * @param vocscraper
      * @param timezoneOffset
+     * @param maxLinks the maximum number of links to store in the document
+     * @param maxBytes the maximum number of content bytes to process
      * @return document as result of parsed snapshot or null if not exist or on any other issue with snapshot
      */
     private Document parseAlternativeSnapshot(final DigestURL location, final String mimeType, final String documentCharset,
-            final VocabularyScraper vocscraper, final int timezoneOffset) {
+            final VocabularyScraper vocscraper, final int timezoneOffset, final int maxLinks, final long maxBytes) {
         Document documentSnapshot = null;
         try {
             // construct url for case (1) with anchor
@@ -383,7 +431,7 @@ public class htmlParser extends AbstractParser implements Parser {
             InputStream snapshotStream = null;
             try {
             	snapshotStream = locationSnapshot.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
-            	ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, snapshotStream, maxLinks);
+            	ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, snapshotStream, maxLinks, maxBytes);
                 documentSnapshot = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraperSnapshot);
             } finally {
             	if(snapshotStream != null) {