set a limit to CharBuffer object size to fight against bad/too large

content
13 years ago · b7bb84c0bb
parent c602eaaf46
commit b7bb84c0bb
8 changed files with 188 additions and 172 deletions
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -59,7 +59,8 @@ import net.yacy.kelondro.util.MemoryControl;

 public class ContentScraper extends AbstractScraper implements Scraper {
 	private static final String EMPTY_STRING = new String();
-	
+	public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
+
    private final char degree = '\u00B0';
    private final char[] minuteCharsHTML = "&#039;".toCharArray();

@ -166,7 +167,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        this.bold = new ClusteredScoreMap<String>();
        this.italic = new ClusteredScoreMap<String>();
        this.li = new ArrayList<String>();
-        this.content = new CharBuffer(1024);
+        this.content = new CharBuffer(MAX_DOCSIZE, 1024);
        this.htmlFilterEventListeners = new EventListenerList();
        this.lon = 0.0f;
        this.lat = 0.0f;
--- a/source/net/yacy/document/parser/html/ContentTransformer.java
+++ b/source/net/yacy/document/parser/html/ContentTransformer.java
@ -37,7 +37,7 @@ import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.logging.Log;

 public class ContentTransformer extends AbstractTransformer implements Transformer {
-    
+
    // statics: for initialization of the HTMLFilterAbstractTransformer
    private static final TreeSet<String> linkTags0 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
    private static final TreeSet<String> linkTags1 = new TreeSet<String>(ASCII.insensitiveASCIIComparator);
@ -82,7 +82,7 @@ public class ContentTransformer extends AbstractTransformer implements Transform
    }

    private static char[] genBlueLetters(int length) {
-            final CharBuffer bb = new CharBuffer(" <FONT COLOR=#0000FF>".toCharArray());
+            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, " <FONT COLOR=#0000FF>".toCharArray());
            length = length / 2;
            if (length > 10) length = 7;
            while (length-- > 0) {
@ -106,7 +106,7 @@ public class ContentTransformer extends AbstractTransformer implements Transform
        }
        return false;
    }
-    
+
    @Override
    public char[] transformText(final char[] text) {
        if (this.bluelist != null) {
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@ -96,7 +96,7 @@ public final class TransformerWriter extends Writer {
        this.outStream     = outStream;
        this.scraper       = scraper;
        this.transformer   = transformer;
-        this.buffer        = new CharBuffer(initialBufferSize);
+        this.buffer        = new CharBuffer(ContentScraper.MAX_DOCSIZE, initialBufferSize);
        this.filterTag     = null;
        this.filterOpts    = null;
        this.filterCont    = null;
@ -114,7 +114,7 @@ public final class TransformerWriter extends Writer {
    }

    public static char[] genTag0raw(final String tagname, final boolean opening, final char[] tagopts) {
-            final CharBuffer bb = new CharBuffer(tagname.length() + tagopts.length + 3);
+            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, tagname.length() + tagopts.length + 3);
            bb.append('<');
            if (!opening) {
                bb.append('/');
@ -136,7 +136,7 @@ public final class TransformerWriter extends Writer {
    }

    public static char[] genTag1raw(final String tagname, final char[] tagopts, final char[] text) {
-            final CharBuffer bb = new CharBuffer(2 * tagname.length() + tagopts.length + text.length + 5);
+            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, 2 * tagname.length() + tagopts.length + text.length + 5);
            bb.append('<').append(tagname);
            if (tagopts.length > 0) {
 //              if (tagopts[0] == (byte) 32)
@ -157,7 +157,7 @@ public final class TransformerWriter extends Writer {

    public static char[] genTag0(final String tagname, final Properties tagopts, final char quotechar) {
            final char[] tagoptsx = (tagopts.isEmpty()) ? null : genOpts(tagopts, quotechar);
-            final CharBuffer bb = new CharBuffer(tagname.length() + ((tagoptsx == null) ? 0 : (tagoptsx.length + 1)) + tagname.length() + 2);
+            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, tagname.length() + ((tagoptsx == null) ? 0 : (tagoptsx.length + 1)) + tagname.length() + 2);
            bb.append('<').append(tagname);
            if (tagoptsx != null) {
                bb.appendSpace();
@ -175,7 +175,7 @@ public final class TransformerWriter extends Writer {

    public static char[] genTag1(final String tagname, final Properties tagopts, final char[] text, final char quotechar) {
            final char[] gt0 = genTag0(tagname, tagopts, quotechar);
-            final CharBuffer cb = new CharBuffer(gt0, gt0.length + text.length + tagname.length() + 3);
+            final CharBuffer cb = new CharBuffer(ContentScraper.MAX_DOCSIZE, gt0, gt0.length + text.length + tagname.length() + 3);
            cb.append(text).append('<').append('/').append(tagname).append('>');
            final char[] result = cb.getChars();
            try {
@ -189,7 +189,7 @@ public final class TransformerWriter extends Writer {
    // a helper method for pretty-printing of properties for html tags
    public static char[] genOpts(final Properties prop, final char quotechar) {
            final Enumeration<?> e = prop.propertyNames();
-            final CharBuffer bb = new CharBuffer(prop.size() * 40);
+            final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, prop.size() * 40);
            String key;
            while (e.hasMoreElements()) {
                key = (String) e.nextElement();
@ -225,7 +225,7 @@ public final class TransformerWriter extends Writer {
            if (opening) {
                if ((this.scraper != null) && (this.scraper.isTag0(tag))) {
                    // this single tag is collected at once here
-                    final CharBuffer charBuffer = new CharBuffer(content);
+                    final CharBuffer charBuffer = new CharBuffer(ContentScraper.MAX_DOCSIZE, content);
                    this.scraper.scrapeTag0(tag, charBuffer.propParser());
                    try {
                        charBuffer.close();
@ -236,7 +236,7 @@ public final class TransformerWriter extends Writer {
                }
                if ((this.transformer != null) && (this.transformer.isTag0(tag))) {
                    // this single tag is collected at once here
-                    final CharBuffer scb = new CharBuffer(content);
+                    final CharBuffer scb = new CharBuffer(ContentScraper.MAX_DOCSIZE, content);
                    try {
                        return this.transformer.transformTag0(tag, scb.propParser(), quotechar);
                    } finally {
@ -250,14 +250,14 @@ public final class TransformerWriter extends Writer {
                           ((this.transformer != null) && (this.transformer.isTag1(tag)))) {
                    // ok, start collecting
                    this.filterTag = tag;
-                    final CharBuffer scb = new CharBuffer(content);
+                    final CharBuffer scb = new CharBuffer(ContentScraper.MAX_DOCSIZE, content);
                    this.filterOpts = scb.propParser();
                    try {
                        scb.close();
                    } catch (final IOException e) {
                        Log.logException(e);
                    }
-                    if (this.filterCont == null) this.filterCont = new CharBuffer(Math.max(100, content.length)); else this.filterCont.reset();
+                    if (this.filterCont == null) this.filterCont = new CharBuffer(ContentScraper.MAX_DOCSIZE, Math.max(100, content.length)); else this.filterCont.reset();
                    return new char[0];
                } else {
                     // we ignore that thing and return it again
--- a/source/net/yacy/document/parser/odtParser.java
+++ b/source/net/yacy/document/parser/odtParser.java
@ -1,4 +1,4 @@
-//odtParser.java 
+//odtParser.java
 //------------------------
 //part of YaCy
 //(C) by Michael Peter Christen; mc@yacy.net
@ -50,71 +50,73 @@ import net.yacy.kelondro.util.FileUtils;

 public class odtParser extends AbstractParser implements Parser {

-    public odtParser() {        
+    public final static int MAX_DOCSIZE = 200 * 1024 * 1024;
+
+    public odtParser() {
        super("OASIS OpenDocument V2 Text Document Parser");
-        SUPPORTED_EXTENSIONS.add("odt");
-        SUPPORTED_EXTENSIONS.add("ods");
-        SUPPORTED_EXTENSIONS.add("odp");
-        SUPPORTED_EXTENSIONS.add("odg");
-        SUPPORTED_EXTENSIONS.add("odc");
-        SUPPORTED_EXTENSIONS.add("odf");
-        SUPPORTED_EXTENSIONS.add("odb");
-        SUPPORTED_EXTENSIONS.add("odi");
-        SUPPORTED_EXTENSIONS.add("odm");
-        SUPPORTED_EXTENSIONS.add("ott");
-        SUPPORTED_EXTENSIONS.add("ots");
-        SUPPORTED_EXTENSIONS.add("otp");
-        SUPPORTED_EXTENSIONS.add("otg");
-        SUPPORTED_EXTENSIONS.add("sxw"); // Star Office Writer file format
-        SUPPORTED_EXTENSIONS.add("sxc"); // Star Office Calc file format
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.graphics");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.chart");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.formula");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.database");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.image");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text-master");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text-template");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet-template");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation-template");
-        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.graphics-template");
-        SUPPORTED_MIME_TYPES.add("application/x-vnd.oasis.opendocument.text");
-        SUPPORTED_MIME_TYPES.add("application/OOo-calc");
-        SUPPORTED_MIME_TYPES.add("application/OOo-writer");
+        this.SUPPORTED_EXTENSIONS.add("odt");
+        this.SUPPORTED_EXTENSIONS.add("ods");
+        this.SUPPORTED_EXTENSIONS.add("odp");
+        this.SUPPORTED_EXTENSIONS.add("odg");
+        this.SUPPORTED_EXTENSIONS.add("odc");
+        this.SUPPORTED_EXTENSIONS.add("odf");
+        this.SUPPORTED_EXTENSIONS.add("odb");
+        this.SUPPORTED_EXTENSIONS.add("odi");
+        this.SUPPORTED_EXTENSIONS.add("odm");
+        this.SUPPORTED_EXTENSIONS.add("ott");
+        this.SUPPORTED_EXTENSIONS.add("ots");
+        this.SUPPORTED_EXTENSIONS.add("otp");
+        this.SUPPORTED_EXTENSIONS.add("otg");
+        this.SUPPORTED_EXTENSIONS.add("sxw"); // Star Office Writer file format
+        this.SUPPORTED_EXTENSIONS.add("sxc"); // Star Office Calc file format
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.graphics");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.chart");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.formula");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.database");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.image");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text-master");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text-template");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet-template");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation-template");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.graphics-template");
+        this.SUPPORTED_MIME_TYPES.add("application/x-vnd.oasis.opendocument.text");
+        this.SUPPORTED_MIME_TYPES.add("application/OOo-calc");
+        this.SUPPORTED_MIME_TYPES.add("application/OOo-writer");
    }
-    
+
    private Document[] parse(final MultiProtocolURI location, final String mimeType,
            final String charset, final File dest)
            throws Parser.Failure, InterruptedException {
-        
+
        CharBuffer writer = null;
-        try {          
+        try {
            String docDescription = null;
            String docKeywordStr  = null;
            String docShortTitle  = null;
            String docLongTitle   = null;
            String docAuthor      = null;
            String docLanguage    = null;
-            
+
            // opening the file as zip file
            final ZipFile zipFile = new ZipFile(dest);
            final Enumeration<? extends ZipEntry> zipEnum = zipFile.entries();
            final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
-            
+
            // looping through all containing files
            while (zipEnum.hasMoreElements()) {
-                
+
                // getting the next zip file entry
                final ZipEntry zipEntry= zipEnum.nextElement();
                final String entryName = zipEntry.getName();
-                
+
                // content.xml contains the document content in xml format
                if (entryName.equals("content.xml")) {
-                    
+
                    // create a writer for output
-                    writer = new CharBuffer((int)zipEntry.getSize());
+                    writer = new CharBuffer(MAX_DOCSIZE, (int)zipEntry.getSize());
                    try {
 	                    // extract data
 	                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
@ -142,22 +144,22 @@ public class odtParser extends AbstractParser implements Parser {
                    docLanguage    = metaData.getLanguage();
                }
            }
-            
+
            // make the languages set
            final Set<String> languages = new HashSet<String>(1);
            if (docLanguage != null) languages.add(docLanguage);
-            
+
            // if there is no title availabe we generate one
            if (docLongTitle == null) {
                if (docShortTitle != null) {
                    docLongTitle = docShortTitle;
-                } 
-            }            
-         
+                }
+            }
+
            // split the keywords
            String[] docKeywords = null;
            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
-            
+
            // create the parser document
            Document[] docs = null;
            final byte[] contentBytes = UTF8.getBytes(writer.toString());
@ -173,41 +175,42 @@ public class odtParser extends AbstractParser implements Parser {
                    "",
                    null,
                    docDescription,
-                    0.0f, 0.0f, 
+                    0.0f, 0.0f,
                    contentBytes,
                    null,
                    null,
                    null,
                    false)};
            return docs;
-        } catch (final Exception e) {            
+        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof Parser.Failure) throw (Parser.Failure) e;
-            
+
            // close the writer
            if (writer != null) try { writer.close(); } catch (final Exception ex) {/* ignore this */}
-            
-            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location); 
+
+            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location);
        }
    }
-    
+
+    @Override
    public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {
        File dest = null;
        try {
            // creating a tempfile
            dest = File.createTempFile("OpenDocument", ".odt");
            dest.deleteOnExit();
-            
+
            // copying the stream into a file
            FileUtils.copy(source, dest);
-            
+
            // parsing the content
            return parse(location, mimeType, charset, dest);
        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof Parser.Failure) throw (Parser.Failure) e;
-            
-            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location); 
+
+            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location);
        } finally {
            if (dest != null) FileUtils.deletedelete(dest);
        }
--- a/source/net/yacy/document/parser/ooxmlParser.java
+++ b/source/net/yacy/document/parser/ooxmlParser.java
@ -1,4 +1,4 @@
-//odtParser.java 
+//odtParser.java
 //------------------------
 //part of YaCy
 //(C) by Michael Peter Christen; mc@yacy.net
@ -53,61 +53,61 @@ import net.yacy.kelondro.util.FileUtils;

 public class ooxmlParser extends AbstractParser implements Parser {

-    public ooxmlParser() {        
-        super("Open Office XML Document Parser"); 
-        SUPPORTED_EXTENSIONS.add("docx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-        SUPPORTED_EXTENSIONS.add("dotx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.template");
-        SUPPORTED_EXTENSIONS.add("potx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.template");
-        SUPPORTED_EXTENSIONS.add("ppsx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slideshow");
-        SUPPORTED_EXTENSIONS.add("pptx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.presentation");
-        SUPPORTED_EXTENSIONS.add("xlsx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-        SUPPORTED_EXTENSIONS.add("xltx");
-        SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.template");
+    public ooxmlParser() {
+        super("Open Office XML Document Parser");
+        this.SUPPORTED_EXTENSIONS.add("docx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+        this.SUPPORTED_EXTENSIONS.add("dotx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.template");
+        this.SUPPORTED_EXTENSIONS.add("potx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.template");
+        this.SUPPORTED_EXTENSIONS.add("ppsx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slideshow");
+        this.SUPPORTED_EXTENSIONS.add("pptx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.presentation");
+        this.SUPPORTED_EXTENSIONS.add("xlsx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+        this.SUPPORTED_EXTENSIONS.add("xltx");
+        this.SUPPORTED_MIME_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.template");
    }
-    
+
    private Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final File dest) throws Parser.Failure, InterruptedException {
-        
+
        CharBuffer writer = null;
-        try {          
+        try {
            String docDescription = null;
            String docKeywordStr  = null;
            String docShortTitle  = null;
            String docLongTitle   = null;
            String docAuthor      = null;
            String docLanguage    = null;
-            
+
            // opening the file as zip file
            final ZipFile zipFile= new ZipFile(dest);
            final Enumeration<? extends ZipEntry> zipEnum = zipFile.entries();
            final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
-            
+
            // looping through all containing files
            while (zipEnum.hasMoreElements()) {
-                
+
                // get next zip file entry
                final ZipEntry zipEntry= zipEnum.nextElement();
                final String entryName = zipEntry.getName();
-                
+
                // content.xml contains the document content in xml format
                if (entryName.equals("word/document.xml")
                	|| entryName.startsWith("ppt/slides/slide")
                	|| entryName.startsWith("xl/worksheets/sheet")) {
-                    
+
                    // create a writer for output
-                    writer = new CharBuffer((int)zipEntry.getSize());
+                    writer = new CharBuffer(odtParser.MAX_DOCSIZE, (int)zipEntry.getSize());
                    try {
 	                    // extract data
 	                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
 	                    try {
 		                    final SAXParser saxParser = saxParserFactory.newSAXParser();
 		                    saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
-		                
+
 		                    // close readers and writers
 	                    } finally {
 	                    	zipFileEntryStream.close();
@ -129,21 +129,21 @@ public class ooxmlParser extends AbstractParser implements Parser {
                    docLanguage    = metaData.getLanguage();
                }
            }
-            
+
            // make the languages set
            final Set<String> languages = new HashSet<String>(1);
            if (docLanguage != null && docLanguage.length() == 0)
        	languages.add(docLanguage);
-            
+
            // if there is no title availabe we generate one
            if ((docLongTitle == null || docLongTitle.length() == 0) && (docShortTitle != null)) {
                    docLongTitle = docShortTitle;
-            }            
-         
+            }
+
            // split the keywords
            String[] docKeywords = null;
            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
-            
+
            // create the parser document
            Document[] docs = null;
            final byte[] contentBytes = UTF8.getBytes(writer.toString());
@ -159,44 +159,45 @@ public class ooxmlParser extends AbstractParser implements Parser {
                    "",
                    null,
                    docDescription,
-                    0.0f, 0.0f, 
+                    0.0f, 0.0f,
                    contentBytes,
                    null,
                    null,
                    null,
                    false)};
            return docs;
-        } catch (final Exception e) {            
+        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof Parser.Failure) throw (Parser.Failure) e;
-            
+
            // close the writer
            if (writer != null) try {
                writer.close();
            } catch (final Exception ex) {/* ignore this */}

            Log.logException(e);
-            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location); 
+            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location);
        }
    }
-    
+
+    @Override
    public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {
        File dest = null;
        try {
            // creating a tempfile
            dest = File.createTempFile("OpenDocument", ".odt");
            dest.deleteOnExit();
-            
+
            // copying the stream into a file
            FileUtils.copy(source, dest);
-            
+
            // parsing the content
            return parse(location, mimeType, charset, dest);
        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof Parser.Failure) throw (Parser.Failure) e;
-            
-            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location); 
+
+            throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(),location);
        } finally {
            if (dest != null) FileUtils.deletedelete(dest);
        }
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@ -67,6 +67,7 @@ public class pdfParser extends AbstractParser implements Parser {
        this.SUPPORTED_MIME_TYPES.add("text/x-pdf");
    }

+    @Override
    public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {

        // check memory for parser
@ -125,12 +126,13 @@ public class pdfParser extends AbstractParser implements Parser {
        if (docTitle == null || docTitle.length() == 0) {
            docTitle = MultiProtocolURI.unescape(location.getFileName());
        }
-        final CharBuffer writer = new CharBuffer();
+        final CharBuffer writer = new CharBuffer(odtParser.MAX_DOCSIZE);
        try {
            // create a writer for output
            final PDFTextStripper  stripper = new PDFTextStripper();
            // we start the pdf parsing in a separate thread to ensure that it can be terminated
            final Thread t = new Thread() {
+                @Override
                public void run() {
                    try {
                        stripper.writeText(pdfDoc, writer); // may throw a NPE
--- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java
@ -188,7 +188,7 @@ public class URIMetadataRow implements URIMetadata {
            final String dc_publisher,
            final float lat,
            final float lon) {
-        final CharBuffer s = new CharBuffer(360);
+        final CharBuffer s = new CharBuffer(20000, 360);
        s.append(url.toNormalform(false, true)).appendLF();
        s.append(dc_title).appendLF();
        if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
@ -381,23 +381,23 @@ public class URIMetadataRow implements URIMetadata {
    public boolean matches(final Pattern matcher) {
        return this.metadata().matches(matcher);
    }
-    
+
    public DigestURI url() {
        return this.metadata().url();
    }
-    
+
    public String  dc_title()  {
        return this.metadata().dc_title();
    }
-    
+
    public String  dc_creator() {
        return this.metadata().dc_creator();
    }
-    
+
    public String  dc_publisher() {
        return this.metadata().dc_publisher();
    }
-    
+
    public String  dc_subject()   {
        return this.metadata().dc_subject();
    }
@ -409,7 +409,7 @@ public class URIMetadataRow implements URIMetadata {
    public float lon() {
        return this.metadata().lon();
    }
-    
+
    private Components metadata() {
        // avoid double computation of metadata elements
        if (this.comp != null) return this.comp;
--- a/source/net/yacy/kelondro/io/CharBuffer.java
+++ b/source/net/yacy/kelondro/io/CharBuffer.java
@ -1,4 +1,4 @@
-// serverCharBuffer.java 
+// serverCharBuffer.java
 // ---------------------------
 // (C) by Michael Peter Christen; mc@yacy.net
 // first published on http://www.anomic.de
@ -34,42 +34,46 @@ import java.util.Properties;
 import net.yacy.cora.document.UTF8;

 public final class CharBuffer extends Writer {
-    
+
    public static final char singlequote = '\'';
    public static final char doublequote = '"';
    public static final char equal       = '=';
-    
+
    private char[] buffer;
    private int offset;
    private int length;
+    private final int maximumLength;

-    
-    public CharBuffer() {
+    public CharBuffer(final int maximumLength) {
        this.buffer = new char[10];
        this.length = 0;
        this.offset = 0;
+        this.maximumLength = maximumLength;
    }
-    
-    public CharBuffer(final int initLength) {
+
+    public CharBuffer(final int maximumLength, final int initLength) {
        this.buffer = new char[initLength];
        this.length = 0;
        this.offset = 0;
-    }        
-    
-    public CharBuffer(final char[] bb) {
+        this.maximumLength = maximumLength;
+    }
+
+    public CharBuffer(final int maximumLength, final char[] bb) {
        this.buffer = bb;
        this.length = bb.length;
        this.offset = 0;
+        this.maximumLength = maximumLength;
    }

-    public CharBuffer(final char[] bb, final int initLength) {
+    public CharBuffer(final int maximumLength, final char[] bb, final int initLength) {
        this.buffer = new char[initLength];
        System.arraycopy(bb, 0, this.buffer, 0, bb.length);
        this.length = bb.length;
        this.offset = 0;
+        this.maximumLength = maximumLength;
    }
-    
-    public CharBuffer(final char[] bb, final int of, final int le) {
+
+    public CharBuffer(final int maximumLength, final char[] bb, final int of, final int le) {
        if (of * 2 > bb.length) {
            this.buffer = new char[le];
            System.arraycopy(bb, of, this.buffer, 0, le);
@ -80,17 +84,20 @@ public final class CharBuffer extends Writer {
            this.length = le;
            this.offset = of;
        }
+        this.maximumLength = maximumLength;
    }

    public CharBuffer(final CharBuffer bb) {
        this.buffer = bb.buffer;
        this.length = bb.length;
        this.offset = bb.offset;
+        this.maximumLength = bb.maximumLength;
    }

    public CharBuffer(final File f) throws IOException {
        // initially fill the buffer with the content of a file
        if (f.length() > Integer.MAX_VALUE) throw new IOException("file is too large for buffering");
+        this.maximumLength = Integer.MAX_VALUE;

        this.length = 0;
        this.buffer = new char[(int) f.length()*2];
@ -117,7 +124,7 @@ public final class CharBuffer extends Writer {
        this.length = 0;
        this.offset = 0;
    }
-    
+
    public int length() {
        return this.length;
    }
@ -135,19 +142,21 @@ public final class CharBuffer extends Writer {
    public void write(final int b) {
        write((char)b);
    }
-    
+
    public void write(final char b) {
+        if (this.buffer.length > this.maximumLength) return;
        if (this.offset + this.length + 1 > this.buffer.length) grow(this.offset + this.length + 1);
        this.buffer[this.offset + this.length++] = b;
    }
-    
+
    @Override
    public void write(final char[] bb) {
        write(bb, 0, bb.length);
    }
-    
+
    @Override
    public void write(final char[] bb, final int of, final int le) {
+        if (this.buffer.length > this.maximumLength) return;
        if (this.offset + this.length + le > this.buffer.length) grow(this.offset + this.length + le);
        System.arraycopy(bb, of, this.buffer, this.offset + this.length, le);
        this.length += le;
@ -156,7 +165,7 @@ public final class CharBuffer extends Writer {
    private static final char SPACE = ' ';
    private static final char CR = (char) 13;
    private static final char LF = (char) 10;
-    
+
    public CharBuffer appendSpace() {
        write(SPACE);
        return this;
@ -189,29 +198,29 @@ public final class CharBuffer extends Writer {

    @Override
    public CharBuffer append(final char c) {
-        write(c);     
+        write(c);
        return this;
    }
-    
+
    public CharBuffer append(final String s) {
        final char[] temp = new char[s.length()];
        s.getChars(0, temp.length, temp, 0);
        write(temp);
        return this;
-    }    
-    
+    }
+
    public CharBuffer append(final String s, final int off, final int len) {
        final char[] temp = new char[len];
        s.getChars(off, (off + len), temp, 0);
        write(temp);
        return this;
    }
-    
+
    public CharBuffer append(final CharBuffer bb) {
        write(bb.buffer, bb.offset, bb.length);
        return this;
    }
-    
+
    public char charAt(final int pos) {
        if (pos < 0) throw new IndexOutOfBoundsException();
        if (pos > this.length) throw new IndexOutOfBoundsException();
@ -227,7 +236,7 @@ public final class CharBuffer extends Writer {
            System.arraycopy(this.buffer, this.offset + pos + 1, this.buffer, this.offset + pos, this.length - pos - 1);
        }
    }
-    
+
    public int indexOf(final char b) {
        return indexOf(b, 0);
    }
@ -247,18 +256,18 @@ public final class CharBuffer extends Writer {
        loop: for (int i = start; i <= this.length - bs.length; i++) {
            // first test only first char
            if (this.buffer[this.offset + i] != bs[0]) continue loop;
-            
+
            // then test all remaining char
            for (int j = 1; j < bs.length; j++) {
                if (this.buffer[this.offset + i + j] != bs[j]) continue loop;
            }
-            
+
            // found hit
            return i;
        }
        return -1;
    }
-    
+
    public static int indexOf(final char[] b, final char c) {
        return indexOf(b, 0, c);
    }
@ -267,7 +276,7 @@ public final class CharBuffer extends Writer {
        for (int i = offset; i < b.length; i++) if (b[i] == c) return i;
        return -1;
    }
-    
+
    public static int indexOf(final char[] b, final char[] s) {
        return indexOf(b, 0, s);
    }
@ -277,12 +286,12 @@ public final class CharBuffer extends Writer {
        loop: for (int i = start; i <= b.length - bs.length; i++) {
            // first test only first char
            if (b[i] != bs[0]) continue loop;
-            
+
            // then test all remaining char
            for (int j = 1; j < bs.length; j++) {
                if (b[i + j] != bs[j]) continue loop;
            }
-            
+
            // found hit
            return i;
        }
@ -301,7 +310,7 @@ public final class CharBuffer extends Writer {
        }
        return true;
    }
-    
+
    public char[] getChars() {
        return getChars(0);
    }
@ -318,7 +327,7 @@ public final class CharBuffer extends Writer {
        System.arraycopy(this.buffer, this.offset + start, tmp, 0, end - start);
        return tmp;
    }
-    
+
    public byte[] getBytes() {
        return UTF8.getBytes(new String(this.buffer, this.offset, this.length));
    }
@ -363,7 +372,7 @@ public final class CharBuffer extends Writer {
        }
        return true;
    }
-    
+
    public int whitespaceStart(final boolean includeNonLetterBytes) {
        // returns number of whitespace char at the beginning of text
        if (includeNonLetterBytes) {
@ -377,7 +386,7 @@ public final class CharBuffer extends Writer {
        }
        return this.length;
    }
-    
+
    public int whitespaceEnd(final boolean includeNonLetterBytes) {
        // returns position of whitespace at the end of text
        if (includeNonLetterBytes) {
@ -391,8 +400,8 @@ public final class CharBuffer extends Writer {
        }
        return 0;
    }
-    
-    
+
+
    @Override
    public String toString() {
        return new String(this.buffer, this.offset, this.length);
@ -453,11 +462,11 @@ public final class CharBuffer extends Writer {
        }
        return p;
    }
-    
+
    public static boolean equals(final char[] buffer, final char[] pattern) {
        return equals(buffer, 0, pattern);
    }
-    
+
    public static boolean equals(final char[] buffer, final int offset, final char[] pattern) {
        // compares two char arrays: true, if pattern appears completely at offset position
        if (buffer.length < offset + pattern.length) return false;
@ -468,20 +477,20 @@ public final class CharBuffer extends Writer {
    public void reset() {
        this.length = 0;
        this.offset = 0;
-    }        
-    
-    public void reset(final int newSize) {  
+    }
+
+    public void reset(final int newSize) {
        this.resize(newSize);
        this.reset();
-    }         
-     
+    }
+
    public void resize(final int newSize) {
        if(newSize < 0) throw new IllegalArgumentException("Illegal array size: " + newSize);
        final char[] v = new char[newSize];
        System.arraycopy(this.buffer,0,v,0,newSize > this.buffer.length ? this.buffer.length : newSize);
-        this.buffer = v;          
+        this.buffer = v;
    }
-    
+
    public char toCharArray()[] {
        final char[] newbuf = new char[this.length];
        System.arraycopy(this.buffer, 0, newbuf, 0, this.length);
@ -490,12 +499,12 @@ public final class CharBuffer extends Writer {

    @Override
    public void close() throws IOException {
-    	this.buffer = null; // assist with garbage collection 
+    	this.buffer = null; // assist with garbage collection
    }

    @Override
    public void flush() throws IOException {
-        // TODO Auto-generated method stub        
-    }    
-        
+        // TODO Auto-generated method stub
+    }
+
 }