added pull request from als plus an NPE fix

14 years ago · 9cd469e6d6
parent 7146dffcc8 39898cb94a
commit 9cd469e6d6
15 changed files with 248 additions and 194 deletions
--- a/.classpath
+++ b/.classpath
@ -1,17 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <classpath>
-	<classpathentry excluding="env/|htdocsdefault/|proxymsg/|yacy/|env/|yacy/user/|yacy/user/|yacy/ui/|processing/domaingraph/applet/|processing/domaingraph/|api/|api/bookmarks/posts/|api/bookmarks/|api/util/|api/bookmarks/xbel/|api/bookmarks/tags/|api/ymarks/" kind="src" path="htroot"/>
-	<classpathentry kind="src" path="test"/>
-	<classpathentry excluding="user/|user/|ui/" kind="src" path="htroot/yacy"/>
-	<classpathentry kind="src" path="htroot/env"/>
 	<classpathentry kind="src" path="source"/>
-	<classpathentry kind="src" path="htroot/yacy/ui"/>
-	<classpathentry excluding="bookmarks/posts/|bookmarks/|util/|bookmarks/xbel/|bookmarks/tags/|ymarks/" kind="src" path="htroot/api"/>
-	<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
-	<classpathentry excluding="posts/|xbel/|tags/" kind="src" path="htroot/api/bookmarks"/>
-	<classpathentry kind="src" path="htroot/api/bookmarks/xbel"/>
-	<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
-	<classpathentry kind="src" path="htroot/api/ymarks"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 	<classpathentry kind="lib" path="lib/commons-logging-1.1.1.jar"/>
 	<classpathentry kind="lib" path="lib/servlet-api.jar"/>
--- a/build.properties
+++ b/build.properties
@ -7,7 +7,7 @@ releaseVersion=1.0
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 releaseFileParentDir=yacy
-releaseNr=$Revision: 8134 $
+releaseNr=$Revision: 8135 $
 privateKeyFile=private.key

 # defining some file/directory access rights
--- a/source/de/anomic/data/ymark/YMarkAutoTagger.java
+++ b/source/de/anomic/data/ymark/YMarkAutoTagger.java
@ -27,6 +27,9 @@ import de.anomic.crawler.retrieval.Response;

 public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandler {

+	private static final String EMPTY_STRING = new String();
+	
+	
 	public final static String SPACE = " ";
 	public final static String POISON = "";
 	public final static HashSet<String> stopwords = new HashSet<String>(Arrays.asList(".", "!", "?", "nbsp", "uuml", "ouml", "auml", "amp", "quot", "laquo", "raquo", 
@ -83,19 +86,22 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 		final TreeSet<YMarkTag> topwords = new TreeSet<YMarkTag>();
 		StringBuilder token; 

-		if(document != null) {
+		if(document == null) {
+			return EMPTY_STRING;
+		}

-			//get words from document
-			final Map<String, Word> words = new Condenser(document, true, true, LibraryProvider.dymLib).words();
-			
-			// generate potential tags from document title, description and subject
-			final int bufferSize = document.dc_title().length() + document.dc_description().length() + document.dc_subject(' ').length() + 32;
-			final StringBuilder buffer = new StringBuilder(bufferSize);
-			final StringBuilder pwords = new StringBuilder(1000);
-			buffer.append(document.dc_title().toLowerCase());
-			buffer.append(document.dc_description().toLowerCase());
-			buffer.append(document.dc_subject(' ').toLowerCase());
-			final Enumeration<StringBuilder> tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(buffer.toString())), LibraryProvider.dymLib);
+		//get words from document
+		final Map<String, Word> words = new Condenser(document, true, true, LibraryProvider.dymLib).words();
+		
+		// generate potential tags from document title, description and subject
+		final int bufferSize = document.dc_title().length() + document.dc_description().length() + document.dc_subject(' ').length() + 32;
+		final StringBuilder buffer = new StringBuilder(bufferSize);
+		final StringBuilder pwords = new StringBuilder(1000);
+		buffer.append(document.dc_title().toLowerCase());
+		buffer.append(document.dc_description().toLowerCase());
+		buffer.append(document.dc_subject(' ').toLowerCase());
+		final WordTokenizer tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(buffer.toString())), LibraryProvider.dymLib);
+		try {
 			int score = 0;
 			
 			// get phrases
@ -163,44 +169,49 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 				return document.getFileExtension();
 			}
 			return clean;
+		} finally {
+			tokens.close();
 		}
-		return new String();		
 	}	
 	
 	private static TreeMap<String, YMarkTag> getPhrases(final Document document, final int size) {
 		final TreeMap<String, YMarkTag> phrases = new TreeMap<String, YMarkTag>();
 		final StringBuilder phrase = new StringBuilder(128);
-		final Enumeration<StringBuilder> tokens = new WordTokenizer(document.getText(), LibraryProvider.dymLib);
-		StringBuilder token;
-		int count = 0;
-		
-		// loop through text
-		while(tokens.hasMoreElements()) {				
-
-			token = tokens.nextElement();			
-			if(stopwords.contains(token.toString()) || isDigitSpace(token.toString()))
-				continue;			
+		final WordTokenizer tokens = new WordTokenizer(document.getText(), LibraryProvider.dymLib);
+		try {
+			StringBuilder token;
+			int count = 0;
 			
-			// if we have a full phrase, delete the first token
-			count++;
-			if(count > size)
-				phrase.delete(0, phrase.indexOf(SPACE)+1);
+			// loop through text
+			while(tokens.hasMoreElements()) {				
+	
+				token = tokens.nextElement();			
+				if(stopwords.contains(token.toString()) || isDigitSpace(token.toString()))
+					continue;			
+				
+				// if we have a full phrase, delete the first token
+				count++;
+				if(count > size)
+					phrase.delete(0, phrase.indexOf(SPACE)+1);
+				
+				// append new token
+				if(phrase.length() > 1)
+					phrase.append(SPACE);						
+				phrase.append(token);
+	
+				if(count >= size) {	// make sure we really have a phrase
+					if(phrases.containsKey(phrase.toString())) {
+						phrases.get(phrase.toString()).inc();
+					} else {
+						phrases.put(phrase.toString(), new YMarkTag(phrase.toString()));
+					}
+				}		
+			}
 			
-			// append new token
-			if(phrase.length() > 1)
-				phrase.append(SPACE);						
-			phrase.append(token);
-
-			if(count >= size) {	// make sure we really have a phrase
-				if(phrases.containsKey(phrase.toString())) {
-					phrases.get(phrase.toString()).inc();
-				} else {
-					phrases.put(phrase.toString(), new YMarkTag(phrase.toString()));
-				}
-			}		
+			return phrases;
+		} finally {
+			tokens.close();
 		}
-		
-		return phrases;
 	}

 	public static String autoTag(final String url, final LoaderDispatcher loader, final int max, final TreeMap<String, YMarkTag> tags) {
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@ -235,21 +235,24 @@ public final class Condenser {
        if (text == null) return;
        String word;
        Word wprop;
-        WordTokenizer wordenum;
-        wordenum = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), meaningLib);
-        int pip = 0;
-        while (wordenum.hasMoreElements()) {
-            word = (wordenum.nextElement().toString()).toLowerCase(Locale.ENGLISH);
-            if (useForLanguageIdentification) this.languageIdentificator.add(word);
-            if (word.length() < 2) continue;
-            wprop = this.words.get(word);
-            if (wprop == null) wprop = new Word(0, pip, phrase);
-            if (wprop.flags == null) wprop.flags = flagstemplate.clone();
-            wprop.flags.set(flagpos, true);
-            this.words.put(word, wprop);
-            pip++;
-            this.RESULT_NUMB_WORDS++;
-            this.RESULT_DIFF_WORDS++;
+        WordTokenizer wordenum = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), meaningLib);
+        try {
+	        int pip = 0;
+	        while (wordenum.hasMoreElements()) {
+	            word = (wordenum.nextElement().toString()).toLowerCase(Locale.ENGLISH);
+	            if (useForLanguageIdentification) this.languageIdentificator.add(word);
+	            if (word.length() < 2) continue;
+	            wprop = this.words.get(word);
+	            if (wprop == null) wprop = new Word(0, pip, phrase);
+	            if (wprop.flags == null) wprop.flags = flagstemplate.clone();
+	            wprop.flags.set(flagpos, true);
+	            this.words.put(word, wprop);
+	            pip++;
+	            this.RESULT_NUMB_WORDS++;
+	            this.RESULT_DIFF_WORDS++;
+	        }
+        } finally {
+        	wordenum.close();
        }
    }

@ -296,45 +299,49 @@ public final class Condenser {

        // read source
        final WordTokenizer wordenum = new WordTokenizer(is, meaningLib);
-        while (wordenum.hasMoreElements()) {
-            word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
-            if (this.languageIdentificator != null) this.languageIdentificator.add(word);
-            if (word.length() < wordminsize) continue;
-
-            // distinguish punctuation and words
-            wordlen = word.length();
-            if (wordlen == 1 && SentenceReader.punctuation(word.charAt(0))) {
-                // store sentence
-                currsentwords.clear();
-                wordInSentenceCounter = 1;
-            } else {
-                // check index.of detection
-                if (last_last && comb_indexof && word.equals("modified")) {
-                    this.RESULT_FLAGS.set(flag_cat_indexof, true);
-                    wordenum.pre(true); // parse lines as they come with CRLF
-                }
-                if (last_index && (wordminsize > 2 || word.equals("of"))) comb_indexof = true;
-                last_last = word.equals("last");
-                last_index = word.equals("index");
-
-                // store word
-                allwordcounter++;
-                currsentwords.add(word);
-                wsp = this.words.get(word);
-                if (wsp != null) {
-                    // word already exists
-                    wordHandle = wsp.posInText;
-                    wsp.inc();
-                } else {
-                    // word does not yet exist, create new word entry
-                    wordHandle = wordHandleCount++;
-                    wsp = new Word(wordHandle, wordInSentenceCounter, sentences.size() + 100);
-                    wsp.flags = this.RESULT_FLAGS.clone();
-                    this.words.put(word, wsp);
-                }
-                // we now have the unique handle of the word, put it into the sentence:
-                wordInSentenceCounter++;
-            }
+        try {
+	        while (wordenum.hasMoreElements()) {
+	            word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
+	            if (this.languageIdentificator != null) this.languageIdentificator.add(word);
+	            if (word.length() < wordminsize) continue;
+	
+	            // distinguish punctuation and words
+	            wordlen = word.length();
+	            if (wordlen == 1 && SentenceReader.punctuation(word.charAt(0))) {
+	                // store sentence
+	                currsentwords.clear();
+	                wordInSentenceCounter = 1;
+	            } else {
+	                // check index.of detection
+	                if (last_last && comb_indexof && word.equals("modified")) {
+	                    this.RESULT_FLAGS.set(flag_cat_indexof, true);
+	                    wordenum.pre(true); // parse lines as they come with CRLF
+	                }
+	                if (last_index && (wordminsize > 2 || word.equals("of"))) comb_indexof = true;
+	                last_last = word.equals("last");
+	                last_index = word.equals("index");
+	
+	                // store word
+	                allwordcounter++;
+	                currsentwords.add(word);
+	                wsp = this.words.get(word);
+	                if (wsp != null) {
+	                    // word already exists
+	                    wordHandle = wsp.posInText;
+	                    wsp.inc();
+	                } else {
+	                    // word does not yet exist, create new word entry
+	                    wordHandle = wordHandleCount++;
+	                    wsp = new Word(wordHandle, wordInSentenceCounter, sentences.size() + 100);
+	                    wsp.flags = this.RESULT_FLAGS.clone();
+	                    this.words.put(word, wsp);
+	                }
+	                // we now have the unique handle of the word, put it into the sentence:
+	                wordInSentenceCounter++;
+	            }
+	        }
+        } finally {
+        	wordenum.close();
        }

        if (pseudostemming) {
--- a/source/net/yacy/document/SentenceReader.java
+++ b/source/net/yacy/document/SentenceReader.java
@ -141,4 +141,12 @@ public class SentenceReader implements Iterator<StringBuilder> {
    public void remove() {
        throw new UnsupportedOperationException();
    }
+    
+    public void close() {
+    	try {
+    		raf.close();
+    	} catch(IOException ioe) {
+    		// Ignore IO Exceptions
+    	}
+    }
 }
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -280,14 +280,21 @@ public final class TextParser {
        final HashMap<Parser, Parser.Failure> failedParser = new HashMap<Parser, Parser.Failure>();
        if (MemoryControl.request(sourceArray.length * 6, false)) {
            for (final Parser parser: parsers) {
+            	ByteArrayInputStream bis = new ByteArrayInputStream(sourceArray);
                try {
-                    docs = parser.parse(location, mimeType, documentCharset, new ByteArrayInputStream(sourceArray));
+                    docs = parser.parse(location, mimeType, documentCharset, bis);
                } catch (final Parser.Failure e) {
                    failedParser.put(parser, e);
                    //log.logWarning("tried parser '" + parser.getName() + "' to parse " + location.toNormalform(true, false) + " but failed: " + e.getMessage(), e);
                } catch (final Exception e) {
                    failedParser.put(parser, new Parser.Failure(e.getMessage(), location));
                    //log.logWarning("tried parser '" + parser.getName() + "' to parse " + location.toNormalform(true, false) + " but failed: " + e.getMessage(), e);
+                } finally {
+                	try {
+                		bis.close();
+                	} catch(IOException ioe) {
+                		// Ignore.
+                	}
                }
                if (docs != null) break;
            }
--- a/source/net/yacy/document/WordTokenizer.java
+++ b/source/net/yacy/document/WordTokenizer.java
@ -79,6 +79,10 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
        if (this.meaningLib != null) WordCache.learn(r);
        return r;
    }
+    
+    public void close() {
+    	e.close();
+    }

    private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
        // returns an enumeration of StringBuilder Objects
@ -145,6 +149,9 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
            return r;
        }

+        public void close() {
+        	e.close();
+        }
    }

    public static StringBuilder trim(final StringBuilder sb) {
@ -172,23 +179,27 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
     */
    public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib) {
        final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
-        final Enumeration<StringBuilder> words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
-        int pos = 0;
-        StringBuilder word;
-        byte[] hash;
-        Integer oldpos;
-        while (words.hasMoreElements()) {
-            word = words.nextElement();
-            hash = Word.word2hash(word);
-
-            // don't overwrite old values, that leads to too far word distances
-            oldpos = map.put(hash, LargeNumberCache.valueOf(pos));
-            if (oldpos != null) {
-                map.put(hash, oldpos);
-            }
-
-            pos += word.length() + 1;
+        final WordTokenizer words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
+        try {
+	        int pos = 0;
+	        StringBuilder word;
+	        byte[] hash;
+	        Integer oldpos;
+	        while (words.hasMoreElements()) {
+	            word = words.nextElement();
+	            hash = Word.word2hash(word);
+	
+	            // don't overwrite old values, that leads to too far word distances
+	            oldpos = map.put(hash, LargeNumberCache.valueOf(pos));
+	            if (oldpos != null) {
+	                map.put(hash, oldpos);
+	            }
+	
+	            pos += word.length() + 1;
+	        }
+	        return map;
+        } finally {
+        	words.close();
        }
-        return map;
    }
 }
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -58,7 +58,8 @@ import net.yacy.kelondro.util.MemoryControl;


 public class ContentScraper extends AbstractScraper implements Scraper {
-
+	private static final String EMPTY_STRING = new String();
+	
    private final char degree = '\u00B0';
    private final char[] minuteCharsHTML = "&#039;".toCharArray();

@ -159,7 +160,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        this.iframes = new HashSet<MultiProtocolURI>();
        this.metas = new HashMap<String, String>();
        this.script = new HashSet<MultiProtocolURI>();
-        this.title = "";
+        this.title = EMPTY_STRING;
        this.headlines = new ArrayList[6];
        for (int i = 0; i < this.headlines.length; i++) this.headlines[i] = new ArrayList<String>();
        this.bold = new ClusteredScoreMap<String>();
@ -318,14 +319,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {

    public void scrapeTag0(final String tagname, final Properties tagopts) {
        if (tagname.equalsIgnoreCase("img")) {
-            final String src = tagopts.getProperty("src", "");
+            final String src = tagopts.getProperty("src", EMPTY_STRING);
            try {
                final int width = Integer.parseInt(tagopts.getProperty("width", "-1"));
                final int height = Integer.parseInt(tagopts.getProperty("height", "-1"));
                if (src.length() > 0) {
                    final MultiProtocolURI url = absolutePath(src);
                    if (url != null) {
-                        final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height, -1);
+                        final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", EMPTY_STRING), width, height, -1);
                        addImage(this.images, ie);
                    }
                }
@ -333,47 +334,47 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            this.evaluationScores.match(Element.imgpath, src);
        } else if(tagname.equalsIgnoreCase("base")) {
            try {
-                this.root = new MultiProtocolURI(tagopts.getProperty("href", ""));
+                this.root = new MultiProtocolURI(tagopts.getProperty("href", EMPTY_STRING));
            } catch (final MalformedURLException e) {}
        } else if (tagname.equalsIgnoreCase("frame")) {
-            final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", ""));
+            final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
            mergeAnchors(src, tagopts /* with property "name" */);
            this.frames.add(src);
            this.evaluationScores.match(Element.framepath, src.toNormalform(true, false));
        } else if (tagname.equalsIgnoreCase("body")) {
-            final String c = tagopts.getProperty("class", "");
+            final String c = tagopts.getProperty("class", EMPTY_STRING);
            this.evaluationScores.match(Element.bodyclass, c);
        } else if (tagname.equalsIgnoreCase("div")) {
-            final String id = tagopts.getProperty("id", "");
+            final String id = tagopts.getProperty("id", EMPTY_STRING);
            this.evaluationScores.match(Element.divid, id);
        } else if (tagname.equalsIgnoreCase("meta")) {
-            String name = tagopts.getProperty("name", "");
-            final String content = tagopts.getProperty("content","");
+            String name = tagopts.getProperty("name", EMPTY_STRING);
+            final String content = tagopts.getProperty("content", EMPTY_STRING);
            if (name.length() > 0) {
                this.metas.put(name.toLowerCase(), CharacterCoding.html2unicode(content));
                if (name.equals("generator")) {
                    this.evaluationScores.match(Element.metagenerator, content);
                }
            } else {
-                name = tagopts.getProperty("http-equiv", "");
+                name = tagopts.getProperty("http-equiv", EMPTY_STRING);
                if (name.length() > 0) {
                    this.metas.put(name.toLowerCase(), CharacterCoding.html2unicode(content));
                }
            }
        } else if (tagname.equalsIgnoreCase("area")) {
-            final String areatitle = cleanLine(tagopts.getProperty("title",""));
-            //String alt   = tagopts.getProperty("alt","");
-            final String href  = tagopts.getProperty("href", "");
+            final String areatitle = cleanLine(tagopts.getProperty("title",EMPTY_STRING));
+            //String alt   = tagopts.getProperty("alt",EMPTY_STRING);
+            final String href  = tagopts.getProperty("href", EMPTY_STRING);
            tagopts.put("nme", areatitle);
            if (href.length() > 0) mergeAnchors(absolutePath(href), tagopts);
        } else if (tagname.equalsIgnoreCase("link")) {
-            final String href = tagopts.getProperty("href", "");
+            final String href = tagopts.getProperty("href", EMPTY_STRING);
            final MultiProtocolURI newLink = absolutePath(href);

            if (newLink != null) {
-                final String rel = tagopts.getProperty("rel", "");
-                final String linktitle = tagopts.getProperty("title", "");
-                final String type = tagopts.getProperty("type", "");
+                final String rel = tagopts.getProperty("rel", EMPTY_STRING);
+                final String linktitle = tagopts.getProperty("title", EMPTY_STRING);
+                final String type = tagopts.getProperty("type", EMPTY_STRING);

                if (rel.equalsIgnoreCase("shortcut icon")) {
                    final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1);
@ -394,11 +395,11 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                }
            }
        } else if(tagname.equalsIgnoreCase("embed")) {
-            mergeAnchors(absolutePath(tagopts.getProperty("src", "")), tagopts /* with property "name" */);
+            mergeAnchors(absolutePath(tagopts.getProperty("src", EMPTY_STRING)), tagopts /* with property "name" */);
        } else if(tagname.equalsIgnoreCase("param")) {
-            final String name = tagopts.getProperty("name", "");
+            final String name = tagopts.getProperty("name", EMPTY_STRING);
            if (name.equalsIgnoreCase("movie")) {
-                mergeAnchors(absolutePath(tagopts.getProperty("value", "")), tagopts /* with property "name" */);
+                mergeAnchors(absolutePath(tagopts.getProperty("value", EMPTY_STRING)), tagopts /* with property "name" */);
            }
        }

@ -409,12 +410,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public void scrapeTag1(final String tagname, final Properties tagopts, final char[] text) {
        // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text));
        if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
-            final String href = tagopts.getProperty("href", "");
+            final String href = tagopts.getProperty("href", EMPTY_STRING);
            MultiProtocolURI url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
                final String f = url.getFile();
                final int p = f.lastIndexOf('.');
-                final String type = (p < 0) ? "" : f.substring(p + 1);
+                final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1);
                if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) {
                    // special handling of such urls: put them to the image urls
                    final ImageEntry ie = new ImageEntry(url, recursiveParse(text), -1, -1, -1);
@ -461,12 +462,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            h = recursiveParse(text);
            if (h.length() > 0) this.li.add(h);
        } else if (tagname.equalsIgnoreCase("iframe")) {
-            final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", ""));
+            final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
            mergeAnchors(src, tagopts /* with property "name" */);
            this.iframes.add(src);
            this.evaluationScores.match(Element.iframepath, src.toNormalform(true, false));
        } else if (tagname.equalsIgnoreCase("script")) {
-            final String src = tagopts.getProperty("src", "");
+            final String src = tagopts.getProperty("src", EMPTY_STRING);
            if (src.length() > 0) {
                this.script.add(absolutePath(src));
                this.evaluationScores.match(Element.scriptpath, src);
@ -507,7 +508,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    private final static String cleanLine(final String s) {
-        if (!MemoryControl.request(s.length() * 2, false)) return "";
+        if (!MemoryControl.request(s.length() * 2, false)) return EMPTY_STRING;
        final StringBuilder sb = new StringBuilder(s.length());
        char l = ' ';
        char c;
@ -683,27 +684,27 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public String getDescription() {
        String s = this.metas.get("description");
        if (s == null) s = this.metas.get("dc.description");
-        if (s == null) return "";
+        if (s == null) return EMPTY_STRING;
        return s;
    }

    public String getContentType() {
        final String s = this.metas.get("content-type");
-        if (s == null) return "";
+        if (s == null) return EMPTY_STRING;
        return s;
    }

    public String getAuthor() {
        String s = this.metas.get("author");
        if (s == null) s = this.metas.get("dc.creator");
-        if (s == null) return "";
+        if (s == null) return EMPTY_STRING;
        return s;
    }

    public String getPublisher() {
        String s = this.metas.get("copyright");
        if (s == null) s = this.metas.get("dc.publisher");
-        if (s == null) return "";
+        if (s == null) return EMPTY_STRING;
        return s;
    }

@ -732,7 +733,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public String[] getKeywords() {
        String s = this.metas.get("keywords");
        if (s == null) s = this.metas.get("dc.description");
-        if (s == null) s = "";
+        if (s == null) s = EMPTY_STRING;
        if (s.length() == 0) {
            return MultiProtocolURI.splitpattern.split(getTitle().toLowerCase());
        }
@ -756,13 +757,13 @@ public class ContentScraper extends AbstractScraper implements Scraper {

    public String getRefreshPath() {
        String s = this.metas.get("refresh");
-        if (s == null) return "";
+        if (s == null) return EMPTY_STRING;

        final int pos = s.indexOf(';');
-        if (pos < 0) return "";
+        if (pos < 0) return EMPTY_STRING;
        s = s.substring(pos + 1);
        if (s.toLowerCase().startsWith("url=")) return s.substring(4).trim();
-        return "";
+        return EMPTY_STRING;
    }

    // parse location
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@ -81,11 +81,22 @@ public final class TransformerWriter extends Writer {
            final Scraper scraper,
            final Transformer transformer,
            final boolean passbyIfBinarySuspect
+    ) {
+    	this(outStream, charSet, scraper, transformer, passbyIfBinarySuspect, 1024);
+    }
+
+    public TransformerWriter(
+            final OutputStream outStream,
+            final Charset charSet,
+            final Scraper scraper,
+            final Transformer transformer,
+            final boolean passbyIfBinarySuspect,
+            final int initialBufferSize
    ) {
        this.outStream     = outStream;
        this.scraper       = scraper;
        this.transformer   = transformer;
-        this.buffer        = new CharBuffer(1024);
+        this.buffer        = new CharBuffer(initialBufferSize);
        this.filterTag     = null;
        this.filterOpts    = null;
        this.filterCont    = null;
@ -540,6 +551,7 @@ public final class TransformerWriter extends Writer {
                final char[] filtered = filterSentence(this.buffer.getChars(), quotechar);
                if (this.out != null) this.out.write(filtered);
            }
+            this.buffer.close();
            this.buffer = null;
        }
        final char[] finalized = filterFinalize(quotechar);
@ -550,6 +562,7 @@ public final class TransformerWriter extends Writer {
        }
        this.filterTag = null;
        this.filterOpts = null;
+        if (this.filterCont != null) this.filterCont.close();
        this.filterCont = null;
 //      if (scraper != null) {scraper.close(); scraper = null;}
 //      if (transformer != null) {transformer.close(); transformer = null;}
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@ -196,7 +196,7 @@ public class htmlParser extends AbstractParser implements Parser {

        // parsing the content
        final ContentScraper scraper = new ContentScraper(location);
-        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
+        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, sourceStream.available());
        try {
            FileUtils.copy(sourceStream, writer, c);
        } catch (final IOException e) {
--- a/source/net/yacy/document/parser/odtParser.java
+++ b/source/net/yacy/document/parser/odtParser.java
@ -114,17 +114,20 @@ public class odtParser extends AbstractParser implements Parser {
                if (entryName.equals("content.xml")) {
                    
                    // create a writer for output
-                    writer = new CharBuffer();
-                    
-                    // extract data
-                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
-                    final SAXParser saxParser = saxParserFactory.newSAXParser();
-                    saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
-                
-                    // close readers and writers
-                    zipFileEntryStream.close();
-                    writer.close();
-                    
+                    writer = new CharBuffer((int)zipEntry.getSize());
+                    try {
+	                    // extract data
+	                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
+	                    try {
+		                    final SAXParser saxParser = saxParserFactory.newSAXParser();
+		                    saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
+	                    } finally {
+		                    // close readers and writers
+		                    zipFileEntryStream.close();
+	                    }
+                    } finally {
+                    	writer.close();
+                    }
                } else if (entryName.equals("meta.xml")) {
                    //  meta.xml contains metadata about the document
                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
--- a/source/net/yacy/document/parser/ooxmlParser.java
+++ b/source/net/yacy/document/parser/ooxmlParser.java
@ -100,17 +100,21 @@ public class ooxmlParser extends AbstractParser implements Parser {
                	|| entryName.startsWith("xl/worksheets/sheet")) {
                    
                    // create a writer for output
-                    writer = new CharBuffer();
-                    
-                    // extract data
-                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
-                    final SAXParser saxParser = saxParserFactory.newSAXParser();
-                    saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
-                
-                    // close readers and writers
-                    zipFileEntryStream.close();
-                    writer.close();
-                    
+                    writer = new CharBuffer((int)zipEntry.getSize());
+                    try {
+	                    // extract data
+	                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
+	                    try {
+		                    final SAXParser saxParser = saxParserFactory.newSAXParser();
+		                    saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
+		                
+		                    // close readers and writers
+	                    } finally {
+	                    	zipFileEntryStream.close();
+	                    }
+                    } finally {
+                    	writer.close();
+                    }
                } else if (entryName.equals("docProps/core.xml")) {
                    //  meta.xml contains metadata about the document
                    final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
--- a/source/net/yacy/kelondro/blob/ArrayStack.java
+++ b/source/net/yacy/kelondro/blob/ArrayStack.java
@ -898,8 +898,7 @@ public class ArrayStack implements BLOB {
                if (!i1.hasNext()) {
                    if (i2.hasNext()) {
                        HeapWriter.delete(f1);
-                        if (f2.renameTo(newFile))
-                            return newFile;
+                        if (f2.renameTo(newFile)) return newFile;
                        return f2;
                    }
                    HeapWriter.delete(f1);
@ -907,8 +906,7 @@ public class ArrayStack implements BLOB {
                    return null;
                } else if (!i2.hasNext()) {
                    HeapWriter.delete(f2);
-                    if (f1.renameTo(newFile))
-                        return newFile;
+                    if (f1.renameTo(newFile)) return newFile;
                    return f1;
                }
                assert i1.hasNext();
--- a/source/net/yacy/kelondro/index/RowCollection.java
+++ b/source/net/yacy/kelondro/index/RowCollection.java
@ -48,6 +48,8 @@ import net.yacy.kelondro.util.kelondroException;

 public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>, Cloneable {

+	private static final byte[] EMPTY_CACHE = new byte[0];
+	
    public  static final long growfactorLarge100 = 140L;
    public  static final long growfactorSmall100 = 120L;
    private static final int isortlimit = 20;
@ -77,7 +79,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
        this.rowdef = rowdef;
        this.sortBound = 0;
        this.lastTimeWrote = System.currentTimeMillis();
-        this.chunkcache = new byte[0];
+       	this.chunkcache = EMPTY_CACHE;
        this.chunkcount = 0;
    }

--- a/source/net/yacy/kelondro/io/CharBuffer.java
+++ b/source/net/yacy/kelondro/io/CharBuffer.java
@ -122,9 +122,9 @@ public final class CharBuffer extends Writer {
        return length;
    }

-    private void grow() {
-        int newsize = buffer.length * 2 + 1;
-        if (newsize < 32) newsize = 32;
+    private void grow(int minSize) {
+        int newsize = buffer.length + 1024;
+        if (newsize < minSize) newsize = minSize+1;
        char[] tmp = new char[newsize];
        System.arraycopy(buffer, offset, tmp, 0, length);
        buffer = tmp;
@ -136,7 +136,7 @@ public final class CharBuffer extends Writer {
    }
    
    public void write(final char b) {
-        if (offset + length + 1 > buffer.length) grow();
+        if (offset + length + 1 > buffer.length) grow(offset + length + 1);
        buffer[offset + length++] = b;
    }
    
@ -145,7 +145,7 @@ public final class CharBuffer extends Writer {
    }
    
    public void write(final char[] bb, final int of, final int le) {
-        while (offset + length + le > buffer.length) grow();
+        if (offset + length + le > buffer.length) grow(offset + length + le);
        System.arraycopy(bb, of, buffer, offset + length, le);
        length += le;
    }
@ -476,7 +476,7 @@ public final class CharBuffer extends Writer {
    }

    public void close() throws IOException {
-        // TODO Auto-generated method stub        
+    	buffer = null; // assist with garbage collection 
    }

    public void flush() throws IOException {