diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java
index 1b83dc28b..159ca48e0 100644
--- a/htroot/CacheAdmin_p.java
+++ b/htroot/CacheAdmin_p.java
@@ -121,8 +121,9 @@ public class CacheAdmin_p {
                     else {
                         htmlFilterContentScraper scraper = new htmlFilterContentScraper(url);
                         OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
-                        plasmaParserDocument document = switchboard.parser.transformScraper(url, "text/html", scraper);
                         serverFileUtils.copy(file, os);
+                        os.flush();
+                        plasmaParserDocument document = switchboard.parser.transformScraper(url, "text/html", scraper);
                         info += "<b>HEADLINE:</b><br>" + scraper.getHeadline() + "<br><br>";
                         info += "<b>HREF:</b><br>" + formatAnchor(document.getHyperlinks()) + "<br>";
                         info += "<b>MEDIA:</b><br>" + formatAnchor(document.getMedialinks()) + "<br>";
@@ -130,7 +131,7 @@ public class CacheAdmin_p {
                         info += "<b>TEXT:</b><br><span class=\"small\">" + new String(scraper.getText()) + "</span><br>";
                         info += "<b>LINES:</b><br><span class=\"small\">";
                         String[] sentences = document.getSentences();
-                        for (int i = 0; i < sentences.length; i++) info += sentences + "<br>";
+                        for (int i = 0; i < sentences.length; i++) info += sentences[i] + "<br>";
                         info += "</span><br>";
                     }
                 } catch (Exception e) {
diff --git a/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java
index a91936ea9..591ca5881 100644
--- a/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java
@@ -1,434 +1 @@
-// htmlFilterAbstractScraper.java 
-// ---------------------------
-// (C) by Michael Peter Christen; mc@anomic.de
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-// last major change: 18.02.2004
-//
-// You agree that the Author(s) is (are) not responsible for cost,
-// loss of data or any harm that may be caused by usage of this softare or
-// this documentation. The usage of this software is on your own risk. The
-// installation and usage (starting/running) of this software may allow other
-// people or application to access your computer and any attached devices and
-// is highly dependent on the configuration of the software which must be
-// done by the user of the software;the author(s) is (are) also
-// not responsible for proper configuration and usage of the software, even
-// if provoked by documentation provided together with the software.
-//
-// THE SOFTWARE THAT FOLLOWS AS ART OF PROGRAMMING BELOW THIS SECTION
-// IS PUBLISHED UNDER THE GPL AS DOCUMENTED IN THE FILE gpl.txt ASIDE THIS
-// FILE AND AS IN http://www.gnu.org/licenses/gpl.txt
-// ANY CHANGES TO THIS FILE ACCORDING TO THE GPL CAN BE DONE TO THE
-// LINES THAT FOLLOWS THIS COPYRIGHT NOTICE HERE, BUT CHANGES MUST NOT
-// BE DONE ABOVE OR INSIDE THE COPYRIGHT NOTICE. A RE-DISTRIBUTION
-// MUST CONTAIN THE INTACT AND UNCHANGED COPYRIGHT NOTICE.
-// CONTRIBUTIONS AND CHANGES TO THE PROGRAM CODE SHOULD BE MARKED AS SUCH.
-
-package de.anomic.htmlFilter;
-
-import java.util.HashSet;
-import java.util.HashMap;
-import java.util.Properties;
-
-import de.anomic.server.serverByteBuffer;
-
-public abstract class htmlFilterAbstractScraper implements htmlFilterScraper {
-
-    public static final byte lb = (byte) '<';
-    public static final byte rb = (byte) '>';
-    public static final byte sl = (byte) '/';
- 
-    private HashSet      tags0;
-    private HashSet      tags1;
-
-    // define a translation table for html character codings
-    private static HashMap trans = new HashMap(300);
-    static {
-        trans.put("&quot;", "\""); //Anf&uuml;hrungszeichen oben
-        trans.put("&amp;", "&"); //Ampersand-Zeichen, kaufm&auml;nnisches Und
-        trans.put("&lt;", "<"); //&ouml;ffnende spitze Klammer
-        trans.put("&gt;", ">"); //schlie&szlig;ende spitze Klammer
-        trans.put("&nbsp;", " "); //Erzwungenes Leerzeichen
-        trans.put("&iexcl;", "!"); //umgekehrtes Ausrufezeichen
-        trans.put("&cent;", " cent "); //Cent-Zeichen
-        trans.put("&pound;", " pound "); //Pfund-Zeichen
-        trans.put("&curren;", " currency "); //W&auml;hrungs-Zeichen
-        trans.put("&yen;", " yen "); //Yen-Zeichen
-        trans.put("&brvbar;", " "); //durchbrochener Strich
-        trans.put("&sect;", " paragraph "); //Paragraph-Zeichen
-        trans.put("&uml;", " "); //P&uuml;nktchen oben
-        trans.put("&copy;", " copyright "); //Copyright-Zeichen
-        trans.put("&ordf;", " "); //Ordinal-Zeichen weiblich
-        trans.put("&laquo;", " "); //angewinkelte Anf&uuml;hrungszeichen links
-        trans.put("&not;", " not "); //Verneinungs-Zeichen
-        trans.put("&shy;", "-"); //kurzer Trennstrich
-        trans.put("&reg;", " trademark "); //Registriermarke-Zeichen
-        trans.put("&macr;", " "); //&Uuml;berstrich
-        trans.put("&deg;", " degree "); //Grad-Zeichen
-        trans.put("&plusmn;", " +/- "); //Plusminus-Zeichen
-        trans.put("&sup2;", " square "); //Hoch-2-Zeichen
-        trans.put("&sup3;", " 3 "); //Hoch-3-Zeichen
-        trans.put("&acute;", " "); //Acute-Zeichen
-        trans.put("&micro;", " micro "); //Mikro-Zeichen
-        trans.put("&para;", " paragraph "); //Absatz-Zeichen
-        trans.put("&middot;", " "); //Mittelpunkt
-        trans.put("&cedil;", " "); //H&auml;kchen unten
-        trans.put("&sup1;", " "); //Hoch-1-Zeichen
-        trans.put("&ordm;", " degree "); //Ordinal-Zeichen m&auml;nnlich
-        trans.put("&raquo;", " "); //angewinkelte Anf&uuml;hrungszeichen rechts
-        trans.put("&frac14;", " quarter "); //ein Viertel
-        trans.put("&frac12;", " half "); //ein Halb
-        trans.put("&frac34;", " 3/4 "); //drei Viertel
-        trans.put("&iquest;", "?"); //umgekehrtes Fragezeichen
-        trans.put("&Agrave;", "A"); //A mit Accent grave
-        trans.put("&Aacute;", "A"); //A mit Accent acute
-        trans.put("&Acirc;", "A"); //A mit Circumflex
-        trans.put("&Atilde;", "A"); //A mit Tilde
-        trans.put("&Auml;", "Ae"); //A Umlaut
-        trans.put("&Aring;", "A"); //A mit Ring
-        trans.put("&AElig;", "A"); //A mit legiertem E
-        trans.put("&Ccedil;", "C"); //C mit H&auml;kchen
-        trans.put("&Egrave;", "E"); //E mit Accent grave
-        trans.put("&Eacute;", "E"); //E mit Accent acute
-        trans.put("&Ecirc;", "E"); //E mit Circumflex
-        trans.put("&Euml;", "E"); //E Umlaut
-        trans.put("&Igrave;", "I"); //I mit Accent grave
-        trans.put("&Iacute;", "I"); //I mit Accent acute
-        trans.put("&Icirc;", "I"); //I mit Circumflex
-        trans.put("&Iuml;", "I"); //I Umlaut
-        trans.put("&ETH;", "D"); //Eth (isl&auml;ndisch)
-        trans.put("&Ntilde;", "N"); //N mit Tilde
-        trans.put("&Ograve;", "O"); //O mit Accent grave
-        trans.put("&Oacute;", "O"); //O mit Accent acute
-        trans.put("&Ocirc;", "O"); //O mit Circumflex
-        trans.put("&Otilde;", "O"); //O mit Tilde
-        trans.put("&Ouml;", "Oe"); //O Umlaut
-        trans.put("&times;", " times "); //Mal-Zeichen
-        trans.put("&Oslash;", "O"); //O mit Schr&auml;gstrich
-        trans.put("&Ugrave;", "U"); //U mit Accent grave
-        trans.put("&Uacute;", "U"); //U mit Accent acute
-        trans.put("&Ucirc;", "U"); //U mit Circumflex
-        trans.put("&Uuml;", "Ue"); //U Umlaut
-        trans.put("&Yacute;", "Y"); //Y mit Accent acute
-        trans.put("&THORN;", "P"); //THORN (isl&auml;ndisch)
-        trans.put("&szlig;", "ss"); //scharfes S
-        trans.put("&agrave;", "a"); //a mit Accent grave
-        trans.put("&aacute;", "a"); //a mit Accent acute
-        trans.put("&acirc;", "a"); //a mit Circumflex
-        trans.put("&atilde;", "a"); //a mit Tilde
-        trans.put("&auml;", "ae"); //a Umlaut
-        trans.put("&aring;", "a"); //a mit Ring
-        trans.put("&aelig;", "a"); //a mit legiertem e
-        trans.put("&ccedil;", "c"); //c mit H&auml;kchen
-        trans.put("&egrave;", "e"); //e mit Accent grave
-        trans.put("&eacute;", "e"); //e mit Accent acute
-        trans.put("&ecirc;", "e"); //e mit Circumflex
-        trans.put("&euml;", "e"); //e Umlaut
-        trans.put("&igrave;", "i"); //i mit Accent grave
-        trans.put("&iacute;", "i"); //i mit Accent acute
-        trans.put("&icirc;", "i"); //i mit Circumflex
-        trans.put("&iuml;", "i"); //i Umlaut
-        trans.put("&eth;", "d"); //eth (isl&auml;ndisch)
-        trans.put("&ntilde;", "n"); //n mit Tilde
-        trans.put("&ograve;", "o"); //o mit Accent grave
-        trans.put("&oacute;", "o"); //o mit Accent acute
-        trans.put("&ocirc;", "o"); //o mit Circumflex
-        trans.put("&otilde;", "o"); //o mit Tilde
-        trans.put("&ouml;", "oe"); //o Umlaut
-        trans.put("&divide;", "%"); //Divisions-Zeichen
-        trans.put("&oslash;", "o"); //o mit Schr&auml;gstrich
-        trans.put("&ugrave;", "u"); //u mit Accent grave
-        trans.put("&uacute;", "u"); //u mit Accent acute
-        trans.put("&ucirc;", "u"); //u mit Circumflex
-        trans.put("&uuml;", "ue"); //u Umlaut
-        trans.put("&yacute;", "y"); //y mit Accent acute
-        trans.put("&thorn;", "p"); //thorn (isl&auml;ndisch)
-        trans.put("&yuml;", "y"); //y Umlaut
-        trans.put("&Alpha;", " Alpha "); //Alpha gro&szlig;
-        trans.put("&alpha;", " alpha "); //alpha klein
-        trans.put("&Beta;", " Beta "); //Beta gro&szlig;
-        trans.put("&beta;", " beta "); //beta klein
-        trans.put("&Gamma;", " Gamma "); //Gamma gro&szlig;
-        trans.put("&gamma;", " gamma "); //gamma klein
-        trans.put("&Delta;", " Delta "); //Delta gro&szlig;
-        trans.put("&delta;", " delta "); //delta klein
-        trans.put("&Epsilon;", " Epsilon "); //Epsilon gro&szlig;
-        trans.put("&epsilon;", " epsilon "); //epsilon klein
-        trans.put("&Zeta;", " Zeta "); //Zeta gro&szlig;
-        trans.put("&zeta;", " zeta "); //zeta klein
-        trans.put("&Eta;", " Eta "); //Eta gro&szlig;
-        trans.put("&eta;", " eta "); //eta klein
-        trans.put("&Theta;", " Theta "); //Theta gro&szlig;
-        trans.put("&theta;", " theta "); //theta klein
-        trans.put("&Iota;", " Iota "); //Iota gro&szlig;
-        trans.put("&iota;", " iota "); //iota klein
-        trans.put("&Kappa;", " Kappa "); //Kappa gro&szlig;
-        trans.put("&kappa;", " kappa "); //kappa klein
-        trans.put("&Lambda;", " Lambda "); //Lambda gro&szlig;
-        trans.put("&lambda;", " lambda "); //lambda klein
-        trans.put("&Mu;", " Mu "); //Mu gro&szlig;
-        trans.put("&mu;", " mu "); //mu klein
-        trans.put("&Nu;", " Nu "); //Nu gro&szlig;
-        trans.put("&nu;", " nu "); //nu klein
-        trans.put("&Xi;", " Xi "); //Xi gro&szlig;
-        trans.put("&xi;", " xi "); //xi klein
-        trans.put("&Omicron;", " Omicron "); //Omicron gro&szlig;
-        trans.put("&omicron;", " omicron "); //omicron klein
-        trans.put("&Pi;", " Pi "); //Pi gro&szlig;
-        trans.put("&pi;", " pi "); //pi klein
-        trans.put("&Rho;", " Rho "); //Rho gro&szlig;
-        trans.put("&rho;", " rho "); //rho klein
-        trans.put("&Sigma;", " Sigma "); //Sigma gro&szlig;
-        trans.put("&sigmaf;", " sigma "); //sigmaf klein
-        trans.put("&sigma;", " sigma "); //sigma klein
-        trans.put("&Tau;", " Tau "); //Tau gro&szlig;
-        trans.put("&tau;", " tau "); //tau klein
-        trans.put("&Upsilon;", " Ypsilon "); //Upsilon gro&szlig;
-        trans.put("&upsilon;", " ypsilon "); //upsilon klein
-        trans.put("&Phi;", " Phi "); //Phi gro&szlig;
-        trans.put("&phi;", " phi "); //phi klein
-        trans.put("&Chi;", " Chi "); //Chi gro&szlig;
-        trans.put("&chi;", " chi "); //chi klein
-        trans.put("&Psi;", " Psi "); //Psi gro&szlig;
-        trans.put("&psi;", " psi "); //psi klein
-        trans.put("&Omega;", " Omega "); //Omega gro&szlig;
-        trans.put("&omega;", " omega "); //omega klein
-        trans.put("&thetasym;", " theta "); //theta Symbol
-        trans.put("&upsih;", " ypsilon "); //upsilon mit Haken
-        trans.put("&piv;", " pi "); //pi Symbol
-        trans.put("&forall;", " for all "); //f&uuml;r alle
-        trans.put("&part;", " part of "); //teilweise
-        trans.put("&exist;", " exists "); //existiert
-        trans.put("&empty;", " null "); //leer
-        trans.put("&nabla;", " nabla "); //nabla
-        trans.put("&isin;", " element of "); //Element von
-        trans.put("&notin;", " not element of "); //kein Element von
-        trans.put("&ni;", " contains "); //enth&auml;lt als Element
-        trans.put("&prod;", " product "); //Produkt
-        trans.put("&sum;", " sum "); //Summe
-        trans.put("&minus;", " minus "); //minus
-        trans.put("&lowast;", " times "); //Asterisk
-        trans.put("&radic;", " sqare root "); //Quadratwurzel
-        trans.put("&prop;", " proportional to "); //proportional zu
-        trans.put("&infin;", " unlimited "); //unendlich
-        trans.put("&ang;", " angle "); //Winkel
-        trans.put("&and;", " and "); //und
-        trans.put("&or;", " or "); //oder
-        trans.put("&cap;", " "); //Schnittpunkt
-        trans.put("&cup;", " unity "); //Einheit
-        trans.put("&int;", " integral "); //Integral
-        trans.put("&there4;", " cause "); //deshalb
-        trans.put("&sim;", " similar to "); //&auml;hnlich wie
-        trans.put("&cong;", " equal "); //ann&auml;hernd gleich
-        trans.put("&asymp;", " equal "); //beinahe gleich
-        trans.put("&ne;", " not equal "); //ungleich
-        trans.put("&equiv;", " identical "); //identisch mit
-        trans.put("&le;", " smaller or equal than "); //kleiner gleich
-        trans.put("&ge;", " greater or equal than "); //gr&ouml;&szlig;er gleich
-        trans.put("&sub;", " subset of "); //Untermenge von
-        trans.put("&sup;", " superset of "); //Obermenge von
-        trans.put("&nsub;", " not subset of "); //keine Untermenge von
-        trans.put("&sube;", ""); //Untermenge von oder gleich mit
-        trans.put("&supe;", ""); //Obermenge von oder gleich mit
-        trans.put("&oplus;", ""); //Direktsumme
-        trans.put("&otimes;", ""); //Vektorprodukt
-        trans.put("&perp;", ""); //senkrecht zu
-        trans.put("&sdot;", ""); //Punkt-Operator
-        trans.put("&loz;", ""); //Raute
-        trans.put("&lceil;", ""); //links oben
-        trans.put("&rceil;", ""); //rechts oben
-        trans.put("&lfloor;", ""); //links unten
-        trans.put("&rfloor;", ""); //rechts unten
-        trans.put("&lang;", ""); //spitze Klammer links
-        trans.put("&rang;", ""); //spitze Klammer rechts
-        trans.put("&larr;", ""); //Pfeil links
-        trans.put("&uarr;", ""); //Pfeil oben
-        trans.put("&rarr;", ""); //Pfeil rechts
-        trans.put("&darr;", ""); //Pfeil unten
-        trans.put("&harr;", ""); //Pfeil links/rechts
-        trans.put("&crarr;", ""); //Pfeil unten-Knick-links
-        trans.put("&lArr;", ""); //Doppelpfeil links
-        trans.put("&uArr;", ""); //Doppelpfeil oben
-        trans.put("&rArr;", ""); //Doppelpfeil rechts
-        trans.put("&dArr;", ""); //Doppelpfeil unten
-        trans.put("&hArr;", ""); //Doppelpfeil links/rechts
-        trans.put("&bull;", ""); //Bullet-Zeichen
-        trans.put("&hellip;", ""); //Horizontale Ellipse
-        trans.put("&prime;", ""); //Minutenzeichen
-        trans.put("&oline;", ""); //&Uuml;berstrich
-        trans.put("&frasl;", ""); //Bruchstrich
-        trans.put("&weierp;", ""); //Weierstrass p
-        trans.put("&image;", ""); //Zeichen f&uuml;r &quot;imagin&auml;r&quot;
-        trans.put("&real;", ""); //Zeichen f&uuml;r &quot;real&quot;
-        trans.put("&trade;", ""); //Trademark-Zeichen
-        trans.put("&euro;", ""); //Euro-Zeichen
-        trans.put("&alefsym;", ""); //Alef-Symbol
-        trans.put("&spades;", ""); //Pik-Zeichen
-        trans.put("&clubs;", ""); //Kreuz-Zeichen
-        trans.put("&hearts;", ""); //Herz-Zeichen
-        trans.put("&diams;", ""); //Karo-Zeichen
-        trans.put("&ensp;", ""); //Leerzeichen Breite n
-        trans.put("&emsp;", ""); //Leerzeichen Breite m
-        trans.put("&thinsp;", ""); //Schmales Leerzeichen
-        trans.put("&zwnj;", ""); //null breiter Nichtverbinder
-        trans.put("&zwj;", ""); //null breiter Verbinder
-        trans.put("&lrm;", ""); //links-nach-rechts-Zeichen
-        trans.put("&rlm;", ""); //rechts-nach-links-Zeichen
-        trans.put("&ndash;", ""); //Gedankenstrich Breite n
-        trans.put("&mdash;", ""); //Gedankenstrich Breite m
-        trans.put("&lsquo;", ""); //einfaches Anf&uuml;hrungszeichen links
-        trans.put("&rsquo;", ""); //einfaches Anf&uuml;hrungszeichen rechts
-        trans.put("&sbquo;", ""); //einfaches low-9-Zeichen
-        trans.put("&ldquo;", ""); //doppeltes Anf&uuml;hrungszeichen links
-        trans.put("&rdquo;", ""); //doppeltes Anf&uuml;hrungszeichen rechts
-        trans.put("&bdquo;", ""); //doppeltes low-9-Zeichen rechts
-        trans.put("&dagger;", ""); //Kreuz
-        trans.put("&Dagger;", ""); //Doppelkreuz
-        trans.put("&permil;", ""); //zu tausend
-        trans.put("&lsaquo;", ""); //angewinkeltes einzelnes Anf.zeichen links
-        trans.put("&rsaquo;", ""); //angewinkeltes einzelnes Anf.zeichen rechts
-    }
-
-
-    public htmlFilterAbstractScraper(HashSet tags0, HashSet tags1) {
-	this.tags0  = tags0;
-	this.tags1  = tags1;
-    }
-
-    public boolean isTag0(String tag) {
-	return (tags0 != null) && (tags0.contains(tag));
-    }
-
-    public boolean isTag1(String tag) {
-	return (tags1 != null) && (tags1.contains(tag));
-    }
-
-    //the 'missing' method that shall be implemented:
-    public abstract void scrapeText(byte[] text);
-
-    // the other methods must take into account to construct the return value correctly
-    public void scrapeTag0(String tagname, Properties tagopts) {
-    }
-
-    public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {
-    }
-
-    // string conversions
-    private static String code_iso8859s(int c) {
-	switch ((int) c & 0xff) {
-        
-        // german umlaute and ligaturen
-	case 0xc4: return "AE"; case 0xd6: return "OE"; case 0xdc: return "UE";
-	case 0xe4: return "ae"; case 0xf6: return "oe"; case 0xfc: return "ue";
-        case 0xdf: return "ss";
-        
-        // accent on letters; i.e. french characters
-        case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc5: return  "A";
-        case 0xc6: return  "AE";
-        case 0xc7: return  "C";
-        case 0xc8: case 0xc9: case 0xca: return  "E";
-        case 0xcc: case 0xcd: case 0xce: case 0xcf: return  "I";
-        case 0xd0: return  "D";
-        case 0xd1: return  "N";
-        case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd8: return  "O";
-        case 0xd7: return  "x";
-        case 0xd9: case 0xda: case 0xdb: return  "U";
-        case 0xdd: return  "Y";
-        case 0xde: return  "p";
-        
-        case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe5: return  "a";
-        case 0xe6: return  "ae";
-        case 0xe7: return  "c";
-        case 0xe8: case 0xe9: case 0xea: return  "e";
-        case 0xec: case 0xed: case 0xee: case 0xef: return  "i";
-        case 0xf0: return  "d";
-        case 0xf1: return  "n";
-        case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf8: return  "o";
-        case 0xf7: return  "%";
-        case 0xf9: case 0xfa: case 0xfb: return  "u";
-        case 0xfd: case 0xff: return  "y";
-        case 0xfe: return  "p";
-        
-	// special characters
-        case 0xa4: return " euro ";
-	default: return null;
-	}
-    }
-
-    public static serverByteBuffer convertUmlaute(serverByteBuffer bb) {
-	serverByteBuffer t = new serverByteBuffer(bb.length() + 20);
-        int b0, b1, b2;
-        String z;
-        int i = 0;
-        while (i < bb.length()) {
-            b0 = bb.byteAt(i) & 0xff;
-            // check utf-8 encoding
-            if (b0 < 128) {
-                t.append(b0);
-                i++;
-            } else {
-                b1 = bb.byteAt(i + 1) & 0xff;
-                if ((b0 > 0xbf) && (b0 < 0xe0)) {
-                    z = code_iso8859s(((b0 & 0x1f) << 0x6) | (b1 & 0x3f));
-                    i += 2;
-                } else {
-                    b2 = bb.byteAt(i + 2) & 0xff;
-                    z = code_iso8859s(((b0 & 0xf) << 0xc) | ((b1 & 0x3f) << 0x6) | (b2 & 0x3f));
-                    i += 3;
-                }
-                if (z == null) t.append(b0); else t.append(z);
-            }
-	}
-	return t;
-    }
-
-    private static byte[] transscript(byte[] code) {
-        String t = (String) trans.get(new String(code));
-        if (t == null) return new byte[0]; else return t.getBytes();
-    }
-
-    protected static serverByteBuffer transscriptAll(serverByteBuffer bb) {
-	int p0 = 0, p1;
-        byte[] t;
-        while ((p0 = bb.indexOf((byte) '&', p0)) >= 0) {
-	    p1 = bb.indexOf((byte) ';', p0);
-	    if (p1 >= 0) {
-                t = transscript(bb.getBytes(p0, p1 + 1));
-		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length() + p0 - p1 + t.length).append(t).append(bb.getBytes(p1 + 1));
-	    } else {
-		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length()).append(bb.getBytes(p0 + 1));
-            }
-	}
-        t = null;
-	return bb;
-    }
-
-    protected static serverByteBuffer stripAllTags(serverByteBuffer bb) {
-	int p0 = 0, p1;
-	while ((p0 = bb.indexOf(lb, p0)) >= 0) {
-	    p1 = bb.indexOf(rb, p0);
-	    if (p1 >= 0) {
-		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length() + p0 - p1 + 1).trim().append((byte) 32).append(new serverByteBuffer(bb.getBytes(p1 + 1)).trim());
-	    } else {
-		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length()).trim().append(new serverByteBuffer(bb.getBytes(p0 + 1)).trim());
-	    }
-	}
-	return bb.trim();
-    }
-
-    public static serverByteBuffer stripAll(serverByteBuffer bb) {
-	//return stripAllTags(s);
-	 return convertUmlaute(transscriptAll(stripAllTags(bb)));
-    }
-
-    public void close() {
-        // free resources
-        tags0 = null;
-        tags1 = null;
-    }
-    
-    public void finalize() {
-        close();
-    }
-        
-}
+// htmlFilterAbstractScraper.java // ---------------------------// (C) by Michael Peter Christen; mc@anomic.de// first published on http://www.anomic.de// Frankfurt, Germany, 2004// last major change: 18.02.2004//// You agree that the Author(s) is (are) not responsible for cost,// loss of data or any harm that may be caused by usage of this softare or// this documentation. The usage of this software is on your own risk. The// installation and usage (starting/running) of this software may allow other// people or application to access your computer and any attached devices and// is highly dependent on the configuration of the software which must be// done by the user of the software;the author(s) is (are) also// not responsible for proper configuration and usage of the software, even// if provoked by documentation provided together with the software.//// THE SOFTWARE THAT FOLLOWS AS ART OF PROGRAMMING BELOW THIS SECTION// IS PUBLISHED UNDER THE GPL AS DOCUMENTED IN THE FILE gpl.txt ASIDE THIS// FILE AND AS IN http://www.gnu.org/licenses/gpl.txt// ANY CHANGES TO THIS FILE ACCORDING TO THE GPL CAN BE DONE TO THE// LINES THAT FOLLOWS THIS COPYRIGHT NOTICE HERE, BUT CHANGES MUST NOT// BE DONE ABOVE OR INSIDE THE COPYRIGHT NOTICE. A RE-DISTRIBUTION// MUST CONTAIN THE INTACT AND UNCHANGED COPYRIGHT NOTICE.// CONTRIBUTIONS AND CHANGES TO THE PROGRAM CODE SHOULD BE MARKED AS SUCH.package de.anomic.htmlFilter;import java.util.HashSet;import java.util.HashMap;import java.util.Properties;import de.anomic.server.serverByteBuffer;public abstract class htmlFilterAbstractScraper implements htmlFilterScraper {    public static final byte lb = (byte) '<';    public static final byte rb = (byte) '>';    public static final byte sl = (byte) '/';     private HashSet      tags0;    private HashSet      tags1;    // define a translation table for html character codings    private static HashMap trans = new HashMap(300);    static {        trans.put("&quot;", "\""); //Anf&uuml;hrungszeichen oben        trans.put("&amp;", "&"); //Ampersand-Zeichen, kaufm&auml;nnisches Und        trans.put("&lt;", "<"); //&ouml;ffnende spitze Klammer        trans.put("&gt;", ">"); //schlie&szlig;ende spitze Klammer        trans.put("&nbsp;", " "); //Erzwungenes Leerzeichen        trans.put("&iexcl;", "!"); //umgekehrtes Ausrufezeichen        trans.put("&cent;", " cent "); //Cent-Zeichen        trans.put("&pound;", " pound "); //Pfund-Zeichen        trans.put("&curren;", " currency "); //W&auml;hrungs-Zeichen        trans.put("&yen;", " yen "); //Yen-Zeichen        trans.put("&brvbar;", " "); //durchbrochener Strich        trans.put("&sect;", " paragraph "); //Paragraph-Zeichen        trans.put("&uml;", " "); //P&uuml;nktchen oben        trans.put("&copy;", " copyright "); //Copyright-Zeichen        trans.put("&ordf;", " "); //Ordinal-Zeichen weiblich        trans.put("&laquo;", " "); //angewinkelte Anf&uuml;hrungszeichen links        trans.put("&not;", " not "); //Verneinungs-Zeichen        trans.put("&shy;", "-"); //kurzer Trennstrich        trans.put("&reg;", " trademark "); //Registriermarke-Zeichen        trans.put("&macr;", " "); //&Uuml;berstrich        trans.put("&deg;", " degree "); //Grad-Zeichen        trans.put("&plusmn;", " +/- "); //Plusminus-Zeichen        trans.put("&sup2;", " square "); //Hoch-2-Zeichen        trans.put("&sup3;", " 3 "); //Hoch-3-Zeichen        trans.put("&acute;", " "); //Acute-Zeichen        trans.put("&micro;", " micro "); //Mikro-Zeichen        trans.put("&para;", " paragraph "); //Absatz-Zeichen        trans.put("&middot;", " "); //Mittelpunkt        trans.put("&cedil;", " "); //H&auml;kchen unten        trans.put("&sup1;", " "); //Hoch-1-Zeichen        trans.put("&ordm;", " degree "); //Ordinal-Zeichen m&auml;nnlich        trans.put("&raquo;", " "); //angewinkelte Anf&uuml;hrungszeichen rechts        trans.put("&frac14;", " quarter "); //ein Viertel        trans.put("&frac12;", " half "); //ein Halb        trans.put("&frac34;", " 3/4 "); //drei Viertel        trans.put("&iquest;", "?"); //umgekehrtes Fragezeichen        trans.put("&Agrave;", "A"); //A mit Accent grave        trans.put("&Aacute;", "A"); //A mit Accent acute        trans.put("&Acirc;", "A"); //A mit Circumflex        trans.put("&Atilde;", "A"); //A mit Tilde        trans.put("&Auml;", "Ae"); //A Umlaut        trans.put("&Aring;", "A"); //A mit Ring        trans.put("&AElig;", "A"); //A mit legiertem E        trans.put("&Ccedil;", "C"); //C mit H&auml;kchen        trans.put("&Egrave;", "E"); //E mit Accent grave        trans.put("&Eacute;", "E"); //E mit Accent acute        trans.put("&Ecirc;", "E"); //E mit Circumflex        trans.put("&Euml;", "E"); //E Umlaut        trans.put("&Igrave;", "I"); //I mit Accent grave        trans.put("&Iacute;", "I"); //I mit Accent acute        trans.put("&Icirc;", "I"); //I mit Circumflex        trans.put("&Iuml;", "I"); //I Umlaut        trans.put("&ETH;", "D"); //Eth (isl&auml;ndisch)        trans.put("&Ntilde;", "N"); //N mit Tilde        trans.put("&Ograve;", "O"); //O mit Accent grave        trans.put("&Oacute;", "O"); //O mit Accent acute        trans.put("&Ocirc;", "O"); //O mit Circumflex        trans.put("&Otilde;", "O"); //O mit Tilde        trans.put("&Ouml;", "Oe"); //O Umlaut        trans.put("&times;", " times "); //Mal-Zeichen        trans.put("&Oslash;", "O"); //O mit Schr&auml;gstrich        trans.put("&Ugrave;", "U"); //U mit Accent grave        trans.put("&Uacute;", "U"); //U mit Accent acute        trans.put("&Ucirc;", "U"); //U mit Circumflex        trans.put("&Uuml;", "Ue"); //U Umlaut        trans.put("&Yacute;", "Y"); //Y mit Accent acute        trans.put("&THORN;", "P"); //THORN (isl&auml;ndisch)        trans.put("&szlig;", "ss"); //scharfes S        trans.put("&agrave;", "a"); //a mit Accent grave        trans.put("&aacute;", "a"); //a mit Accent acute        trans.put("&acirc;", "a"); //a mit Circumflex        trans.put("&atilde;", "a"); //a mit Tilde        trans.put("&auml;", "ae"); //a Umlaut        trans.put("&aring;", "a"); //a mit Ring        trans.put("&aelig;", "a"); //a mit legiertem e        trans.put("&ccedil;", "c"); //c mit H&auml;kchen        trans.put("&egrave;", "e"); //e mit Accent grave        trans.put("&eacute;", "e"); //e mit Accent acute        trans.put("&ecirc;", "e"); //e mit Circumflex        trans.put("&euml;", "e"); //e Umlaut        trans.put("&igrave;", "i"); //i mit Accent grave        trans.put("&iacute;", "i"); //i mit Accent acute        trans.put("&icirc;", "i"); //i mit Circumflex        trans.put("&iuml;", "i"); //i Umlaut        trans.put("&eth;", "d"); //eth (isl&auml;ndisch)        trans.put("&ntilde;", "n"); //n mit Tilde        trans.put("&ograve;", "o"); //o mit Accent grave        trans.put("&oacute;", "o"); //o mit Accent acute        trans.put("&ocirc;", "o"); //o mit Circumflex        trans.put("&otilde;", "o"); //o mit Tilde        trans.put("&ouml;", "oe"); //o Umlaut        trans.put("&divide;", "%"); //Divisions-Zeichen        trans.put("&oslash;", "o"); //o mit Schr&auml;gstrich        trans.put("&ugrave;", "u"); //u mit Accent grave        trans.put("&uacute;", "u"); //u mit Accent acute        trans.put("&ucirc;", "u"); //u mit Circumflex        trans.put("&uuml;", "ue"); //u Umlaut        trans.put("&yacute;", "y"); //y mit Accent acute        trans.put("&thorn;", "p"); //thorn (isl&auml;ndisch)        trans.put("&yuml;", "y"); //y Umlaut        trans.put("&Alpha;", " Alpha "); //Alpha gro&szlig;        trans.put("&alpha;", " alpha "); //alpha klein        trans.put("&Beta;", " Beta "); //Beta gro&szlig;        trans.put("&beta;", " beta "); //beta klein        trans.put("&Gamma;", " Gamma "); //Gamma gro&szlig;        trans.put("&gamma;", " gamma "); //gamma klein        trans.put("&Delta;", " Delta "); //Delta gro&szlig;        trans.put("&delta;", " delta "); //delta klein        trans.put("&Epsilon;", " Epsilon "); //Epsilon gro&szlig;        trans.put("&epsilon;", " epsilon "); //epsilon klein        trans.put("&Zeta;", " Zeta "); //Zeta gro&szlig;        trans.put("&zeta;", " zeta "); //zeta klein        trans.put("&Eta;", " Eta "); //Eta gro&szlig;        trans.put("&eta;", " eta "); //eta klein        trans.put("&Theta;", " Theta "); //Theta gro&szlig;        trans.put("&theta;", " theta "); //theta klein        trans.put("&Iota;", " Iota "); //Iota gro&szlig;        trans.put("&iota;", " iota "); //iota klein        trans.put("&Kappa;", " Kappa "); //Kappa gro&szlig;        trans.put("&kappa;", " kappa "); //kappa klein        trans.put("&Lambda;", " Lambda "); //Lambda gro&szlig;        trans.put("&lambda;", " lambda "); //lambda klein        trans.put("&Mu;", " Mu "); //Mu gro&szlig;        trans.put("&mu;", " mu "); //mu klein        trans.put("&Nu;", " Nu "); //Nu gro&szlig;        trans.put("&nu;", " nu "); //nu klein        trans.put("&Xi;", " Xi "); //Xi gro&szlig;        trans.put("&xi;", " xi "); //xi klein        trans.put("&Omicron;", " Omicron "); //Omicron gro&szlig;        trans.put("&omicron;", " omicron "); //omicron klein        trans.put("&Pi;", " Pi "); //Pi gro&szlig;        trans.put("&pi;", " pi "); //pi klein        trans.put("&Rho;", " Rho "); //Rho gro&szlig;        trans.put("&rho;", " rho "); //rho klein        trans.put("&Sigma;", " Sigma "); //Sigma gro&szlig;        trans.put("&sigmaf;", " sigma "); //sigmaf klein        trans.put("&sigma;", " sigma "); //sigma klein        trans.put("&Tau;", " Tau "); //Tau gro&szlig;        trans.put("&tau;", " tau "); //tau klein        trans.put("&Upsilon;", " Ypsilon "); //Upsilon gro&szlig;        trans.put("&upsilon;", " ypsilon "); //upsilon klein        trans.put("&Phi;", " Phi "); //Phi gro&szlig;        trans.put("&phi;", " phi "); //phi klein        trans.put("&Chi;", " Chi "); //Chi gro&szlig;        trans.put("&chi;", " chi "); //chi klein        trans.put("&Psi;", " Psi "); //Psi gro&szlig;        trans.put("&psi;", " psi "); //psi klein        trans.put("&Omega;", " Omega "); //Omega gro&szlig;        trans.put("&omega;", " omega "); //omega klein        trans.put("&thetasym;", " theta "); //theta Symbol        trans.put("&upsih;", " ypsilon "); //upsilon mit Haken        trans.put("&piv;", " pi "); //pi Symbol        trans.put("&forall;", " for all "); //f&uuml;r alle        trans.put("&part;", " part of "); //teilweise        trans.put("&exist;", " exists "); //existiert        trans.put("&empty;", " null "); //leer        trans.put("&nabla;", " nabla "); //nabla        trans.put("&isin;", " element of "); //Element von        trans.put("&notin;", " not element of "); //kein Element von        trans.put("&ni;", " contains "); //enth&auml;lt als Element        trans.put("&prod;", " product "); //Produkt        trans.put("&sum;", " sum "); //Summe        trans.put("&minus;", " minus "); //minus        trans.put("&lowast;", " times "); //Asterisk        trans.put("&radic;", " sqare root "); //Quadratwurzel        trans.put("&prop;", " proportional to "); //proportional zu        trans.put("&infin;", " unlimited "); //unendlich        trans.put("&ang;", " angle "); //Winkel        trans.put("&and;", " and "); //und        trans.put("&or;", " or "); //oder        trans.put("&cap;", " "); //Schnittpunkt        trans.put("&cup;", " unity "); //Einheit        trans.put("&int;", " integral "); //Integral        trans.put("&there4;", " cause "); //deshalb        trans.put("&sim;", " similar to "); //&auml;hnlich wie        trans.put("&cong;", " equal "); //ann&auml;hernd gleich        trans.put("&asymp;", " equal "); //beinahe gleich        trans.put("&ne;", " not equal "); //ungleich        trans.put("&equiv;", " identical "); //identisch mit        trans.put("&le;", " smaller or equal than "); //kleiner gleich        trans.put("&ge;", " greater or equal than "); //gr&ouml;&szlig;er gleich        trans.put("&sub;", " subset of "); //Untermenge von        trans.put("&sup;", " superset of "); //Obermenge von        trans.put("&nsub;", " not subset of "); //keine Untermenge von        trans.put("&sube;", ""); //Untermenge von oder gleich mit        trans.put("&supe;", ""); //Obermenge von oder gleich mit        trans.put("&oplus;", ""); //Direktsumme        trans.put("&otimes;", ""); //Vektorprodukt        trans.put("&perp;", ""); //senkrecht zu        trans.put("&sdot;", ""); //Punkt-Operator        trans.put("&loz;", ""); //Raute        trans.put("&lceil;", ""); //links oben        trans.put("&rceil;", ""); //rechts oben        trans.put("&lfloor;", ""); //links unten        trans.put("&rfloor;", ""); //rechts unten        trans.put("&lang;", ""); //spitze Klammer links        trans.put("&rang;", ""); //spitze Klammer rechts        trans.put("&larr;", ""); //Pfeil links        trans.put("&uarr;", ""); //Pfeil oben        trans.put("&rarr;", ""); //Pfeil rechts        trans.put("&darr;", ""); //Pfeil unten        trans.put("&harr;", ""); //Pfeil links/rechts        trans.put("&crarr;", ""); //Pfeil unten-Knick-links        trans.put("&lArr;", ""); //Doppelpfeil links        trans.put("&uArr;", ""); //Doppelpfeil oben        trans.put("&rArr;", ""); //Doppelpfeil rechts        trans.put("&dArr;", ""); //Doppelpfeil unten        trans.put("&hArr;", ""); //Doppelpfeil links/rechts        trans.put("&bull;", ""); //Bullet-Zeichen        trans.put("&hellip;", ""); //Horizontale Ellipse        trans.put("&prime;", ""); //Minutenzeichen        trans.put("&oline;", ""); //&Uuml;berstrich        trans.put("&frasl;", ""); //Bruchstrich        trans.put("&weierp;", ""); //Weierstrass p        trans.put("&image;", ""); //Zeichen f&uuml;r &quot;imagin&auml;r&quot;        trans.put("&real;", ""); //Zeichen f&uuml;r &quot;real&quot;        trans.put("&trade;", ""); //Trademark-Zeichen        trans.put("&euro;", ""); //Euro-Zeichen        trans.put("&alefsym;", ""); //Alef-Symbol        trans.put("&spades;", ""); //Pik-Zeichen        trans.put("&clubs;", ""); //Kreuz-Zeichen        trans.put("&hearts;", ""); //Herz-Zeichen        trans.put("&diams;", ""); //Karo-Zeichen        trans.put("&ensp;", ""); //Leerzeichen Breite n        trans.put("&emsp;", ""); //Leerzeichen Breite m        trans.put("&thinsp;", ""); //Schmales Leerzeichen        trans.put("&zwnj;", ""); //null breiter Nichtverbinder        trans.put("&zwj;", ""); //null breiter Verbinder        trans.put("&lrm;", ""); //links-nach-rechts-Zeichen        trans.put("&rlm;", ""); //rechts-nach-links-Zeichen        trans.put("&ndash;", ""); //Gedankenstrich Breite n        trans.put("&mdash;", ""); //Gedankenstrich Breite m        trans.put("&lsquo;", ""); //einfaches Anf&uuml;hrungszeichen links        trans.put("&rsquo;", ""); //einfaches Anf&uuml;hrungszeichen rechts        trans.put("&sbquo;", ""); //einfaches low-9-Zeichen        trans.put("&ldquo;", ""); //doppeltes Anf&uuml;hrungszeichen links        trans.put("&rdquo;", ""); //doppeltes Anf&uuml;hrungszeichen rechts        trans.put("&bdquo;", ""); //doppeltes low-9-Zeichen rechts        trans.put("&dagger;", ""); //Kreuz        trans.put("&Dagger;", ""); //Doppelkreuz        trans.put("&permil;", ""); //zu tausend        trans.put("&lsaquo;", ""); //angewinkeltes einzelnes Anf.zeichen links        trans.put("&rsaquo;", ""); //angewinkeltes einzelnes Anf.zeichen rechts    }    public htmlFilterAbstractScraper(HashSet tags0, HashSet tags1) {	this.tags0  = tags0;	this.tags1  = tags1;    }    public boolean isTag0(String tag) {	return (tags0 != null) && (tags0.contains(tag));    }    public boolean isTag1(String tag) {	return (tags1 != null) && (tags1.contains(tag));    }    //the 'missing' method that shall be implemented:    public abstract void scrapeText(byte[] text);    // the other methods must take into account to construct the return value correctly    public void scrapeTag0(String tagname, Properties tagopts) {    }    public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {    }    // string conversions    private static String code_iso8859s(int c) {	switch ((int) c & 0xff) {                // german umlaute and ligaturen	case 0xc4: return "AE"; case 0xd6: return "OE"; case 0xdc: return "UE";	case 0xe4: return "ae"; case 0xf6: return "oe"; case 0xfc: return "ue";        case 0xdf: return "ss";                // accent on letters; i.e. french characters        case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc5: return  "A";        case 0xc6: return  "AE";        case 0xc7: return  "C";        case 0xc8: case 0xc9: case 0xca: return  "E";        case 0xcc: case 0xcd: case 0xce: case 0xcf: return  "I";        case 0xd0: return  "D";        case 0xd1: return  "N";        case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd8: return  "O";        case 0xd7: return  "x";        case 0xd9: case 0xda: case 0xdb: return  "U";        case 0xdd: return  "Y";        case 0xde: return  "p";                case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe5: return  "a";        case 0xe6: return  "ae";        case 0xe7: return  "c";        case 0xe8: case 0xe9: case 0xea: return  "e";        case 0xec: case 0xed: case 0xee: case 0xef: return  "i";        case 0xf0: return  "d";        case 0xf1: return  "n";        case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf8: return  "o";        case 0xf7: return  "%";        case 0xf9: case 0xfa: case 0xfb: return  "u";        case 0xfd: case 0xff: return  "y";        case 0xfe: return  "p";        	// special characters        case 0xa4: return " euro ";	default: return null;	}    }    public static serverByteBuffer convertUmlaute(serverByteBuffer bb) {	serverByteBuffer t = new serverByteBuffer(bb.length() + 20);        int b0, b1, b2;        String z;        int i = 0;        while (i < bb.length()) {            b0 = bb.byteAt(i) & 0xff;            // check utf-8 encoding            if ((b0 < 128) || (i + 1 == bb.length())) {                t.append(b0);                i++;            } else {                b1 = bb.byteAt(i + 1) & 0xff;                if (b1 > 0x3f) {                    z = code_iso8859s(b0);                    i++;                } else if ((b0 > 0xbf) && (b0 < 0xe0)) {                    z = code_iso8859s(((b0 & 0x1f) << 0x6) | (b1 & 0x3f));                    i += 2;                } else {                    if (i + 2 >= bb.length()) {                        z = null;                        i++;                    } else {                        b2 = bb.byteAt(i + 2) & 0xff;                        if (b2 > 0x3f) {                            z = code_iso8859s(b0);                            i++;                        } else {                            z = code_iso8859s(((b0 & 0xf) << 0xc) | ((b1 & 0x3f) << 0x6) | (b2 & 0x3f));                            i += 3;                        }                    }                }                if (z == null) t.append(b0); else t.append(z);            }	}	return t;    }    private static byte[] transscript(byte[] code) {        String t = (String) trans.get(new String(code));        if (t == null) return new byte[0]; else return t.getBytes();    }    protected static serverByteBuffer transscriptAll(serverByteBuffer bb) {	int p0 = 0, p1;        byte[] t;        while ((p0 = bb.indexOf((byte) '&', p0)) >= 0) {	    p1 = bb.indexOf((byte) ';', p0);	    if (p1 >= 0) {                t = transscript(bb.getBytes(p0, p1 + 1));		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length() + p0 - p1 + t.length).append(t).append(bb.getBytes(p1 + 1));	    } else {		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length()).append(bb.getBytes(p0 + 1));            }	}        t = null;	return bb;    }    protected static serverByteBuffer stripAllTags(serverByteBuffer bb) {	int p0 = 0, p1;	while ((p0 = bb.indexOf(lb, p0)) >= 0) {	    p1 = bb.indexOf(rb, p0);	    if (p1 >= 0) {		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length() + p0 - p1 + 1).trim().append((byte) 32).append(new serverByteBuffer(bb.getBytes(p1 + 1)).trim());	    } else {		bb = new serverByteBuffer(bb.getBytes(0, p0), bb.length()).trim().append(new serverByteBuffer(bb.getBytes(p0 + 1)).trim());	    }	}	return bb.trim();    }    public static serverByteBuffer stripAll(serverByteBuffer bb) {	//return stripAllTags(s);	 return convertUmlaute(transscriptAll(stripAllTags(bb)));    }    public void close() {        // free resources        tags0 = null;        tags1 = null;    }        public void finalize() {        close();    }    }
\ No newline at end of file
diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index a4751847d..3fb292c10 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -41,6 +41,7 @@
 package de.anomic.htmlFilter;
 
 import java.net.URL;
+import java.net.MalformedURLException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -178,4 +179,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
 	System.out.println("TEXT    :" + new String(text.getBytes()));
     }
 
+    
+    public static void main(String[] args) {
+	String test = "Nokia k�rzt bei Forschung und Entwicklung";
+        try {
+            htmlFilterContentScraper scraper = new htmlFilterContentScraper(new URL("http://localhost"));
+            scraper.scrapeText(test.getBytes());
+            System.out.println(new String(scraper.getText()));
+        } catch (MalformedURLException e) {}
+    }
+    
 }
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index ab2c8811c..2ad248f2a 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -149,8 +149,8 @@ public final class plasmaParser {
      * @see #initMediaExt(String)
      */
     static {
-		initMediaExt(extString2extList("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
-			"sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj"));
+        initMediaExt(extString2extList("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
+        "sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj"));
         
         /* ===================================================
          * initializing the parser object pool
@@ -383,21 +383,21 @@ public final class plasmaParser {
     
     private static void loadEnabledParserList() {
         // loading a list of availabe parser from file
-    	Properties prop = new Properties();
+        Properties prop = new Properties();
         BufferedInputStream bufferedIn = null;
-    	try {
-    	    prop.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("yacy.parser"))));
-    	} catch (IOException e) {
-    	    System.err.println("ERROR: yacy.parser not found in settings path");
-    	} finally {
+        try {
+            prop.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("yacy.parser"))));
+        } catch (IOException e) {
+            System.err.println("ERROR: yacy.parser not found in settings path");
+        } finally {
             if (bufferedIn != null) try{ bufferedIn.close(); }catch(Exception e){}
         }
-
+        
         // enable them ...
         setEnabledParserList(prop.keySet());
-	}
-
-	private static void loadAvailableParserList() {
+    }
+    
+    private static void loadAvailableParserList() {
         try {
             plasmaParser.availableParserList.clear();
             
@@ -405,24 +405,24 @@ public final class plasmaParser {
             String javaClassPath = System.getProperty("java.class.path");
             
             // getting the current package name
-			String plasmaParserPkgName = plasmaParser.class.getPackage().getName() + ".parser";
+            String plasmaParserPkgName = plasmaParser.class.getPackage().getName() + ".parser";
             serverLog.logInfo("PARSER","Searching for additional content parsers in package " + plasmaParserPkgName);
- 
+            
             // getting an uri to the parser subpackage
-	        String packageURI = plasmaParser.class.getResource("/"+plasmaParserPkgName.replace('.','/')).toString();
-			serverLog.logDebug("PARSER", "Parser directory is " + packageURI);           
- 
+            String packageURI = plasmaParser.class.getResource("/"+plasmaParserPkgName.replace('.','/')).toString();
+            serverLog.logDebug("PARSER", "Parser directory is " + packageURI);
+            
             // open the parser directory
-	        File parserDir = new File(new URI(packageURI));
+            File parserDir = new File(new URI(packageURI));
             if ((parserDir == null) || (!parserDir.exists()) || (!parserDir.isDirectory())) return;
             
-            /* 
-             * loop through all subdirectories and test if we can 
+            /*
+             * loop through all subdirectories and test if we can
              * find an additional parser class
              */
             File[] parserDirectories = parserDir.listFiles(parserDirectoryFilter);
             if (parserDirectories == null) return;
-			for (int parserDirNr=0; parserDirNr< parserDirectories.length; parserDirNr++) {
+            for (int parserDirNr=0; parserDirNr< parserDirectories.length; parserDirNr++) {
                 File currentDir = parserDirectories[parserDirNr];
                 serverLog.logDebug("PARSER", "Searching in directory " + currentDir.toString());
                 String[] parserClasses = currentDir.list(parserFileNameFilter);
@@ -432,7 +432,7 @@ public final class plasmaParser {
                     serverLog.logDebug("PARSER", "Testing parser class " + parserClasses[parserNr]);
                     String className = parserClasses[parserNr].substring(0,parserClasses[parserNr].indexOf(".class"));
                     String fullClassName = plasmaParserPkgName + "." + currentDir.getName() + "." + className;
-	                try {
+                    try {
                         // trying to load the parser class by its name
                         Class parserClass = Class.forName(fullClassName);
                         Object theParser = parserClass.newInstance();
@@ -446,7 +446,7 @@ public final class plasmaParser {
                                     throw new ParserException("Missing dependency detected: '" + neededLibx[libxId] + "'.");
                                 }
                             }
-                        }                        
+                        }
                         
                         // loading the list of mime-types that are supported by this parser class
                         Hashtable supportedMimeTypes = ((Parser)theParser).getSupportedMimeTypes();
@@ -456,31 +456,31 @@ public final class plasmaParser {
                             availableParserList.put(mimeType,fullClassName);
                             serverLog.logInfo("PARSER", "Found functional parser for mimeType '" + mimeType + "'.");
                         }
-                            
-	                } catch (Exception e) { /* we can ignore this for the moment */ 
+                        
+                    } catch (Exception e) { /* we can ignore this for the moment */
                         serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
-                    } catch (Error e) { /* we can ignore this for the moment */ 
+                    } catch (Error e) { /* we can ignore this for the moment */
                         serverLog.logWarning("PARSER", "Parser '" + className + "' doesn't work correctly and will be ignored.\n [" + e.getClass().getName() + "]: " + e.getMessage());
                     }
                 }
-			}
+            }
             
         } catch (Exception e) {
             serverLog.logError("PARSER", "Unable to determine all installed parsers. " + e.getMessage());
-        }		
-	}
-
-	public void close() {        
+        }
+    }
+    
+    public void close() {
         // clearing the parser list
         synchronized (this.enabledParserList) {
-	        this.enabledParserList.clear();
-		}
+            this.enabledParserList.clear();
+        }
         
         // closing the parser object pool
-        try {        
-	        this.theParserPool.close();
-        } catch (Exception e) { }        
-    }    
+        try {
+            this.theParserPool.close();
+        } catch (Exception e) { }
+    }
     
     public plasmaParserDocument parseSource(URL location, String mimeType, byte[] source) {
         
@@ -498,7 +498,6 @@ public final class plasmaParser {
                 // ... otherwise we make a html scraper and transformer
                 htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);
                 OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);
-    
                 hfos.write(source);
                 hfos.close();
                 return transformScraper(location, mimeType, scraper);
@@ -660,22 +659,24 @@ public final class plasmaParser {
         return v;
     }
     
-    public static void main(String[] args) {		
-	//javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
-	//java -cp source:lib/commons-collections.jar:lib/commons-pool-1.2.jar de.anomic.plasma.plasmaParser bug.html bug.out
-		try {
-			File in = new File(args[0]);
-			File out = new File(args[1]);
-			plasmaParser theParser = new plasmaParser();
+    public static void main(String[] args) {
+        //javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
+        //java -cp source:lib/commons-collections.jar:lib/commons-pool-1.2.jar de.anomic.plasma.plasmaParser bug.html bug.out
+        try {
+            File in = new File(args[0]);
+            //File out = new File(args[1]);
+            plasmaParser theParser = new plasmaParser();
             theParser.initRealtimeParsableMimeTypes("application/xhtml+xml,text/html,text/plain");
             theParser.initParseableMimeTypes("application/atom+xml,application/gzip,application/java-archive,application/msword,application/octet-stream,application/pdf,application/rdf+xml,application/rss+xml,application/rtf,application/x-gzip,application/x-tar,application/xml,application/zip,text/rss,text/rtf,text/xml,application/x-bzip2");
-			FileInputStream theInput = new FileInputStream(in);
-			ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
-			serverFileUtils.copy(theInput, theOutput);
-			plasmaParserDocument document = theParser.parseSource(new URL("http://brain/~theli/test.pdf"), null, theOutput.toByteArray());
-			//plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray());
-			byte[] theText = document.getText();
-			serverFileUtils.write(theText, out);
+            FileInputStream theInput = new FileInputStream(in);
+            ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
+            serverFileUtils.copy(theInput, theOutput);
+            plasmaParserDocument document = theParser.parseSource(new URL("http://brain/~theli/test.pdf"), null, theOutput.toByteArray());
+            //plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray());
+            //byte[] theText = document.getText();
+            //serverFileUtils.write(theText, out);
+            String[] sentences = document.getSentences();
+            for (int i = 0; i < sentences.length; i++) System.out.println("line " + i + ":" + sentences[i]);
         } catch (Exception e) {
             e.printStackTrace();
         }
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index a51785167..d3bdbfdf3 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -105,6 +105,7 @@ public class plasmaSnippetCache {
     }
     
     public result retrieve(URL url, Set queryhashes, boolean fetchOnline) {
+        // heise = "0OQUNU3JSs05"
         if (queryhashes.size() == 0) {
             //System.out.println("found no queryhashes for url retrieve " + url);
             return new result(null, SOURCE_ERROR, "no query hashes given");
@@ -250,7 +251,7 @@ public class plasmaSnippetCache {
         } catch (IOException e) {}
         
         if (header == null) {
-            String filename = url.getFile();
+            String filename = cacheManager.getCachePath(url).getName();
             int p = filename.lastIndexOf('.');
             if ((p < 0) ||
                 ((p >= 0) && (plasmaParser.supportedFileExtContains(filename.substring(p + 1))))) {
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 537fdc58c..1c4ec2b46 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -371,6 +371,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             deployThread("99_indexcachemigration", "index cache migration", "migration of index cache data structures 0.37 -> 0.38",
             new serverInstantThread(classicCache, "oneStepMigration", "size"), 30000);
         }
+        
+        // test routine for snippet fetch
+        // url = /www.heise.de/mobil/newsticker/meldung/mail/54980
+        Set query = new HashSet(); query.add("0OQUNU3JSs05"); // 'heise'
+        //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/mobil/newsticker/meldung/mail/54980"), query, true);
+        plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true);
     }
     
     private static String ppRamString(int bytes) {
diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java
index 158fd147a..cd19d0d72 100644
--- a/source/de/anomic/server/serverFileUtils.java
+++ b/source/de/anomic/server/serverFileUtils.java
@@ -90,12 +90,12 @@ public final class serverFileUtils {
         FileInputStream fis = null;
         FileOutputStream fos = null;
         try {
-	        fis = new FileInputStream(source);
-	        fos = new FileOutputStream(dest);
-	        copy(fis, fos);
+            fis = new FileInputStream(source);
+            fos = new FileOutputStream(dest);
+            copy(fis, fos);
         } finally {
             if (fis != null) try {fis.close();} catch (Exception e) {}
-            if (fos != null) try {fos.close();} catch (Exception e) {}            
+            if (fos != null) try {fos.close();} catch (Exception e) {}
         }
     }
 
@@ -107,16 +107,16 @@ public final class serverFileUtils {
     }
     
     public static byte[] read(File source) throws IOException {
-		byte[] buffer = new byte[(int) source.length()];
-		InputStream fis = null;
-		try {
-			fis = new FileInputStream(source);
-			int p = 0, c;
-			while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c;
-		} finally {
+        byte[] buffer = new byte[(int) source.length()];
+        InputStream fis = null;
+        try {
+            fis = new FileInputStream(source);
+            int p = 0, c;
+            while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c;
+        } finally {
             if (fis != null) try { fis.close(); } catch (Exception e) {}
-		}
-		return buffer;
+        }
+        return buffer;
     }
     
     public static byte[] readAndZip(File source) throws IOException {
diff --git a/source/yacy.java b/source/yacy.java
index 2af5914c4..69cdaed4b 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -148,6 +148,10 @@ public final class yacy {
             
             plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
             
+            // hardcoded, forced, temporary value-migration
+            sb.setConfig("htTemplatePath", "htroot/env/templates");
+            sb.setConfig("parseableExt", "html,htm,txt,php,shtml,asp");
+            
             // if we are running an SVN version, we try to detect the used svn revision now ...
             if (vString.equals("@" + "REPL_VERSION" + "@")) {
                 Properties buildProp = new Properties();
@@ -188,9 +192,6 @@ public final class yacy {
             if (timeout < 60000) timeout = 60000;
             int maxSessions   = Integer.parseInt(sb.getConfig("httpdMaxSessions", "100"));
             
-            // hardcoded, forced, temporary value-migration
-            sb.setConfig("htTemplatePath", "htroot/env/templates");
-            
             // create some directories
             File htRootPath = new File(sb.getRootPath(), sb.getConfig("htRootPath", "htroot"));
             File htDocsPath = new File(sb.getRootPath(), sb.getConfig("htDocsPath", "DATA/HTDOCS"));
diff --git a/yacy.init b/yacy.init
index 74b8dbb76..c0b620b21 100644
--- a/yacy.init
+++ b/yacy.init
@@ -100,7 +100,7 @@ parseableMimeTypes=
 # this is important to recognize <a href> - tags as not-html reference
 # These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
 mediaExt=ace,arj,asf,avi,bin,bz2,css,deb,doc,dmg,gif,gz,hqx,img,iso,jar,jpe,jpg,jpeg,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,sit,swf,sxc,sxd,sxi,sxw,tar,tgz,torrent,wmv,xcf,xls,zip
-parseableExt=html,htm,txt
+parseableExt=html,htm,txt,php,shtml,asp
 
 # Promotion Strings
 # These strings appear in the Web Mask of the YACY search client