*) cleaning up the code a little bit

*) minor changes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7396 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 3d95981f7d
parent 2a6499364d
commit 3d95981f7d
18 changed files with 132 additions and 141 deletions
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -5,7 +5,9 @@
 //first published on http://www.anomic.de
 //Frankfurt, Germany, 2004

-//last major change: 12.07.2004
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$

 //This program is free software; you can redistribute it and/or modify
 //it under the terms of the GNU General Public License as published by
@ -31,7 +33,6 @@ import java.net.MalformedURLException;
 import java.net.URLDecoder;
 import java.util.Collection;
 import java.util.Enumeration;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;

@ -85,7 +86,6 @@ public class ViewFile {
            return prop;
        }
        
-        
        final int display = post.getInt("display", 1);
        
        // get segment
@ -209,7 +209,7 @@ public class ViewFile {
            
        } else if (viewMode.equals("iframeCache")) {
            prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
-            String ext = url.getFileExtension();
+            final String ext = url.getFileExtension();
            if ("jpg.jpeg.png.gif".indexOf(ext) >= 0) {
                prop.put("viewMode_png", 1);
                prop.put("viewMode_png_url", url.toNormalform(false, true));
@ -259,7 +259,7 @@ public class ViewFile {
                if (sentences != null) {
                    
                    // Search word highlighting
-                    for (StringBuilder s: sentences) {
+                    for (final StringBuilder s: sentences) {
                        sentence = s.toString();
                        if (sentence.trim().length() > 0) {
                            prop.put("viewMode_sentences_" + i + "_nr", i + 1);
@ -282,9 +282,9 @@ public class ViewFile {
                if (sentences != null) {
                    
                    // Search word highlighting
-                    for (StringBuilder s: sentences) {
+                    for (final StringBuilder s: sentences) {
                        sentence = s.toString();
-                        Enumeration<String> tokens = Condenser.wordTokenizer(sentence, "UTF-8", LibraryProvider.dymLib);
+                        final Enumeration<String> tokens = Condenser.wordTokenizer(sentence, "UTF-8", LibraryProvider.dymLib);
                        while (tokens.hasMoreElements()) {
                            token = tokens.nextElement();
                            if (token.length() > 0) {
@ -307,7 +307,7 @@ public class ViewFile {
                i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
                dark = (i % 2 == 0);
                
-                final HashMap<MultiProtocolURI, ImageEntry> ts = document.getImages();
+                final Map<MultiProtocolURI, ImageEntry> ts = document.getImages();
                final Iterator<ImageEntry> tsi = ts.values().iterator();
                ImageEntry entry;
                while (tsi.hasNext()) {
@ -353,7 +353,7 @@ public class ViewFile {
            words = URLDecoder.decode(words, "UTF-8");
            if (words.indexOf(' ') >= 0) return words.split(" ");
            if (words.indexOf(',') >= 0) return words.split(",");
-            if (words.indexOf('+') >= 0) return words.split("+");
+            if (words.indexOf('+') >= 0) return words.split("\\+");
            w = new String[1];
            w[0] = words;
        } catch (final UnsupportedEncodingException e) {}
@ -362,24 +362,23 @@ public class ViewFile {
    
    private static final String markup(final String[] wordArray, String message) {
        message = CharacterCoding.unicode2html(message, true);
-        if (wordArray != null)
-            for (int j = 0; j < wordArray.length; j++) {
-                final String currentWord = wordArray[j].trim();
+        if (wordArray != null) {
+            int j = 0;
+            for (String currentWord : wordArray) {
+                currentWord = currentWord.trim();
                // TODO: replace upper-/lowercase words as well
                message = message.replaceAll(currentWord,
-                                "<span class=\"" + HIGHLIGHT_CSS + ((j % MAX_HIGHLIGHTS) + 1) + "\">" +
+                                "<span class=\"" + HIGHLIGHT_CSS + ((j++ % MAX_HIGHLIGHTS) + 1) + "\">" +
                                currentWord + 
                                "</span>");
            }
+        }
        return message;
    }
    
    private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map<MultiProtocolURI, String> media, final String name, boolean dark) {
-        final Iterator<Map.Entry<MultiProtocolURI, String>> mi = media.entrySet().iterator();
-        Map.Entry<MultiProtocolURI, String> entry;
        int i = 0;
-        while (mi.hasNext()) {
-            entry = mi.next();
+        for (Map.Entry<MultiProtocolURI, String> entry : media.entrySet()) {
            prop.put("viewMode_links_" + c + "_nr", c);
            prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
            prop.putHTML("viewMode_links_" + c + "_type", name);
--- a/source/de/anomic/crawler/ResultImages.java
+++ b/source/de/anomic/crawler/ResultImages.java
@ -1,4 +1,4 @@
-// plasmaCrawlResultImages.java
+// ResultImages.java
 // (C) 2008 by by Detlef Reichl; detlef!reichl()gmx!org and Michael Peter Christen; mc@yacy.net
 // first published 13.04.2008 on http://yacy.net
 //
@ -26,9 +26,10 @@

 package de.anomic.crawler;

-import java.util.HashMap;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;

 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
@ -49,13 +50,13 @@ public class ResultImages {
    // we also check all links for a double-check so we don't get the same image more than once in any queue
    // image links may appear double here even if the pages where the image links are embedded already are checked for double-occurrence:
    // the same images may be linked from different pages
-    private static final ConcurrentHashMap<MultiProtocolURI, Long> doubleCheck = new ConcurrentHashMap<MultiProtocolURI, Long>(); // (url, time) when the url appeared first
+    private static final ConcurrentMap<MultiProtocolURI, Long> doubleCheck = new ConcurrentHashMap<MultiProtocolURI, Long>(); // (url, time) when the url appeared first
    
    public static void registerImages(final DigestURI source, final Document document, final boolean privateEntry) {
        if (document == null) return;
        if (source == null) return;
        
-        final HashMap<MultiProtocolURI, ImageEntry> images = document.getImages();
+        final Map<MultiProtocolURI, ImageEntry> images = document.getImages();
        for (final ImageEntry image: images.values()) {
            // do a double-check; attention: this can be time-consuming since this possibly needs a DNS-lookup
            if (doubleCheck.containsKey(image.url())) continue;
@ -78,7 +79,7 @@ public class ResultImages {
                } else {
                    ratio = (float) image.height() / (float) image.width();
                }
-                if (ratio < 1.0f || ratio > 2.0f) good = false;
+                good = !(ratio < 1.0f || ratio > 2.0f);
            }
            if (good) {
                if (privateEntry) {
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -1,11 +1,13 @@
-//plasmaParserDocument.java 
+//Document.java 
 //------------------------
 //part of YaCy
 //(C) by Michael Peter Christen; mc@yacy.net
 //first published on http://www.anomic.de
 //Frankfurt, Germany, 2005
 //
-//last major change: 24.04.2005
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 //This program is free software; you can redistribute it and/or modify
 //it under the terms of the GNU General Public License as published by
@ -32,7 +34,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
+import java.io.Writer;
 import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@ -69,7 +73,7 @@ public class Document {
    private Object text;                        // the clear text, all that is visible
    private final Map<MultiProtocolURI, String> anchors; // all links embedded as clickeable entities (anchor tags)
    private final Map<MultiProtocolURI, String> rss; // all embedded rss feeds
-    private final HashMap<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
+    private final Map<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
    // the anchors and images - Maps are URL-to-EntityDescription mappings.
    // The EntityDescription appear either as visible text in anchors or as alternative
    // text in image tags.
@ -87,7 +91,7 @@ public class Document {
                    final Object text,
                    final Map<MultiProtocolURI, String> anchors,
                    final Map<MultiProtocolURI, String> rss,
-                    final HashMap<MultiProtocolURI, ImageEntry> images,
+                    final Map<MultiProtocolURI, ImageEntry> images,
                    boolean indexingDenied) {
        this.source = location;
        this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
@ -294,7 +298,7 @@ dc_rights
        if (this.text == null) return null;
        final Condenser.sentencesFromInputStreamEnum e = Condenser.sentencesFromInputStream(getText());
        e.pre(pre);
-        ArrayList<StringBuilder> sentences = new ArrayList<StringBuilder>();
+        List<StringBuilder> sentences = new ArrayList<StringBuilder>();
        while (e.hasNext()) {
            sentences.add(e.next());
        }
@ -336,7 +340,7 @@ dc_rights
        return this.videolinks;
    }
    
-    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
+    public Map<MultiProtocolURI, ImageEntry> getImages() {
        // returns all links enbedded as pictures (visible in document)
        // this resturns a htmlFilterImageEntry collection
        if (!resorted) resortLinks();
@ -368,7 +372,7 @@ dc_rights
        audiolinks = new HashMap<MultiProtocolURI, String>();
        applinks   = new HashMap<MultiProtocolURI, String>();
        emaillinks = new HashMap<String, String>();
-        final HashMap<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
+        final Map<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
        Map.Entry<MultiProtocolURI, String> entry;
        while (i.hasNext()) {
            entry = i.next();
@ -425,7 +429,7 @@ dc_rights
    public static Map<MultiProtocolURI, String> allSubpaths(final Collection<?> links) {
        // links is either a Set of Strings (urls) or a Set of
        // htmlFilterImageEntries
-        final HashSet<String> h = new HashSet<String>();
+        final Set<String> h = new HashSet<String>();
        Iterator<?> i = links.iterator();
        Object o;
        MultiProtocolURI url;
@ -457,7 +461,7 @@ dc_rights
            } catch (final MalformedURLException e) { }
        // now convert the strings to yacyURLs
        i = h.iterator();
-        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
+        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        while (i.hasNext()) {
            u = (String) i.next();
            try {
@ -473,7 +477,7 @@ dc_rights
        // links is either a Set of Strings (with urls) or
        // htmlFilterImageEntries
        // we find all links that are part of a reference inside a url
-        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
+        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        final Iterator<?> i = links.iterator();
        Object o;
        MultiProtocolURI url;
@ -567,7 +571,7 @@ dc_rights
        return this.indexingDenied;
    }
    
-    public void writeXML(OutputStreamWriter os, Date date) throws IOException {
+    public void writeXML(final Writer os, final Date date) throws IOException {
        os.write("<record>\n");
        String title = this.dc_title();
        if (title != null && title.length() > 0) os.write("<dc:title><![CDATA[" + title + "]]></dc:title>\n");
@ -593,11 +597,11 @@ dc_rights
        os.write("</record>\n");
    }
    
+    @Override
    public String toString() {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        OutputStreamWriter osw;
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try {
-            osw = new OutputStreamWriter(baos, "UTF-8");
+            final Writer osw = new OutputStreamWriter(baos, "UTF-8");
            writeXML(osw, new Date());
            osw.close();
            return new String(baos.toByteArray(), "UTF-8");
@ -631,7 +635,9 @@ dc_rights
     * @param docs
     * @return
     */
-    public static Document mergeDocuments(final MultiProtocolURI location, final String globalMime, Document[] docs) {
+    public static Document mergeDocuments(final MultiProtocolURI location,
+            final String globalMime, final Document[] docs)
+    {
        if (docs == null || docs.length == 0) return null;
        if (docs.length == 1) return docs[0];
        
@ -646,7 +652,7 @@ dc_rights

        final Map<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
        final Map<MultiProtocolURI, String> rss = new HashMap<MultiProtocolURI, String>();
-        final HashMap<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
+        final Map<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
        
        for (Document doc: docs) {
            
@ -706,15 +712,17 @@ dc_rights
                false);
    }
    
-    public static Map<MultiProtocolURI, String> getHyperlinks(Document[] documents) {
-        Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
-        for (Document d: documents) result.putAll(d.getHyperlinks());
+    public static Map<MultiProtocolURI, String> getHyperlinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) {
+            result.putAll(d.getHyperlinks());
+        }
        return result;
    }
    
-    public static Map<MultiProtocolURI, String> getImagelinks(Document[] documents) {
-        Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
-        for (Document d: documents) {
+    public static Map<MultiProtocolURI, String> getImagelinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) {
            for (ImageEntry imageReference : d.getImages().values()) {
                result.put(imageReference.url(), imageReference.alt());
            }
--- a/source/net/yacy/document/content/DCEntry.java
+++ b/source/net/yacy/document/content/DCEntry.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.04.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/content/SurrogateReader.java
+++ b/source/net/yacy/document/content/SurrogateReader.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.04.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/content/dao/Dao.java
+++ b/source/net/yacy/document/content/dao/Dao.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 25.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/content/dao/DatabaseConnection.java
+++ b/source/net/yacy/document/content/dao/DatabaseConnection.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 11.06.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/content/dao/ImportDump.java
+++ b/source/net/yacy/document/content/dao/ImportDump.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 26.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/content/dao/PhpBB3Dao.java
+++ b/source/net/yacy/document/content/dao/PhpBB3Dao.java
@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 26.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/language/Identificator.java
+++ b/source/net/yacy/document/language/Identificator.java
@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-23 23:00:00 +0200 (Fr, 23 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
--- a/source/net/yacy/document/language/LanguageFilenameFilter.java
+++ b/source/net/yacy/document/language/LanguageFilenameFilter.java
@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-18 23:00:00 +0200 (Di, 18 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
--- a/source/net/yacy/document/language/LanguageStatistics.java
+++ b/source/net/yacy/document/language/LanguageStatistics.java
@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-18 23:00:00 +0200 (Di, 18 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
--- a/source/net/yacy/document/language/LanguageStatisticsHolder.java
+++ b/source/net/yacy/document/language/LanguageStatisticsHolder.java
@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-23 23:00:00 +0200 (Fr, 23 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
--- a/source/net/yacy/document/parser/html/AbstractScraper.java
+++ b/source/net/yacy/document/parser/html/AbstractScraper.java
@ -29,8 +29,8 @@

 package net.yacy.document.parser.html;

-import java.util.HashSet;
 import java.util.Properties;
+import java.util.Set;

 public abstract class AbstractScraper implements Scraper {

@ -38,15 +38,15 @@ public abstract class AbstractScraper implements Scraper {
    public static final char rb = '>';
    public static final char sl = '/';
 
-    private HashSet<String> tags0;
-    private HashSet<String> tags1;
+    private Set<String> tags0;
+    private Set<String> tags1;

    /**
     * create a scraper. the tag sets must contain tags in lowercase!
     * @param tags0
     * @param tags1
     */
-    public AbstractScraper(final HashSet<String> tags0, final HashSet<String> tags1) {
+    public AbstractScraper(final Set<String> tags0, final Set<String> tags1) {
        this.tags0  = tags0;
        this.tags1  = tags1;
    }
@ -68,11 +68,9 @@ public abstract class AbstractScraper implements Scraper {
    public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text);

    protected static String stripAllTags(final char[] s) {
-        StringBuilder r = new StringBuilder(s.length);
+        final StringBuilder r = new StringBuilder(s.length);
        int bc = 0;
-        char c;
-        for (int p = 0; p < s.length; p++) {
-            c = s[p];
+        for (final char c : s) {
            if (c == lb) {
                bc++;
                r.append(' ');
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -4,8 +4,6 @@
 // first published on http://www.anomic.de
 // Frankfurt, Germany, 2004
 //
-// Contains contributions by Marc Nause [MN]
-//
 // $LastChangedDate$
 // $LastChangedRevision$
 // $LastChangedBy$
@ -41,6 +39,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;

 import javax.swing.event.EventListenerList;

@ -55,8 +54,8 @@ import net.yacy.kelondro.util.ISO639;
 public class ContentScraper extends AbstractScraper implements Scraper {

    // statics: for initialization of the HTMLFilterAbstractScraper
-    private static final HashSet<String> linkTags0 = new HashSet<String>(9,0.99f);
-    private static final HashSet<String> linkTags1 = new HashSet<String>(7,0.99f);
+    private static final Set<String> linkTags0 = new HashSet<String>(9,0.99f);
+    private static final Set<String> linkTags1 = new HashSet<String>(7,0.99f);

    // all these tags must be given in lowercase, because the tags from the files are compared in lowercase
    static {
@ -79,10 +78,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    // class variables: collectors for links
-    private HashMap<MultiProtocolURI, String> rss;
-    private HashMap<MultiProtocolURI, String> anchors;
-    private HashMap<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
-    private final HashMap<String, String> metas;
+    private Map<MultiProtocolURI, String> rss;
+    private Map<MultiProtocolURI, String> anchors;
+    private Map<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
+    private final Map<String, String> metas;
    private String title;
    //private String headline;
    private List<String>[] headlines;
@ -153,8 +152,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (b.length() != 0) content.append(b).append(32);
    }

-    private static final int find(final String s, final String m, int start) {
-        int p = s.indexOf(m, start);
+    private static final int find(final String s, final String m, final int start) {
+        final int p = s.indexOf(m, start);
        return (p < 0) ? Integer.MAX_VALUE : p;
    }
    
@ -185,14 +184,13 @@ public class ContentScraper extends AbstractScraper implements Scraper {
 //                    addImage(images, ie);
                }
            } catch (final NumberFormatException e) {}
-        }
-        if (tagname.equalsIgnoreCase("base")) try {
-            root = new MultiProtocolURI(tagopts.getProperty("href", ""));
-        } catch (final MalformedURLException e) {}
-        if (tagname.equalsIgnoreCase("frame")) {
+        } else if(tagname.equalsIgnoreCase("base")) {
+            try {
+                root = new MultiProtocolURI(tagopts.getProperty("href", ""));
+            } catch (final MalformedURLException e) {}
+        } else if (tagname.equalsIgnoreCase("frame")) {
            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
-        }
-        if (tagname.equalsIgnoreCase("meta")) {
+        } else if (tagname.equalsIgnoreCase("meta")) {
            String name = tagopts.getProperty("name", "");
            if (name.length() > 0) {
                metas.put(name.toLowerCase(), CharacterCoding.html2unicode(tagopts.getProperty("content","")));
@ -202,14 +200,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    metas.put(name.toLowerCase(), CharacterCoding.html2unicode(tagopts.getProperty("content","")));
                }
            }
-        }
-        if (tagname.equalsIgnoreCase("area")) {
+        } else if (tagname.equalsIgnoreCase("area")) {
            final String areatitle = cleanLine(tagopts.getProperty("title",""));
            //String alt   = tagopts.getProperty("alt","");
            final String href  = tagopts.getProperty("href", "");
            if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
-        }
-        if (tagname.equalsIgnoreCase("link")) {
+        } else if (tagname.equalsIgnoreCase("link")) {
            final MultiProtocolURI newLink = absolutePath(tagopts.getProperty("href", ""));

            if (newLink != null) {
@ -227,18 +223,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    anchors.put(newLink, linktitle);
                }
            }
-        }
-        //start contrib [MN]
-        if (tagname.equalsIgnoreCase("embed")) {
+        } else if(tagname.equalsIgnoreCase("embed")) {
            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
-        }
-        if (tagname.equalsIgnoreCase("param")) {
+        } else if(tagname.equalsIgnoreCase("param")) {
            final String name = tagopts.getProperty("name", "");
            if (name.equalsIgnoreCase("movie")) {
                anchors.put(absolutePath(tagopts.getProperty("value", "")),name);
            }
        }
-        //end contrib [MN]

        // fire event
        fireScrapeTag0(tagname, tagopts);
@ -262,24 +254,20 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                }
            }
        }
-        String h;
+        final String h;
        if ((tagname.equalsIgnoreCase("h1")) && (text.length < 1024)) {
            h = recursiveParse(text);
            if (h.length() > 0) headlines[0].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h2")) && (text.length < 1024)) {
+        } else if((tagname.equalsIgnoreCase("h2")) && (text.length < 1024)) {
            h = recursiveParse(text);
            if (h.length() > 0) headlines[1].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h3")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("h3")) && (text.length < 1024)) {
            h = recursiveParse(text);
            if (h.length() > 0) headlines[2].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
            h = recursiveParse(text);
            if (h.length() > 0) headlines[3].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) {
            title = recursiveParse(text);
        }

@ -287,7 +275,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        fireScrapeTag1(tagname, tagopts, text);
    }

-    private String recursiveParse(char[] inlineHtml) {
+    private String recursiveParse(final char[] inlineHtml) {
        if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml));
        
        // start a new scraper to parse links inside this text
@ -307,11 +295,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        return cleanLine(super.stripAll(scraper.content.getChars()));
    }
    
-    private final static String cleanLine(String s) {
-        StringBuilder sb = new StringBuilder(s.length());
-        char c, l = ' ';
-        for (int i = 0; i < s.length(); i++) {
-            c = s.charAt(i);
+    private final static String cleanLine(final String s) {
+        final StringBuilder sb = new StringBuilder(s.length());
+        char l = ' ';
+        for (char c : s.toCharArray()) {
            if (c < ' ') c = ' ';
            if (c == ' ') {
                if (l != ' ') sb.append(c);
@ -358,9 +345,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    
    public String[] getHeadlines(final int i) {
        assert ((i >= 1) && (i <= 4));
-        final String[] s = new String[headlines[i - 1].size()];
-        for (int j = 0; j < headlines[i - 1].size(); j++) s[j] = headlines[i - 1].get(j);
-        return s;
+        return headlines[i - 1].toArray(new String[headlines.length]);
    }
    
    public byte[] getText() {
@ -389,7 +374,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     * get all images
     * @return a map of <urlhash, ImageEntry>
     */
-    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
+    public Map<MultiProtocolURI, ImageEntry> getImages() {
        // this resturns a String(absolute url)/htmlFilterImageEntry - relation
        return images;
    }
@ -448,13 +433,13 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        return s;
    }
    
-    public HashSet<String> getContentLanguages() {
+    public Set<String> getContentLanguages() {
        // i.e. <meta name="DC.language" content="en" scheme="DCTERMS.RFC3066">
        // or <meta http-equiv="content-language" content="en">
        String s = metas.get("content-language");
        if (s == null) s = metas.get("dc.language");
        if (s == null) return null;
-        HashSet<String> hs = new HashSet<String>();
+        Set<String> hs = new HashSet<String>();
        String[] cl = s.split(" |,");
        int p;
        for (int i = 0; i < cl.length; i++) {
@ -579,7 +564,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        return scraper;
    }
    
-    public static void addAllImages(final HashMap<MultiProtocolURI, ImageEntry> a, final HashMap<MultiProtocolURI, ImageEntry> b) {
+    public static void addAllImages(final Map<MultiProtocolURI, ImageEntry> a, final Map<MultiProtocolURI, ImageEntry> b) {
        final Iterator<Map.Entry<MultiProtocolURI, ImageEntry>> i = b.entrySet().iterator();
        Map.Entry<MultiProtocolURI, ImageEntry> ie;
        while (i.hasNext()) {
@ -588,7 +573,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
    }
    
-    public static void addImage(final HashMap<MultiProtocolURI, ImageEntry> a, final ImageEntry ie) {
+    public static void addImage(final Map<MultiProtocolURI, ImageEntry> a, final ImageEntry ie) {
        if (a.containsKey(ie.url())) {
            // in case of a collision, take that image that has the better image size tags
            if ((ie.height() > 0) && (ie.width() > 0)) a.put(ie.url(), ie);
--- a/source/net/yacy/document/parser/images/bmpParser.java
+++ b/source/net/yacy/document/parser/images/bmpParser.java
@ -2,9 +2,9 @@
 // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.07.2007 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -4,9 +4,9 @@
 //
 // This is a part of YaCy, a peer-to-peer based web search engine
 //
-// $LastChangedDate: 2009-10-11 02:12:19 +0200 (So, 11 Okt 2009) $
-// $LastChangedRevision: 6398 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
--- a/source/net/yacy/document/parser/images/icoParser.java
+++ b/source/net/yacy/document/parser/images/icoParser.java
@ -2,9 +2,9 @@
 // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.07.2007 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 //