From 3d95981f7d211b7935d9cc84bb29ffcb6ec51bf8 Mon Sep 17 00:00:00 2001
From: low012 <low012@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Mon, 27 Dec 2010 17:07:21 +0000
Subject: [PATCH] *) cleaning up the code a little bit *) minor changes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7396 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/ViewFile.java                          | 33 ++++---
 source/de/anomic/crawler/ResultImages.java    | 11 +--
 source/net/yacy/document/Document.java        | 52 +++++++-----
 source/net/yacy/document/content/DCEntry.java |  6 +-
 .../document/content/SurrogateReader.java     |  6 +-
 source/net/yacy/document/content/dao/Dao.java |  6 +-
 .../content/dao/DatabaseConnection.java       |  6 +-
 .../yacy/document/content/dao/ImportDump.java |  6 +-
 .../yacy/document/content/dao/PhpBB3Dao.java  |  6 +-
 .../yacy/document/language/Identificator.java |  6 +-
 .../language/LanguageFilenameFilter.java      |  6 +-
 .../document/language/LanguageStatistics.java |  6 +-
 .../language/LanguageStatisticsHolder.java    |  6 +-
 .../document/parser/html/AbstractScraper.java | 14 ++-
 .../document/parser/html/ContentScraper.java  | 85 ++++++++-----------
 .../document/parser/images/bmpParser.java     |  6 +-
 .../parser/images/genericImageParser.java     |  6 +-
 .../document/parser/images/icoParser.java     |  6 +-
 18 files changed, 132 insertions(+), 141 deletions(-)
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index ab48a4ab2..a274c43bb 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -5,7 +5,9 @@
 //first published on http://www.anomic.de
 //Frankfurt, Germany, 2004
 
-//last major change: 12.07.2004
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 
 //This program is free software; you can redistribute it and/or modify
 //it under the terms of the GNU General Public License as published by
@@ -31,7 +33,6 @@ import java.net.MalformedURLException;
 import java.net.URLDecoder;
 import java.util.Collection;
 import java.util.Enumeration;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 
@@ -85,7 +86,6 @@ public class ViewFile {
             return prop;
         }
         
-        
         final int display = post.getInt("display", 1);
         
         // get segment
@@ -209,7 +209,7 @@ public class ViewFile {
             
         } else if (viewMode.equals("iframeCache")) {
             prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
-            String ext = url.getFileExtension();
+            final String ext = url.getFileExtension();
             if ("jpg.jpeg.png.gif".indexOf(ext) >= 0) {
                 prop.put("viewMode_png", 1);
                 prop.put("viewMode_png_url", url.toNormalform(false, true));
@@ -259,7 +259,7 @@ public class ViewFile {
                 if (sentences != null) {
                     
                     // Search word highlighting
-                    for (StringBuilder s: sentences) {
+                    for (final StringBuilder s: sentences) {
                         sentence = s.toString();
                         if (sentence.trim().length() > 0) {
                             prop.put("viewMode_sentences_" + i + "_nr", i + 1);
@@ -282,9 +282,9 @@ public class ViewFile {
                 if (sentences != null) {
                     
                     // Search word highlighting
-                    for (StringBuilder s: sentences) {
+                    for (final StringBuilder s: sentences) {
                         sentence = s.toString();
-                        Enumeration<String> tokens = Condenser.wordTokenizer(sentence, "UTF-8", LibraryProvider.dymLib);
+                        final Enumeration<String> tokens = Condenser.wordTokenizer(sentence, "UTF-8", LibraryProvider.dymLib);
                         while (tokens.hasMoreElements()) {
                             token = tokens.nextElement();
                             if (token.length() > 0) {
@@ -307,7 +307,7 @@ public class ViewFile {
                 i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
                 dark = (i % 2 == 0);
                 
-                final HashMap<MultiProtocolURI, ImageEntry> ts = document.getImages();
+                final Map<MultiProtocolURI, ImageEntry> ts = document.getImages();
                 final Iterator<ImageEntry> tsi = ts.values().iterator();
                 ImageEntry entry;
                 while (tsi.hasNext()) {
@@ -353,7 +353,7 @@ public class ViewFile {
             words = URLDecoder.decode(words, "UTF-8");
             if (words.indexOf(' ') >= 0) return words.split(" ");
             if (words.indexOf(',') >= 0) return words.split(",");
-            if (words.indexOf('+') >= 0) return words.split("+");
+            if (words.indexOf('+') >= 0) return words.split("\\+");
             w = new String[1];
             w[0] = words;
         } catch (final UnsupportedEncodingException e) {}
@@ -362,24 +362,23 @@ public class ViewFile {
     
     private static final String markup(final String[] wordArray, String message) {
         message = CharacterCoding.unicode2html(message, true);
-        if (wordArray != null)
-            for (int j = 0; j < wordArray.length; j++) {
-                final String currentWord = wordArray[j].trim();
+        if (wordArray != null) {
+            int j = 0;
+            for (String currentWord : wordArray) {
+                currentWord = currentWord.trim();
                 // TODO: replace upper-/lowercase words as well
                 message = message.replaceAll(currentWord,
-                                "<span class=\"" + HIGHLIGHT_CSS + ((j % MAX_HIGHLIGHTS) + 1) + "\">" +
+                                "<span class=\"" + HIGHLIGHT_CSS + ((j++ % MAX_HIGHLIGHTS) + 1) + "\">" +
                                 currentWord + 
                                 "</span>");
             }
+        }
         return message;
     }
     
     private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map<MultiProtocolURI, String> media, final String name, boolean dark) {
-        final Iterator<Map.Entry<MultiProtocolURI, String>> mi = media.entrySet().iterator();
-        Map.Entry<MultiProtocolURI, String> entry;
         int i = 0;
-        while (mi.hasNext()) {
-            entry = mi.next();
+        for (Map.Entry<MultiProtocolURI, String> entry : media.entrySet()) {
             prop.put("viewMode_links_" + c + "_nr", c);
             prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
             prop.putHTML("viewMode_links_" + c + "_type", name);
diff --git a/source/de/anomic/crawler/ResultImages.java b/source/de/anomic/crawler/ResultImages.java
index 352c961ea..04994366c 100755
--- a/source/de/anomic/crawler/ResultImages.java
+++ b/source/de/anomic/crawler/ResultImages.java
@@ -1,4 +1,4 @@
-// plasmaCrawlResultImages.java
+// ResultImages.java
 // (C) 2008 by by Detlef Reichl; detlef!reichl()gmx!org and Michael Peter Christen; mc@yacy.net
 // first published 13.04.2008 on http://yacy.net
 //
@@ -26,9 +26,10 @@
 
 package de.anomic.crawler;
 
-import java.util.HashMap;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
 
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
@@ -49,13 +50,13 @@ public class ResultImages {
     // we also check all links for a double-check so we don't get the same image more than once in any queue
     // image links may appear double here even if the pages where the image links are embedded already are checked for double-occurrence:
     // the same images may be linked from different pages
-    private static final ConcurrentHashMap<MultiProtocolURI, Long> doubleCheck = new ConcurrentHashMap<MultiProtocolURI, Long>(); // (url, time) when the url appeared first
+    private static final ConcurrentMap<MultiProtocolURI, Long> doubleCheck = new ConcurrentHashMap<MultiProtocolURI, Long>(); // (url, time) when the url appeared first
     
     public static void registerImages(final DigestURI source, final Document document, final boolean privateEntry) {
         if (document == null) return;
         if (source == null) return;
         
-        final HashMap<MultiProtocolURI, ImageEntry> images = document.getImages();
+        final Map<MultiProtocolURI, ImageEntry> images = document.getImages();
         for (final ImageEntry image: images.values()) {
             // do a double-check; attention: this can be time-consuming since this possibly needs a DNS-lookup
             if (doubleCheck.containsKey(image.url())) continue;
@@ -78,7 +79,7 @@ public class ResultImages {
                 } else {
                     ratio = (float) image.height() / (float) image.width();
                 }
-                if (ratio < 1.0f || ratio > 2.0f) good = false;
+                good = !(ratio < 1.0f || ratio > 2.0f);
             }
             if (good) {
                 if (privateEntry) {
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index d6efdd64f..b6399216a 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -1,11 +1,13 @@
-//plasmaParserDocument.java 
+//Document.java 
 //------------------------
 //part of YaCy
 //(C) by Michael Peter Christen; mc@yacy.net
 //first published on http://www.anomic.de
 //Frankfurt, Germany, 2005
 //
-//last major change: 24.04.2005
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 //This program is free software; you can redistribute it and/or modify
 //it under the terms of the GNU General Public License as published by
@@ -32,7 +34,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
+import java.io.Writer;
 import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -69,7 +73,7 @@ public class Document {
     private Object text;                        // the clear text, all that is visible
     private final Map<MultiProtocolURI, String> anchors; // all links embedded as clickeable entities (anchor tags)
     private final Map<MultiProtocolURI, String> rss; // all embedded rss feeds
-    private final HashMap<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
+    private final Map<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
     // the anchors and images - Maps are URL-to-EntityDescription mappings.
     // The EntityDescription appear either as visible text in anchors or as alternative
     // text in image tags.
@@ -87,7 +91,7 @@ public class Document {
                     final Object text,
                     final Map<MultiProtocolURI, String> anchors,
                     final Map<MultiProtocolURI, String> rss,
-                    final HashMap<MultiProtocolURI, ImageEntry> images,
+                    final Map<MultiProtocolURI, ImageEntry> images,
                     boolean indexingDenied) {
         this.source = location;
         this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
@@ -294,7 +298,7 @@ dc_rights
         if (this.text == null) return null;
         final Condenser.sentencesFromInputStreamEnum e = Condenser.sentencesFromInputStream(getText());
         e.pre(pre);
-        ArrayList<StringBuilder> sentences = new ArrayList<StringBuilder>();
+        List<StringBuilder> sentences = new ArrayList<StringBuilder>();
         while (e.hasNext()) {
             sentences.add(e.next());
         }
@@ -336,7 +340,7 @@ dc_rights
         return this.videolinks;
     }
     
-    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
+    public Map<MultiProtocolURI, ImageEntry> getImages() {
         // returns all links enbedded as pictures (visible in document)
         // this resturns a htmlFilterImageEntry collection
         if (!resorted) resortLinks();
@@ -368,7 +372,7 @@ dc_rights
         audiolinks = new HashMap<MultiProtocolURI, String>();
         applinks   = new HashMap<MultiProtocolURI, String>();
         emaillinks = new HashMap<String, String>();
-        final HashMap<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
+        final Map<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
         Map.Entry<MultiProtocolURI, String> entry;
         while (i.hasNext()) {
             entry = i.next();
@@ -425,7 +429,7 @@ dc_rights
     public static Map<MultiProtocolURI, String> allSubpaths(final Collection<?> links) {
         // links is either a Set of Strings (urls) or a Set of
         // htmlFilterImageEntries
-        final HashSet<String> h = new HashSet<String>();
+        final Set<String> h = new HashSet<String>();
         Iterator<?> i = links.iterator();
         Object o;
         MultiProtocolURI url;
@@ -457,7 +461,7 @@ dc_rights
             } catch (final MalformedURLException e) { }
         // now convert the strings to yacyURLs
         i = h.iterator();
-        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
+        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
         while (i.hasNext()) {
             u = (String) i.next();
             try {
@@ -473,7 +477,7 @@ dc_rights
         // links is either a Set of Strings (with urls) or
         // htmlFilterImageEntries
         // we find all links that are part of a reference inside a url
-        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
+        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
         final Iterator<?> i = links.iterator();
         Object o;
         MultiProtocolURI url;
@@ -567,7 +571,7 @@ dc_rights
         return this.indexingDenied;
     }
     
-    public void writeXML(OutputStreamWriter os, Date date) throws IOException {
+    public void writeXML(final Writer os, final Date date) throws IOException {
         os.write("<record>\n");
         String title = this.dc_title();
         if (title != null && title.length() > 0) os.write("<dc:title><![CDATA[" + title + "]]></dc:title>\n");
@@ -593,11 +597,11 @@ dc_rights
         os.write("</record>\n");
     }
     
+    @Override
     public String toString() {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        OutputStreamWriter osw;
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
         try {
-            osw = new OutputStreamWriter(baos, "UTF-8");
+            final Writer osw = new OutputStreamWriter(baos, "UTF-8");
             writeXML(osw, new Date());
             osw.close();
             return new String(baos.toByteArray(), "UTF-8");
@@ -631,7 +635,9 @@ dc_rights
      * @param docs
      * @return
      */
-    public static Document mergeDocuments(final MultiProtocolURI location, final String globalMime, Document[] docs) {
+    public static Document mergeDocuments(final MultiProtocolURI location,
+            final String globalMime, final Document[] docs)
+    {
         if (docs == null || docs.length == 0) return null;
         if (docs.length == 1) return docs[0];
         
@@ -646,7 +652,7 @@ dc_rights
 
         final Map<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
         final Map<MultiProtocolURI, String> rss = new HashMap<MultiProtocolURI, String>();
-        final HashMap<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
+        final Map<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
         
         for (Document doc: docs) {
             
@@ -706,15 +712,17 @@ dc_rights
                 false);
     }
     
-    public static Map<MultiProtocolURI, String> getHyperlinks(Document[] documents) {
-        Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
-        for (Document d: documents) result.putAll(d.getHyperlinks());
+    public static Map<MultiProtocolURI, String> getHyperlinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) {
+            result.putAll(d.getHyperlinks());
+        }
         return result;
     }
     
-    public static Map<MultiProtocolURI, String> getImagelinks(Document[] documents) {
-        Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
-        for (Document d: documents) {
+    public static Map<MultiProtocolURI, String> getImagelinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) {
             for (ImageEntry imageReference : d.getImages().values()) {
                 result.put(imageReference.url(), imageReference.alt());
             }
diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java
index f18d51b0b..039a111de 100644
--- a/source/net/yacy/document/content/DCEntry.java
+++ b/source/net/yacy/document/content/DCEntry.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.04.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/content/SurrogateReader.java b/source/net/yacy/document/content/SurrogateReader.java
index 6578eeea9..ab8fbd7be 100644
--- a/source/net/yacy/document/content/SurrogateReader.java
+++ b/source/net/yacy/document/content/SurrogateReader.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.04.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/content/dao/Dao.java b/source/net/yacy/document/content/dao/Dao.java
index 1455f14fc..62e43a1eb 100644
--- a/source/net/yacy/document/content/dao/Dao.java
+++ b/source/net/yacy/document/content/dao/Dao.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 25.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/content/dao/DatabaseConnection.java b/source/net/yacy/document/content/dao/DatabaseConnection.java
index 6187d9c43..6cfadca26 100644
--- a/source/net/yacy/document/content/dao/DatabaseConnection.java
+++ b/source/net/yacy/document/content/dao/DatabaseConnection.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 11.06.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/content/dao/ImportDump.java b/source/net/yacy/document/content/dao/ImportDump.java
index 59dc8c5e3..71a4a402a 100644
--- a/source/net/yacy/document/content/dao/ImportDump.java
+++ b/source/net/yacy/document/content/dao/ImportDump.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 26.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/content/dao/PhpBB3Dao.java b/source/net/yacy/document/content/dao/PhpBB3Dao.java
index e728ce3da..191e9a52e 100644
--- a/source/net/yacy/document/content/dao/PhpBB3Dao.java
+++ b/source/net/yacy/document/content/dao/PhpBB3Dao.java
@@ -2,9 +2,9 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 26.05.2009 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/language/Identificator.java b/source/net/yacy/document/language/Identificator.java
index 7ee072fdc..3ac9cfd63 100644
--- a/source/net/yacy/document/language/Identificator.java
+++ b/source/net/yacy/document/language/Identificator.java
@@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-23 23:00:00 +0200 (Fr, 23 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
diff --git a/source/net/yacy/document/language/LanguageFilenameFilter.java b/source/net/yacy/document/language/LanguageFilenameFilter.java
index 6ded9040b..c6b67da93 100644
--- a/source/net/yacy/document/language/LanguageFilenameFilter.java
+++ b/source/net/yacy/document/language/LanguageFilenameFilter.java
@@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-18 23:00:00 +0200 (Di, 18 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
diff --git a/source/net/yacy/document/language/LanguageStatistics.java b/source/net/yacy/document/language/LanguageStatistics.java
index 76ba979d4..70115d87e 100644
--- a/source/net/yacy/document/language/LanguageStatistics.java
+++ b/source/net/yacy/document/language/LanguageStatistics.java
@@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-18 23:00:00 +0200 (Di, 18 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
diff --git a/source/net/yacy/document/language/LanguageStatisticsHolder.java b/source/net/yacy/document/language/LanguageStatisticsHolder.java
index 0581e4896..1c384aed4 100644
--- a/source/net/yacy/document/language/LanguageStatisticsHolder.java
+++ b/source/net/yacy/document/language/LanguageStatisticsHolder.java
@@ -4,9 +4,9 @@
 // first published on http://www.yacy.net
 // Braunschweig, Germany, 2008
 //
-// $LastChangedDate: 2008-05-23 23:00:00 +0200 (Fr, 23 Mai 2008) $
-// $LastChangedRevision: 4824 $
-// $LastChangedBy: low012 $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
diff --git a/source/net/yacy/document/parser/html/AbstractScraper.java b/source/net/yacy/document/parser/html/AbstractScraper.java
index cc55b0b6b..cf51c841d 100644
--- a/source/net/yacy/document/parser/html/AbstractScraper.java
+++ b/source/net/yacy/document/parser/html/AbstractScraper.java
@@ -29,8 +29,8 @@
 
 package net.yacy.document.parser.html;
 
-import java.util.HashSet;
 import java.util.Properties;
+import java.util.Set;
 
 public abstract class AbstractScraper implements Scraper {
 
@@ -38,15 +38,15 @@ public abstract class AbstractScraper implements Scraper {
     public static final char rb = '>';
     public static final char sl = '/';
  
-    private HashSet<String> tags0;
-    private HashSet<String> tags1;
+    private Set<String> tags0;
+    private Set<String> tags1;
 
     /**
      * create a scraper. the tag sets must contain tags in lowercase!
      * @param tags0
      * @param tags1
      */
-    public AbstractScraper(final HashSet<String> tags0, final HashSet<String> tags1) {
+    public AbstractScraper(final Set<String> tags0, final Set<String> tags1) {
         this.tags0  = tags0;
         this.tags1  = tags1;
     }
@@ -68,11 +68,9 @@ public abstract class AbstractScraper implements Scraper {
     public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text);
 
     protected static String stripAllTags(final char[] s) {
-        StringBuilder r = new StringBuilder(s.length);
+        final StringBuilder r = new StringBuilder(s.length);
         int bc = 0;
-        char c;
-        for (int p = 0; p < s.length; p++) {
-            c = s[p];
+        for (final char c : s) {
             if (c == lb) {
                 bc++;
                 r.append(' ');
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 7ac57b21b..715d16552 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -4,8 +4,6 @@
 // first published on http://www.anomic.de
 // Frankfurt, Germany, 2004
 //
-// Contains contributions by Marc Nause [MN]
-//
 // $LastChangedDate$
 // $LastChangedRevision$
 // $LastChangedBy$
@@ -41,6 +39,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 
 import javax.swing.event.EventListenerList;
 
@@ -55,8 +54,8 @@ import net.yacy.kelondro.util.ISO639;
 public class ContentScraper extends AbstractScraper implements Scraper {
 
     // statics: for initialization of the HTMLFilterAbstractScraper
-    private static final HashSet<String> linkTags0 = new HashSet<String>(9,0.99f);
-    private static final HashSet<String> linkTags1 = new HashSet<String>(7,0.99f);
+    private static final Set<String> linkTags0 = new HashSet<String>(9,0.99f);
+    private static final Set<String> linkTags1 = new HashSet<String>(7,0.99f);
 
     // all these tags must be given in lowercase, because the tags from the files are compared in lowercase
     static {
@@ -79,10 +78,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     }
 
     // class variables: collectors for links
-    private HashMap<MultiProtocolURI, String> rss;
-    private HashMap<MultiProtocolURI, String> anchors;
-    private HashMap<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
-    private final HashMap<String, String> metas;
+    private Map<MultiProtocolURI, String> rss;
+    private Map<MultiProtocolURI, String> anchors;
+    private Map<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
+    private final Map<String, String> metas;
     private String title;
     //private String headline;
     private List<String>[] headlines;
@@ -153,8 +152,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         if (b.length() != 0) content.append(b).append(32);
     }
 
-    private static final int find(final String s, final String m, int start) {
-        int p = s.indexOf(m, start);
+    private static final int find(final String s, final String m, final int start) {
+        final int p = s.indexOf(m, start);
         return (p < 0) ? Integer.MAX_VALUE : p;
     }
     
@@ -185,14 +184,13 @@ public class ContentScraper extends AbstractScraper implements Scraper {
 //                    addImage(images, ie);
                 }
             } catch (final NumberFormatException e) {}
-        }
-        if (tagname.equalsIgnoreCase("base")) try {
-            root = new MultiProtocolURI(tagopts.getProperty("href", ""));
-        } catch (final MalformedURLException e) {}
-        if (tagname.equalsIgnoreCase("frame")) {
+        } else if(tagname.equalsIgnoreCase("base")) {
+            try {
+                root = new MultiProtocolURI(tagopts.getProperty("href", ""));
+            } catch (final MalformedURLException e) {}
+        } else if (tagname.equalsIgnoreCase("frame")) {
             anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
-        }
-        if (tagname.equalsIgnoreCase("meta")) {
+        } else if (tagname.equalsIgnoreCase("meta")) {
             String name = tagopts.getProperty("name", "");
             if (name.length() > 0) {
                 metas.put(name.toLowerCase(), CharacterCoding.html2unicode(tagopts.getProperty("content","")));
@@ -202,14 +200,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                     metas.put(name.toLowerCase(), CharacterCoding.html2unicode(tagopts.getProperty("content","")));
                 }
             }
-        }
-        if (tagname.equalsIgnoreCase("area")) {
+        } else if (tagname.equalsIgnoreCase("area")) {
             final String areatitle = cleanLine(tagopts.getProperty("title",""));
             //String alt   = tagopts.getProperty("alt","");
             final String href  = tagopts.getProperty("href", "");
             if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
-        }
-        if (tagname.equalsIgnoreCase("link")) {
+        } else if (tagname.equalsIgnoreCase("link")) {
             final MultiProtocolURI newLink = absolutePath(tagopts.getProperty("href", ""));
 
             if (newLink != null) {
@@ -227,18 +223,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                     anchors.put(newLink, linktitle);
                 }
             }
-        }
-        //start contrib [MN]
-        if (tagname.equalsIgnoreCase("embed")) {
+        } else if(tagname.equalsIgnoreCase("embed")) {
             anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
-        }
-        if (tagname.equalsIgnoreCase("param")) {
+        } else if(tagname.equalsIgnoreCase("param")) {
             final String name = tagopts.getProperty("name", "");
             if (name.equalsIgnoreCase("movie")) {
                 anchors.put(absolutePath(tagopts.getProperty("value", "")),name);
             }
         }
-        //end contrib [MN]
 
         // fire event
         fireScrapeTag0(tagname, tagopts);
@@ -262,24 +254,20 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                 }
             }
         }
-        String h;
+        final String h;
         if ((tagname.equalsIgnoreCase("h1")) && (text.length < 1024)) {
             h = recursiveParse(text);
             if (h.length() > 0) headlines[0].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h2")) && (text.length < 1024)) {
+        } else if((tagname.equalsIgnoreCase("h2")) && (text.length < 1024)) {
             h = recursiveParse(text);
             if (h.length() > 0) headlines[1].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h3")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("h3")) && (text.length < 1024)) {
             h = recursiveParse(text);
             if (h.length() > 0) headlines[2].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
             h = recursiveParse(text);
             if (h.length() > 0) headlines[3].add(h);
-        }
-        if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) {
+        } else if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) {
             title = recursiveParse(text);
         }
 
@@ -287,7 +275,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         fireScrapeTag1(tagname, tagopts, text);
     }
 
-    private String recursiveParse(char[] inlineHtml) {
+    private String recursiveParse(final char[] inlineHtml) {
         if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml));
         
         // start a new scraper to parse links inside this text
@@ -307,11 +295,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         return cleanLine(super.stripAll(scraper.content.getChars()));
     }
     
-    private final static String cleanLine(String s) {
-        StringBuilder sb = new StringBuilder(s.length());
-        char c, l = ' ';
-        for (int i = 0; i < s.length(); i++) {
-            c = s.charAt(i);
+    private final static String cleanLine(final String s) {
+        final StringBuilder sb = new StringBuilder(s.length());
+        char l = ' ';
+        for (char c : s.toCharArray()) {
             if (c < ' ') c = ' ';
             if (c == ' ') {
                 if (l != ' ') sb.append(c);
@@ -358,9 +345,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     
     public String[] getHeadlines(final int i) {
         assert ((i >= 1) && (i <= 4));
-        final String[] s = new String[headlines[i - 1].size()];
-        for (int j = 0; j < headlines[i - 1].size(); j++) s[j] = headlines[i - 1].get(j);
-        return s;
+        return headlines[i - 1].toArray(new String[headlines.length]);
     }
     
     public byte[] getText() {
@@ -389,7 +374,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
      * get all images
      * @return a map of <urlhash, ImageEntry>
      */
-    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
+    public Map<MultiProtocolURI, ImageEntry> getImages() {
         // this resturns a String(absolute url)/htmlFilterImageEntry - relation
         return images;
     }
@@ -448,13 +433,13 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         return s;
     }
     
-    public HashSet<String> getContentLanguages() {
+    public Set<String> getContentLanguages() {
         // i.e. <meta name="DC.language" content="en" scheme="DCTERMS.RFC3066">
         // or <meta http-equiv="content-language" content="en">
         String s = metas.get("content-language");
         if (s == null) s = metas.get("dc.language");
         if (s == null) return null;
-        HashSet<String> hs = new HashSet<String>();
+        Set<String> hs = new HashSet<String>();
         String[] cl = s.split(" |,");
         int p;
         for (int i = 0; i < cl.length; i++) {
@@ -579,7 +564,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         return scraper;
     }
     
-    public static void addAllImages(final HashMap<MultiProtocolURI, ImageEntry> a, final HashMap<MultiProtocolURI, ImageEntry> b) {
+    public static void addAllImages(final Map<MultiProtocolURI, ImageEntry> a, final Map<MultiProtocolURI, ImageEntry> b) {
         final Iterator<Map.Entry<MultiProtocolURI, ImageEntry>> i = b.entrySet().iterator();
         Map.Entry<MultiProtocolURI, ImageEntry> ie;
         while (i.hasNext()) {
@@ -588,7 +573,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         }
     }
     
-    public static void addImage(final HashMap<MultiProtocolURI, ImageEntry> a, final ImageEntry ie) {
+    public static void addImage(final Map<MultiProtocolURI, ImageEntry> a, final ImageEntry ie) {
         if (a.containsKey(ie.url())) {
             // in case of a collision, take that image that has the better image size tags
             if ((ie.height() > 0) && (ie.width() > 0)) a.put(ie.url(), ie);
diff --git a/source/net/yacy/document/parser/images/bmpParser.java b/source/net/yacy/document/parser/images/bmpParser.java
index b98d04cfb..57a9e63d2 100644
--- a/source/net/yacy/document/parser/images/bmpParser.java
+++ b/source/net/yacy/document/parser/images/bmpParser.java
@@ -2,9 +2,9 @@
 // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.07.2007 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index c4116d6d6..d41910cfb 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -4,9 +4,9 @@
 //
 // This is a part of YaCy, a peer-to-peer based web search engine
 //
-// $LastChangedDate: 2009-10-11 02:12:19 +0200 (So, 11 Okt 2009) $
-// $LastChangedRevision: 6398 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 // 
diff --git a/source/net/yacy/document/parser/images/icoParser.java b/source/net/yacy/document/parser/images/icoParser.java
index 65bf7337d..27d7b79c1 100644
--- a/source/net/yacy/document/parser/images/icoParser.java
+++ b/source/net/yacy/document/parser/images/icoParser.java
@@ -2,9 +2,9 @@
 // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 15.07.2007 on http://yacy.net
 //
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
+// $LastChangedDate$
+// $LastChangedRevision$
+// $LastChangedBy$
 //
 // LICENSE
 //