- enhanced html parser: recognized much more details in the content

- added more properties to solr index - refactoring - more constants in switchboard - fix for some NPEs - recognition of more images - removed synchronization in HandleMap (obviously not necessary?) - added a nolocal configuration to remove excessive dns lookup (works only on allip - default off). Indexes produced with this setting are all flagged with 'local' and are (on purpose) not usable for freeworld because they will be rejected as beeing local. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7672 6c8d7289-2bf4-0310-a012-ef5d649a1542
14 years ago · b77b8cac0c
parent bc84d2bc9d
commit b77b8cac0c
50 changed files with 609 additions and 276 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -98,6 +98,13 @@ filesize.max.other = 8589934591
 network.unit.definition = defaults/yacy.network.freeworld.unit
 #network.unit.definition = defaults/yacy.network.intranet.unit

+# distinguish intranet/internet IPs:
+# if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the
+# url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup
+# to check if a host is in the internet oder an intranet can be omited.
+# This option is only valid if the network.unit.domain property is set to 'any'
+network.unit.domain.nocheck = false
+
 # Update process properties
 # The update server location is given in the network.unit.definition,
 # but the settings for update processing and cycles are individual.
--- a/htroot/ConfigNetwork_p.java
+++ b/htroot/ConfigNetwork_p.java
@ -190,7 +190,7 @@ public class ConfigNetwork_p {
        prop.putHTML("network.unit.definition", sb.getConfig("network.unit.definition", ""));
        prop.putHTML("network.unit.name", sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""));
        prop.putHTML("network.unit.description", sb.getConfig("network.unit.description", ""));
-        prop.putHTML("network.unit.domain", sb.getConfig("network.unit.domain", ""));
+        prop.putHTML("network.unit.domain", sb.getConfig(SwitchboardConstants.NETWORK_DOMAIN, ""));
        prop.putHTML("network.unit.dht", sb.getConfig("network.unit.dht", ""));
        networkBootstrapLocations.remove(sb.getConfig("network.unit.definition", ""));
        int c = 0;
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -34,6 +34,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@ -414,7 +415,7 @@ public class Crawler_p {
                            writer.close();
                            
                            // get links and generate filter
-                            final Map<MultiProtocolURI, String> hyperlinks = scraper.getAnchors();
+                            final Map<MultiProtocolURI, Properties> hyperlinks = scraper.getAnchors();
                            if (fullDomain && newcrawlingdepth > 0) newcrawlingMustMatch = siteFilter(hyperlinks.keySet());
                            
                            final DigestURI crawlURL = new DigestURI("file://" + crawlingFile.toString());
@ -492,7 +493,7 @@ public class Crawler_p {
                        // String description = scraper.getDescription();
                        
                        // get links and generate filter
-                        final Map<MultiProtocolURI, String> hyperlinks = scraper.getAnchors();
+                        final Map<MultiProtocolURI, Properties> hyperlinks = scraper.getAnchors();
                        if (fullDomain && newcrawlingdepth > 0) newcrawlingMustMatch = siteFilter(hyperlinks.keySet());

                        // put links onto crawl queue
@ -515,10 +516,10 @@ public class Crawler_p {
                                cachePolicy);
                        sb.crawler.putActive(profile.handle().getBytes(), profile);
                        sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
-                        final Iterator<Map.Entry<MultiProtocolURI, String>> linkiterator = hyperlinks.entrySet().iterator();
+                        final Iterator<Map.Entry<MultiProtocolURI, Properties>> linkiterator = hyperlinks.entrySet().iterator();
                        DigestURI nexturl;
                        while (linkiterator.hasNext()) {
-                            final Map.Entry<MultiProtocolURI, String> e = linkiterator.next();
+                            final Map.Entry<MultiProtocolURI, Properties> e = linkiterator.next();
                            if (e.getKey() == null) continue;
                            nexturl = new DigestURI(e.getKey());
                            // remove the url from the database to be prepared to crawl them again
@ -530,7 +531,7 @@ public class Crawler_p {
                                    sb.peers.mySeed().hash.getBytes(), 
                                    nexturl, 
                                    null, 
-                                    e.getValue(), 
+                                    e.getValue().getProperty("name", ""), 
                                    new Date(),
                                    profile.handle(),
                                    0,
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@ -35,6 +35,7 @@ import java.net.UnknownHostException;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Properties;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;

@ -231,7 +232,7 @@ public final class CrawlStacker {
            }
        }
    }
-    public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final Map<MultiProtocolURI, String> hyperlinks, boolean replace) {
+    public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final Map<MultiProtocolURI, Properties> hyperlinks, boolean replace) {
        new Thread() {
            public void run() {
                enqueueEntries(initiator, profileHandle, hyperlinks, true);
@ -239,8 +240,8 @@ public final class CrawlStacker {
        }.start();
    }

-    public void enqueueEntries(byte[] initiator, String profileHandle, Map<MultiProtocolURI, String> hyperlinks, boolean replace) {
-        for (Map.Entry<MultiProtocolURI, String> e: hyperlinks.entrySet()) {
+    public void enqueueEntries(byte[] initiator, String profileHandle, Map<MultiProtocolURI, Properties> hyperlinks, boolean replace) {
+        for (Map.Entry<MultiProtocolURI, Properties> e: hyperlinks.entrySet()) {
            if (e.getKey() == null) continue;
            
            // delete old entry, if exists to force a re-load of the url (thats wanted here)
@ -272,7 +273,7 @@ public final class CrawlStacker {
                        initiator, 
                        url, 
                        null, 
-                        e.getValue(), 
+                        e.getValue().getProperty("name", ""), 
                        new Date(),
                        profileHandle,
                        0,
--- a/source/de/anomic/crawler/RobotsTxt.java
+++ b/source/de/anomic/crawler/RobotsTxt.java
@ -305,7 +305,7 @@ public class RobotsTxt {
            if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress.");
            
            // sending the get request
-            robotsTxt = client.GETbytes(robotsURL.toString());
+            robotsTxt = client.GETbytes(robotsURL);
            // statistics:
            if (robotsTxt != null) {
            	ByteCount.addAccountCount(ByteCount.CRAWLER, robotsTxt.length);
--- a/source/de/anomic/crawler/ZURL.java
+++ b/source/de/anomic/crawler/ZURL.java
@ -53,7 +53,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
    private static final int EcoFSBufferSize = 2000;
    private static final int maxStackSize    = 1000;
    
-    public final static Row rowdef = new Row(
+    private final static Row rowdef = new Row(
            "String urlhash-"   + Word.commonHashLength + ", " + // the url's hash
            "String executor-"  + Word.commonHashLength + ", " + // the crawling executor
            "Cardinal workdate-8 {b256}, " +                           // the time when the url was last time tried to load
@ -64,8 +64,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
    );

    // the class object
-    protected Index urlIndex;
-    protected final ConcurrentLinkedQueue<byte[]> stack;
+    private Index urlIndex;
+    private final ConcurrentLinkedQueue<byte[]> stack;
    
    public ZURL(
    		final File cachePath,
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@ -125,7 +125,7 @@ public final class HTTPLoader {
        client.setTimout(socketTimeout);
        client.setHeader(requestHeader.entrySet());
            // send request
-        	final byte[] responseBody = client.GETbytes(url.toString(), maxFileSize);
+        	final byte[] responseBody = client.GETbytes(url, maxFileSize);
        	final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
        	final int code = client.getHttpResponse().getStatusLine().getStatusCode();

@ -241,7 +241,7 @@ public final class HTTPLoader {
        final HTTPClient client = new HTTPClient();
        client.setTimout(20000);
        client.setHeader(requestHeader.entrySet());
-        	final byte[] responseBody = client.GETbytes(request.url().toString(), Long.MAX_VALUE);
+        	final byte[] responseBody = client.GETbytes(request.url(), Long.MAX_VALUE);
        	final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
        	final int code = client.getHttpResponse().getStatusLine().getStatusCode();
            // FIXME: 30*-handling (bottom) is never reached
--- a/source/de/anomic/data/BookmarkHelper.java
+++ b/source/de/anomic/data/BookmarkHelper.java
@ -36,6 +36,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.Map.Entry;
@ -130,7 +131,7 @@ public class BookmarkHelper {
            
        int importCount = 0;
        
-        Map<MultiProtocolURI, String> links = new HashMap<MultiProtocolURI, String>();
+        Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>();
        String title;
        MultiProtocolURI url;
        Bookmark bm;
@ -144,9 +145,9 @@ public class BookmarkHelper {
            writer.close();
            links = scraper.getAnchors();           
        } catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());}
-        for (final Entry<MultiProtocolURI, String> link: links.entrySet()) {
+        for (final Entry<MultiProtocolURI, Properties> link: links.entrySet()) {
            url = link.getKey();
-            title = link.getValue();
+            title = link.getValue().getProperty("name", "");
            Log.logInfo("BOOKMARKS", "links.get(url)");
            if ("".equals(title)) {//cannot be displayed
                title = url.toString();
--- a/source/de/anomic/http/client/Cache.java
+++ b/source/de/anomic/http/client/Cache.java
@ -140,6 +140,15 @@ public final class Cache {
        if (responseHeader == null) throw new IOException("Cache.store of url " + url.toString() + " not possible: responseHeader == null");
        if (file == null) throw new IOException("Cache.store of url " + url.toString() + " not possible: file == null");
        log.logInfo("storing content of url " + url.toString() + ", " + file.length + " bytes");
+
+        // store the file
+        try {
+            fileDB.insert(url.hash(), file);
+        } catch (UnsupportedEncodingException e) {
+            throw new IOException("Cache.store: cannot write to fileDB (1): " + e.getMessage());
+        } catch (IOException e) {
+            throw new IOException("Cache.store: cannot write to fileDB (2): " + e.getMessage());
+        }
        
        // store the response header into the header database
        final HashMap<String, String> hm = new HashMap<String, String>();
@ -154,15 +163,6 @@ public final class Cache {
        } catch (Exception e) {
            throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
        }
-        
-        // store the file
-        try {
-            fileDB.insert(url.hash(), file);
-        } catch (UnsupportedEncodingException e) {
-            throw new IOException("Cache.store: cannot write to fileDB (1): " + e.getMessage());
-        } catch (IOException e) {
-            throw new IOException("Cache.store: cannot write to fileDB (2): " + e.getMessage());
-        }
        if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
    }
    
@ -173,8 +173,11 @@ public final class Cache {
     */
    public static boolean has(final DigestURI url) {
        boolean headerExists;
-        headerExists = responseHeaderDB.containsKey(url.hash());
-        boolean fileExists = fileDB.containsKey(url.hash());
+        boolean fileExists;
+        //synchronized (responseHeaderDB) {
+            headerExists = responseHeaderDB.containsKey(url.hash());
+            fileExists = fileDB.containsKey(url.hash());
+        //}
        if (headerExists && fileExists) return true;
        if (!headerExists && !fileExists) return false;
        // if not both is there then we do a clean-up
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@ -342,8 +342,8 @@ public class Segment {
                Response.docType(document.dc_format()), // doctype
                condenser.RESULT_FLAGS,                    // flags
                UTF8.getBytes(language),                   // language
-                document.inboundLinks(),                   // inbound links
-                document.outboundLinks(),                  // outbound links
+                document.inboundLinkCount(),                   // inbound links
+                document.outboundLinkCount(),                  // outbound links
                document.getAudiolinks().size(),           // laudio
                document.getImages().size(),               // limage
                document.getVideolinks().size(),           // lvideo
@ -363,8 +363,8 @@ public class Segment {
                condenser,                                    // document condenser
                language,                                     // document language
                Response.docType(document.dc_format()),       // document type
-                document.inboundLinks(),                      // inbound links
-                document.outboundLinks(),                     // outbound links
+                document.inboundLinkCount(),                      // inbound links
+                document.outboundLinkCount(),                     // outbound links
                searchEvent,                                  // a search event that can have results directly
                sourceName                                    // the name of the source where the index was created
        );
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -574,6 +574,9 @@ public final class Switchboard extends serverSwitch {
                isGlobalMode(),
                this.domainList); // Intranet and Global mode may be both true!
        
+        // possibly switch off localIP check
+        Domains.setNoLocalCheck(this.isAllIPMode()); 
+        
        // check status of account configuration: when local url crawling is allowed, it is not allowed
        // that an automatic authorization of localhost is done, because in this case crawls from local
        // addresses are blocked to prevent attack szenarios where remote pages contain links to localhost
@ -828,7 +831,7 @@ public final class Switchboard extends serverSwitch {
            setConfig(plasmaSwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
        }
        */
-        MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global"));
+        MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global"));

    }
    
@ -941,11 +944,13 @@ public final class Switchboard extends serverSwitch {
                    this.crawler,
                    this.indexSegments.segment(Segments.Process.LOCALCRAWLING),
                    this.peers,
-                    "local.any".indexOf(getConfig("network.unit.domain", "global")) >= 0,
-                    "global.any".indexOf(getConfig("network.unit.domain", "global")) >= 0,
+                    "local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
+                    "global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
                    this.domainList);

        }
+        Domains.setNoLocalCheck(this.isAllIPMode()); // possibly switch off localIP check
+        
        // start up crawl jobs
        continueCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
        continueCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
@ -1022,13 +1027,25 @@ public final class Switchboard extends serverSwitch {
    }

    public boolean isIntranetMode() {
-        return "local.any".indexOf(getConfig("network.unit.domain", "global")) >= 0;
+        return "local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0;
    }
    
    public boolean isGlobalMode() {
-        return "global.any".indexOf(getConfig("network.unit.domain", "global")) >= 0;
+        return "global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0;
    }    
    
+    public boolean isAllIPMode() {
+        return "any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0;
+    }
+    
+    /**
+     * in nocheck mode the isLocal property is not checked to omit DNS lookup. Can only be done in allip mode
+     * @return
+     */
+    public boolean isIPNoCheckMode() {
+        return isAllIPMode() && getConfigBool(SwitchboardConstants.NETWORK_DOMAIN_NOCHECK, false);
+    }
+    
    public boolean isRobinsonMode() {
        // we are in robinson mode, if we do not exchange index by dht distribution
        // we need to take care that search requests and remote indexing requests go only
@ -1893,9 +1910,13 @@ public final class Switchboard extends serverSwitch {
            for (Document doc: in.documents) {
                try {
                    String id = UTF8.String(new DigestURI(doc.dc_identifier(), null).hash());
-                    assert id.equals(UTF8.String(in.queueEntry.url().hash()));
+                    String iquh = UTF8.String(in.queueEntry.url().hash());
+                    if (!id.equals(iquh)) {
+                        log.logWarning("doc=" + id + ":" + doc.dc_identifier() + ", query=" + iquh  + ":" + in.queueEntry.url());
+                        // in case that this happens it appears that the doc id is the right one
+                    }
                    try {
-                        this.solrConnector.add(id, doc);
+                        this.solrConnector.add(id, in.queueEntry.getResponseHeader(), doc);
                    } catch (IOException e) {
                        Log.logWarning("SOLR", "failed to send " + in.queueEntry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
                    }
@ -1951,9 +1972,7 @@ public final class Switchboard extends serverSwitch {
            assert in.queueEntry != null;
            assert in.documents != null;
            assert in.queueEntry != null;
-            final Integer[] ioLinks = webStructure.generateCitationReference(in.queueEntry.url(), in.documents[i], (in.condenser == null) ? null : in.condenser[i], in.queueEntry.lastModified()); // [outlinksSame, outlinksOther]
-            in.documents[i].setInboundLinks(ioLinks[0].intValue());
-            in.documents[i].setOutboundLinks(ioLinks[1].intValue());
+            webStructure.generateCitationReference(in.queueEntry.url(), in.documents[i], (in.condenser == null) ? null : in.condenser[i], in.queueEntry.lastModified()); // [outlinksSame, outlinksOther]
        }
        return in;
    }
@ -2621,7 +2640,7 @@ public final class Switchboard extends serverSwitch {
                        yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
                    } else {
                        ssc++;
-                        final byte[] content = client.GETbytes(url.toString());
+                        final byte[] content = client.GETbytes(url);
                        enu = FileUtils.strings(content);
                        lc = 0;
                        while (enu.hasNext()) {
@ -2746,7 +2765,7 @@ public final class Switchboard extends serverSwitch {
        client.setHeader(reqHeader.entrySet());
        try {
            // sending request
-            final Map<String, String> result = FileUtils.table(client.GETbytes(url.toString()));
+            final Map<String, String> result = FileUtils.table(client.GETbytes(url));
            return (result == null) ? new HashMap<String, String>() : result;
        } catch (final Exception e) {
            Log.logException(e);
--- a/source/de/anomic/search/SwitchboardConstants.java
+++ b/source/de/anomic/search/SwitchboardConstants.java
@ -395,6 +395,8 @@ public final class SwitchboardConstants {
     * 
     */
    public static final String NETWORK_NAME = "network.unit.name";
+    public static final String NETWORK_DOMAIN = "network.unit.domain";
+    public static final String NETWORK_DOMAIN_NOCHECK = "network.unit.domain.nocheck";
    public static final String NETWORK_WHITELIST = "network.unit.access.whitelist";
    public static final String NETWORK_BLACKLIST = "network.unit.access.blacklist";
    
--- a/source/de/anomic/yacy/graphics/WebStructureGraph.java
+++ b/source/de/anomic/yacy/graphics/WebStructureGraph.java
@ -128,39 +128,31 @@ public class WebStructureGraph {
        }
    }
    
-    public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(final DigestURI url, final Document document, final Condenser condenser, final Date docDate) {
+    public void generateCitationReference(final DigestURI url, final Document document, final Condenser condenser, final Date docDate) {
        // generate citation reference
        final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
        final Iterator<MultiProtocolURI> it = hl.keySet().iterator();
        final HashSet<MultiProtocolURI> globalRefURLs = new HashSet<MultiProtocolURI>();
        final String refhost = url.getHost();
        MultiProtocolURI u;
-        int GCount = 0;
-        int LCount = 0;
        while (it.hasNext()) {
            u = it.next();
            if (u == null) continue;
-            if (refhost != null && u.getHost() != null && u.getHost().equals(refhost)) {
-                // this is a local link
-                LCount++;
-            } else {
+            if (refhost != null && u.getHost() != null && !u.getHost().equals(refhost)) {
                // this is a global link
-                GCount++;
                globalRefURLs.add(u);
            }
        }
-        
+        leanrefObject lro = new leanrefObject(url, globalRefURLs);
        if (globalRefURLs.size() > 0) try {
            if (this.publicRefDNSResolvingWorker.isAlive()) {
-                this.publicRefDNSResolvingQueue.put(new leanrefObject(url, globalRefURLs));
+                this.publicRefDNSResolvingQueue.put(lro);
            } else {
-                this.learnrefs(new leanrefObject(url, globalRefURLs));
+                this.learnrefs(lro);
            }
        } catch (InterruptedException e) {
-            this.learnrefs(new leanrefObject(url, globalRefURLs));
+            this.learnrefs(lro);
        }
-        
-        return new Integer[] {Integer.valueOf(LCount), Integer.valueOf(GCount)};
    }
    
    public void learnrefs(final leanrefObject lro) {
--- a/source/de/anomic/yacy/yacyRelease.java
+++ b/source/de/anomic/yacy/yacyRelease.java
@ -39,6 +39,7 @@ import java.security.SignatureException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.SortedSet;
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
@ -239,7 +240,7 @@ public final class yacyRelease extends yacyVersion {
        }
        
        // analyze links in scraper resource, and find link to latest release in it
-        final Map<MultiProtocolURI, String> anchors = scraper.getAnchors(); // a url (String) / name (String) relation
+        final Map<MultiProtocolURI, Properties> anchors = scraper.getAnchors(); // a url (String) / name (String) relation
        final TreeSet<yacyRelease> mainReleases = new TreeSet<yacyRelease>();
        final TreeSet<yacyRelease> devReleases = new TreeSet<yacyRelease>();
        for (MultiProtocolURI url : anchors.keySet()) {
--- a/source/de/anomic/yacy/yacySeed.java
+++ b/source/de/anomic/yacy/yacySeed.java
@ -71,6 +71,7 @@ import net.yacy.kelondro.order.Digest;
 import net.yacy.kelondro.util.MapTools;
 import net.yacy.kelondro.util.OS;

+import de.anomic.search.Switchboard;
 import de.anomic.tools.bitfield;
 import de.anomic.tools.crypt;
 import de.anomic.yacy.dht.FlatWordPartitionScheme;
@ -844,8 +845,9 @@ public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yac
        if (ipString.length() > 0 && ipString.length() < 8) return ipString + " -> IP is too short: ";
        InetAddress ip = Domains.dnsResolve(ipString);
        if (ip == null) return ipString + " -> IP is not proper"; //this does not work with staticIP
-        if (ipString.equals("localhost") || ipString.startsWith("127.") || ipString.startsWith("0:0:0:0:0:0:0:1")) return ipString + " - IP for localhost rejected";
-        return null;
+        if (Switchboard.getSwitchboard().isAllIPMode()) return null;
+        boolean islocal = Domains.isLocal(ip);
+        return (!islocal && Switchboard.getSwitchboard().isGlobalMode() || (islocal && Switchboard.getSwitchboard().isIntranetMode())) ? null : ipString + " - IP for localhost rejected";
    }

    @Override
--- a/source/de/anomic/yacy/yacySeedDB.java
+++ b/source/de/anomic/yacy/yacySeedDB.java
@ -892,7 +892,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
        byte[] content = null;
        try {
            // send request
-        	content = client.GETbytes(seedURL.toString());
+        	content = client.GETbytes(seedURL);
        } catch (final Exception e) {
        	throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
        }
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@ -1142,7 +1142,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
                client.setTimout(timeout);
                client.setUserAgent(userAgent);
                client.setHost(this.getHost());
-                return new ByteArrayInputStream(client.GETbytes(this.toNormalform(false, false)));
+                return new ByteArrayInputStream(client.GETbytes(this));
        }
        
        return null;
@ -1163,7 +1163,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
                client.setTimout(timeout);
                client.setUserAgent(userAgent);
                client.setHost(this.getHost());
-                return client.GETbytes(this.toNormalform(false, false));
+                return client.GETbytes(this);
        }
        
        return null;
--- a/source/net/yacy/cora/protocol/Domains.java
+++ b/source/net/yacy/cora/protocol/Domains.java
@ -426,7 +426,8 @@ public class Domains {
        // the id=7 is used to flag local addresses
    }
    
-    private static KeyList globalHosts;
+    private static KeyList globalHosts = null;
+    private static boolean noLocalCheck = false;

    public static void init(File globalHostsnameCache) {
        if (globalHostsnameCache == null) {
@ -437,6 +438,10 @@ public class Domains {
            globalHosts = null;
        }
    }
+    
+    public static void setNoLocalCheck(boolean v) {
+        noLocalCheck = v;
+    }

    public static void close() {
        if (globalHosts != null) try {globalHosts.close();} catch (IOException e) {}
@ -532,10 +537,12 @@ public class Domains {
            ip = NAME_CACHE_HIT.get(host);
            if (ip != null) {
                //System.out.println("DNSLOOKUP-CACHE-HIT(SYNC) " + host);
+                LOOKUP_SYNC.remove(host);
                return ip;
            }
            if (NAME_CACHE_MISS.containsKey(host)) {
                //System.out.println("DNSLOOKUP-CACHE-MISS(SYNC) " + host);
+                LOOKUP_SYNC.remove(host);
                return null;
            }
            
@ -563,14 +570,13 @@ public class Domains {
                }
            }
            LOOKUP_SYNC.remove(host);
-            
            return ip;
        }
    }
    
    private final static Pattern dotPattern = Pattern.compile("\\.");
    
-    private static final InetAddress parseInetAddress(String ip) {
+    public static final InetAddress parseInetAddress(String ip) {
        if (ip == null || ip.length() < 8) return null;
        if (ip.equals("0:0:0:0:0:0:0:1%0")) ip = "127.0.0.1"; 
        final String[] ips = dotPattern.split(ip);
@ -776,7 +782,8 @@ public class Domains {
    }
    
    public static boolean isLocalhost(final String host) {
-        return ("127.0.0.1".equals(host) ||
+        return (noLocalCheck ||
+                "127.0.0.1".equals(host) ||
                "localhost".equals(host) ||
                host.startsWith("0:0:0:0:0:0:0:1")
                );
@ -787,7 +794,8 @@ public class Domains {
    }
    
    private static boolean isLocal(final String host, boolean recursive) {
-        if (host == null || host.length() == 0) return true;
+        
+        if (noLocalCheck || host == null || host.length() == 0) return true;

        // FIXME IPv4 only
        // check local ip addresses
@ -802,11 +810,13 @@ public class Domains {
        // check dns lookup: may be a local address even if the domain name looks global
        if (!recursive) return false;
        final InetAddress a = dnsResolve(host);
-        boolean localp = a == null || a.isAnyLocalAddress() || a.isLinkLocalAddress() || a.isLoopbackAddress() || a.isSiteLocalAddress() || isLocal(a.getHostAddress(), false);
-        return localp;
+        return isLocal(a);
    }
    
-    
+    public static boolean isLocal(InetAddress a) {
+        boolean localp = noLocalCheck || a == null || a.isAnyLocalAddress() || a.isLinkLocalAddress() || a.isLoopbackAddress() || a.isSiteLocalAddress() || isLocal(a.getHostAddress(), false);
+        return localp;
+    }
    
    public static void main(final String[] args) {
        /*
--- a/source/net/yacy/cora/protocol/ResponseHeader.java
+++ b/source/net/yacy/cora/protocol/ResponseHeader.java
@ -69,7 +69,7 @@ public class ResponseHeader extends HeaderFramework {
    
    public Date lastModified() {
        Date d = headerDate(LAST_MODIFIED);
-        if (d == null) return new Date(); else return d;
+        if (d == null) return date(); else return d;
    }
    
    public long age() {
--- a/source/net/yacy/cora/protocol/http/HTTPClient.java
+++ b/source/net/yacy/cora/protocol/http/HTTPClient.java
@ -267,7 +267,10 @@ public class HTTPClient {
     * @throws IOException 
     */
    public byte[] GETbytes(final String uri) throws IOException {
-    	return GETbytes(uri, Long.MAX_VALUE);
+        return GETbytes(uri, Long.MAX_VALUE);
+    }
+    public byte[] GETbytes(final MultiProtocolURI url) throws IOException {
+        return GETbytes(url, Long.MAX_VALUE);
    }
    
    /**
@ -279,12 +282,15 @@ public class HTTPClient {
     * @throws IOException 
     */
    public byte[] GETbytes(final String uri, long maxBytes) throws IOException {
-        final MultiProtocolURI url = new MultiProtocolURI(uri);
+        return GETbytes(new MultiProtocolURI(uri), maxBytes);
+    }
+    
+    public byte[] GETbytes(final MultiProtocolURI url, long maxBytes) throws IOException {
        boolean localhost = url.getHost().equals("localhost");
        String urix = url.toNormalform(true, false, !localhost, false);
-    	final HttpGet httpGet = new HttpGet(urix);
-    	if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
-    	return getContentBytes(httpGet, maxBytes);
+        final HttpGet httpGet = new HttpGet(urix);
+        if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
+        return getContentBytes(httpGet, maxBytes);
    }
    
    /**
--- a/source/net/yacy/cora/protocol/http/HTTPConnector.java
+++ b/source/net/yacy/cora/protocol/http/HTTPConnector.java
@ -85,7 +85,7 @@ public class HTTPConnector {
 		client.setHost(vhost);
 		byte[] b;
 		try {
-		    b = client.POSTbytes(url.toNormalform(true, false, true, false), post, usegzip);
+		    b = client.POSTbytes(url, url.getHost(), post, usegzip);
 		} finally {
 		    client.finish();
 		}
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@ -25,10 +25,20 @@
 package net.yacy.cora.services.federated.solr;


+import java.net.InetAddress;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
 import net.yacy.cora.document.UTF8;
+import net.yacy.cora.protocol.Domains;
+import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.document.Document;
+import net.yacy.document.parser.html.ContentScraper;
+import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
-
+import net.yacy.cora.document.MultiProtocolURI;
 import org.apache.solr.common.SolrInputDocument;

 public enum SolrScheme {
@ -37,21 +47,21 @@ public enum SolrScheme {
    DublinCore;

    
-    public SolrInputDocument yacy2solr(String id, Document document) {
-        if (this == SolrCell) return yacy2solrSolrCell(id, document);
+    public SolrInputDocument yacy2solr(String id, ResponseHeader header, Document document) {
+        if (this == SolrCell) return yacy2solrSolrCell(id, header, document);
        return null;
    }
    
-    public static SolrInputDocument yacy2solrSolrCell(String id, Document yacydoc) {
+    public static SolrInputDocument yacy2solrSolrCell(String id, ResponseHeader header, Document yacydoc) {
        // we user the SolrCell design as index scheme
        SolrInputDocument solrdoc = new SolrInputDocument();
        DigestURI digestURI = new DigestURI(yacydoc.dc_source());
        solrdoc.addField("id", id);
        solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
+        InetAddress address = Domains.dnsResolve(digestURI.getHost());
+        if (address != null) solrdoc.addField("attr_ip", address.getHostAddress());
+        if (digestURI.getHost() != null) solrdoc.addField("attr_host", digestURI.getHost());
        /*
-         *
-    private final MultiProtocolURI source;      // the source url
-    private final String mimeType;              // mimeType as taken from http header
    private final String charset;               // the charset of the document
    private final List<String> keywords;        // most resources provide a keyword field
    private       StringBuilder title;          // a document title, taken from title or h1 tag; shall appear as headline of search result
@ -73,14 +83,149 @@ public enum SolrScheme {
    private int inboundLinks, outboundLinks; // counters for inbound and outbound links, are counted after calling notifyWebStructure
    private Set<String> languages;
    private boolean indexingDenied;
-    private float lon, lat;
         */
        solrdoc.addField("title", yacydoc.dc_title());
        solrdoc.addField("author", yacydoc.dc_creator());
        solrdoc.addField("description", yacydoc.dc_description());
        solrdoc.addField("content_type", yacydoc.dc_format());
-        solrdoc.addField("subject", yacydoc.dc_subject(' '));
-        solrdoc.addField("text", UTF8.String(yacydoc.getTextBytes()));
+        solrdoc.addField("last_modified", header.lastModified());
+        solrdoc.addField("keywords", yacydoc.dc_subject(' '));
+        String content = UTF8.String(yacydoc.getTextBytes());
+        solrdoc.addField("attr_text", content);
+        int contentwc = content.split(" ").length;
+        solrdoc.addField("wordcount_i", contentwc);
+
+        // path elements of link
+        String path = digestURI.getPath();
+        if (path != null) {
+            String[] paths = path.split("/");
+            if (paths.length > 0) solrdoc.addField("attr_paths", paths);
+        }
+        
+        // list all links
+        Map<MultiProtocolURI, Properties> alllinks = yacydoc.getAnchors();        
+        int c = 0;
+        String[] inboundlinks = new String[yacydoc.inboundLinkCount()];
+        solrdoc.addField("inboundlinkscount_i", inboundlinks.length);
+        for (MultiProtocolURI url: yacydoc.inboundLinks()) {
+            Properties p = alllinks.get(url);
+            String name = p.getProperty("name", "");
+            String rel = p.getProperty("rel", "");
+            inboundlinks[c++] =
+                "<a href=\"" + url.toNormalform(false, false) + "\"" +
+                ((rel.toLowerCase().equals("nofollow")) ? " rel=\"nofollow\"" : "") +
+                ">" +
+                ((name.length() > 0) ? name : "") + "</a>";
+        }
+        solrdoc.addField("attr_inboundlinks", inboundlinks);
+        c = 0;
+        String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
+        solrdoc.addField("outboundlinkscount_i", outboundlinks.length);
+        for (MultiProtocolURI url: yacydoc.outboundLinks()) {
+            Properties p = alllinks.get(url);
+            String name = p.getProperty("name", "");
+            String rel = p.getProperty("rel", "");
+            outboundlinks[c++] =
+                "<a href=\"" + url.toNormalform(false, false) + "\"" +
+                ((rel.toLowerCase().equals("nofollow")) ? " rel=\"nofollow\"" : "") +
+                ">" +
+                ((name.length() > 0) ? name : "") + "</a>";
+        }
+        solrdoc.addField("attr_outboundlinks", yacydoc.outboundLinks().toArray());
+        
+        // charset
+        solrdoc.addField("attr_charset", yacydoc.getCharset());
+
+        // coordinates
+        if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
+            solrdoc.addField("lon_coordinate", yacydoc.lon());
+            solrdoc.addField("lat_coordinate", yacydoc.lat());
+        }
+        solrdoc.addField("attr_httpstatus", "200");
+        Object parser = yacydoc.getParserObject();
+        if (parser instanceof ContentScraper) {
+            ContentScraper html = (ContentScraper) parser;
+            
+            // header tags
+            int h = 0;
+            int f = 1;
+            for (int i = 1; i <= 6; i++) {
+                String[] hs = html.getHeadlines(i);
+                h = h | (hs.length > 0 ? f : 0);
+                f = f * 2;
+                solrdoc.addField("attr_h" + i, hs);
+            }
+            solrdoc.addField("htags_i", h);
+
+            // meta tags
+            Map<String, String> metas = html.getMetas();
+            String robots = metas.get("robots");
+            if (robots != null) solrdoc.addField("attr_meta_robots", robots);
+            String generator = metas.get("generator");
+            if (generator != null) solrdoc.addField("attr_meta_generator", generator);
+            
+            // bold, italic
+            String[] bold = html.getBold();
+            if (bold.length > 0) solrdoc.addField("attr_bold", bold);
+            String[] italic = html.getItalic();
+            if (bold.length > 0) solrdoc.addField("attr_italic", italic);
+            String[] li = html.getLi();
+            solrdoc.addField("licount_i", li.length);
+            if (li.length > 0) solrdoc.addField("attr_li", li);
+            
+            // images
+            Collection<ImageEntry> imagesc = html.getImages().values();
+            String[] images = new String[imagesc.size()];
+            c = 0;
+            for (ImageEntry ie: imagesc) images[c++] = ie.toString();
+            solrdoc.addField("imagescount_i", images.length);
+            if (images.length > 0) solrdoc.addField("attr_images", images);
+
+            // style sheets
+            Map<MultiProtocolURI, String> csss = html.getCSS();
+            String[] css = new String[csss.size()];
+            c = 0;
+            for (Map.Entry<MultiProtocolURI, String> entry: csss.entrySet()) {
+                css[c++] =
+                    "<link rel=\"stylesheet\" type=\"text/css\" media=\"" + entry.getValue() + "\"" +
+                    " href=\""+ entry.getKey().toNormalform(false, false, false, false) + "\" />";
+            }
+            solrdoc.addField("csscount_i", css.length);
+            if (css.length > 0) solrdoc.addField("attr_css", css);
+            
+            // Scripts
+            Set<MultiProtocolURI> scriptss = html.getScript();
+            String[] scripts = new String[scriptss.size()];
+            c = 0;
+            for (MultiProtocolURI url: scriptss) {
+                scripts[c++] = url.toNormalform(false, false, false, false);
+            }
+            solrdoc.addField("scriptscount_i", scripts.length);
+            if (scripts.length > 0) solrdoc.addField("attr_scripts", scripts);
+            
+            // Frames
+            Set<MultiProtocolURI> framess = html.getFrames();
+            String[] frames = new String[framess.size()];
+            c = 0;
+            for (MultiProtocolURI entry: framess) {
+                frames[c++] = entry.toNormalform(false, false, false, false);
+            }
+            solrdoc.addField("framesscount_i", frames.length);
+            if (frames.length > 0) solrdoc.addField("attr_frames", frames);
+            
+            // IFrames
+            Set<MultiProtocolURI> iframess = html.getFrames();
+            String[] iframes = new String[iframess.size()];
+            c = 0;
+            for (MultiProtocolURI entry: iframess) {
+                iframes[c++] = entry.toNormalform(false, false, false, false);
+            }
+            solrdoc.addField("iframesscount_i", iframes.length);
+            if (iframes.length > 0) solrdoc.addField("attr_iframes", iframes);
+            
+            // flash embedded
+            solrdoc.addField("flash_b", html.containsFlash());
+        }
        return solrdoc;
    }
    
@ -88,11 +233,7 @@ public enum SolrScheme {
    /*
     * standard solr scheme

-   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
-   <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
   <field name="name" type="textgen" indexed="true" stored="true"/>
-   <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
-   <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
   <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
   <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
@ -100,7 +241,6 @@ public enum SolrScheme {
   <field name="weight" type="float" indexed="true" stored="true"/>
   <field name="price"  type="float" indexed="true" stored="true"/>
   <field name="popularity" type="int" indexed="true" stored="true" />
-   <field name="inStock" type="boolean" indexed="true" stored="true" />

   <!-- Common metadata fields, named specifically to match up with
     SolrCell metadata when parsing rich documents such as Word, PDF.
@ -118,13 +258,5 @@ public enum SolrScheme {
   <field name="last_modified" type="date" indexed="true" stored="true"/>
   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>

-   <!-- catchall field, containing all other searchable text fields (implemented
-        via copyField further on in this schema  -->
-   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
-
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
-
     */
 }
--- a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
@ -41,6 +41,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;

+import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.document.Document;
 import net.yacy.kelondro.logging.Log;

@ -187,12 +188,12 @@ public class SolrSingleConnector {
    }
    */
    
-    public void add(String id, Document doc) throws IOException {
-        add(id, doc, this.scheme);
+    public void add(String id, ResponseHeader header, Document doc) throws IOException {
+        add(id, header, doc, this.scheme);
    }
    
-    public void add(String id, Document doc, SolrScheme tempScheme) throws IOException {
-        SolrInputDocument solrdoc = tempScheme.yacy2solr(id, doc);
+    public void add(String id, ResponseHeader header, Document doc, SolrScheme tempScheme) throws IOException {
+        SolrInputDocument solrdoc = tempScheme.yacy2solr(id, header, doc);
        int thisrrc = this.transmissionRoundRobinCounter;
        int nextrrc = thisrrc++;
        if (nextrrc >= transmissionQueueCount) nextrrc = 0;
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -47,6 +47,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.TreeSet;

@ -72,33 +73,36 @@ public class Document {
    private final List<String>  sections;       // if present: more titles/headlines appearing in the document
    private final StringBuilder description;    // an abstract, if present: short content description
    private Object text;                        // the clear text, all that is visible
-    private final Map<MultiProtocolURI, String> anchors; // all links embedded as clickeable entities (anchor tags)
+    private final Map<MultiProtocolURI, Properties> anchors; // all links embedded as clickeable entities (anchor tags)
    private final Map<MultiProtocolURI, String> rss; // all embedded rss feeds
    private final Map<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
    // the anchors and images - Maps are URL-to-EntityDescription mappings.
    // The EntityDescription appear either as visible text in anchors or as alternative
    // text in image tags.
-    private Map<MultiProtocolURI, String> hyperlinks, audiolinks, videolinks, applinks;
+    private Map<MultiProtocolURI, String> hyperlinks, audiolinks, videolinks, applinks, inboundlinks, outboundlinks;
    private Map<String, String> emaillinks;
    private MultiProtocolURI favicon;
    private boolean resorted;
-    private int inboundLinks, outboundLinks; // counters for inbound and outbound links, are counted after calling notifyWebStructure
    private Set<String> languages;
    private boolean indexingDenied;
    private float lon, lat;
+    private Object parserObject; // the source object that was used to create the Document

-    public Document(final MultiProtocolURI location, final String mimeType, final String charset, final Set<String> languages,
+    public Document(final MultiProtocolURI location, final String mimeType, final String charset,
+                    final Object parserObject,
+                    final Set<String> languages,
                    final String[] keywords, final String title, final String author, final String publisher,
                    final String[] sections, final String abstrct,
                    final float lon, final float lat,
                    final Object text,
-                    final Map<MultiProtocolURI, String> anchors,
+                    final Map<MultiProtocolURI, Properties> anchors,
                    final Map<MultiProtocolURI, String> rss,
                    final Map<MultiProtocolURI, ImageEntry> images,
                    boolean indexingDenied) {
        this.source = location;
        this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
        this.charset = charset;
+        this.parserObject = parserObject;
        this.keywords = (keywords == null) ? new LinkedList<String>() : Arrays.asList(keywords);
        this.title = (title == null) ? new StringBuilder(0) : new StringBuilder(title);
        this.creator = (author == null) ? new StringBuilder(0) : new StringBuilder(author);
@ -106,7 +110,7 @@ public class Document {
        this.description = (abstrct == null) ? new StringBuilder(0) : new StringBuilder(abstrct);
        this.lon = lon;
        this.lat = lat;
-        this.anchors = (anchors == null) ? new HashMap<MultiProtocolURI, String>(0) : anchors;
+        this.anchors = (anchors == null) ? new HashMap<MultiProtocolURI, Properties>(0) : anchors;
        this.rss = (rss == null) ? new HashMap<MultiProtocolURI, String>(0) : rss;
        this.images =  (images == null) ? new HashMap<MultiProtocolURI, ImageEntry>() : images;
        this.publisher = publisher;
@ -116,19 +120,15 @@ public class Document {
        this.applinks = null;
        this.emaillinks = null;
        this.resorted = false;
-        this.inboundLinks = -1;
-        this.outboundLinks = -1;
+        this.inboundlinks = null;
+        this.outboundlinks = null;
        this.languages = languages;
        this.indexingDenied = indexingDenied;
        this.text = text == null ? new ByteArrayOutputStream() : text;
    }
    
-    public void setInboundLinks(int il) {
-        this.inboundLinks = il;
-    }
-    
-    public void setOutboundLinks(int ol) {
-        this.outboundLinks = ol;
+    public Object getParserObject() {
+        return this.parserObject;
    }
    
    /**
@ -179,8 +179,8 @@ dc_rights
    public String dc_creator() {
        return (creator == null) ? "" : creator.toString();
    }
-    
-    public String dc_subject(final char separator) {
+
+    public String[] dc_subject() {
        // sort out doubles and empty words
        final TreeSet<String> hs = new TreeSet<String>();
        String s;
@ -189,11 +189,18 @@ dc_rights
            s = (this.keywords.get(i)).trim();
            if (s.length() > 0) hs.add(s.toLowerCase());
        }
-        if (hs.isEmpty()) return "";
+        String[] t = new String[hs.size()];
+        int i = 0;
+        for (String u: hs) t[i++] = u;
+        return t;
+    }
+    
+    public String dc_subject(final char separator) {
+        String[] t = dc_subject();
+        if (t.length == 0) return "";
        // generate a new list
-        final StringBuilder sb = new StringBuilder(this.keywords.size() * 6);
-        final Iterator<String> i = hs.iterator();
-        while (i.hasNext()) sb.append(i.next()).append(separator);
+        final StringBuilder sb = new StringBuilder(t.length * 8);
+        for (String s: t) sb.append(s).append(separator);
        return sb.substring(0, sb.length() - 1);
    }
    
@ -314,7 +321,7 @@ dc_rights
        return this.keywords;
    }
    
-    public Map<MultiProtocolURI, String> getAnchors() {
+    public Map<MultiProtocolURI, Properties> getAnchors() {
        // returns all links embedded as anchors (clickeable entities)
        // this is a url(String)/text(String) map
        return anchors;
@ -371,72 +378,79 @@ dc_rights
        return this.lat;
    }
    
-    private synchronized void resortLinks() {
+    private void resortLinks() {
        if (this.resorted) return;
-        
-        // extract hyperlinks, medialinks and emaillinks from anchorlinks
-        MultiProtocolURI url;
-        String u;
-        int extpos, qpos;
-        String ext = null;
-        final Iterator<Map.Entry<MultiProtocolURI, String>> i = anchors.entrySet().iterator();
-        hyperlinks = new HashMap<MultiProtocolURI, String>();
-        videolinks = new HashMap<MultiProtocolURI, String>();
-        audiolinks = new HashMap<MultiProtocolURI, String>();
-        applinks   = new HashMap<MultiProtocolURI, String>();
-        emaillinks = new HashMap<String, String>();
-        final Map<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
-        Map.Entry<MultiProtocolURI, String> entry;
-        while (i.hasNext()) {
-            entry = i.next();
-            url = entry.getKey();
-            if (url == null) continue;
-            u = url.toNormalform(true, false);
-            if (u.startsWith("mailto:")) {
-                emaillinks.put(u.substring(7), entry.getValue());
-            } else {
-                extpos = u.lastIndexOf('.');
-                if (extpos > 0) {
-                    if (((qpos = u.indexOf('?')) >= 0) && (qpos > extpos)) {
-                        ext = u.substring(extpos + 1, qpos).toLowerCase();
-                    } else {
-                        ext = u.substring(extpos + 1).toLowerCase();
-                    }
-                    if (Classification.isMediaExtension(ext)) {
-                        // this is not a normal anchor, its a media link
-                        if (Classification.isImageExtension(ext)) {
-                            ContentScraper.addImage(collectedImages, new ImageEntry(url, entry.getValue(), -1, -1, -1));
+        synchronized (this) {
+            if (this.resorted) return;
+            // extract hyperlinks, medialinks and emaillinks from anchorlinks
+            MultiProtocolURI url;
+            String u;
+            int extpos, qpos;
+            String ext = null;
+            String thishost = this.source.getHost();
+            this.inboundlinks = new HashMap<MultiProtocolURI, String>();
+            this.outboundlinks = new HashMap<MultiProtocolURI, String>();
+            this.hyperlinks = new HashMap<MultiProtocolURI, String>();
+            this.videolinks = new HashMap<MultiProtocolURI, String>();
+            this.audiolinks = new HashMap<MultiProtocolURI, String>();
+            this.applinks   = new HashMap<MultiProtocolURI, String>();
+            this.emaillinks = new HashMap<String, String>();
+            final Map<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
+            for (Map.Entry<MultiProtocolURI, ImageEntry> entry: collectedImages.entrySet()) {
+                if (entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image");
+            }
+            for (Map.Entry<MultiProtocolURI, Properties> entry: anchors.entrySet()) {
+                url = entry.getKey();
+                if (url == null) continue;
+                if (url.getHost() != null && thishost != null && url.getHost().equals(thishost)) this.inboundlinks.put(url, "anchor"); else this.outboundlinks.put(url, "anchor");
+                u = url.toNormalform(true, false);
+                String name = entry.getValue().getProperty("name", "");
+                if (u.startsWith("mailto:")) {
+                    emaillinks.put(u.substring(7), name);
+                } else {
+                    extpos = u.lastIndexOf('.');
+                    if (extpos > 0) {
+                        if (((qpos = u.indexOf('?')) >= 0) && (qpos > extpos)) {
+                            ext = u.substring(extpos + 1, qpos).toLowerCase();
+                        } else {
+                            ext = u.substring(extpos + 1).toLowerCase();
+                        }
+                        if (Classification.isMediaExtension(ext)) {
+                            // this is not a normal anchor, its a media link
+                            if (Classification.isImageExtension(ext)) {
+                                ContentScraper.addImage(collectedImages, new ImageEntry(url, name, -1, -1, -1));
+                            }
+                            else if (Classification.isAudioExtension(ext)) audiolinks.put(url, name);
+                            else if (Classification.isVideoExtension(ext)) videolinks.put(url, name);
+                            else if (Classification.isApplicationExtension(ext)) applinks.put(url, name);
                        }
-                        else if (Classification.isAudioExtension(ext)) audiolinks.put(url, entry.getValue());
-                        else if (Classification.isVideoExtension(ext)) videolinks.put(url, entry.getValue());
-                        else if (Classification.isApplicationExtension(ext)) applinks.put(url, entry.getValue());
                    }
+                    // in any case we consider this as a link and let the parser decide if that link can be followed
+                    hyperlinks.put(url, name);
                }
-                // in any case we consider this as a link and let the parser decide if that link can be followed
-                hyperlinks.put(url, entry.getValue());
            }
+            
+            // add image links that we collected from the anchors to the image map
+            ContentScraper.addAllImages(images, collectedImages);
+           
+            // expand the hyperlinks:
+            // we add artificial hyperlinks to the hyperlink set
+            // that can be calculated from given hyperlinks and imagelinks
+            
+            hyperlinks.putAll(allReflinks(images.values()));
+            hyperlinks.putAll(allReflinks(audiolinks.keySet()));
+            hyperlinks.putAll(allReflinks(videolinks.keySet()));
+            hyperlinks.putAll(allReflinks(applinks.keySet()));
+            /*
+            hyperlinks.putAll(allSubpaths(hyperlinks.keySet()));
+            hyperlinks.putAll(allSubpaths(images.values()));
+            hyperlinks.putAll(allSubpaths(audiolinks.keySet()));
+            hyperlinks.putAll(allSubpaths(videolinks.keySet()));
+            hyperlinks.putAll(allSubpaths(applinks.keySet()));
+             */        
+            // don't do this again
+            this.resorted = true;
        }
-        
-        // add image links that we collected from the anchors to the image map
-        ContentScraper.addAllImages(images, collectedImages);
-       
-        // expand the hyperlinks:
-        // we add artificial hyperlinks to the hyperlink set
-        // that can be calculated from given hyperlinks and imagelinks
-        
-        hyperlinks.putAll(allReflinks(images.values()));
-        hyperlinks.putAll(allReflinks(audiolinks.keySet()));
-        hyperlinks.putAll(allReflinks(videolinks.keySet()));
-        hyperlinks.putAll(allReflinks(applinks.keySet()));
-        /*
-        hyperlinks.putAll(allSubpaths(hyperlinks.keySet()));
-        hyperlinks.putAll(allSubpaths(images.values()));
-        hyperlinks.putAll(allSubpaths(audiolinks.keySet()));
-        hyperlinks.putAll(allSubpaths(videolinks.keySet()));
-        hyperlinks.putAll(allSubpaths(applinks.keySet()));
-         */        
-        // don't do this again
-        this.resorted = true;
    }
    
    public static Map<MultiProtocolURI, String> allSubpaths(final Collection<?> links) {
@ -573,12 +587,24 @@ dc_rights
    	this.favicon = faviconURL;
    }
    
-    public int inboundLinks() {
-        return (this.inboundLinks < 0) ? 0 : this.inboundLinks;
+    public int inboundLinkCount() {
+        if (this.inboundlinks == null) resortLinks();
+        return (this.inboundlinks == null) ? 0 : this.inboundlinks.size();
+    }
+    
+    public int outboundLinkCount() {
+        if (this.outboundlinks == null) resortLinks();
+        return (this.outboundlinks == null) ? 0 : this.outboundlinks.size();
    }
    
-    public int outboundLinks() {
-        return (this.outboundLinks < 0) ? 0 : this.outboundLinks;
+    public Set<MultiProtocolURI> inboundLinks() {
+        if (this.inboundlinks == null) resortLinks();
+        return (this.inboundlinks == null) ? null : this.inboundlinks.keySet();
+    }
+    
+    public Set<MultiProtocolURI> outboundLinks() {
+        if (this.outboundlinks == null) resortLinks();
+        return (this.outboundlinks == null) ? null : this.outboundlinks.keySet();
    }
    
    public boolean indexingDenied() {
@ -608,7 +634,7 @@ dc_rights
        String language = this.dc_language();
        if (language != null && language.length() > 0) os.write("<dc:language>" + this.dc_language() + "</dc:language>\n");
        os.write("<dc:date>" + ISO8601Formatter.FORMATTER.format(date) + "</dc:date>\n");
-        if (this.lon != 0.0f && this.lat != 0.0f) os.write("<geo:long>" + this.lon +"</geo:long><geo:lat>" + this.lat + "</geo:lat>\n");
+        if (this.lon != 0.0f && this.lat != 0.0f) os.write("<geo:Point><geo:long>" + this.lon +"</geo:long><geo:lat>" + this.lat + "</geo:lat></geo:Point>\n");
        os.write("</record>\n");
    }
    
@ -665,7 +691,7 @@ dc_rights
        final StringBuilder      description   = new StringBuilder(80);
        final LinkedList<String> sectionTitles = new LinkedList<String>();

-        final Map<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+        final Map<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
        final Map<MultiProtocolURI, String> rss = new HashMap<MultiProtocolURI, String>();
        final Map<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
        float lon = 0.0f, lat = 0.0f;
@ -716,6 +742,7 @@ dc_rights
                globalMime,
                null,
                null,
+                null,
                subjects.toString().split(" |,"),
                title.toString(),
                authors.toString(),
--- a/source/net/yacy/document/content/DCEntry.java
+++ b/source/net/yacy/document/content/DCEntry.java
@ -267,6 +267,7 @@ public class DCEntry extends TreeMap<String, String> {
            getIdentifier(true),
            "text/html",
            "UTF-8",
+            this,
            languages,
            getSubject(),
            getTitle(),
--- a/source/net/yacy/document/parser/csvParser.java
+++ b/source/net/yacy/document/parser/csvParser.java
@ -64,6 +64,7 @@ public class csvParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    charset,
+                    this,
                    null,
                    null,
                    concatRow(table.get(0)),
--- a/source/net/yacy/document/parser/docParser.java
+++ b/source/net/yacy/document/parser/docParser.java
@ -88,6 +88,7 @@ public class docParser extends AbstractParser implements Parser {
                  location,
                  mimeType,
                  "UTF-8",
+                  this,
                  null,
                  null,
                  title,
--- a/source/net/yacy/document/parser/genericParser.java
+++ b/source/net/yacy/document/parser/genericParser.java
@ -50,6 +50,7 @@ public class genericParser extends AbstractParser implements Parser {
                location,
                mimeType,
                charset,
+                this,
                null,
                null,
                location.getFileName().length() == 0 ? location.toTokens() : MultiProtocolURI.unescape(location.getFileName()), // title
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -70,6 +70,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        linkTags0.add("meta");
        linkTags0.add("area");
        linkTags0.add("link");
+        linkTags0.add("script");
        linkTags0.add("embed");     //added by [MN]
        linkTags0.add("param");     //added by [MN]

@ -78,17 +79,27 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        linkTags1.add("h2");
        linkTags1.add("h3");
        linkTags1.add("h4");
+        linkTags1.add("h5");
+        linkTags1.add("h6");
        linkTags1.add("title");
+        linkTags1.add("b");
+        linkTags1.add("strong");
+        linkTags1.add("i");
+        linkTags1.add("li");
+        linkTags1.add("iframe");
+        //<iframe src="../../../index.htm" name="SELFHTML_in_a_box" width="90%" height="400">
    }

    // class variables: collectors for links
-    private Map<MultiProtocolURI, String> rss;
-    private Map<MultiProtocolURI, String> anchors;
+    private Map<MultiProtocolURI, Properties> anchors;
+    private Map<MultiProtocolURI, String> rss, css;
+    private Set<MultiProtocolURI> script, frames, iframes;
    private Map<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
    private final Map<String, String> metas;
    private String title;
    //private String headline;
    private List<String>[] headlines;
+    private List<String> bold, italic, li;
    private CharBuffer content;
    private final EventListenerList htmlFilterEventListeners;
    private float lon, lat;
@ -110,12 +121,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        super(linkTags0, linkTags1);
        this.root = root;
        this.rss = new HashMap<MultiProtocolURI, String>();
-        this.anchors = new HashMap<MultiProtocolURI, String>();
+        this.css = new HashMap<MultiProtocolURI, String>();
+        this.anchors = new HashMap<MultiProtocolURI, Properties>();
        this.images = new HashMap<MultiProtocolURI, ImageEntry>();
+        this.frames = new HashSet<MultiProtocolURI>();
+        this.iframes = new HashSet<MultiProtocolURI>();
        this.metas = new HashMap<String, String>();
+        this.script = new HashSet<MultiProtocolURI>();
        this.title = "";
-        this.headlines = new ArrayList[4];
-        for (int i = 0; i < 4; i++) headlines[i] = new ArrayList<String>();
+        this.headlines = new ArrayList[6];
+        for (int i = 0; i < this.headlines.length; i++) headlines[i] = new ArrayList<String>();
+        this.bold = new ArrayList<String>();
+        this.italic = new ArrayList<String>();
+        this.li = new ArrayList<String>();
        this.content = new CharBuffer(1024);
        this.htmlFilterEventListeners = new EventListenerList();
        this.lon = 0.0f;
@ -202,7 +220,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            s = p + 1;
            try {
                url = new MultiProtocolURI(u);
-                anchors.put(url, u);
+                anchors.put(url, new Properties());
                continue;
            } catch (MalformedURLException e) {}
        }
@ -228,26 +246,24 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            try {
                final int width = Integer.parseInt(tagopts.getProperty("width", "-1"));
                final int height = Integer.parseInt(tagopts.getProperty("height", "-1"));
-                if (width > 15 && height > 15) {
-                    final float ratio = (float) Math.min(width, height) / Math.max(width, height);
-                    if (ratio > 0.4) {
-                        final MultiProtocolURI url = absolutePath(tagopts.getProperty("src", ""));
-                        final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height, -1);
-                        addImage(images, ie);
-                    }
-// i think that real pictures have witdth & height tags - thq
-//                } else if (width < 0 && height < 0) { // add or to ignore !?
-//                    final yacyURL url = absolutePath(tagopts.getProperty("src", ""));
-//                    final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt", ""), width, height);
-//                    addImage(images, ie);
-                }
+                //if (width > 15 && height > 15) {
+                    final MultiProtocolURI url = absolutePath(tagopts.getProperty("src", ""));
+                    final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height, -1);
+                    addImage(images, ie);
+                //}
            } catch (final NumberFormatException e) {}
        } else if(tagname.equalsIgnoreCase("base")) {
            try {
                root = new MultiProtocolURI(tagopts.getProperty("href", ""));
            } catch (final MalformedURLException e) {}
        } else if (tagname.equalsIgnoreCase("frame")) {
-            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
+            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts /* with property "name" */);
+            frames.add(absolutePath(tagopts.getProperty("src", "")));
+        } else if (tagname.equalsIgnoreCase("iframe")) {
+            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts /* with property "name" */);
+            iframes.add(absolutePath(tagopts.getProperty("src", "")));
+        } else if (tagname.equalsIgnoreCase("script")) {
+            script.add(absolutePath(tagopts.getProperty("src", "")));
        } else if (tagname.equalsIgnoreCase("meta")) {
            String name = tagopts.getProperty("name", "");
            if (name.length() > 0) {
@ -262,7 +278,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            final String areatitle = cleanLine(tagopts.getProperty("title",""));
            //String alt   = tagopts.getProperty("alt","");
            final String href  = tagopts.getProperty("href", "");
-            if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
+            Properties p = new Properties(); p.put("name", areatitle);
+            if (href.length() > 0) anchors.put(absolutePath(href), p);
        } else if (tagname.equalsIgnoreCase("link")) {
            final MultiProtocolURI newLink = absolutePath(tagopts.getProperty("href", ""));

@ -277,16 +294,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    this.favicon = newLink;
                } else if (rel.equalsIgnoreCase("alternate") && type.equalsIgnoreCase("application/rss+xml")) {
                    rss.put(newLink, linktitle);
+                } else if (rel.equalsIgnoreCase("stylesheet") && type.equalsIgnoreCase("text/css")) {
+                    css.put(newLink, rel);
                } else if (!rel.equalsIgnoreCase("stylesheet") && !rel.equalsIgnoreCase("alternate stylesheet")) {
-                    anchors.put(newLink, linktitle);
+                    Properties p = new Properties(); p.put("name", linktitle);
+                    anchors.put(newLink, p);
                }
            }
        } else if(tagname.equalsIgnoreCase("embed")) {
-            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
+            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts /* with property "name" */);
        } else if(tagname.equalsIgnoreCase("param")) {
            final String name = tagopts.getProperty("name", "");
            if (name.equalsIgnoreCase("movie")) {
-                anchors.put(absolutePath(tagopts.getProperty("value", "")),name);
+                anchors.put(absolutePath(tagopts.getProperty("value", "")), tagopts /* with property "name" */);
            }
        }

@ -308,7 +328,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    final ImageEntry ie = new ImageEntry(url, recursiveParse(text), -1, -1, -1);
                    addImage(images, ie);
                } else {
-                    anchors.put(url, recursiveParse(text));
+                    tagopts.put("name", recursiveParse(text));
+                    anchors.put(url, tagopts);
                }
            }
        }
@ -325,8 +346,26 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        } else if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
            h = recursiveParse(text);
            if (h.length() > 0) headlines[3].add(h);
+        } else if ((tagname.equalsIgnoreCase("h5")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) headlines[4].add(h);
+        } else if ((tagname.equalsIgnoreCase("h6")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) headlines[5].add(h);
        } else if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) {
            title = recursiveParse(text);
+        } else if ((tagname.equalsIgnoreCase("b")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) bold.add(h);
+        } else if ((tagname.equalsIgnoreCase("strong")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) bold.add(h);
+        } else if ((tagname.equalsIgnoreCase("i")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) italic.add(h);
+        } else if ((tagname.equalsIgnoreCase("li")) && (text.length < 1024)) {
+            h = recursiveParse(text);
+            if (h.length() > 0) li.add(h);
        }

        // fire event
@ -389,8 +428,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
        
        // otherwise take any headline
-        for (int i = 0; i < 4; i++) {
-            if (!headlines[i].isEmpty()) return headlines[i].get(0);
+        for (int i = 0; i < this.headlines.length; i++) {
+            if (!this.headlines[i].isEmpty()) return this.headlines[i].get(0);
        }
        
        // take description tag
@ -402,8 +441,31 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }
    
    public String[] getHeadlines(final int i) {
-        assert ((i >= 1) && (i <= 4));
-        return headlines[i - 1].toArray(new String[headlines.length]);
+        assert ((i >= 1) && (i <= this.headlines.length));
+        return this.headlines[i - 1].toArray(new String[this.headlines[i - 1].size()]);
+    }
+    
+    public String[] getBold() {
+        return this.bold.toArray(new String[this.bold.size()]);
+    }
+    
+    public String[] getItalic() {
+        return this.italic.toArray(new String[this.italic.size()]);
+    }
+    
+    public String[] getLi() {
+        return this.li.toArray(new String[this.li.size()]);
+    }
+    
+    public boolean containsFlash() {
+        this.anchors = new HashMap<MultiProtocolURI, Properties>();
+        String ext;
+        for (MultiProtocolURI url: this.anchors.keySet()) {
+            ext = url.getFileExtension();
+            if (ext == null) continue;
+            if (ext.equals("swf")) return true;
+        }
+        return false;
    }
    
    public byte[] getText() {
@ -415,7 +477,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
    }

-    public Map<MultiProtocolURI, String> getAnchors() {
+    public Map<MultiProtocolURI, Properties> getAnchors() {
        // returns a url (String) / name (String) relation
        return anchors;
    }
@ -425,6 +487,25 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        return rss;
    }

+    public Map<MultiProtocolURI, String> getCSS() {
+        // returns a url (String) / name (String) relation
+        return css;
+    }
+
+    public Set<MultiProtocolURI> getFrames() {
+        // returns a url (String) / name (String) relation
+        return frames;
+    }
+
+    public Set<MultiProtocolURI> getIFrames() {
+        // returns a url (String) / name (String) relation
+        return iframes;
+    }
+
+    public Set<MultiProtocolURI> getScript() {
+        return script;
+    }
+
    /**
     * get all images
     * @return a map of <urlhash, ImageEntry>
--- a/source/net/yacy/document/parser/html/ImageEntry.java
+++ b/source/net/yacy/document/parser/html/ImageEntry.java
@ -65,7 +65,11 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry

    @Override
    public String toString() {
-        return "{" + url.toString() + ", " + alt + ", " + width + "/" + height + "}";
+        return "<img url=\"" + url.toNormalform(false, false, false, false) + "\"" +
+               (alt != null && alt.length() > 0 ? " alt=\"" + alt + "\"" : "") +
+               (width >= 0 ? " width=\"" + width + "\"" : "") +
+               (height >= 0 ? " height=\"" + height + "\"" : "") +
+               ">";
    }

    @Override
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@ -162,9 +162,15 @@ public class htmlParser extends AbstractParser implements Parser {
    }

    private static Document[] transformScraper(final MultiProtocolURI location, final String mimeType, final String charSet, final ContentScraper scraper) {
-        final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
+        final String[] sections = new String[
+                 scraper.getHeadlines(1).length +
+                 scraper.getHeadlines(2).length +
+                 scraper.getHeadlines(3).length +
+                 scraper.getHeadlines(4).length +
+                 scraper.getHeadlines(5).length +
+                 scraper.getHeadlines(6).length];
        int p = 0;
-        for (int i = 1; i <= 4; i++) {
+        for (int i = 1; i <= 6; i++) {
            for (final String headline : scraper.getHeadlines(i)) {
                sections[p++] = headline;
            }
@ -173,6 +179,7 @@ public class htmlParser extends AbstractParser implements Parser {
                location,
                mimeType,
                charSet,
+                scraper,
                scraper.getContentLanguages(),
                scraper.getKeywords(),
                scraper.getTitle(),
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -36,6 +36,7 @@ import java.io.IOException;
 import java.io.InputStream;import java.net.MalformedURLException;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Properties;
 import java.util.Iterator;
 import java.util.Set;

@ -180,7 +181,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        }        
        
        final HashSet<String> languages = new HashSet<String>();
-        final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+        final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
        final HashMap<MultiProtocolURI, ImageEntry> images  = new HashMap<MultiProtocolURI, ImageEntry>();
        // add this image to the map of images
        String infoString = ii.info.toString();
@ -192,6 +193,7 @@ public class genericImageParser extends AbstractParser implements Parser {
             location,
             mimeType,
             "UTF-8",
+             this,
             languages,
             keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords
             title, // title
--- a/source/net/yacy/document/parser/mmParser.java
+++ b/source/net/yacy/document/parser/mmParser.java
@ -88,6 +88,7 @@ public class mmParser extends AbstractParser implements Parser {
            location,
            mimeType,
            "UTF-8",
+            this,
            null,
            null,
            rootElementText,
--- a/source/net/yacy/document/parser/odtParser.java
+++ b/source/net/yacy/document/parser/odtParser.java
@ -162,6 +162,7 @@ public class odtParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    "UTF-8",
+                    this,
                    languages,
                    docKeywords,
                    docLongTitle,
--- a/source/net/yacy/document/parser/ooxmlParser.java
+++ b/source/net/yacy/document/parser/ooxmlParser.java
@ -147,6 +147,7 @@ public class ooxmlParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    "UTF-8",
+                    this,
                    languages,
                    docKeywords,
                    docLongTitle,
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@ -165,6 +165,7 @@ public class pdfParser extends AbstractParser implements Parser {
                location,
                mimeType,
                "UTF-8",
+                this,
                null,
                docKeywords,
                docTitle,
--- a/source/net/yacy/document/parser/pptParser.java
+++ b/source/net/yacy/document/parser/pptParser.java
@ -86,6 +86,7 @@ public class pptParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    "UTF-8",
+                    this,
                    null,
                    null,
                    title,
--- a/source/net/yacy/document/parser/psParser.java
+++ b/source/net/yacy/document/parser/psParser.java
@ -102,6 +102,7 @@ public class psParser extends AbstractParser implements Parser {
                    location, // url
                    mimeType, // mime
                    "UTF-8",  // charset
+                    this,
                    null,     // languages
                    null,     // keywords
                    null,     // title
--- a/source/net/yacy/document/parser/rssParser.java
+++ b/source/net/yacy/document/parser/rssParser.java
@ -33,6 +33,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;

 import net.yacy.cora.document.MultiProtocolURI;
@ -72,18 +73,21 @@ public class rssParser extends AbstractParser implements Parser {
        final List<Document> docs = new ArrayList<Document>();
        MultiProtocolURI uri;
        Set<String> languages;
-        Map<MultiProtocolURI, String> anchors;
+        Map<MultiProtocolURI, Properties> anchors;
        Document doc;
        for (final Hit item: feed) try {
            uri = new MultiProtocolURI(item.getLink());
            languages = new HashSet<String>();
            languages.add(item.getLanguage());
-            anchors = new HashMap<MultiProtocolURI, String>();
-            anchors.put(uri, item.getTitle());
+            anchors = new HashMap<MultiProtocolURI, Properties>();
+            Properties p = new Properties();
+            p.put("name", item.getTitle());
+            anchors.put(uri, p);
            doc = new Document(
                    uri,
                    TextParser.mimeOf(url),
                    charset,
+                    this,
                    languages,
                    item.getSubject(),
                    item.getTitle(),
--- a/source/net/yacy/document/parser/rtfParser.java
+++ b/source/net/yacy/document/parser/rtfParser.java
@ -67,6 +67,7 @@ public class rtfParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    "UTF-8",
+                    this,
                    null,
                    null,
                    ((bodyText.length() > 80)? bodyText.substring(0, 80):bodyText.trim()).
--- a/source/net/yacy/document/parser/sevenzipParser.java
+++ b/source/net/yacy/document/parser/sevenzipParser.java
@ -60,6 +60,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
                location,
                mimeType,
                charset,
+                this,
                null,
                null,
                null,
--- a/source/net/yacy/document/parser/sidAudioParser.java
+++ b/source/net/yacy/document/parser/sidAudioParser.java
@ -82,6 +82,7 @@ public class sidAudioParser extends AbstractParser implements Parser {
                        location,
                        mimeType,
                        "UTF-8",
+                        this,
                        null,
                        null,
                        header.get("name"),
--- a/source/net/yacy/document/parser/sitemapParser.java
+++ b/source/net/yacy/document/parser/sitemapParser.java
@ -87,6 +87,7 @@ public class sitemapParser extends AbstractParser implements Parser {
                    uri,
                    TextParser.mimeOf(url),
                    charset,
+                    this,
                    null,
                    null,
                    "",
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@ -31,6 +31,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Properties;

 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
@ -80,7 +81,7 @@ public class swfParser extends AbstractParser implements Parser {
            final String[] sections =  null;
            final String abstrct = null;
            //TreeSet images = null;
-            final Map<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+            final Map<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
            int urls = 0;
            int urlStart = -1;
            int urlEnd = 0;
@ -97,7 +98,9 @@ public class swfParser extends AbstractParser implements Parser {
                urlEnd = contents.indexOf(linebreak,urlStart);
                url = contents.substring(urlStart,urlEnd);
                urlnr = Integer.toString(++urls).toString();
-                anchors.put(new MultiProtocolURI(url), urlnr);
+                Properties p = new Properties();
+                p.put("name", urlnr);
+                anchors.put(new MultiProtocolURI(url), p);
                contents = contents.substring(0,urlStart)+contents.substring(urlEnd);
            }

@ -106,6 +109,7 @@ public class swfParser extends AbstractParser implements Parser {
                    location,     // url of the source document
                    mimeType,     // the documents mime type
                    "UTF-8",      // charset of the document text
+                    this,
                    null,
                    null,          //keywords
                      ((contents.length() > 80)? contents.substring(0, 80):contents.trim()).
--- a/source/net/yacy/document/parser/torrentParser.java
+++ b/source/net/yacy/document/parser/torrentParser.java
@ -98,6 +98,7 @@ public class torrentParser extends AbstractParser implements Parser {
                    location,
                    mimeType,
                    charset,
+                    this,
                    null,
                    null,
                    title, // title
--- a/source/net/yacy/document/parser/vcfParser.java
+++ b/source/net/yacy/document/parser/vcfParser.java
@ -34,6 +34,7 @@ import java.net.MalformedURLException;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
+import java.util.Properties;

 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
@ -66,7 +67,7 @@ public class vcfParser extends AbstractParser implements Parser {
            final StringBuilder parsedTitle = new StringBuilder();
            final StringBuilder parsedDataText = new StringBuilder();
            final HashMap<String, String> parsedData = new HashMap<String, String>();
-            final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+            final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
            final LinkedList<String> parsedNames = new LinkedList<String>();
            
            boolean useLastLine = false;
@ -174,7 +175,9 @@ public class vcfParser extends AbstractParser implements Parser {
                    } else if (key.toUpperCase().startsWith("URL")) {
                        try {
                            final MultiProtocolURI newURL = new MultiProtocolURI(value);
-                            anchors.put(newURL, newURL.toString());   
+                            Properties p = new Properties();
+                            p.put("name", newURL.toString());
+                            anchors.put(newURL, p);   
                            //parsedData.put(key,value);
                        } catch (final MalformedURLException ex) {/* ignore this */}                                                
                    } else if (
@ -205,6 +208,7 @@ public class vcfParser extends AbstractParser implements Parser {
                    url,                        // url of the source document
                    mimeType,                   // the documents mime type
                    null,                       // charset
+                    this,
                    null,                       // set of languages
                    null,                       // a list of extracted keywords
                    parsedTitle.toString(),     // a long document title
--- a/source/net/yacy/document/parser/vsdParser.java
+++ b/source/net/yacy/document/parser/vsdParser.java
@ -106,6 +106,7 @@ public class vsdParser extends AbstractParser implements Parser {
                    location,     // url of the source document
                    mimeType,     // the documents mime type
                    "UTF-8",      // charset of the document text
+                    this,
                    null,         // language
                    keywords,
                    title,
--- a/source/net/yacy/document/parser/xlsParser.java
+++ b/source/net/yacy/document/parser/xlsParser.java
@ -116,6 +116,7 @@ public class xlsParser extends AbstractParser implements Parser {
                        location,
                        mimeType,
                        "UTF-8",
+                        this,
                        null,
                        null,
                        location.getFile(),
--- a/source/net/yacy/kelondro/blob/ArrayStack.java
+++ b/source/net/yacy/kelondro/blob/ArrayStack.java
@ -216,7 +216,7 @@ public class ArrayStack implements BLOB {
    
    public long mem() {
        long m = 0;
-        for (blobItem b: this.blobs) m += b.blob.mem();
+        if (this.blobs != null) for (blobItem b: this.blobs) m += b.blob.mem();
        return m;
    }
    
--- a/source/net/yacy/kelondro/index/HandleMap.java
+++ b/source/net/yacy/kelondro/index/HandleMap.java
@ -184,20 +184,20 @@ public final class HandleMap implements Iterable<Row.Entry> {
        index.clear();
    }
    
-    public final synchronized byte[] smallestKey() {
+    public final byte[] smallestKey() {
        return index.smallestKey();
    }
    
-    public final synchronized byte[] largestKey() {
+    public final byte[] largestKey() {
        return index.largestKey();
    }
    
-    public final synchronized boolean has(final byte[] key) {
+    public final boolean has(final byte[] key) {
        assert (key != null);
        return index.has(key);
    }
    
-    public final synchronized long get(final byte[] key) {
+    public final long get(final byte[] key) {
        assert (key != null);
        final Row.Entry indexentry = index.get(key);
        if (indexentry == null) return -1;
@ -212,10 +212,10 @@ public final class HandleMap implements Iterable<Row.Entry> {
     * @throws IOException
     * @throws RowSpaceExceededException
     */
-    public final synchronized long put(final byte[] key, final long l) throws RowSpaceExceededException {
+    public final long put(final byte[] key, final long l) throws RowSpaceExceededException {
        assert l >= 0 : "l = " + l;
        assert (key != null);
-        final Row.Entry newentry = index.row().newEntry();
+        final Row.Entry newentry = this.rowdef.newEntry();
        newentry.setCol(0, key);
        newentry.setCol(1, l);
        final Row.Entry oldentry = index.replace(newentry);
@ -223,7 +223,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
        return oldentry.getColLong(1);
    }
    
-    public final synchronized void putUnique(final byte[] key, final long l) throws RowSpaceExceededException {
+    public final void putUnique(final byte[] key, final long l) throws RowSpaceExceededException {
        assert l >= 0 : "l = " + l;
        assert (key != null);
        final Row.Entry newentry = this.rowdef.newEntry();
@ -232,39 +232,41 @@ public final class HandleMap implements Iterable<Row.Entry> {
        index.addUnique(newentry);
    }
    
-    public final synchronized long add(final byte[] key, final long a) throws RowSpaceExceededException {
+    public final long add(final byte[] key, final long a) throws RowSpaceExceededException {
        assert key != null;
        assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
-
-        final Row.Entry indexentry = index.get(key);
-        if (indexentry == null) {
-            final Row.Entry newentry = this.rowdef.newEntry();
-            newentry.setCol(0, key);
-            newentry.setCol(1, a);
-            index.addUnique(newentry);
-            return 1;
+        synchronized (index) {
+            final Row.Entry indexentry = index.get(key);
+            if (indexentry == null) {
+                final Row.Entry newentry = this.rowdef.newEntry();
+                newentry.setCol(0, key);
+                newentry.setCol(1, a);
+                index.addUnique(newentry);
+                return 1;
+            }
+            final long i = indexentry.getColLong(1) + a;
+            indexentry.setCol(1, i);
+            index.put(indexentry);
+            return i;
        }
-        final long i = indexentry.getColLong(1) + a;
-        indexentry.setCol(1, i);
-        index.put(indexentry);
-        return i;
    }
    
-    public final synchronized long inc(final byte[] key) throws RowSpaceExceededException {
+    public final long inc(final byte[] key) throws RowSpaceExceededException {
        return add(key, 1);
    }
    
-    public final synchronized long dec(final byte[] key) throws RowSpaceExceededException {
+    public final long dec(final byte[] key) throws RowSpaceExceededException {
        return add(key, -1);
    }
    
-    public final synchronized ArrayList<long[]> removeDoubles() throws RowSpaceExceededException {
+    public final ArrayList<long[]> removeDoubles() throws RowSpaceExceededException {
        final ArrayList<long[]> report = new ArrayList<long[]>();
        long[] is;
        int c;
        long l;
        final int initialSize = this.size();
-        for (final RowCollection rowset: index.removeDoubles()) {
+        ArrayList<RowCollection> rd = index.removeDoubles();
+        for (final RowCollection rowset: rd) {
            is = new long[rowset.size()];
            c = 0;
            for (Row.Entry e: rowset) {
@ -277,7 +279,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
        return report;
    }
    
-    public final synchronized ArrayList<byte[]> top(int count) {
+    public final ArrayList<byte[]> top(int count) {
        List<Row.Entry> list0 = index.top(count);
        ArrayList<byte[]> list = new ArrayList<byte[]>();
        for (Row.Entry entry: list0) {
@ -288,41 +290,44 @@ public final class HandleMap implements Iterable<Row.Entry> {
    
    public final synchronized long remove(final byte[] key) {
        assert (key != null);
-        final boolean exist = index.has(key);
-        if (!exist) return -1;
-        final int s = index.size();
-        final long m = index.mem();
-        final Row.Entry indexentry = index.remove(key);
-        assert (indexentry != null);
-        assert index.size() < s : "s = " + s + ", index.size() = " + index.size();
-        assert index.mem() <= m : "m = " + m + ", index.mem() = " + index.mem();
+        final Row.Entry indexentry;
+        synchronized (index) {
+            final boolean exist = index.has(key);
+            if (!exist) return -1;
+            final int s = index.size();
+            final long m = index.mem();
+            indexentry = index.remove(key);
+            assert (indexentry != null);
+            assert index.size() < s : "s = " + s + ", index.size() = " + index.size();
+            assert index.mem() <= m : "m = " + m + ", index.mem() = " + index.mem();
+        }
        if (indexentry == null) return -1;
        return indexentry.getColLong(1);
    }

-    public final synchronized long removeone() {
+    public final long removeone() {
        final Row.Entry indexentry = index.removeOne();
        if (indexentry == null) return -1;
        return indexentry.getColLong(1);
    }
    
-    public final synchronized int size() {
+    public final int size() {
        return index.size();
    }
    
-    public final synchronized boolean isEmpty() {
+    public final boolean isEmpty() {
        return index.isEmpty();
    }
    
-    public final synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
+    public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
        return index.keys(up, firstKey);
    }

-    public final synchronized CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
+    public final CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
        return index.rows(up, firstKey);
    }
    
-    public final synchronized void close() {
+    public final void close() {
        index.close();
        index = null;
    }