- added fast site-operator

- refactoring merge into BLOBArray git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5770 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 7ba078daa1
parent b4126432bc
commit 7ba078daa1
9 changed files with 220 additions and 150 deletions
--- a/htroot/api/webstructure.java
+++ b/htroot/api/webstructure.java
@ -51,7 +51,8 @@ public class webstructure {
                    about = null;
                }
            }
-            if (about != null) {
+            if (url != null && about != null) {
+                
                plasmaWebStructure.structureEntry sentry = sb.webStructure.references(about);
                if (sentry != null) {
                    reference(prop, 0, sentry, sb.webStructure);
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -87,6 +87,7 @@ public final class search {
        final String  prefer = post.get("prefer", "");
        final String  contentdom = post.get("contentdom", "text");
        final String  filter = post.get("filter", ".*");
+        String  sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null;
        String  language = post.get("language", "");
        if (!iso639.exists(language)) {
            // take language from the user agent
@ -180,7 +181,29 @@ public final class search {
        plasmaSearchEvent theSearch = null;
        if ((query.length() == 0) && (abstractSet != null)) {
            // this is _not_ a normal search, only a request for index abstracts
-            theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(Base64Order.enhancedComparator), null, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), language, false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client, false);
+            theQuery = new plasmaSearchQuery(
+                    null,
+                    abstractSet,
+                    new TreeSet<String>(Base64Order.enhancedComparator),
+                    null,
+                    rankingProfile,
+                    maxdist,
+                    prefer,
+                    plasmaSearchQuery.contentdomParser(contentdom),
+                    language,
+                    false,
+                    count,
+                    0,
+                    filter,
+                    plasmaSearchQuery.SEARCHDOM_LOCAL,
+                    null,
+                    -1,
+                    null,
+                    false,
+                    sitehash, 
+                    yacyURL.TLD_any_zone_filter,
+                    client,
+                    false);
            theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
            yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");

@ -207,7 +230,30 @@ public final class search {
            
        } else {
            // retrieve index containers from search request
-            theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, null, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), language, false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client, false);
+            theQuery = new plasmaSearchQuery(
+                    null, 
+                    queryhashes, 
+                    excludehashes, 
+                    null, 
+                    rankingProfile, 
+                    maxdist, 
+                    prefer, 
+                    plasmaSearchQuery.
+                    contentdomParser(contentdom), 
+                    language, 
+                    false, 
+                    count, 
+                    0, 
+                    filter, 
+                    plasmaSearchQuery.SEARCHDOM_LOCAL, 
+                    null, 
+                    -1, 
+                    constraint, 
+                    false,
+                    sitehash, 
+                    yacyURL.TLD_any_zone_filter,
+                    client, 
+                    false);
            theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
            yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
            RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes), ""));
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -270,13 +270,19 @@ public class yacysearch {
                    }
                }
            }
+            if (post.containsKey("tenant")) {
+                final String tenant = post.get("tenant");
+                if (urlmask == null) urlmask = ".*" + tenant + ".*"; else urlmask = ".*" + tenant + urlmask;
+            }
            int site = querystring.indexOf("site:");
+            String sitehash = null;
            if (site >= 0) {
                int ftb = querystring.indexOf(' ', site);
                if (ftb == -1) ftb = querystring.length();
                String domain = querystring.substring(site + 5, ftb);
                query[0].remove("site:" + domain.toLowerCase());
                while(domain.startsWith(".")) domain = domain.substring(1);
+                sitehash = yacyURL.domhash(domain);
                if (domain.indexOf(".") < 0) domain = "\\." + domain; // is tld
                if (domain.length() > 0) {
                    if (urlmask == null) {
@ -286,10 +292,6 @@ public class yacysearch {
                    }
                }
            }
-            if (post.containsKey("tenant")) {
-                final String tenant = post.get("tenant");
-                if (urlmask == null) urlmask = ".*" + tenant + ".*"; else urlmask = ".*" + tenant + urlmask;
-            }
            if (urlmask == null || urlmask.length() == 0) urlmask = originalUrlMask; //if no urlmask was given
           
            // read the language from the language-restrict option 'lr'
@ -385,6 +387,7 @@ public class yacysearch {
                    20,
                    constraint,
                    true,
+                    sitehash,
                    yacyURL.TLD_any_zone_filter,
                    client,
                    authenticated);
--- a/source/de/anomic/kelondro/blob/BLOBArray.java
+++ b/source/de/anomic/kelondro/blob/BLOBArray.java
@ -37,12 +37,16 @@ import java.util.List;
 import java.util.TreeMap;
 import java.util.concurrent.CopyOnWriteArrayList;

+import de.anomic.kelondro.index.Row;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.order.CloneableIterator;
 import de.anomic.kelondro.order.NaturalOrder;
 import de.anomic.kelondro.order.MergeIterator;
+import de.anomic.kelondro.text.ReferenceContainer;
+import de.anomic.kelondro.text.ReferenceContainerCache.blobFileEntries;
 import de.anomic.kelondro.util.DateFormatter;
 import de.anomic.kelondro.util.FileUtils;
+import de.anomic.kelondro.util.Log;

 public class BLOBArray implements BLOB {

@ -533,6 +537,125 @@ public class BLOBArray implements BLOB {
        blobs = null;
    }
    
+    public File mergeMount(File f1, File f2, Row payloadrow, File newFile) throws IOException {
+        Log.logInfo("BLOBArray", "merging " + f1.getName() + " with " + f2.getName());
+        File resultFile = mergeWorker(f1, f2, payloadrow, newFile);
+        if (resultFile == null) return null;
+        mountBLOB(resultFile);
+        Log.logInfo("BLOBArray", "merged " + f1.getName() + " with " + f2.getName() + " into " + resultFile);
+        return resultFile;
+    }
+    
+    private File mergeWorker(File f1, File f2, Row payloadrow, File newFile) throws IOException {
+        // iterate both files and write a new one
+        
+        CloneableIterator<ReferenceContainer> i1 = new blobFileEntries(f1, payloadrow);
+        CloneableIterator<ReferenceContainer> i2 = new blobFileEntries(f2, payloadrow);
+        if (!i1.hasNext()) {
+            if (i2.hasNext()) {
+                FileUtils.deletedelete(f1);
+                if (f2.renameTo(newFile)) return newFile;
+                return f2;
+            } else {
+                FileUtils.deletedelete(f1);
+                FileUtils.deletedelete(f2);
+                return null;
+            }
+        } else if (!i2.hasNext()) {
+            FileUtils.deletedelete(f2);
+            if (f1.renameTo(newFile)) return newFile;
+            return f1;
+        }
+        assert i1.hasNext();
+        assert i2.hasNext();
+        File tmpFile = new File(newFile.getParentFile(), newFile.getName() + ".tmp");
+        HeapWriter writer = new HeapWriter(tmpFile, newFile, this.keylength(), this.ordering());
+        merge(i1, i2, this.ordering(), writer);
+        try {
+            writer.close(true);
+            // we don't need the old files any more
+            FileUtils.deletedelete(f1);
+            FileUtils.deletedelete(f2);
+            return newFile;
+        } catch (IOException e) {
+            FileUtils.deletedelete(tmpFile);
+            FileUtils.deletedelete(newFile);
+            e.printStackTrace();
+            return null;
+        }
+    }
+    
+    private static void merge(CloneableIterator<ReferenceContainer> i1, CloneableIterator<ReferenceContainer> i2, ByteOrder ordering, HeapWriter writer) throws IOException {
+        assert i1.hasNext();
+        assert i2.hasNext();
+        ReferenceContainer c1, c2, c1o, c2o;
+        c1 = i1.next();
+        c2 = i2.next();
+        int e;
+        while (true) {
+            assert c1 != null;
+            assert c2 != null;
+            e = ordering.compare(c1.getWordHash().getBytes(), c2.getWordHash().getBytes());
+            if (e < 0) {
+                writer.add(c1.getWordHash().getBytes(), c1.exportCollection());
+                if (i1.hasNext()) {
+                    c1o = c1;
+                    c1 = i1.next();
+                    assert ordering.compare(c1.getWordHash().getBytes(), c1o.getWordHash().getBytes()) > 0;
+                    continue;
+                }
+                break;
+            }
+            if (e > 0) {
+                writer.add(c2.getWordHash().getBytes(), c2.exportCollection());
+                if (i2.hasNext()) {
+                    c2o = c2;
+                    c2 = i2.next();
+                    assert ordering.compare(c2.getWordHash().getBytes(), c2o.getWordHash().getBytes()) > 0;
+                    continue;
+                }
+                break;
+            }
+            assert e == 0;
+            // merge the entries
+            writer.add(c1.getWordHash().getBytes(), (c1.merge(c2)).exportCollection());
+            if (i1.hasNext() && i2.hasNext()) {
+                c1 = i1.next();
+                c2 = i2.next();
+                continue;
+            }
+            if (i1.hasNext()) c1 = i1.next();
+            if (i2.hasNext()) c2 = i2.next();
+            break;
+           
+        }
+        // catch up remaining entries
+        assert !(i1.hasNext() && i2.hasNext());
+        while (i1.hasNext()) {
+            //System.out.println("FLUSH REMAINING 1: " + c1.getWordHash());
+            writer.add(c1.getWordHash().getBytes(), c1.exportCollection());
+            if (i1.hasNext()) {
+                c1o = c1;
+                c1 = i1.next();
+                assert ordering.compare(c1.getWordHash().getBytes(), c1o.getWordHash().getBytes()) > 0;
+                continue;
+            }
+            break;
+        }
+        while (i2.hasNext()) {
+            //System.out.println("FLUSH REMAINING 2: " + c2.getWordHash());
+            writer.add(c2.getWordHash().getBytes(), c2.exportCollection());
+            if (i2.hasNext()) {
+                c2o = c2;
+                c2 = i2.next();
+                assert ordering.compare(c2.getWordHash().getBytes(), c2o.getWordHash().getBytes()) > 0;
+                continue;
+            }
+            break;
+        }
+        // finished with writing
+    }
+

    public static void main(final String[] args) {
        final File f = new File("/Users/admin/blobarraytest");
--- a/source/de/anomic/kelondro/text/IODispatcher.java
+++ b/source/de/anomic/kelondro/text/IODispatcher.java
@ -1,4 +1,4 @@
-// ReferenceContainerArray.java
+// IODespatcher.java
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 20.03.2009 on http://yacy.net
 //
@ -29,15 +29,9 @@ import java.io.IOException;
 import java.util.concurrent.ArrayBlockingQueue;

 import de.anomic.kelondro.blob.BLOBArray;
-import de.anomic.kelondro.blob.HeapWriter;
 import de.anomic.kelondro.index.Row;
-import de.anomic.kelondro.order.ByteOrder;
-import de.anomic.kelondro.order.CloneableIterator;
-import de.anomic.kelondro.text.ReferenceContainerCache.blobFileEntries;
-import de.anomic.kelondro.util.FileUtils;

 /**
- * merger class for files from ReferenceContainerArray.
 * this is a concurrent merger that can merge single files that are queued for merging.
 * when several ReferenceContainerArray classes host their ReferenceContainer file arrays,
 * they may share a single ReferenceContainerMerger object which does the sharing for all
@ -104,7 +98,7 @@ public class IODispatcher extends Thread {
    public synchronized void merge(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) {
        if (mergeQueue == null || !this.isAlive()) {
            try {
-                mergeMount(f1, f2, array, payloadrow, newFile);
+                array.mergeMount(f1, f2, payloadrow, newFile);
            } catch (IOException e) {
                e.printStackTrace();
            }
@ -116,7 +110,7 @@ public class IODispatcher extends Thread {
            } catch (InterruptedException e) {
                e.printStackTrace();
                try {
-                    mergeMount(f1, f2, array, payloadrow, newFile);
+                    array.mergeMount(f1, f2, payloadrow, newFile);
                } catch (IOException ee) {
                    ee.printStackTrace();
                }
@ -189,134 +183,12 @@ public class IODispatcher extends Thread {

        public File merge() {
            try {
-                return mergeMount(f1, f2, array, payloadrow, newFile);
+                return array.mergeMount(f1, f2, payloadrow, newFile);
            } catch (IOException e) {
                e.printStackTrace();
            }
            return null;
        }
    }
-    
-    public static File mergeMount(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) throws IOException {
-        System.out.println("*** DEBUG mergeOldest: vvvvvvvvv array has " + array.entries() + " entries vvvvvvvvv");
-        System.out.println("*** DEBUG mergeOldest: unmounted " + f1.getName());
-        System.out.println("*** DEBUG mergeOldest: unmounted " + f2.getName());
-        File resultFile = mergeWorker(f1, f2, array, payloadrow, newFile);
-        if (resultFile == null) return null;
-        array.mountBLOB(resultFile);
-        System.out.println("*** DEBUG mergeOldest:   mounted " + newFile.getName());
-        System.out.println("*** DEBUG mergeOldest: ^^^^^^^^^^^ array has " + array.entries() + " entries ^^^^^^^^^^^");
-        return resultFile;
-    }
-    
-    private static File mergeWorker(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) throws IOException {
-        // iterate both files and write a new one
-        
-        CloneableIterator<ReferenceContainer> i1 = new blobFileEntries(f1, payloadrow);
-        CloneableIterator<ReferenceContainer> i2 = new blobFileEntries(f2, payloadrow);
-        if (!i1.hasNext()) {
-            if (i2.hasNext()) {
-                FileUtils.deletedelete(f1);
-                if (f2.renameTo(newFile)) return newFile;
-                return f2;
-            } else {
-                FileUtils.deletedelete(f1);
-                FileUtils.deletedelete(f2);
-                return null;
-            }
-        } else if (!i2.hasNext()) {
-            FileUtils.deletedelete(f2);
-            if (f1.renameTo(newFile)) return newFile;
-            return f1;
-        }
-        assert i1.hasNext();
-        assert i2.hasNext();
-        File tmpFile = new File(newFile.getParentFile(), newFile.getName() + ".tmp");
-        HeapWriter writer = new HeapWriter(tmpFile, newFile, array.keylength(), array.ordering());
-        merge(i1, i2, array.ordering(), writer);
-        try {
-            writer.close(true);
-            // we don't need the old files any more
-            FileUtils.deletedelete(f1);
-            FileUtils.deletedelete(f2);
-            return newFile;
-        } catch (IOException e) {
-            FileUtils.deletedelete(tmpFile);
-            FileUtils.deletedelete(newFile);
-            e.printStackTrace();
-            return null;
-        }
-    }
-    
-    private static void merge(CloneableIterator<ReferenceContainer> i1, CloneableIterator<ReferenceContainer> i2, ByteOrder ordering, HeapWriter writer) throws IOException {
-        assert i1.hasNext();
-        assert i2.hasNext();
-        ReferenceContainer c1, c2, c1o, c2o;
-        c1 = i1.next();
-        c2 = i2.next();
-        int e;
-        while (true) {
-            assert c1 != null;
-            assert c2 != null;
-            e = ordering.compare(c1.getWordHash().getBytes(), c2.getWordHash().getBytes());
-            if (e < 0) {
-                writer.add(c1.getWordHash().getBytes(), c1.exportCollection());
-                if (i1.hasNext()) {
-                    c1o = c1;
-                    c1 = i1.next();
-                    assert ordering.compare(c1.getWordHash().getBytes(), c1o.getWordHash().getBytes()) > 0;
-                    continue;
-                }
-                break;
-            }
-            if (e > 0) {
-                writer.add(c2.getWordHash().getBytes(), c2.exportCollection());
-                if (i2.hasNext()) {
-                    c2o = c2;
-                    c2 = i2.next();
-                    assert ordering.compare(c2.getWordHash().getBytes(), c2o.getWordHash().getBytes()) > 0;
-                    continue;
-                }
-                break;
-            }
-            assert e == 0;
-            // merge the entries
-            writer.add(c1.getWordHash().getBytes(), (c1.merge(c2)).exportCollection());
-            if (i1.hasNext() && i2.hasNext()) {
-                c1 = i1.next();
-                c2 = i2.next();
-                continue;
-            }
-            if (i1.hasNext()) c1 = i1.next();
-            if (i2.hasNext()) c2 = i2.next();
-            break;
-           
-        }
-        // catch up remaining entries
-        assert !(i1.hasNext() && i2.hasNext());
-        while (i1.hasNext()) {
-            //System.out.println("FLUSH REMAINING 1: " + c1.getWordHash());
-            writer.add(c1.getWordHash().getBytes(), c1.exportCollection());
-            if (i1.hasNext()) {
-                c1o = c1;
-                c1 = i1.next();
-                assert ordering.compare(c1.getWordHash().getBytes(), c1o.getWordHash().getBytes()) > 0;
-                continue;
-            }
-            break;
-        }
-        while (i2.hasNext()) {
-            //System.out.println("FLUSH REMAINING 2: " + c2.getWordHash());
-            writer.add(c2.getWordHash().getBytes(), c2.exportCollection());
-            if (i2.hasNext()) {
-                c2o = c2;
-                c2 = i2.next();
-                assert ordering.compare(c2.getWordHash().getBytes(), c2o.getWordHash().getBytes()) > 0;
-                continue;
-            }
-            break;
-        }
-        // finished with writing
-    }

 }
--- a/source/de/anomic/plasma/plasmaSearchQuery.java
+++ b/source/de/anomic/plasma/plasmaSearchQuery.java
@ -70,7 +70,8 @@ public final class plasmaSearchQuery {
    public boolean allofconstraint;
    public boolean onlineSnippetFetch;
    public plasmaSearchRankingProfile ranking;
-    public String host;
+    public String host; // this is the client host that starts the query, not a site operator
+    public String sitehash; // this is a domain hash, 6 bytes long or null
    public yacySeed remotepeer;
    public Long handle;
    // values that are set after a search:
@ -109,6 +110,7 @@ public final class plasmaSearchQuery {
        this.allofconstraint = false;
        this.onlineSnippetFetch = false;
        this.host = null;
+        this.sitehash = null;
        this.remotepeer = null;
        this.handle = Long.valueOf(System.currentTimeMillis());
        this.specialRights = false;
@ -125,6 +127,7 @@ public final class plasmaSearchQuery {
        final int lines, final int offset, final String urlMask,
        final int domType, final String domGroupName, final int domMaxTargets,
        final Bitfield constraint, final boolean allofconstraint,
+        final String site,
        final int domainzone,
        final String host,
        final boolean specialRights) {
@ -146,6 +149,7 @@ public final class plasmaSearchQuery {
 		this.domMaxTargets = domMaxTargets;
 		this.constraint = constraint;
 		this.allofconstraint = allofconstraint;
+		this.sitehash = site; assert site == null || site.length() == 6;
 		this.onlineSnippetFetch = onlineSnippetFetch;
 		this.host = host;
        this.remotepeer = null;
--- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java
@ -74,7 +74,11 @@ public final class plasmaSearchRankingProcess {
    private HashMap<String, ReferenceContainer>[] localSearchContainerMaps;
    private final int[] domZones;
    
-    public plasmaSearchRankingProcess(final plasmaWordIndex wordIndex, final plasmaSearchQuery query, final int maxentries, final int concurrency) {
+    public plasmaSearchRankingProcess(
+            final plasmaWordIndex wordIndex,
+            final plasmaSearchQuery query,
+            final int maxentries,
+            final int concurrency) {
        // we collect the urlhashes and construct a list with urlEntry objects
        // attention: if minEntries is too high, this method will not terminate within the maxTime
        // sortorder: 0 = hash, 1 = url, 2 = ranking
@ -183,6 +187,11 @@ public final class plasmaSearchRankingProcess {
                continue;
            }
            
+            // check site constraints
+            if (query.sitehash != null && !iEntry.urlHash().substring(6).equals(query.sitehash)) {
+                // filter out all domains that do not match with the site constraint
+            }
+            
            // count domZones
            /*
            indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0); // this eats up a lot of time!!!
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -579,13 +579,15 @@ public final class plasmaWordIndex {
        // search for the set of hashes and return a map of of wordhash:indexContainer containing the seach result

        // retrieve entities that belong to the hashes
-        HashMap<String, ReferenceContainer> inclusionContainers = (queryHashes.size() == 0) ? new HashMap<String, ReferenceContainer>(0) : getContainers(
-                        queryHashes,
-                        urlselection);
+        HashMap<String, ReferenceContainer> inclusionContainers =
+            (queryHashes.size() == 0) ?
+                    new HashMap<String, ReferenceContainer>(0) :
+                    getContainers(queryHashes, urlselection);
        if ((inclusionContainers.size() != 0) && (inclusionContainers.size() < queryHashes.size())) inclusionContainers = new HashMap<String, ReferenceContainer>(0); // prevent that only a subset is returned
-        final HashMap<String, ReferenceContainer> exclusionContainers = (inclusionContainers.size() == 0) ? new HashMap<String, ReferenceContainer>(0) : getContainers(
-                excludeHashes,
-                urlselection);
+        final HashMap<String, ReferenceContainer> exclusionContainers =
+            (inclusionContainers.size() == 0) ?
+                    new HashMap<String, ReferenceContainer>(0) :
+                    getContainers(excludeHashes, urlselection);
        return new HashMap[]{inclusionContainers, exclusionContainers};
    }

--- a/source/de/anomic/yacy/yacyURL.java
+++ b/source/de/anomic/yacy/yacyURL.java
@ -42,9 +42,7 @@ import de.anomic.tools.Punycode;
 import de.anomic.tools.Punycode.PunycodeException;

 public class yacyURL implements Serializable {
-    /**
-     * generated with svn4751 on 2008-05-01
-     */
+    
    private static final long serialVersionUID = -1173233022912141884L;
    public  static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
    private static final Pattern backPathPattern = Pattern.compile("(/[^/]+(?<!/\\.{1,2})/)[.]{2}(?=/|$)|/\\.(?=/)|/(?=/)");
@ -56,6 +54,18 @@ public class yacyURL implements Serializable {
    private String protocol, host, userInfo, path, quest, ref, hash;
    private int port;
    
+    public static String domhash(String host) {
+        if (!host.startsWith("http://")) host = "http://" + host;
+        yacyURL url = null;
+        try {
+            url = new yacyURL(host, null);
+        } catch (MalformedURLException e) {
+            e.printStackTrace();
+            return null;
+        }
+        return (url == null) ? null : url.hash().substring(6);
+    }
+    
    public yacyURL(final String url, final String hash) throws MalformedURLException {
        if (url == null) throw new MalformedURLException("url string is null");