diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java
index ee5697c61..dfa781255 100644
--- a/source/de/anomic/data/URLAnalysis.java
+++ b/source/de/anomic/data/URLAnalysis.java
@@ -50,9 +50,12 @@ import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
 
+import de.anomic.kelondro.index.HandleSet;
 import de.anomic.kelondro.index.IntegerHandleIndex;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.text.IndexCollection;
+import de.anomic.kelondro.text.MetadataRepository;
+import de.anomic.kelondro.text.MetadataRowContainer;
 import de.anomic.kelondro.text.ReferenceRow;
 import de.anomic.kelondro.util.MemoryControl;
 import de.anomic.yacy.yacyURL;
@@ -395,29 +398,75 @@ public class URLAnalysis {
                 ReferenceRow.urlEntryRow);
             System.out.println("COLLECTION INDEX REFERENCE COLLECTION starting dump of statistics");
             idx.dump(new File(statisticPath));
-            System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished dump");
+            System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath);
         } catch (IOException e) {
             e.printStackTrace();
         }
     }
 
+    public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException {
+        System.out.println("COLLECTION INDEX DIFF URL-COL startup");
+        IntegerHandleIndex idx = new IntegerHandleIndex(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(statisticFile));
+        MetadataRepository mr = new MetadataRepository(new File(metadataPath));
+        HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, 100);
+        System.out.println("COLLECTION INDEX DIFF URL-COL loaded dump, starting diff");
+        byte[] refhash;
+        Iterator<byte[]> i = mr.iterator();
+        long start = System.currentTimeMillis();
+        long update = start - 7000;
+        int c = 0;
+        while (i.hasNext()) {
+            refhash = i.next();
+            if (idx.get(refhash) == -1) {
+                // the key exists as urlhash in the URL database, but not in the collection as referenced urlhash
+                hs.put(refhash);
+            }
+            c++;
+            if (System.currentTimeMillis() - update > 10000) {
+                System.out.println("COLLECTION INDEX DIFF URL-COL running, checked " + c + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - c) / c) / 60000) + " minutes remaining");
+                update = System.currentTimeMillis();
+            }
+        }
+        mr.close();
+        System.out.println("COLLECTION INDEX DIFF URL-COL finished diff, starting dump to " + diffFile);
+        c = hs.dump(new File(diffFile));
+        System.out.println("COLLECTION INDEX DIFF URL-COL finished dump, wrote " + c + " references that occur in the URL-DB, but not in the collection-dump");
+        return c;
+    }
+    
     public static void main(String[] args) {
-        // example: java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -stat DATA/EXPORT/urls1.txt.gz 
-    	if (args[0].equals("-stat") && args.length >= 2) {
-    		for (int i = 1; i < args.length; i++) genstat(args[i]);
+        if (args[0].equals("-stat") && args.length >= 2) {
+            // generate a statistics about common words in file, store to <file>.stat
+            // example:
+            // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -stat DATA/EXPORT/urls1.txt.gz 
+            for (int i = 1; i < args.length; i++) genstat(args[i]);
     	} else if (args[0].equals("-host") && args.length >= 2) {
+    	    // generate a file <file>.host containing only the hosts of the urls
     		for (int i = 1; i < args.length; i++) genhost(args[i]);
         } else if (args[0].equals("-sort") && args.length >= 2) {
+            // generate file <file>.x.sort with sorted lists and split the file in smaller pieces
             for (int i = 1; i < args.length; i++) sortsplit(args[i]);
         } else if (args[0].equals("-incollection") && args.length >= 2) {
+            // generate a dump of all referenced URL hashes from a given RICOLLECTION
             // example:
             // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -incollection DATA/INDEX/freeworld/TEXT/RICOLLECTION used.dump
             incollection(args[1], args[2]);
+        } else if (args[0].equals("-diffurlcol") && args.length >= 3) {
+            // make a diff-file that contains hashes from the url database that do not occur in the collection reference dump
+            // example:
+            // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT used.dump diffurlcol.dump
+            try {
+                diffurlcol(args[1], args[2], args[3]);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
         } else {
     		System.out.println("usage:");
-    		System.out.println("-stat <file>    generate a statistics about common words in file, store to <file>.stat");
-    		System.out.println("-host <file>    generate a file <file>.host containing only the hosts of the urls");
-    		System.out.println("-sort <file>    generate file <file>.x.sort with sorted lists and split the file in smaller pieces");
+    		System.out.println("-stat         <file>    generate a statistics about common words in file, store to <file>.stat");
+    		System.out.println("-host         <file>    generate a file <file>.host containing only the hosts of the urls");
+    		System.out.println("-sort         <file>    generate file <file>.x.sort with sorted lists and split the file in smaller pieces");
+    		System.out.println("-incollection <path-to-RICOLLECTION> <file>  generate a dump of all referenced URL hashes");
+    		System.out.println("-diffurlcol   <path-to-URL-DB> <dump-from-incollection> <diff-dump>  find URLs that occur ");
     	}
     }
     
diff --git a/source/de/anomic/kelondro/index/HandleSet.java b/source/de/anomic/kelondro/index/HandleSet.java
new file mode 100644
index 000000000..3bcf74676
--- /dev/null
+++ b/source/de/anomic/kelondro/index/HandleSet.java
@@ -0,0 +1,158 @@
+// HandleSet.java
+// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 10.03.2009 on http://www.anomic.de
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.kelondro.index;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import de.anomic.kelondro.order.ByteOrder;
+import de.anomic.kelondro.order.CloneableIterator;
+
+public class HandleSet implements Iterable<byte[]> {
+    
+    private final Row rowdef;
+    private ObjectIndex index;
+    
+    public HandleSet(final int keylength, final ByteOrder objectOrder, final int space) {
+        this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key")}, objectOrder, 0);
+        this.index = new ObjectIndexCache(rowdef, space);
+    }
+
+    /**
+     * initialize a HandleSet with the content of a dump
+     * @param keylength
+     * @param objectOrder
+     * @param file
+     * @throws IOException 
+     */
+    public HandleSet(final int keylength, final ByteOrder objectOrder, final File file) throws IOException {
+        this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
+        // read the index dump and fill the index
+        InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
+        byte[] a = new byte[keylength];
+        int c;
+        while (true) {
+            c = is.read(a);
+            if (c <= 0) break;
+            this.index.addUnique(this.rowdef.newEntry(a));
+        }
+        is.close();
+        assert this.index.size() == file.length() / keylength;
+    }
+
+    /**
+     * write a dump of the set to a file. All entries are written in order
+     * which makes it possible to read them again in a fast way
+     * @param file
+     * @return the number of written entries
+     * @throws IOException
+     */
+    public int dump(File file) throws IOException {
+        // we must use an iterator from the combined index, because we need the entries sorted
+        // otherwise we could just write the byte[] from the in kelondroRowSet which would make
+        // everything much faster, but this is not an option here.
+        Iterator<Row.Entry> i = this.index.rows(true, null);
+        OutputStream os = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024);
+        int c = 0;
+        while (i.hasNext()) {
+            os.write(i.next().bytes());
+            c++;
+        }
+        os.flush();
+        os.close();
+        return c;
+    }
+    
+    public Row row() {
+        return index.row();
+    }
+    
+    public void clear() throws IOException {
+        this.index.clear();
+    }
+    
+    public synchronized boolean has(final byte[] key) {
+        assert (key != null);
+        return index.has(key);
+    }
+    
+    public synchronized int put(final byte[] key) throws IOException {
+        assert (key != null);
+        final Row.Entry newentry = index.row().newEntry();
+        newentry.setCol(0, key);
+        final Row.Entry oldentry = index.put(newentry);
+        if (oldentry == null) return -1;
+        return (int) oldentry.getColLong(1);
+    }
+    
+    public synchronized void putUnique(final byte[] key) throws IOException {
+        assert (key != null);
+        final Row.Entry newentry = this.rowdef.newEntry();
+        newentry.setCol(0, key);
+        index.addUnique(newentry);
+    }
+    
+    public synchronized int remove(final byte[] key) throws IOException {
+        assert (key != null);
+        final Row.Entry indexentry = index.remove(key);
+        if (indexentry == null) return -1;
+        return (int) indexentry.getColLong(1);
+    }
+
+    public synchronized int removeone() throws IOException {
+        final Row.Entry indexentry = index.removeOne();
+        if (indexentry == null) return -1;
+        return (int) indexentry.getColLong(1);
+    }
+    
+    public synchronized int size() {
+        return index.size();
+    }
+    
+    public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
+        try {
+            return index.keys(up, firstKey);
+        } catch (IOException e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+    
+    public Iterator<byte[]> iterator() {
+        return keys(true, null);
+    }
+    
+    public synchronized void close() {
+        index.close();
+        index = null;
+    }
+}
diff --git a/source/de/anomic/kelondro/index/IntegerHandleIndex.java b/source/de/anomic/kelondro/index/IntegerHandleIndex.java
index f02e1db47..76c576f2d 100644
--- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java
+++ b/source/de/anomic/kelondro/index/IntegerHandleIndex.java
@@ -66,7 +66,7 @@ public class IntegerHandleIndex {
         this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
         // read the index dump and fill the index
         InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
-        byte[] a = new byte[keylength + 8];
+        byte[] a = new byte[keylength + 4];
         int c;
         while (true) {
             c = is.read(a);
@@ -74,7 +74,7 @@ public class IntegerHandleIndex {
             this.index.addUnique(this.rowdef.newEntry(a));
         }
         is.close();
-        assert this.index.size() == file.length() / (keylength + 8);
+        assert this.index.size() == file.length() / (keylength + 4);
     }
 
     /**
diff --git a/source/de/anomic/kelondro/text/MetadataRepository.java b/source/de/anomic/kelondro/text/MetadataRepository.java
index 6a80c0674..cf2c26c9f 100644
--- a/source/de/anomic/kelondro/text/MetadataRepository.java
+++ b/source/de/anomic/kelondro/text/MetadataRepository.java
@@ -52,18 +52,20 @@ import de.anomic.kelondro.util.ScoreCluster;
 import de.anomic.kelondro.util.Log;
 import de.anomic.yacy.yacyURL;
 
-public final class MetadataRepository {
+public final class MetadataRepository implements Iterable<byte[]> {
 
     // class objects
-    ObjectIndex urlIndexFile;
-    private Export      exportthread    = null; // will have a export thread assigned if exporter is running
-    private File        location        = null;
-    ArrayList<hostStat> statsDump       = null;
+    private ObjectIndex         urlIndexFile;
+    private Export              exportthread; // will have a export thread assigned if exporter is running
+    private File                location;
+    private ArrayList<hostStat> statsDump;
     
-    public MetadataRepository(final File indexSecondaryPath) {
-        super();
-        this.location = new File(indexSecondaryPath, "TEXT");        
-        urlIndexFile = new Cache(new SplitTable(this.location, "urls", MetadataRowContainer.rowdef, false));
+    public MetadataRepository(final File path) {
+        this.location = path;        
+        this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", MetadataRowContainer.rowdef, false));
+        this.exportthread = null; // will have a export thread assigned if exporter is running
+        this.statsDump = null;
+       
     }
 
     public void clearCache() {
@@ -151,6 +153,19 @@ public final class MetadataRepository {
         return urlIndexFile.has(urlHash.getBytes());
     }
 
+    public CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) {
+        try {
+            return this.urlIndexFile.keys(up, firstKey);
+        } catch (IOException e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    public Iterator<byte[]> iterator() {
+        return keys(true, null);
+    }
+    
     public CloneableIterator<MetadataRowContainer> entries() throws IOException {
         // enumerates entry elements
         return new kiter();
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 7ffb01918..f7f08ac2d 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -45,7 +45,6 @@ import de.anomic.kelondro.index.RowCollection;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.order.CloneableIterator;
-import de.anomic.kelondro.order.MergeIterator;
 import de.anomic.kelondro.order.Order;
 import de.anomic.kelondro.order.RotateIterator;
 import de.anomic.kelondro.text.Index;
@@ -96,19 +95,19 @@ public final class plasmaWordIndex implements Index {
     public static final long CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE = 60L * 24L * 30L;
     
     
-    private final IndexCache               indexCache;
-    private final IndexCollection        collections;          // new database structure to replace AssortmentCluster and FileCluster
-    private final Log                      log;
-    public MetadataRepository        referenceURL;
-    public  final yacySeedDB               seedDB;
-    private final File                     primaryRoot, secondaryRoot;
-    public        IndexingStack            queuePreStack;
-    public        CrawlProfile             profilesActiveCrawls, profilesPassiveCrawls;
-    public  CrawlProfile.entry             defaultProxyProfile;
-    public  CrawlProfile.entry             defaultRemoteProfile;
-    public  CrawlProfile.entry             defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
-    public  CrawlProfile.entry             defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
-    private final File                     queuesRoot;
+    private final IndexCache      indexCache;
+    private final IndexCollection collections;          // new database structure to replace AssortmentCluster and FileCluster
+    private final Log             log;
+    public MetadataRepository     referenceURL;
+    public  final yacySeedDB      seedDB;
+    private final File            primaryRoot, secondaryRoot;
+    public        IndexingStack   queuePreStack;
+    public        CrawlProfile    profilesActiveCrawls, profilesPassiveCrawls;
+    public  CrawlProfile.entry    defaultProxyProfile;
+    public  CrawlProfile.entry    defaultRemoteProfile;
+    public  CrawlProfile.entry    defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
+    public  CrawlProfile.entry    defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
+    private final File            queuesRoot;
 
     public plasmaWordIndex(
             final String networkName,
@@ -171,7 +170,7 @@ public final class plasmaWordIndex implements Index {
 					useCommons);
 
         // create LURL-db
-        referenceURL = new MetadataRepository(this.secondaryRoot);
+        referenceURL = new MetadataRepository(new File(this.secondaryRoot, "TEXT"));
         
         // make crawl profiles database and default profiles
         this.queuesRoot = new File(this.primaryRoot, "QUEUES");
diff --git a/source/yacy.java b/source/yacy.java
index 6dc8aeaa9..6c327f4b2 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -667,10 +667,10 @@ public final class yacy {
             log.logInfo("STARTING URL CLEANUP");
             
             // db containing all currently loades urls
-            final MetadataRepository currentUrlDB = new MetadataRepository(new File(indexSecondaryRoot, networkName));
+            final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexSecondaryRoot, networkName), "TEXT"));
             
             // db used to hold all neede urls
-            final MetadataRepository minimizedUrlDB = new MetadataRepository(new File(indexRoot2, networkName));
+            final MetadataRepository minimizedUrlDB = new MetadataRepository(new File(new File(indexRoot2, networkName), "TEXT"));
             
             final int cacheMem = (int)(MemoryControl.max() - MemoryControl.total());
             if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
@@ -849,7 +849,7 @@ public final class yacy {
         final File root = homePath;
         final File indexroot = new File(root, "DATA/INDEX");
         try {Log.configureLogging(homePath, new File(homePath, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
-        final MetadataRepository currentUrlDB = new MetadataRepository(new File(indexroot, networkName));
+        final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexroot, networkName), "TEXT"));
         currentUrlDB.deadlinkCleaner(null);
         currentUrlDB.close();
     }