From 8b0aea6910cdf7e1b88353ea4ab66ee1f1f88238 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 11 Jun 2007 21:51:56 +0000 Subject: [PATCH] fixed automatic deletion of too many referenced hosts in web structure git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3866 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaWebStructure.java | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/source/de/anomic/plasma/plasmaWebStructure.java b/source/de/anomic/plasma/plasmaWebStructure.java index 3380f1e8e..063fca5db 100644 --- a/source/de/anomic/plasma/plasmaWebStructure.java +++ b/source/de/anomic/plasma/plasmaWebStructure.java @@ -47,7 +47,7 @@ public class plasmaWebStructure { public static int maxCRLDump = 500000; public static int maxCRGDump = 200000; - public static int maxref = 10; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) + public static int maxref = 100; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) private StringBuffer crg; // global citation references private serverLog log; @@ -260,21 +260,27 @@ public class plasmaWebStructure { // check if the maxref is exceeded if (refs.size() > maxref) { - // shrink the references: the entry with the smallest number of references is removed - int minrefcount = Integer.MAX_VALUE; - String minrefkey = null; - Iterator i = refs.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - if (((Integer) entry.getValue()).intValue() < minrefcount) { - minrefcount = ((Integer) entry.getValue()).intValue(); - minrefkey = (String) entry.getKey(); - } - } - // remove the smallest - if (minrefkey != null) refs.remove(minrefkey); - } + int shrink = refs.size() - (maxref * 9 / 10); + delloop: while (shrink > 0) { + // shrink the references: the entry with the smallest number of references is removed + int minrefcount = Integer.MAX_VALUE; + String minrefkey = null; + Iterator i = refs.entrySet().iterator(); + Map.Entry entry; + findloop: while (i.hasNext()) { + entry = (Map.Entry) i.next(); + if (((Integer) entry.getValue()).intValue() < minrefcount) { + minrefcount = ((Integer) entry.getValue()).intValue(); + minrefkey = (String) entry.getKey(); + } + if (minrefcount == 1) break findloop; + } + // remove the smallest + if (minrefkey == null) break delloop; + refs.remove(minrefkey); + shrink--; + } + } // store the map back to the structure structure.put(domhash + "," + url.getHost(), map2refstr(refs));