diff --git a/source/de/anomic/plasma/plasmaWebStructure.java b/source/de/anomic/plasma/plasmaWebStructure.java index 063fca5db..e6c502fd8 100644 --- a/source/de/anomic/plasma/plasmaWebStructure.java +++ b/source/de/anomic/plasma/plasmaWebStructure.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.HashMap; import java.util.TreeMap; import java.util.SortedMap; +import java.util.TreeSet; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.net.URL; @@ -47,8 +48,9 @@ public class plasmaWebStructure { public static int maxCRLDump = 500000; public static int maxCRGDump = 200000; - public static int maxref = 100; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) - + public static int maxref = 200; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) + public static int maxhosts = 4000; // maximum number of hosts in web structure map + private StringBuffer crg; // global citation references private serverLog log; private File rankingPath, structureFile; @@ -63,8 +65,31 @@ public class plasmaWebStructure { this.crg = new StringBuffer(maxCRGDump); this.structure = new TreeMap(); this.structureFile = structureFile; + + // load web structure Map loadedStructure = serverFileUtils.loadHashMap(this.structureFile); if (loadedStructure != null) this.structure.putAll(loadedStructure); + + // delete outdated entries in case the structure is too big + if (this.structure.size() > maxhosts) { + // fill a set with last-modified - dates of the structure + TreeSet delset = new TreeSet(); + Map.Entry entry; + Iterator i = this.structure.entrySet().iterator(); + String key, value; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + key = (String) entry.getKey(); + value = (String) entry.getValue(); + delset.add(value.substring(0, 8) + key); + } + int delcount = this.structure.size() - (maxhosts * 9 / 10); + i = delset.iterator(); + while ((delcount > 0) && (i.hasNext())) { + this.structure.remove(((String) i.next()).substring(8)); + delcount--; + } + } } public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(URL url, String baseurlhash, Date docDate, plasmaParserDocument document, plasmaCondenser condenser) {