From c3b55455fc76db04d7dc0619de05aedc9d1a91a2 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 29 Jan 2015 02:45:32 +0100 Subject: [PATCH] enhanced initialization speed of vocabularies by using better normalization and by removal of unused data structures --- .../net/yacy/cora/lod/vocabulary/Tagging.java | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 507cf8289..52d026894 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -42,7 +42,6 @@ import net.yacy.cora.geo.Locations; import net.yacy.cora.storage.Files; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.search.Switchboard; public class Tagging { @@ -53,7 +52,6 @@ public class Tagging { private final Map synonym2term; private final Map term2synonym; private final Map term2objectlink; - private final Map> synonym2synonyms; private File propFile; private boolean isFacet; // true if the vocabulary shall generate a navigation facet @@ -97,7 +95,6 @@ public class Tagging { this.synonym2term = new ConcurrentHashMap(); this.term2synonym = new ConcurrentHashMap(); this.term2objectlink = new ConcurrentHashMap(); - this.synonym2synonyms = new ConcurrentHashMap>(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + name; this.objectspace = null; @@ -127,7 +124,6 @@ public class Tagging { this.synonym2term.clear(); this.term2synonym.clear(); this.term2objectlink.clear(); - this.synonym2synonyms.clear(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + this.navigatorName; @@ -160,9 +156,6 @@ public class Tagging { this.term2synonym.put(term, synonym); if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); synonyms.add(synonym); - for (String s: synonyms) { - this.synonym2synonyms.put(s, synonyms); - } } } else { // @@ -200,7 +193,6 @@ public class Tagging { this.synonym2term.clear(); this.term2synonym.clear(); this.term2objectlink.clear(); - this.synonym2synonyms.clear(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + this.navigatorName; this.objectspace = null; @@ -262,9 +254,6 @@ public class Tagging { this.term2synonym.put(term, synonym); if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); synonyms.add(synonym); - for (String s: synonyms) { - this.synonym2synonyms.put(s, synonyms); - } } } catch (final InterruptedException e) { } @@ -488,19 +477,15 @@ public class Tagging { return this.objectspace; } - private final static Pattern PATTERN_SPACEPLUS = Pattern.compile(" \\+"); - private final static Pattern PATTERN_SPACESLASH= Pattern.compile(" /"); - private final static Pattern PATTERN_PLUS = Pattern.compile("\\+"); - private final static Pattern PATTERN_SLASH = Pattern.compile("/"); + private final static Pattern PATTERN_SPACESLASHPLUS = Pattern.compile(" (/|\\+)"); + private final static Pattern PATTERN_SLASHPLUS = Pattern.compile("/|\\+"); private final static Pattern PATTERN_SPACESPACE = Pattern.compile(" "); private final String normalizeKey(String k) { k = k.trim(); // remove symbols that are bad in a query attribute - k = PATTERN_SPACEPLUS.matcher(k).replaceAll(", "); - k = PATTERN_SPACESLASH.matcher(k).replaceAll(", "); - k = PATTERN_PLUS.matcher(k).replaceAll(","); - k = PATTERN_SLASH.matcher(k).replaceAll(","); + k = PATTERN_SPACESLASHPLUS.matcher(k).replaceAll(", "); + k = PATTERN_SLASHPLUS.matcher(k).replaceAll(","); k = PATTERN_SPACESPACE.matcher(k).replaceAll(" "); return k; }