From a1f922b34a84f22b72aa3db13605ee070c1890ae Mon Sep 17 00:00:00 2001 From: luccioman Date: Mon, 5 Dec 2016 10:57:37 +0100 Subject: [PATCH] Reduced locations vocabulary memory footprint. Reduced this vocabulary memory usage : - by using only one map term2entries instead of two maps having the same key set - by generating the location object links on the fly using the GeoLocation data instead of storing many duplicates of string prefix "http://www.openstreetmap.org/?lat=" Measurements with VisualVM and GeoNames 0 enabled (cities with a population > 1000) : - AutotaggingLibrary retained size : - initial : 309 718 763 bytes - after refactoring : 159 224 641 bytes --- .../lod/vocabulary/LocationTaggingEntry.java | 53 +++++++++++++ .../lod/vocabulary/SynonymTaggingEntry.java | 54 +++++++++++++ .../net/yacy/cora/lod/vocabulary/Tagging.java | 77 ++++++++++++------- .../cora/lod/vocabulary/TaggingEntry.java | 39 ++++++++++ .../TaggingEntryWithObjectLink.java | 50 ++++++++++++ 5 files changed, 246 insertions(+), 27 deletions(-) create mode 100644 source/net/yacy/cora/lod/vocabulary/LocationTaggingEntry.java create mode 100644 source/net/yacy/cora/lod/vocabulary/SynonymTaggingEntry.java create mode 100644 source/net/yacy/cora/lod/vocabulary/TaggingEntry.java create mode 100644 source/net/yacy/cora/lod/vocabulary/TaggingEntryWithObjectLink.java diff --git a/source/net/yacy/cora/lod/vocabulary/LocationTaggingEntry.java b/source/net/yacy/cora/lod/vocabulary/LocationTaggingEntry.java new file mode 100644 index 000000000..1ad3d0b74 --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/LocationTaggingEntry.java @@ -0,0 +1,53 @@ +// LocationTaggingEntry.java +// Copyright 2016 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.lod.vocabulary; + +import net.yacy.cora.geo.GeoLocation; + +/** + * Entry with a synonym and a location for a term in the {@link Tagging} class. + */ +class LocationTaggingEntry extends SynonymTaggingEntry { + + /** Geographical location of the object */ + private GeoLocation location; + + /** + * + * @param synonym term synonym + * @param location geographical location of the object. Must not be null. + * @throws IllegalArgumentException when a parameter is null + */ + public LocationTaggingEntry(String synonym, GeoLocation location) { + super(synonym); + if(location == null) { + throw new IllegalArgumentException("location must not be null"); + } + this.location = location; + } + + @Override + public String getObjectLink() { + return "http://www.openstreetmap.org/?lat=" + location.lat() + "&lon=" + location.lon() + "&zoom=16"; + } + +} diff --git a/source/net/yacy/cora/lod/vocabulary/SynonymTaggingEntry.java b/source/net/yacy/cora/lod/vocabulary/SynonymTaggingEntry.java new file mode 100644 index 000000000..50637fb1f --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/SynonymTaggingEntry.java @@ -0,0 +1,54 @@ +// SynonymTaggingEntry.java +// Copyright 2016 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.lod.vocabulary; + +/** + * Synonym entry for a term in the {@link Tagging} class + */ +class SynonymTaggingEntry implements TaggingEntry { + + /** Term synonym */ + protected String synonym; + + + /** + * @param synonym a term synonym + * @throws IllegalArgumentException when synonym is null + */ + public SynonymTaggingEntry(String synonym) { + if(synonym == null) { + throw new IllegalArgumentException("synonym must not be null"); + } + this.synonym = synonym; + } + + @Override + public String getSynonym() { + return synonym; + } + + @Override + public String getObjectLink() { + return null; + } + +} diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index b6afab028..e385de395 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -50,8 +50,10 @@ public class Tagging { private final String navigatorName; private final Map synonym2term; - private final Map term2synonym; - private final Map term2objectlink; + + /** Terms associated to TagginEntry instances each having a synonym and an eventual object link */ + private final Map term2entries; + private File propFile; private boolean isFacet; // true if the vocabulary shall generate a navigation facet @@ -93,8 +95,7 @@ public class Tagging { public Tagging(String name) { this.navigatorName = name; this.synonym2term = new ConcurrentHashMap(); - this.term2synonym = new ConcurrentHashMap(); - this.term2objectlink = new ConcurrentHashMap(); + this.term2entries= new ConcurrentHashMap(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + name; this.objectspace = null; @@ -122,8 +123,7 @@ public class Tagging { this.objectspace = objectspace; if (propFile == null) { this.synonym2term.clear(); - this.term2synonym.clear(); - this.term2objectlink.clear(); + this.term2entries.clear(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + this.navigatorName; @@ -134,8 +134,12 @@ public class Tagging { term = normalizeKey(e.getKey()); v = normalizeTerm(e.getKey()); this.synonym2term.put(v, term); - this.term2synonym.put(term, v); - if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) { + this.term2entries.put(term, new TaggingEntryWithObjectLink(v, e.getValue().getObjectlink())); + } else { + this.term2entries.put(term, new SynonymTaggingEntry(v)); + } + continue vocloop; } term = normalizeKey(e.getKey()); @@ -149,12 +153,15 @@ public class Tagging { if (synonym.isEmpty()) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); + this.term2entries.put(term, new SynonymTaggingEntry(synonym)); } String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); - if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) { + this.term2entries.put(term, new TaggingEntryWithObjectLink(synonym, e.getValue().getObjectlink())); + } else { + this.term2entries.put(term, new SynonymTaggingEntry(synonym)); + } synonyms.add(synonym); } } else { @@ -179,11 +186,12 @@ public class Tagging { for (String loc: locNames) { String syn = normalizeTerm(loc); this.synonym2term.put(syn, loc); - this.term2synonym.put(loc, syn); geo = location.find(loc, true); if (!geo.isEmpty()) { g = geo.iterator().next(); - this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16"); + this.term2entries.put(loc, new LocationTaggingEntry(syn, g)); + } else { + this.term2entries.put(loc, new SynonymTaggingEntry(syn)); } } } @@ -191,8 +199,7 @@ public class Tagging { private void init() throws IOException { if (this.propFile == null) return; this.synonym2term.clear(); - this.term2synonym.clear(); - this.term2objectlink.clear(); + this.term2entries.clear(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + this.navigatorName; this.objectspace = null; @@ -231,8 +238,11 @@ public class Tagging { term = normalizeKey(pl[0]); v = normalizeTerm(pl[0]); this.synonym2term.put(v, term); - this.term2synonym.put(term, v); - if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); + if (pl[2] != null && pl[2].length() > 0) { + this.term2entries.put(term, new TaggingEntryWithObjectLink(v, pl[2])); + } else { + this.term2entries.put(term, new SynonymTaggingEntry(v)); + } continue vocloop; } term = normalizeKey(pl[0]); @@ -247,12 +257,15 @@ public class Tagging { if (synonym.isEmpty()) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); + this.term2entries.put(term, new SynonymTaggingEntry(synonym)); } String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); - this.term2synonym.put(term, synonym); - if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); + if (pl[2] != null && pl[2].length() > 0) { + this.term2entries.put(term, new TaggingEntryWithObjectLink(synonym, pl[2])); + } else { + this.term2entries.put(term, new SynonymTaggingEntry(synonym)); + } synonyms.add(synonym); } } catch (final InterruptedException e) { @@ -270,7 +283,7 @@ public class Tagging { } public int size() { - return this.term2objectlink.size(); + return this.term2entries.size(); } public void put(String term, String synonyms, String objectlink) throws IOException { @@ -375,13 +388,15 @@ public class Tagging { private Map> reconstructionSets() { Map> r = new TreeMap>(); - for (Map.Entry e: this.term2synonym.entrySet()) { + for (Map.Entry e: this.term2entries.entrySet()) { Set s = r.get(e.getKey()); if (s == null) { s = new TreeSet(); r.put(e.getKey(), s); } - if (e.getValue() != null && e.getValue().length() != 0) s.add(e.getValue()); + if (e.getValue() != null && e.getValue().getSynonym() != null && e.getValue().getSynonym().length() != 0) { + s.add(e.getValue().getSynonym()); + } } for (Map.Entry e: this.synonym2term.entrySet()) { Set s = r.get(e.getValue()); @@ -398,14 +413,22 @@ public class Tagging { Map> r = reconstructionSets(); Map map = new TreeMap(); for (Map.Entry> e: r.entrySet()) { - String objectlink = this.term2objectlink.get(e.getKey()); - map.put(e.getKey(), new SOTuple(e.getValue().toArray(new String[e.getValue().size()]), objectlink == null ? "" : objectlink)); + TaggingEntry entry = this.term2entries.get(e.getKey()); + String objectLink = null; + if(entry != null) { + objectLink = entry.getObjectLink(); + } + map.put(e.getKey(), new SOTuple(e.getValue().toArray(new String[e.getValue().size()]), objectLink == null ? "" : objectLink)); } return map; } public String getObjectlink(String term) { - return this.term2objectlink.get(term); + TaggingEntry entry = this.term2entries.get(term); + if(entry != null) { + return entry.getObjectLink(); + } + return null; } public Map list() { @@ -526,7 +549,7 @@ public class Tagging { @Override public String toString() { - return this.term2synonym.toString(); + return this.term2entries.toString(); } private final static Pattern PATTERN_AE = Pattern.compile("\u00E4"); // german umlaute hack for better matching diff --git a/source/net/yacy/cora/lod/vocabulary/TaggingEntry.java b/source/net/yacy/cora/lod/vocabulary/TaggingEntry.java new file mode 100644 index 000000000..36d1a207b --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/TaggingEntry.java @@ -0,0 +1,39 @@ +// TaggingEntry.java +// Copyright 2016 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.lod.vocabulary; + +/** + * Data entry for a term in the {@link Tagging} class + */ +interface TaggingEntry { + + /** + * @return the term synonym + */ + public String getSynonym(); + + /** + * @return the term eventual object link + */ + public String getObjectLink(); + +} diff --git a/source/net/yacy/cora/lod/vocabulary/TaggingEntryWithObjectLink.java b/source/net/yacy/cora/lod/vocabulary/TaggingEntryWithObjectLink.java new file mode 100644 index 000000000..63d75ac57 --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/TaggingEntryWithObjectLink.java @@ -0,0 +1,50 @@ +// TaggingEntryWithObjectLink.java +// Copyright 2016 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.lod.vocabulary; + +/** + * Entry with a synonym and an object link for a term in the {@link Tagging} class. + */ +class TaggingEntryWithObjectLink extends SynonymTaggingEntry { + + /** URL related to object corresponding to the term */ + private String objectLink; + + /** + * @param synonym a term synonym + * @param objectLink URL related to object corresponding to the term + * @throws IllegalArgumentException when a parameter is null + */ + public TaggingEntryWithObjectLink(String synonym, String objectLink) { + super(synonym); + if(objectLink == null) { + throw new IllegalArgumentException("object link must not be null"); + } + this.objectLink = objectLink; + } + + @Override + public String getObjectLink() { + return objectLink; + } + +}