Reduced locations vocabulary memory footprint.

Reduced this vocabulary memory usage :
 - by using only one map term2entries instead of two maps having the
same key set
 - by generating the location object links on the fly using the
GeoLocation data instead of storing many duplicates of string prefix
"http://www.openstreetmap.org/?lat="
 
Measurements with VisualVM and GeoNames 0 enabled (cities with a
population > 1000) :
 - AutotaggingLibrary retained size :
  - initial : 309 718 763 bytes
  - after refactoring : 159 224 641 bytes
pull/97/head
luccioman 8 years ago
parent 9c06e752e4
commit a1f922b34a

@ -0,0 +1,53 @@
// LocationTaggingEntry.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.lod.vocabulary;
import net.yacy.cora.geo.GeoLocation;
/**
* Entry with a synonym and a location for a term in the {@link Tagging} class.
*/
class LocationTaggingEntry extends SynonymTaggingEntry {
/** Geographical location of the object */
private GeoLocation location;
/**
*
* @param synonym term synonym
* @param location geographical location of the object. Must not be null.
* @throws IllegalArgumentException when a parameter is null
*/
public LocationTaggingEntry(String synonym, GeoLocation location) {
super(synonym);
if(location == null) {
throw new IllegalArgumentException("location must not be null");
}
this.location = location;
}
@Override
public String getObjectLink() {
return "http://www.openstreetmap.org/?lat=" + location.lat() + "&lon=" + location.lon() + "&zoom=16";
}
}

@ -0,0 +1,54 @@
// SynonymTaggingEntry.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.lod.vocabulary;
/**
* Synonym entry for a term in the {@link Tagging} class
*/
class SynonymTaggingEntry implements TaggingEntry {
/** Term synonym */
protected String synonym;
/**
* @param synonym a term synonym
* @throws IllegalArgumentException when synonym is null
*/
public SynonymTaggingEntry(String synonym) {
if(synonym == null) {
throw new IllegalArgumentException("synonym must not be null");
}
this.synonym = synonym;
}
@Override
public String getSynonym() {
return synonym;
}
@Override
public String getObjectLink() {
return null;
}
}

@ -50,8 +50,10 @@ public class Tagging {
private final String navigatorName;
private final Map<String, String> synonym2term;
private final Map<String, String> term2synonym;
private final Map<String, String> term2objectlink;
/** Terms associated to TagginEntry instances each having a synonym and an eventual object link */
private final Map<String, TaggingEntry> term2entries;
private File propFile;
private boolean isFacet; // true if the vocabulary shall generate a navigation facet
@ -93,8 +95,7 @@ public class Tagging {
public Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap<String, String>();
this.term2synonym = new ConcurrentHashMap<String, String>();
this.term2objectlink = new ConcurrentHashMap<String, String>();
this.term2entries= new ConcurrentHashMap<String, TaggingEntry>();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + name;
this.objectspace = null;
@ -122,8 +123,7 @@ public class Tagging {
this.objectspace = objectspace;
if (propFile == null) {
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.term2entries.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
@ -134,8 +134,12 @@ public class Tagging {
term = normalizeKey(e.getKey());
v = normalizeTerm(e.getKey());
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) {
this.term2entries.put(term, new TaggingEntryWithObjectLink(v, e.getValue().getObjectlink()));
} else {
this.term2entries.put(term, new SynonymTaggingEntry(v));
}
continue vocloop;
}
term = normalizeKey(e.getKey());
@ -149,12 +153,15 @@ public class Tagging {
if (synonym.isEmpty()) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
this.term2entries.put(term, new SynonymTaggingEntry(synonym));
}
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) {
this.term2entries.put(term, new TaggingEntryWithObjectLink(synonym, e.getValue().getObjectlink()));
} else {
this.term2entries.put(term, new SynonymTaggingEntry(synonym));
}
synonyms.add(synonym);
}
} else {
@ -179,11 +186,12 @@ public class Tagging {
for (String loc: locNames) {
String syn = normalizeTerm(loc);
this.synonym2term.put(syn, loc);
this.term2synonym.put(loc, syn);
geo = location.find(loc, true);
if (!geo.isEmpty()) {
g = geo.iterator().next();
this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16");
this.term2entries.put(loc, new LocationTaggingEntry(syn, g));
} else {
this.term2entries.put(loc, new SynonymTaggingEntry(syn));
}
}
}
@ -191,8 +199,7 @@ public class Tagging {
private void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.term2entries.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
this.objectspace = null;
@ -231,8 +238,11 @@ public class Tagging {
term = normalizeKey(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
if (pl[2] != null && pl[2].length() > 0) {
this.term2entries.put(term, new TaggingEntryWithObjectLink(v, pl[2]));
} else {
this.term2entries.put(term, new SynonymTaggingEntry(v));
}
continue vocloop;
}
term = normalizeKey(pl[0]);
@ -247,12 +257,15 @@ public class Tagging {
if (synonym.isEmpty()) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
this.term2entries.put(term, new SynonymTaggingEntry(synonym));
}
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
if (pl[2] != null && pl[2].length() > 0) {
this.term2entries.put(term, new TaggingEntryWithObjectLink(synonym, pl[2]));
} else {
this.term2entries.put(term, new SynonymTaggingEntry(synonym));
}
synonyms.add(synonym);
}
} catch (final InterruptedException e) {
@ -270,7 +283,7 @@ public class Tagging {
}
public int size() {
return this.term2objectlink.size();
return this.term2entries.size();
}
public void put(String term, String synonyms, String objectlink) throws IOException {
@ -375,13 +388,15 @@ public class Tagging {
private Map<String, Set<String>> reconstructionSets() {
Map<String, Set<String>> r = new TreeMap<String, Set<String>>();
for (Map.Entry<String, String> e: this.term2synonym.entrySet()) {
for (Map.Entry<String, TaggingEntry> e: this.term2entries.entrySet()) {
Set<String> s = r.get(e.getKey());
if (s == null) {
s = new TreeSet<String>();
r.put(e.getKey(), s);
}
if (e.getValue() != null && e.getValue().length() != 0) s.add(e.getValue());
if (e.getValue() != null && e.getValue().getSynonym() != null && e.getValue().getSynonym().length() != 0) {
s.add(e.getValue().getSynonym());
}
}
for (Map.Entry<String, String> e: this.synonym2term.entrySet()) {
Set<String> s = r.get(e.getValue());
@ -398,14 +413,22 @@ public class Tagging {
Map<String, Set<String>> r = reconstructionSets();
Map<String, SOTuple> map = new TreeMap<String, SOTuple>();
for (Map.Entry<String, Set<String>> e: r.entrySet()) {
String objectlink = this.term2objectlink.get(e.getKey());
map.put(e.getKey(), new SOTuple(e.getValue().toArray(new String[e.getValue().size()]), objectlink == null ? "" : objectlink));
TaggingEntry entry = this.term2entries.get(e.getKey());
String objectLink = null;
if(entry != null) {
objectLink = entry.getObjectLink();
}
map.put(e.getKey(), new SOTuple(e.getValue().toArray(new String[e.getValue().size()]), objectLink == null ? "" : objectLink));
}
return map;
}
public String getObjectlink(String term) {
return this.term2objectlink.get(term);
TaggingEntry entry = this.term2entries.get(term);
if(entry != null) {
return entry.getObjectLink();
}
return null;
}
public Map<String, SOTuple> list() {
@ -526,7 +549,7 @@ public class Tagging {
@Override
public String toString() {
return this.term2synonym.toString();
return this.term2entries.toString();
}
private final static Pattern PATTERN_AE = Pattern.compile("\u00E4"); // german umlaute hack for better matching

@ -0,0 +1,39 @@
// TaggingEntry.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.lod.vocabulary;
/**
* Data entry for a term in the {@link Tagging} class
*/
interface TaggingEntry {
/**
* @return the term synonym
*/
public String getSynonym();
/**
* @return the term eventual object link
*/
public String getObjectLink();
}

@ -0,0 +1,50 @@
// TaggingEntryWithObjectLink.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.lod.vocabulary;
/**
* Entry with a synonym and an object link for a term in the {@link Tagging} class.
*/
class TaggingEntryWithObjectLink extends SynonymTaggingEntry {
/** URL related to object corresponding to the term */
private String objectLink;
/**
* @param synonym a term synonym
* @param objectLink URL related to object corresponding to the term
* @throws IllegalArgumentException when a parameter is null
*/
public TaggingEntryWithObjectLink(String synonym, String objectLink) {
super(synonym);
if(objectLink == null) {
throw new IllegalArgumentException("object link must not be null");
}
this.objectLink = objectLink;
}
@Override
public String getObjectLink() {
return objectLink;
}
}
Loading…
Cancel
Save