- use only names which consists of at least two parts

- remove word from derewo from locations
pull/1/head
Michael Peter Christen 13 years ago
parent 9264d8b4af
commit cc9ad7198a

@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@ -98,7 +98,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
@ -111,7 +111,7 @@ public class DictionaryLoader_p {
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
@ -141,7 +141,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1ActionActivated", 1);
}

@ -2,12 +2,14 @@
package net.yacy.cora.lod;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
@ -112,13 +114,13 @@ public class JenaTripleStore {
public static void saveFile(String filename, Model model) {
Log.logInfo("TRIPLESTORE", "Saving triplestore with " + model.size() + " triples to " + filename);
FileOutputStream fout;
OutputStream fout;
try {
fout = new FileOutputStream(filename);
fout = new BufferedOutputStream(new FileOutputStream(filename));
model.write(fout);
fout.close();
Log.logInfo("TRIPLESTORE", "Saved triplestore with " + model.size() + " triples to " + filename);
} catch (Exception e) {
// TODO Auto-generated catch block
Log.logWarning("TRIPLESTORE", "Saving to " + filename+" failed");
}
}

@ -95,7 +95,7 @@ public class Tagging {
vocloop: for (Map.Entry<String, SOTuple> e: table.entrySet()) {
if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) {
term = normalizeKey(e.getKey());
v = normalizeWord(e.getKey());
v = normalizeTerm(e.getKey());
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -108,13 +108,13 @@ public class Tagging {
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -402,7 +402,7 @@ public class Tagging {
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeWord(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
@ -416,13 +416,13 @@ public class Tagging {
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
@ -532,7 +532,7 @@ public class Tagging {
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
public static final String normalizeWord(String word) {
public static final String normalizeTerm(String word) {
word = word.trim().toLowerCase();
word = PATTERN_AE.matcher(word).replaceAll("ae");
word = PATTERN_OE.matcher(word).replaceAll("oe");

@ -137,7 +137,7 @@ public class Autotagging {
public Tagging.Metatag getTagFromTerm(String term) {
if (this.vocabularies.isEmpty()) return null;
Tagging.Metatag tag;
term = Tagging.normalizeWord(term);
term = Tagging.normalizeTerm(term);
for (Map.Entry<String, Tagging> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatagFromSynonym(term);
if (tag != null) return tag;

@ -135,11 +135,11 @@ public class LibraryProvider {
if ( geo0.exists() ) {
geo0.renameTo(Dictionary.GEODB0.fileDisabled());
}
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false));
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, dymLib));
return;
}
if ( geo0.exists() ) {
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false));
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, dymLib));
return;
}
}
@ -147,7 +147,7 @@ public class LibraryProvider {
public static void integrateGeonames() {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon));
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib));
return;
}
}
@ -219,7 +219,7 @@ public class LibraryProvider {
Resource resource = i.next();
String subject = resource.toString();
// prepare a propert term from the subject uri
// prepare a proper term from the subject uri
int p = subject.lastIndexOf('/');
if (p < 0) continue;
String term = subject.substring(p + 1);
@ -228,9 +228,10 @@ public class LibraryProvider {
if (p >= 0) term = term.substring(0, p);
term = term.replaceAll("_", " ").trim();
if (term.length() == 0) continue;
if (term.indexOf(' ') < 0) continue; // accept only names that have at least two parts
// store the term into the vocabulary map
map.put(term, new SOTuple("", subject));
map.put(term, new SOTuple("", Tagging.normalizeTerm(subject)));
}
try {
Log.logInfo("LibraryProvider", "adding vocabulary to autotagging");

@ -40,6 +40,7 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
public class GeonamesLocation implements Locations
@ -73,7 +74,7 @@ public class GeonamesLocation implements Locations
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file;
public GeonamesLocation(final File file) {
public GeonamesLocation(final File file, WordCache dymLib) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
@ -117,6 +118,7 @@ public class GeonamesLocation implements Locations
c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c);
for ( final StringBuilder name : locnames ) {
if (dymLib != null && dymLib.contains(name)) continue;
List<Integer> locs = this.name2ids.get(name);
if ( locs == null ) {
locs = new ArrayList<Integer>(1);

@ -40,6 +40,7 @@ import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
/**
@ -59,7 +60,7 @@ public class OpenGeoDBLocation implements Locations
private final Map<String, Integer> zip2id;
private final File file;
public OpenGeoDBLocation(final File file, final boolean lonlat) {
public OpenGeoDBLocation(final File file, WordCache dymLib) {
this.file = file;
this.id2loc = new HashMap<Integer, GeoLocation>();
@ -99,13 +100,8 @@ public class OpenGeoDBLocation implements Locations
line = line.substring(18 + 7);
v = line.split(",");
v = line.split(",");
if ( lonlat ) {
lon = Float.parseFloat(v[2]);
lat = Float.parseFloat(v[3]);
} else {
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
}
this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
}
if ( line.startsWith("geodb_textdata ") ) {
@ -119,6 +115,7 @@ public class OpenGeoDBLocation implements Locations
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
if (h.length() < 2) continue;
if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue;
List<Integer> l = this.name2ids.get(new StringBuilder(h));
if ( l == null ) {
l = new ArrayList<Integer>(1);

Loading…
Cancel
Save