- use only names which consists of at least two parts

- remove word from derewo from locations
pull/1/head
Michael Peter Christen 13 years ago
parent 9264d8b4af
commit cc9ad7198a

@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@ -98,7 +98,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
@ -111,7 +111,7 @@ public class DictionaryLoader_p {
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
@ -141,7 +141,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1ActionActivated", 1);
}
@ -229,7 +229,7 @@ public class DictionaryLoader_p {
LibraryProvider.activatePND();
prop.put("pnd0ActionActivated", 1);
}
// check status again
for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);

@ -2,12 +2,14 @@
package net.yacy.cora.lod;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
@ -38,7 +40,7 @@ public class JenaTripleStore {
public static Model model = ModelFactory.createDefaultModel();
static {
init(model);
}
private final static void init(Model model) {
model.setNsPrefix(YaCyMetadata.PREFIX, YaCyMetadata.NAMESPACE);
@ -48,7 +50,7 @@ public class JenaTripleStore {
model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd");
model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE);
}
public static long size() {
return model.size();
}
@ -81,7 +83,7 @@ public class JenaTripleStore {
InputStream is = FileManager.get().open(fileNameOrUri);
LoadNTriples(is);
}
public static void LoadNTriples(InputStream is) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
if (is != null) {
@ -105,20 +107,20 @@ public class JenaTripleStore {
model = model.union(tmp);
}
}
public static void saveFile(String filename) {
public static void saveFile(String filename) {
saveFile(filename, model);
}
public static void saveFile(String filename, Model model) {
Log.logInfo("TRIPLESTORE", "Saving triplestore with " + model.size() + " triples to " + filename);
FileOutputStream fout;
OutputStream fout;
try {
fout = new FileOutputStream(filename);
fout = new BufferedOutputStream(new FileOutputStream(filename));
model.write(fout);
fout.close();
Log.logInfo("TRIPLESTORE", "Saved triplestore with " + model.size() + " triples to " + filename);
} catch (Exception e) {
// TODO Auto-generated catch block
Log.logWarning("TRIPLESTORE", "Saving to " + filename+" failed");
}
}
@ -145,13 +147,13 @@ public class JenaTripleStore {
Property pr = model.getProperty(predicate);
JenaTripleStore.model.removeAll(r, pr, (Resource) null);
}
public static void addTriple(String subject, String predicate, String object, String username) {
if (privatestorage != null && privatestorage.containsKey(username)) {
addTriple (subject, predicate, object, privatestorage.get(username));
}
}
public static void addTriple(String subject, String predicate, String object) {
addTriple (subject, predicate, object, model);
}
@ -162,43 +164,43 @@ public class JenaTripleStore {
r.addProperty(pr, object);
Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object);
}
public static String getObject(final String subject, final String predicate) {
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ");
Iterator<RDFNode> ni = JenaTripleStore.getObjects(subject, predicate);
if (!ni.hasNext()) return "";
return ni.next().toString();
}
public static Iterator<RDFNode> getObjects(final String subject, final String predicate) {
public static Iterator<RDFNode> getObjects(final String subject, final String predicate) {
final Resource r = subject == null ? null : JenaTripleStore.getResource(subject);
return getObjects(r, predicate);
}
public static String getPrivateObject(final String subject, final String predicate, final String username) {
public static String getPrivateObject(final String subject, final String predicate, final String username) {
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")");
Iterator<RDFNode> ni = JenaTripleStore.getPrivateObjects(subject, predicate, username);
if (!ni.hasNext()) return "";
return ni.next().toString();
}
private static Iterator<RDFNode> getPrivateObjects(final String subject, final String predicate, final String username) {
if (privatestorage != null && privatestorage.containsKey(username)) {
return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username));
}
return null;
}
public static Iterator<RDFNode> getObjects(final Resource r, final String predicate) {
return getObjects(r, predicate, model);
}
private static Iterator<RDFNode> getObjects(final Resource r, final String predicate, final Model model) {
final Property pr = model.getProperty(predicate);
final StmtIterator iter = model.listStatements(r, pr, (Resource) null);
final StmtIterator iter = model.listStatements(r, pr, (Resource) null);
return new Iterator<RDFNode>() {
@Override
public boolean hasNext() {
@ -214,15 +216,15 @@ public class JenaTripleStore {
}
};
}
public static Iterator<Resource> getSubjects(final String predicate) {
return getSubjects(predicate, model);
}
private static Iterator<Resource> getSubjects(final String predicate, final Model model) {
final Property pr = model.getProperty(predicate);
final ResIterator iter = model.listSubjectsWithProperty(pr);
final ResIterator iter = model.listSubjectsWithProperty(pr);
return new Iterator<Resource>() {
@Override
public boolean hasNext() {
@ -246,7 +248,7 @@ public class JenaTripleStore {
m.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE);
return m;
}
public static String getMetadataByURLHash(byte[] urlhash) {
String subject = YaCyMetadata.hashURI(urlhash);
Model model = JenaTripleStore.getSubmodelBySubject(subject);
@ -254,13 +256,13 @@ public class JenaTripleStore {
model.write(baos, "RDF/XML-ABBREV");
return UTF8.String(baos.toByteArray());
}
public static void initPrivateStores() {
Switchboard switchboard = Switchboard.getSwitchboard();
Log.logInfo("TRIPLESTORE", "Init private stores");
if (privatestorage == null) privatestorage = new ConcurrentHashMap<String, Model>();
if (privatestorage != null) privatestorage.clear();
@ -274,42 +276,42 @@ public class JenaTripleStore {
String username = e.getUserName();
File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath()));
File currentuserfile = new File(triplestore, "private_store_"+username+".rdf");
Log.logInfo("TRIPLESTORE", "Init " + username + " from "+currentuserfile.getAbsolutePath());
Model tmp = ModelFactory.createDefaultModel();
init (tmp);
init (tmp);
if (currentuserfile.exists()) {
Log.logInfo("TRIPLESTORE", "Loading from " + currentuserfile.getAbsolutePath());
InputStream is = FileManager.get().open(currentuserfile.getAbsolutePath());
if (is != null) {
// read the RDF/XML file
tmp.read(is, null);
Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + currentuserfile.getAbsolutePath());
} else {
throw new IOException("cannot read " + currentuserfile.getAbsolutePath());
}
}
if (tmp != null) {
privatestorage.put(username, tmp);
}
}
}
catch (Exception anyex) {
Log.logException(anyex);
}
@ -317,19 +319,19 @@ public class JenaTripleStore {
}
public static void savePrivateStores(Switchboard switchboard) {
Log.logInfo("TRIPLESTORE", "Saving user triplestores");
if (privatestorage == null) return;
for (Entry<String, Model> s : privatestorage.entrySet()) {
File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath()));
File currentuserfile = new File(triplestore, "private_store_"+s.getKey()+".rdf");
saveFile (currentuserfile.getAbsolutePath(), s.getValue());
}
}

@ -95,7 +95,7 @@ public class Tagging {
vocloop: for (Map.Entry<String, SOTuple> e: table.entrySet()) {
if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) {
term = normalizeKey(e.getKey());
v = normalizeWord(e.getKey());
v = normalizeTerm(e.getKey());
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -108,13 +108,13 @@ public class Tagging {
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -402,7 +402,7 @@ public class Tagging {
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeWord(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
@ -416,13 +416,13 @@ public class Tagging {
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
@ -532,7 +532,7 @@ public class Tagging {
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
public static final String normalizeWord(String word) {
public static final String normalizeTerm(String word) {
word = word.trim().toLowerCase();
word = PATTERN_AE.matcher(word).replaceAll("ae");
word = PATTERN_OE.matcher(word).replaceAll("oe");

@ -137,7 +137,7 @@ public class Autotagging {
public Tagging.Metatag getTagFromTerm(String term) {
if (this.vocabularies.isEmpty()) return null;
Tagging.Metatag tag;
term = Tagging.normalizeWord(term);
term = Tagging.normalizeTerm(term);
for (Map.Entry<String, Tagging> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatagFromSynonym(term);
if (tag != null) return tag;

@ -135,11 +135,11 @@ public class LibraryProvider {
if ( geo0.exists() ) {
geo0.renameTo(Dictionary.GEODB0.fileDisabled());
}
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false));
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, dymLib));
return;
}
if ( geo0.exists() ) {
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false));
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, dymLib));
return;
}
}
@ -147,7 +147,7 @@ public class LibraryProvider {
public static void integrateGeonames() {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon));
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib));
return;
}
}
@ -219,7 +219,7 @@ public class LibraryProvider {
Resource resource = i.next();
String subject = resource.toString();
// prepare a propert term from the subject uri
// prepare a proper term from the subject uri
int p = subject.lastIndexOf('/');
if (p < 0) continue;
String term = subject.substring(p + 1);
@ -228,9 +228,10 @@ public class LibraryProvider {
if (p >= 0) term = term.substring(0, p);
term = term.replaceAll("_", " ").trim();
if (term.length() == 0) continue;
if (term.indexOf(' ') < 0) continue; // accept only names that have at least two parts
// store the term into the vocabulary map
map.put(term, new SOTuple("", subject));
map.put(term, new SOTuple("", Tagging.normalizeTerm(subject)));
}
try {
Log.logInfo("LibraryProvider", "adding vocabulary to autotagging");

@ -40,6 +40,7 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
public class GeonamesLocation implements Locations
@ -73,7 +74,7 @@ public class GeonamesLocation implements Locations
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file;
public GeonamesLocation(final File file) {
public GeonamesLocation(final File file, WordCache dymLib) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
@ -117,6 +118,7 @@ public class GeonamesLocation implements Locations
c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c);
for ( final StringBuilder name : locnames ) {
if (dymLib != null && dymLib.contains(name)) continue;
List<Integer> locs = this.name2ids.get(name);
if ( locs == null ) {
locs = new ArrayList<Integer>(1);

@ -40,6 +40,7 @@ import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
/**
@ -59,7 +60,7 @@ public class OpenGeoDBLocation implements Locations
private final Map<String, Integer> zip2id;
private final File file;
public OpenGeoDBLocation(final File file, final boolean lonlat) {
public OpenGeoDBLocation(final File file, WordCache dymLib) {
this.file = file;
this.id2loc = new HashMap<Integer, GeoLocation>();
@ -99,13 +100,8 @@ public class OpenGeoDBLocation implements Locations
line = line.substring(18 + 7);
v = line.split(",");
v = line.split(",");
if ( lonlat ) {
lon = Float.parseFloat(v[2]);
lat = Float.parseFloat(v[3]);
} else {
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
}
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
}
if ( line.startsWith("geodb_textdata ") ) {
@ -119,6 +115,7 @@ public class OpenGeoDBLocation implements Locations
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
if (h.length() < 2) continue;
if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue;
List<Integer> l = this.name2ids.get(new StringBuilder(h));
if ( l == null ) {
l = new ArrayList<Integer>(1);

Loading…
Cancel
Save