diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index 13b37667f..ddf8f50be 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -68,7 +68,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); prop.put("geon0ActionLoaded", 1); @@ -98,7 +98,7 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Activate")) { LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0ActionActivated", 1); } @@ -111,7 +111,7 @@ public class DictionaryLoader_p { final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); prop.put("geo1ActionLoaded", 1); @@ -141,7 +141,7 @@ public class DictionaryLoader_p { if (post.containsKey("geo1Activate")) { LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1ActionActivated", 1); } @@ -229,7 +229,7 @@ public class DictionaryLoader_p { LibraryProvider.activatePND(); prop.put("pnd0ActionActivated", 1); } - + // check status again for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index a1e1405be..d0cea5944 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -2,12 +2,14 @@ package net.yacy.cora.lod; +import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.util.Iterator; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; @@ -38,7 +40,7 @@ public class JenaTripleStore { public static Model model = ModelFactory.createDefaultModel(); static { init(model); - + } private final static void init(Model model) { model.setNsPrefix(YaCyMetadata.PREFIX, YaCyMetadata.NAMESPACE); @@ -48,7 +50,7 @@ public class JenaTripleStore { model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd"); model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); } - + public static long size() { return model.size(); } @@ -81,7 +83,7 @@ public class JenaTripleStore { InputStream is = FileManager.get().open(fileNameOrUri); LoadNTriples(is); } - + public static void LoadNTriples(InputStream is) throws IOException { Model tmp = ModelFactory.createDefaultModel(); if (is != null) { @@ -105,20 +107,20 @@ public class JenaTripleStore { model = model.union(tmp); } } - - public static void saveFile(String filename) { + + public static void saveFile(String filename) { saveFile(filename, model); } public static void saveFile(String filename, Model model) { Log.logInfo("TRIPLESTORE", "Saving triplestore with " + model.size() + " triples to " + filename); - FileOutputStream fout; + OutputStream fout; try { - fout = new FileOutputStream(filename); + fout = new BufferedOutputStream(new FileOutputStream(filename)); model.write(fout); + fout.close(); Log.logInfo("TRIPLESTORE", "Saved triplestore with " + model.size() + " triples to " + filename); } catch (Exception e) { - // TODO Auto-generated catch block Log.logWarning("TRIPLESTORE", "Saving to " + filename+" failed"); } } @@ -145,13 +147,13 @@ public class JenaTripleStore { Property pr = model.getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } - + public static void addTriple(String subject, String predicate, String object, String username) { if (privatestorage != null && privatestorage.containsKey(username)) { addTriple (subject, predicate, object, privatestorage.get(username)); } } - + public static void addTriple(String subject, String predicate, String object) { addTriple (subject, predicate, object, model); } @@ -162,43 +164,43 @@ public class JenaTripleStore { r.addProperty(pr, object); Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object); } - + public static String getObject(final String subject, final String predicate) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... "); - + Iterator ni = JenaTripleStore.getObjects(subject, predicate); if (!ni.hasNext()) return ""; return ni.next().toString(); } - public static Iterator getObjects(final String subject, final String predicate) { + public static Iterator getObjects(final String subject, final String predicate) { final Resource r = subject == null ? null : JenaTripleStore.getResource(subject); return getObjects(r, predicate); } - - public static String getPrivateObject(final String subject, final String predicate, final String username) { + + public static String getPrivateObject(final String subject, final String predicate, final String username) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")"); - + Iterator ni = JenaTripleStore.getPrivateObjects(subject, predicate, username); if (!ni.hasNext()) return ""; return ni.next().toString(); } - + private static Iterator getPrivateObjects(final String subject, final String predicate, final String username) { if (privatestorage != null && privatestorage.containsKey(username)) { return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username)); } return null; } - + public static Iterator getObjects(final Resource r, final String predicate) { return getObjects(r, predicate, model); } private static Iterator getObjects(final Resource r, final String predicate, final Model model) { final Property pr = model.getProperty(predicate); - final StmtIterator iter = model.listStatements(r, pr, (Resource) null); - + final StmtIterator iter = model.listStatements(r, pr, (Resource) null); + return new Iterator() { @Override public boolean hasNext() { @@ -214,15 +216,15 @@ public class JenaTripleStore { } }; } - + public static Iterator getSubjects(final String predicate) { return getSubjects(predicate, model); } private static Iterator getSubjects(final String predicate, final Model model) { final Property pr = model.getProperty(predicate); - final ResIterator iter = model.listSubjectsWithProperty(pr); - + final ResIterator iter = model.listSubjectsWithProperty(pr); + return new Iterator() { @Override public boolean hasNext() { @@ -246,7 +248,7 @@ public class JenaTripleStore { m.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); return m; } - + public static String getMetadataByURLHash(byte[] urlhash) { String subject = YaCyMetadata.hashURI(urlhash); Model model = JenaTripleStore.getSubmodelBySubject(subject); @@ -254,13 +256,13 @@ public class JenaTripleStore { model.write(baos, "RDF/XML-ABBREV"); return UTF8.String(baos.toByteArray()); } - + public static void initPrivateStores() { - + Switchboard switchboard = Switchboard.getSwitchboard(); Log.logInfo("TRIPLESTORE", "Init private stores"); - + if (privatestorage == null) privatestorage = new ConcurrentHashMap(); if (privatestorage != null) privatestorage.clear(); @@ -274,42 +276,42 @@ public class JenaTripleStore { String username = e.getUserName(); File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath())); - + File currentuserfile = new File(triplestore, "private_store_"+username+".rdf"); - + Log.logInfo("TRIPLESTORE", "Init " + username + " from "+currentuserfile.getAbsolutePath()); - + Model tmp = ModelFactory.createDefaultModel(); - - init (tmp); - + + init (tmp); + if (currentuserfile.exists()) { - - + + Log.logInfo("TRIPLESTORE", "Loading from " + currentuserfile.getAbsolutePath()); InputStream is = FileManager.get().open(currentuserfile.getAbsolutePath()); if (is != null) { // read the RDF/XML file tmp.read(is, null); Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + currentuserfile.getAbsolutePath()); - - + + } else { throw new IOException("cannot read " + currentuserfile.getAbsolutePath()); } } - + if (tmp != null) { - + privatestorage.put(username, tmp); - + } } } catch (Exception anyex) { - + Log.logException(anyex); } @@ -317,19 +319,19 @@ public class JenaTripleStore { } public static void savePrivateStores(Switchboard switchboard) { - + Log.logInfo("TRIPLESTORE", "Saving user triplestores"); if (privatestorage == null) return; for (Entry s : privatestorage.entrySet()) { - + File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath())); - + File currentuserfile = new File(triplestore, "private_store_"+s.getKey()+".rdf"); saveFile (currentuserfile.getAbsolutePath(), s.getValue()); - + } } diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index f00a0eedf..4a93288cc 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -95,7 +95,7 @@ public class Tagging { vocloop: for (Map.Entry e: table.entrySet()) { if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) { term = normalizeKey(e.getKey()); - v = normalizeWord(e.getKey()); + v = normalizeTerm(e.getKey()); this.synonym2term.put(v, term); this.term2synonym.put(term, v); if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); @@ -108,13 +108,13 @@ public class Tagging { tagloop: for (String synonym: tags) { if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); - synonym = normalizeWord(synonym); + synonym = normalizeTerm(synonym); if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); } - String synonym = normalizeWord(term); + String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); @@ -402,7 +402,7 @@ public class Tagging { } if (pl[1] == null) { term = normalizeKey(pl[0]); - v = normalizeWord(pl[0]); + v = normalizeTerm(pl[0]); this.synonym2term.put(v, term); this.term2synonym.put(term, v); if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); @@ -416,13 +416,13 @@ public class Tagging { tagloop: for (String synonym: tags) { if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); - synonym = normalizeWord(synonym); + synonym = normalizeTerm(synonym); if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); } - String synonym = normalizeWord(term); + String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); @@ -532,7 +532,7 @@ public class Tagging { private final static Pattern PATTERN_UE = Pattern.compile("\u00FC"); private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF"); - public static final String normalizeWord(String word) { + public static final String normalizeTerm(String word) { word = word.trim().toLowerCase(); word = PATTERN_AE.matcher(word).replaceAll("ae"); word = PATTERN_OE.matcher(word).replaceAll("oe"); diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index eb834c887..5e01093bc 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -137,7 +137,7 @@ public class Autotagging { public Tagging.Metatag getTagFromTerm(String term) { if (this.vocabularies.isEmpty()) return null; Tagging.Metatag tag; - term = Tagging.normalizeWord(term); + term = Tagging.normalizeTerm(term); for (Map.Entry v: this.vocabularies.entrySet()) { tag = v.getValue().getMetatagFromSynonym(term); if (tag != null) return tag; diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index 92a392946..e89e76bc9 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -135,11 +135,11 @@ public class LibraryProvider { if ( geo0.exists() ) { geo0.renameTo(Dictionary.GEODB0.fileDisabled()); } - geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); + geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, dymLib)); return; } if ( geo0.exists() ) { - geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); + geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, dymLib)); return; } } @@ -147,7 +147,7 @@ public class LibraryProvider { public static void integrateGeonames() { final File geon = Dictionary.GEON0.file(); if ( geon.exists() ) { - geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); + geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib)); return; } } @@ -219,7 +219,7 @@ public class LibraryProvider { Resource resource = i.next(); String subject = resource.toString(); - // prepare a propert term from the subject uri + // prepare a proper term from the subject uri int p = subject.lastIndexOf('/'); if (p < 0) continue; String term = subject.substring(p + 1); @@ -228,9 +228,10 @@ public class LibraryProvider { if (p >= 0) term = term.substring(0, p); term = term.replaceAll("_", " ").trim(); if (term.length() == 0) continue; + if (term.indexOf(' ') < 0) continue; // accept only names that have at least two parts // store the term into the vocabulary map - map.put(term, new SOTuple("", subject)); + map.put(term, new SOTuple("", Tagging.normalizeTerm(subject))); } try { Log.logInfo("LibraryProvider", "adding vocabulary to autotagging"); diff --git a/source/net/yacy/document/geolocation/GeonamesLocation.java b/source/net/yacy/document/geolocation/GeonamesLocation.java index e98989977..799bba5e6 100644 --- a/source/net/yacy/document/geolocation/GeonamesLocation.java +++ b/source/net/yacy/document/geolocation/GeonamesLocation.java @@ -40,6 +40,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import net.yacy.document.StringBuilderComparator; +import net.yacy.document.WordCache; import net.yacy.kelondro.logging.Log; public class GeonamesLocation implements Locations @@ -73,7 +74,7 @@ public class GeonamesLocation implements Locations private final TreeMap> name2ids; private final File file; - public GeonamesLocation(final File file) { + public GeonamesLocation(final File file, WordCache dymLib) { // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/ this.file = file; @@ -117,6 +118,7 @@ public class GeonamesLocation implements Locations c.setPopulation((int) Long.parseLong(fields[14])); this.id2loc.put(id, c); for ( final StringBuilder name : locnames ) { + if (dymLib != null && dymLib.contains(name)) continue; List locs = this.name2ids.get(name); if ( locs == null ) { locs = new ArrayList(1); diff --git a/source/net/yacy/document/geolocation/OpenGeoDBLocation.java b/source/net/yacy/document/geolocation/OpenGeoDBLocation.java index 968150a4f..98897fa2e 100644 --- a/source/net/yacy/document/geolocation/OpenGeoDBLocation.java +++ b/source/net/yacy/document/geolocation/OpenGeoDBLocation.java @@ -40,6 +40,7 @@ import java.util.TreeSet; import java.util.zip.GZIPInputStream; import net.yacy.document.StringBuilderComparator; +import net.yacy.document.WordCache; import net.yacy.kelondro.logging.Log; /** @@ -59,7 +60,7 @@ public class OpenGeoDBLocation implements Locations private final Map zip2id; private final File file; - public OpenGeoDBLocation(final File file, final boolean lonlat) { + public OpenGeoDBLocation(final File file, WordCache dymLib) { this.file = file; this.id2loc = new HashMap(); @@ -99,13 +100,8 @@ public class OpenGeoDBLocation implements Locations line = line.substring(18 + 7); v = line.split(","); v = line.split(","); - if ( lonlat ) { - lon = Float.parseFloat(v[2]); - lat = Float.parseFloat(v[3]); - } else { - lat = Float.parseFloat(v[2]); - lon = Float.parseFloat(v[3]); - } + lat = Float.parseFloat(v[2]); + lon = Float.parseFloat(v[3]); this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon)); } if ( line.startsWith("geodb_textdata ") ) { @@ -119,6 +115,7 @@ public class OpenGeoDBLocation implements Locations id = Integer.parseInt(v[0]); h = removeQuotes(v[2]); if (h.length() < 2) continue; + if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue; List l = this.name2ids.get(new StringBuilder(h)); if ( l == null ) { l = new ArrayList(1);