diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java
index 4350d5655..9568816e2 100644
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@@ -57,7 +57,9 @@ public class DictionaryLoader_p {
prop.put(dictionary.nickname + "ActionDeactivated", 0);
}
- if (post == null) return prop;
+ if (post == null) {
+ return prop;
+ }
// GEON0
if (post.containsKey("geon0Load")) {
@@ -67,6 +69,7 @@ public class DictionaryLoader_p {
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
+ LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
} catch (final MalformedURLException e) {
@@ -96,6 +99,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
+ LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
@@ -108,6 +112,7 @@ public class DictionaryLoader_p {
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
+ LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
} catch (final MalformedURLException e) {
@@ -137,6 +142,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
+ LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
prop.put("geo1ActionActivated", 1);
}
diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java
new file mode 100644
index 000000000..724101d17
--- /dev/null
+++ b/source/net/yacy/document/Autotagging.java
@@ -0,0 +1,238 @@
+/**
+ * Autotagging
+ * Copyright 2012 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+ * first published 07.01.2012 on http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.document;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import net.yacy.document.geolocalization.Localization;
+import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.util.FileUtils;
+
+/**
+ * Autotagging provides a set of tag/print-name properties which can be used to
+ * - create tags from texts automatically
+ * - create navigation entries for given tags
+ */
+public class Autotagging {
+
+ final static Object PRESENT = new Object();
+
+ final char prefixChar;
+ final File autotaggingPath;
+ final Map vocabularies;
+ final Map allTags;
+
+ public Autotagging(final File autotaggingPath, char prefixChar) {
+ this.vocabularies = new ConcurrentHashMap();
+ this.autotaggingPath = autotaggingPath;
+ this.prefixChar = prefixChar;
+ this.allTags = new ConcurrentHashMap();
+ reload();
+ }
+
+
+ /**
+ * scan the input directory and load all tag tables (again)
+ * a tag table is a property file where
+ * the key is the tag name
+ * the value is the visible name for the tag (shown in a navigator)
+ * properties without values are allowed (the value is then set to the key)
+ * also the value can be used as a tag
+ */
+ public void reload() {
+ this.vocabularies.clear();
+ this.allTags.clear();
+ if (this.autotaggingPath == null || !this.autotaggingPath.exists()) {
+ return;
+ }
+ final String[] files = this.autotaggingPath.list();
+ for (final String f: files) {
+ if (f.endsWith(".vocabulary")) {
+ try {
+ File ff = new File(this.autotaggingPath, f);
+ String vocName = ff.getName();
+ vocName = vocName.substring(0, vocName.length() - 11);
+ Vocabulary voc = new Vocabulary(vocName, ff);
+ this.vocabularies.put(vocName, voc);
+ for (String t: voc.tags()) {
+ this.allTags.put(t, PRESENT);
+ }
+ } catch (final IOException e) {
+ Log.logException(e);
+ }
+ }
+ }
+ }
+
+ /*
+ public void addDidYouMean(WordCache wordCache) {
+
+ }
+ */
+
+ public void addLocalization(Localization localization) {
+ Vocabulary voc = new Vocabulary("Locale", localization);
+ this.vocabularies.put("Locale", voc);
+ for (String t: voc.tags()) {
+ this.allTags.put(t, PRESENT);
+ }
+ }
+
+ /**
+ * produce a set of tags for a given text.
+ * The set contains the names of the tags with a prefix character at the front
+ * @param text
+ * @return
+ */
+ public Set tags(String text) {
+ Set as = new HashSet();
+
+ return as;
+ }
+
+ public static class Vocabulary {
+
+ final String navigatorName;
+ final Map tag2print, print2tag;
+
+ public Vocabulary(String name) {
+ this.navigatorName = name;
+ this.tag2print = new ConcurrentHashMap();
+ this.print2tag = new ConcurrentHashMap();
+ }
+
+ public Vocabulary(String name, File propFile) throws IOException {
+ this(name);
+ ArrayList list = FileUtils.getListArray(propFile);
+ String k, v;
+ String[] tags;
+ int p;
+ vocloop: for (String line: list) {
+ line = line.trim();
+ if (line.length() == 0 || line.charAt(0) == '#') {
+ continue vocloop;
+ }
+ p = line.indexOf(':');
+ if (p < 0) {
+ p = line.indexOf('=');
+ }
+ if (p < 0) {
+ p = line.indexOf('\t');
+ }
+ if (p < 0) {
+ this.tag2print.put(line, line);
+ this.print2tag.put(line, line);
+ continue vocloop;
+ }
+ k = line.substring(0, p).trim();
+ v = line.substring(p + 1);
+ tags = v.split(",");
+ tagloop: for (String t: tags) {
+ t = t.trim().toLowerCase();
+ if (t.length() == 0) {
+ continue tagloop;
+ }
+ this.tag2print.put(t, k);
+ this.print2tag.put(k, t);
+ }
+ this.tag2print.put(k.toLowerCase(), k);
+ this.print2tag.put(k, k.toLowerCase());
+ }
+ }
+
+ public Vocabulary(String name, Localization localization) {
+ this(name);
+ Set locNames = localization.locationNames();
+ for (String loc: locNames) {
+ this.tag2print.put(loc.toLowerCase(), loc);
+ this.print2tag.put(loc, loc.toLowerCase());
+ }
+ }
+
+ public String getName() {
+ return this.navigatorName;
+ }
+
+ public String getPrint(final String tag) {
+ return this.tag2print.get(tag);
+ }
+
+ public String getTag(final String print) {
+ return this.print2tag.get(print);
+ }
+
+ public Set tags() {
+ return this.tag2print.keySet();
+ }
+
+ @Override
+ public String toString() {
+ return this.print2tag.toString();
+ }
+ }
+
+ public class Metatag {
+ private final String vocName;
+ private final String print;
+ public Metatag(String vocName, String print) {
+ this.vocName = vocName;
+ this.print = print;
+ }
+ public Metatag(String metatag) {
+ assert metatag.charAt(0) == Autotagging.this.prefixChar;
+ int p = metatag.indexOf(':');
+ assert p > 0;
+ this.vocName = metatag.substring(1, p);
+ this.print = metatag.substring(p + 1);
+ }
+ public String getVocabularyName() {
+ return this.vocName;
+ }
+ public String getPrintName() {
+ return this.print;
+ }
+ public String getMetatag() {
+ return Autotagging.this.prefixChar + this.vocName + ":" + this.print.replaceAll(" ", "_");
+ }
+ }
+
+ public Metatag metatag(String vocName, String print) {
+ return new Metatag(vocName, print);
+ }
+
+ public Metatag metatag(String metatag) {
+ return new Metatag(metatag);
+ }
+
+ public static void main(String[] args) {
+ Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
+ for (Map.Entry entry: a.vocabularies.entrySet()) {
+ System.out.println(entry);
+ }
+ }
+
+}
diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java
index de27f3f0f..e9b4aca6c 100644
--- a/source/net/yacy/document/LibraryProvider.java
+++ b/source/net/yacy/document/LibraryProvider.java
@@ -48,12 +48,15 @@ import net.yacy.kelondro.util.FileUtils;
public class LibraryProvider
{
- private static final String path_to_source_dictionaries = "source";
- private static final String path_to_did_you_mean_dictionaries = "didyoumean";
+ public static final char tagPrefix = '$';
+ public static final String path_to_source_dictionaries = "source";
+ public static final String path_to_did_you_mean_dictionaries = "didyoumean";
+ public static final String path_to_autotagging_dictionaries = "autotagging";
public static final String disabledExtension = ".disabled";
public static WordCache dymLib = new WordCache(null);
+ public static Autotagging autotagging = new Autotagging(null, tagPrefix);
public static OverarchingLocalization geoLoc = new OverarchingLocalization();
private static File dictSource = null;
private static File dictRoot = null;
@@ -91,7 +94,7 @@ public class LibraryProvider
* initialize the LibraryProvider as static class. This assigns default paths, and initializes the
* dictionary classes Additionally, if default dictionaries are given in the source path, they are
* translated into the input format inside the DATA/DICTIONARIES directory
- *
+ *
* @param pathToSource
* @param pathToDICTIONARIES
*/
@@ -107,6 +110,8 @@ public class LibraryProvider
initDidYouMean();
integrateOpenGeoDB();
integrateGeonames();
+ initAutotagging(tagPrefix);
+ autotagging.addLocalization(geoLoc);
}
public static void integrateOpenGeoDB() {
@@ -141,6 +146,14 @@ public class LibraryProvider
dymLib = new WordCache(dymDict);
}
+ public static void initAutotagging(char prefix) {
+ final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries);
+ if ( !autotaggingPath.exists() ) {
+ autotaggingPath.mkdirs();
+ }
+ autotagging = new Autotagging(autotaggingPath, prefix);
+ }
+
public static void removeDeReWo() {
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
final File derewoInput = LibraryProvider.Dictionary.DRW0.file();
diff --git a/source/net/yacy/document/geolocalization/GeonamesLocalization.java b/source/net/yacy/document/geolocalization/GeonamesLocalization.java
index 735816983..f23ea4f54 100644
--- a/source/net/yacy/document/geolocalization/GeonamesLocalization.java
+++ b/source/net/yacy/document/geolocalization/GeonamesLocalization.java
@@ -165,6 +165,20 @@ public class GeonamesLocalization implements Localization
return a;
}
+ /**
+ * produce a set of location names
+ * @return a set of names
+ */
+ @Override
+ public Set locationNames() {
+ Set locations = new HashSet();
+ Set l = this.name2ids.keySet();
+ for (StringBuilder s: l) {
+ locations.add(s.toString());
+ }
+ return locations;
+ }
+
@Override
public Set recommend(final String s) {
final Set a = new HashSet();
diff --git a/source/net/yacy/document/geolocalization/Localization.java b/source/net/yacy/document/geolocalization/Localization.java
index 130cee536..3008c4d70 100644
--- a/source/net/yacy/document/geolocalization/Localization.java
+++ b/source/net/yacy/document/geolocalization/Localization.java
@@ -47,12 +47,19 @@ public interface Localization {
*/
public TreeSet find(String anyname, boolean locationexact);
+ /**
+ * produce a set of location names
+ * @return a set of names
+ */
+ public Set locationNames();
+
/**
* recommend a set of names according to a given name
* @param s a possibly partially matching name
* @return a set of names that match with the given name using the local dictionary of names
*/
public Set recommend(String s);
+
/**
* recommend a set of names according to a given name
* @param s a possibly partially matching name
@@ -70,6 +77,7 @@ public interface Localization {
* hashCode that must be used to distinguish localization services in hash sets
* @return the hash code, may be derived from the nickname
*/
+ @Override
public int hashCode();
/**
@@ -77,5 +85,6 @@ public interface Localization {
* @param other
* @return true if both objects are localization services and have the same nickname
*/
+ @Override
public boolean equals(Object other);
}
diff --git a/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
index fbf2aeadb..f0f891e8e 100644
--- a/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
+++ b/source/net/yacy/document/geolocalization/OpenGeoDBLocalization.java
@@ -195,7 +195,7 @@ public class OpenGeoDBLocalization implements Localization
/**
* check database tables against occurrences of this entity the anyname - String may be one of: - name of
* a town, villa, region etc - zip code - telephone prefix - kfz sign
- *
+ *
* @param anyname
* @return
*/
@@ -241,9 +241,23 @@ public class OpenGeoDBLocalization implements Localization
return a;
}
+ /**
+ * produce a set of location names
+ * @return a set of names
+ */
+ @Override
+ public Set locationNames() {
+ Set locations = new HashSet();
+ Set l = this.name2ids.keySet();
+ for (StringBuilder s: l) {
+ locations.add(s.toString());
+ }
+ return locations;
+ }
+
/**
* read the dictionary and construct a set of recommendations to a given string
- *
+ *
* @param s input value that is used to match recommendations
* @return a set that contains all words that start with the input value
*/
diff --git a/source/net/yacy/document/geolocalization/OverarchingLocalization.java b/source/net/yacy/document/geolocalization/OverarchingLocalization.java
index fbf704fe9..93a8183ba 100644
--- a/source/net/yacy/document/geolocalization/OverarchingLocalization.java
+++ b/source/net/yacy/document/geolocalization/OverarchingLocalization.java
@@ -56,6 +56,11 @@ public class OverarchingLocalization implements Localization {
this.services.remove(nickname);
}
+ /**
+ * the number of locations that this localization stores
+ * @return the number of locations
+ */
+ @Override
public int locations() {
int locations = 0;
for (final Localization service: this.services.values()) {
@@ -65,8 +70,12 @@ public class OverarchingLocalization implements Localization {
}
/**
- * find (a set of) locations
+ * find a location by name
+ * @param anyname - a name of a location
+ * @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
+ * @return a set of locations, ordered by population (if this information is given)
*/
+ @Override
public TreeSet find(final String anyname, final boolean locationexact) {
final TreeSet locations = new TreeSet();
for (final Localization service: this.services.values()) {
@@ -76,36 +85,80 @@ public class OverarchingLocalization implements Localization {
}
/**
- * recommend location names
+ * produce a set of location names
+ * @return a set of names
+ */
+ @Override
+ public Set locationNames() {
+ final Set locations = new HashSet();
+ for (final Localization service: this.services.values()) {
+ locations.addAll(service.locationNames());
+ }
+ return locations;
+ }
+
+ /**
+ * recommend a set of names according to a given name
+ * @param s a possibly partially matching name
+ * @return a set of names that match with the given name using the local dictionary of names
*/
+ @Override
public Set recommend(final String s) {
final Set recommendations = new HashSet();
- if (s.length() == 0) return recommendations;
+ if (s.length() == 0) {
+ return recommendations;
+ }
for (final Localization service: this.services.values()) {
recommendations.addAll(service.recommend(s));
}
return recommendations;
}
+ /**
+ * recommend a set of names according to a given name
+ * @param s a possibly partially matching name
+ * @return a set of names that match with the given name using the local dictionary of names
+ */
+ @Override
public Set recommend(final StringBuilder s) {
final Set recommendations = new HashSet();
- if (s.length() == 0) return recommendations;
+ if (s.length() == 0) {
+ return recommendations;
+ }
for (final Localization service: this.services.values()) {
recommendations.addAll(service.recommend(s));
}
return recommendations;
}
+ /**
+ * return an nickname of the localization service
+ * @return the nickname
+ */
+ @Override
public String nickname() {
return "oa";
}
+ /**
+ * hashCode that must be used to distinguish localization services in hash sets
+ * @return the hash code, may be derived from the nickname
+ */
+ @Override
public int hashCode() {
return nickname().hashCode();
}
+ /**
+ * compare localization services; to be used for hash sets with localization services
+ * @param other
+ * @return true if both objects are localization services and have the same nickname
+ */
+ @Override
public boolean equals(final Object other) {
- if (!(other instanceof Localization)) return false;
+ if (!(other instanceof Localization)) {
+ return false;
+ }
return nickname().equals(((Localization) other).nickname());
}