diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 33268dfa7..65734d6c8 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -480,7 +480,7 @@ public class yacysearch { if (p > 0) { String k = vocabulary.substring(0, p); String v = vocabulary.substring(p + 1); - metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v)); + metatags.add(LibraryProvider.autotagging.metatag(k, v)); } } diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 9216e1af7..f00a0eedf 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -493,14 +493,14 @@ public class Tagging { return this.propFile; } - public Metatag getMetatagFromSynonym(char prefix, final String word) { + public Metatag getMetatagFromSynonym(final String word) { String printname = this.synonym2term.get(word); if (printname == null) return null; - return new Metatag(prefix, printname); + return new Metatag(printname); } - public Metatag getMetatagFromTerm(char prefix, final String word) { - return new Metatag(prefix, word); + public Metatag getMetatagFromTerm(final String word) { + return new Metatag(word); } public Set getSynonyms(String term) { @@ -543,9 +543,7 @@ public class Tagging { public class Metatag { private final String object; - private final char prefix; - public Metatag(char prefix, String object) { - this.prefix = prefix; + public Metatag(String object) { this.object = object; } @@ -563,7 +561,7 @@ public class Tagging { @Override public String toString() { - return this.prefix + Tagging.this.navigatorName + ":" + encodePrintname(this.object); + return Tagging.this.navigatorName + ":" + encodePrintname(this.object); } @Override @@ -589,12 +587,12 @@ public class Tagging { return PATTERN_UL.matcher(maskname).replaceAll(" "); } - public static String cleanTagFromAutotagging(char prefix, final String tagString) { + public static String cleanTagFromAutotagging(final String tagString) { if (tagString == null || tagString.length() == 0) return ""; String[] tags = PATTERN_SP.split(tagString); StringBuilder sb = new StringBuilder(tagString.length()); for (String tag : tags) { - if (tag.length() > 0 && tag.charAt(0) != prefix) { + if (tag.length() > 0) { sb.append(tag).append(' '); } } diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index cd682899b..eb834c887 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -20,18 +20,14 @@ package net.yacy.document; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.util.Collection; -import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.document.UTF8; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.document.WordCache.Dictionary; import net.yacy.document.geolocation.Locations; import net.yacy.kelondro.logging.Log; @@ -44,7 +40,6 @@ public class Autotagging { private final static Object PRESENT = new Object(); - public final char prefixChar; private final File autotaggingPath; private final Map vocabularies; // mapping from vocabulary name to the tagging vocabulary private final Map allTags; @@ -58,10 +53,9 @@ public class Autotagging { * properties without values are allowed (the value is then set to the key) * also the value can be used as a tag */ - public Autotagging(final File autotaggingPath, char prefixChar) { + public Autotagging(final File autotaggingPath) { this.vocabularies = new ConcurrentHashMap(); this.autotaggingPath = autotaggingPath; - this.prefixChar = prefixChar; this.allTags = new ConcurrentHashMap(); if (this.autotaggingPath == null || !this.autotaggingPath.exists()) { return; @@ -114,16 +108,6 @@ public class Autotagging { } } - public void addDictionaries(Map dictionaries) { - for (Map.Entry entry: dictionaries.entrySet()) { - Tagging voc = new Tagging(entry.getKey(), entry.getValue()); - this.vocabularies.put(entry.getKey(), voc); - for (String t: voc.tags()) { - this.allTags.put(t, PRESENT); - } - } - } - public void addPlaces(Locations locations) { if (locations.size() == 0) return; // otherwise we get a navigation that does nothing Tagging voc = new Tagging("Locations", locations); @@ -137,28 +121,10 @@ public class Autotagging { } } - /** - * produce a set of tags for a given text. - * The set contains the names of the tags with a prefix character at the front - * @param text - * @return - */ - public Set getPrintTagsFromText(String text) { - Set as = new HashSet(); - if (this.vocabularies.isEmpty()) return as; - final WordTokenizer tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), LibraryProvider.dymLib); - String tag; - while (tokens.hasMoreElements()) { - tag = getTagFromTerm(tokens.nextElement().toString()).toString(); - if (tag != null) as.add(tag); - } - return as; - } - public int size() { return this.vocabularies.size(); } - + /** * maximum number of compound tags (number of words in one tag) * @return @@ -173,39 +139,15 @@ public class Autotagging { Tagging.Metatag tag; term = Tagging.normalizeWord(term); for (Map.Entry v: this.vocabularies.entrySet()) { - tag = v.getValue().getMetatagFromSynonym(this.prefixChar, term); + tag = v.getValue().getMetatagFromSynonym(term); if (tag != null) return tag; } return null; } - public static boolean metatagAppearIn(final Tagging.Metatag metatag, final String[] tags) { - String tag = metatag.toString(); - for (String s: tags) { - if (tag.equals(s)) return true; - } - return false; - } - - public Tagging.Metatag metatag(String metatag) { - int p = metatag.indexOf(':'); - if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag); - String vocName = metatag.substring(1, p); + public Tagging.Metatag metatag(String vocName, String term) { Tagging tagging = this.vocabularies.get(vocName); - return tagging.getMetatagFromTerm(this.prefixChar, Tagging.decodeMaskname(metatag.substring(p + 1))); - } - - public String cleanTagFromAutotagging(String tagString) { - return Tagging.cleanTagFromAutotagging(this.prefixChar, tagString); - } - - public static void main(String[] args) { - Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$'); - for (Map.Entry entry: a.vocabularies.entrySet()) { - System.out.println(entry); - } - Set tags = a.getPrintTagsFromText("In die Tueren und Fluchttueren muessen noch Schloesser eingebaut werden"); - System.out.println(tags); + return tagging.getMetatagFromTerm(Tagging.decodeMaskname(term)); } } diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index bf2210bb0..9bfecb957 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -227,10 +227,6 @@ dc_rights String objectspace = vocabulary.getObjectspace(); StringBuilder sb = new StringBuilder(e.getValue().size() * 20); for (Tagging.Metatag s: e.getValue()) { - String t = s.toString(); - if (!this.keywords.contains(t)) { - this.keywords.add(t); - } sb.append(',').append(s.getObject()); String objectlink = vocabulary.getObjectlink(s.getObject()); if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) { diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index 949000f6a..92a392946 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -57,7 +57,6 @@ import com.hp.hpl.jena.rdf.model.Resource; public class LibraryProvider { - public static final char tagPrefix = '$'; public static final String path_to_source_dictionaries = "source"; public static final String path_to_did_you_mean_dictionaries = "didyoumean"; public static final String path_to_autotagging_dictionaries = "autotagging"; @@ -116,7 +115,7 @@ public class LibraryProvider { dictRoot = rootPath; // initialize libraries - initAutotagging(tagPrefix); + initAutotagging(); activateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); @@ -161,12 +160,12 @@ public class LibraryProvider { dymLib = new WordCache(dymDict); } - public static void initAutotagging(char prefix) { + public static void initAutotagging() { final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries); if ( !autotaggingPath.exists() ) { autotaggingPath.mkdirs(); } - autotagging = new Autotagging(autotaggingPath, prefix); + autotagging = new Autotagging(autotaggingPath); } public static void activateDeReWo() { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index aefff4930..b46feb0b7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -37,7 +37,7 @@ import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; -import net.yacy.document.LibraryProvider; +import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.index.Row; @@ -230,7 +230,7 @@ public class URIMetadataRow implements URIMetadata { String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = ""; String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = ""; String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = ""; - tags = LibraryProvider.autotagging.cleanTagFromAutotagging(tags); + tags = Tagging.cleanTagFromAutotagging(tags); String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = ""; String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0"; String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0"; @@ -313,7 +313,7 @@ public class URIMetadataRow implements URIMetadata { assert (s.toString().indexOf(0) < 0); s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator())); assert (s.toString().indexOf(0) < 0); - s.append(",tags=").append(crypt.simpleEncode(LibraryProvider.autotagging.cleanTagFromAutotagging(metadata.dc_subject()))); + s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(metadata.dc_subject()))); assert (s.toString().indexOf(0) < 0); s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher())); assert (s.toString().indexOf(0) < 0); diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index a041e7b62..4ea5a9545 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -716,22 +716,6 @@ public final class RWIProcess extends Thread } } - // check vocabulary constraint - /* - final String tags = page.dc_subject(); - final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject()); - if (this.query.metatags != null && this.query.metatags.size() > 0) { - // all metatags must appear in the tags list - for (Tagging.Metatag metatag: this.query.metatags) { - if (!Autotagging.metatagAppearIn(metatag, taglist)) { - this.sortout++; - //Log.logInfo("RWIProcess", "sorted out " + page.url()); - continue takeloop; - } - } - } - */ - // evaluate information of metadata for navigation // author navigation: if ( pageauthor != null && pageauthor.length() > 0 ) { @@ -787,24 +771,6 @@ public final class RWIProcess extends Thread this.filetypeNavigator.inc(fileext); } - // vocabulary navigation - /* - tagharvest: for (String tag: taglist) { - if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest; - try { - Tagging.Metatag metatag = LibraryProvider.autotagging.metatag(tag); - ScoreMap voc = this.vocabularyNavigator.get(metatag.getVocabularyName()); - if (voc == null) { - voc = new ConcurrentScoreMap(); - this.vocabularyNavigator.put(metatag.getVocabularyName(), voc); - } - voc.inc(metatag.getObject()); - } catch (RuntimeException e) { - // tag may not be well-formed - } - } - */ - // accept url return page; }