diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index e772e0c0c..a3036e72f 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -319,6 +319,19 @@ public class Autotagging { return false; } + public String cleanTagFromAutotagging(final String tagString) { + if (tagString == null || tagString.length() == 0) return ""; + String[] tags = PATTERN_SP.split(tagString); + StringBuilder sb = new StringBuilder(tagString.length()); + for (String tag : tags) { + if (tag.length() > 0 && tag.charAt(0) != this.prefixChar) { + sb.append(tag).append(' '); + } + } + if (sb.length() == 0) return ""; + return sb.substring(0, sb.length() - 1); + } + public static void main(String[] args) { Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$'); for (Map.Entry entry: a.vocabularies.entrySet()) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 8819aa07e..756586c52 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -36,6 +36,7 @@ import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; +import net.yacy.document.LibraryProvider; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.index.Row; @@ -222,6 +223,7 @@ public class URIMetadataRow implements URIMetadata { String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = ""; String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = ""; String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = ""; + tags = LibraryProvider.autotagging.cleanTagFromAutotagging(tags); String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = ""; String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0"; String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0"; @@ -304,7 +306,7 @@ public class URIMetadataRow implements URIMetadata { assert (s.toString().indexOf(0) < 0); s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator())); assert (s.toString().indexOf(0) < 0); - s.append(",tags=").append(crypt.simpleEncode(metadata.dc_subject())); + s.append(",tags=").append(crypt.simpleEncode(LibraryProvider.autotagging.cleanTagFromAutotagging(metadata.dc_subject()))); assert (s.toString().indexOf(0) < 0); s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher())); assert (s.toString().indexOf(0) < 0);