removed old navigation practice using subject tags in favor of

triplestore-tags
pull/1/head
Michael Peter Christen 13 years ago
parent 96f6a5869f
commit 9264d8b4af

@ -480,7 +480,7 @@ public class yacysearch {
if (p > 0) {
String k = vocabulary.substring(0, p);
String v = vocabulary.substring(p + 1);
metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v));
metatags.add(LibraryProvider.autotagging.metatag(k, v));
}
}

@ -493,14 +493,14 @@ public class Tagging {
return this.propFile;
}
public Metatag getMetatagFromSynonym(char prefix, final String word) {
public Metatag getMetatagFromSynonym(final String word) {
String printname = this.synonym2term.get(word);
if (printname == null) return null;
return new Metatag(prefix, printname);
return new Metatag(printname);
}
public Metatag getMetatagFromTerm(char prefix, final String word) {
return new Metatag(prefix, word);
public Metatag getMetatagFromTerm(final String word) {
return new Metatag(word);
}
public Set<String> getSynonyms(String term) {
@ -543,9 +543,7 @@ public class Tagging {
public class Metatag {
private final String object;
private final char prefix;
public Metatag(char prefix, String object) {
this.prefix = prefix;
public Metatag(String object) {
this.object = object;
}
@ -563,7 +561,7 @@ public class Tagging {
@Override
public String toString() {
return this.prefix + Tagging.this.navigatorName + ":" + encodePrintname(this.object);
return Tagging.this.navigatorName + ":" + encodePrintname(this.object);
}
@Override
@ -589,12 +587,12 @@ public class Tagging {
return PATTERN_UL.matcher(maskname).replaceAll(" ");
}
public static String cleanTagFromAutotagging(char prefix, final String tagString) {
public static String cleanTagFromAutotagging(final String tagString) {
if (tagString == null || tagString.length() == 0) return "";
String[] tags = PATTERN_SP.split(tagString);
StringBuilder sb = new StringBuilder(tagString.length());
for (String tag : tags) {
if (tag.length() > 0 && tag.charAt(0) != prefix) {
if (tag.length() > 0) {
sb.append(tag).append(' ');
}
}

@ -20,18 +20,14 @@
package net.yacy.document;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocation.Locations;
import net.yacy.kelondro.logging.Log;
@ -44,7 +40,6 @@ public class Autotagging {
private final static Object PRESENT = new Object();
public final char prefixChar;
private final File autotaggingPath;
private final Map<String, Tagging> vocabularies; // mapping from vocabulary name to the tagging vocabulary
private final Map<String, Object> allTags;
@ -58,10 +53,9 @@ public class Autotagging {
* properties without values are allowed (the value is then set to the key)
* also the value can be used as a tag
*/
public Autotagging(final File autotaggingPath, char prefixChar) {
public Autotagging(final File autotaggingPath) {
this.vocabularies = new ConcurrentHashMap<String, Tagging>();
this.autotaggingPath = autotaggingPath;
this.prefixChar = prefixChar;
this.allTags = new ConcurrentHashMap<String, Object>();
if (this.autotaggingPath == null || !this.autotaggingPath.exists()) {
return;
@ -114,16 +108,6 @@ public class Autotagging {
}
}
public void addDictionaries(Map<String, Dictionary> dictionaries) {
for (Map.Entry<String, Dictionary> entry: dictionaries.entrySet()) {
Tagging voc = new Tagging(entry.getKey(), entry.getValue());
this.vocabularies.put(entry.getKey(), voc);
for (String t: voc.tags()) {
this.allTags.put(t, PRESENT);
}
}
}
public void addPlaces(Locations locations) {
if (locations.size() == 0) return; // otherwise we get a navigation that does nothing
Tagging voc = new Tagging("Locations", locations);
@ -137,28 +121,10 @@ public class Autotagging {
}
}
/**
* produce a set of tags for a given text.
* The set contains the names of the tags with a prefix character at the front
* @param text
* @return
*/
public Set<String> getPrintTagsFromText(String text) {
Set<String> as = new HashSet<String>();
if (this.vocabularies.isEmpty()) return as;
final WordTokenizer tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), LibraryProvider.dymLib);
String tag;
while (tokens.hasMoreElements()) {
tag = getTagFromTerm(tokens.nextElement().toString()).toString();
if (tag != null) as.add(tag);
}
return as;
}
public int size() {
return this.vocabularies.size();
}
/**
* maximum number of compound tags (number of words in one tag)
* @return
@ -173,39 +139,15 @@ public class Autotagging {
Tagging.Metatag tag;
term = Tagging.normalizeWord(term);
for (Map.Entry<String, Tagging> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatagFromSynonym(this.prefixChar, term);
tag = v.getValue().getMetatagFromSynonym(term);
if (tag != null) return tag;
}
return null;
}
public static boolean metatagAppearIn(final Tagging.Metatag metatag, final String[] tags) {
String tag = metatag.toString();
for (String s: tags) {
if (tag.equals(s)) return true;
}
return false;
}
public Tagging.Metatag metatag(String metatag) {
int p = metatag.indexOf(':');
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
String vocName = metatag.substring(1, p);
public Tagging.Metatag metatag(String vocName, String term) {
Tagging tagging = this.vocabularies.get(vocName);
return tagging.getMetatagFromTerm(this.prefixChar, Tagging.decodeMaskname(metatag.substring(p + 1)));
}
public String cleanTagFromAutotagging(String tagString) {
return Tagging.cleanTagFromAutotagging(this.prefixChar, tagString);
}
public static void main(String[] args) {
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
for (Map.Entry<String, Tagging> entry: a.vocabularies.entrySet()) {
System.out.println(entry);
}
Set<String> tags = a.getPrintTagsFromText("In die Tueren und Fluchttueren muessen noch Schloesser eingebaut werden");
System.out.println(tags);
return tagging.getMetatagFromTerm(Tagging.decodeMaskname(term));
}
}

@ -227,10 +227,6 @@ dc_rights
String objectspace = vocabulary.getObjectspace();
StringBuilder sb = new StringBuilder(e.getValue().size() * 20);
for (Tagging.Metatag s: e.getValue()) {
String t = s.toString();
if (!this.keywords.contains(t)) {
this.keywords.add(t);
}
sb.append(',').append(s.getObject());
String objectlink = vocabulary.getObjectlink(s.getObject());
if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) {

@ -57,7 +57,6 @@ import com.hp.hpl.jena.rdf.model.Resource;
public class LibraryProvider {
public static final char tagPrefix = '$';
public static final String path_to_source_dictionaries = "source";
public static final String path_to_did_you_mean_dictionaries = "didyoumean";
public static final String path_to_autotagging_dictionaries = "autotagging";
@ -116,7 +115,7 @@ public class LibraryProvider {
dictRoot = rootPath;
// initialize libraries
initAutotagging(tagPrefix);
initAutotagging();
activateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
@ -161,12 +160,12 @@ public class LibraryProvider {
dymLib = new WordCache(dymDict);
}
public static void initAutotagging(char prefix) {
public static void initAutotagging() {
final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries);
if ( !autotaggingPath.exists() ) {
autotaggingPath.mkdirs();
}
autotagging = new Autotagging(autotaggingPath, prefix);
autotagging = new Autotagging(autotaggingPath);
}
public static void activateDeReWo() {

@ -37,7 +37,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.document.LibraryProvider;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Row;
@ -230,7 +230,7 @@ public class URIMetadataRow implements URIMetadata {
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
tags = LibraryProvider.autotagging.cleanTagFromAutotagging(tags);
tags = Tagging.cleanTagFromAutotagging(tags);
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
@ -313,7 +313,7 @@ public class URIMetadataRow implements URIMetadata {
assert (s.toString().indexOf(0) < 0);
s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
assert (s.toString().indexOf(0) < 0);
s.append(",tags=").append(crypt.simpleEncode(LibraryProvider.autotagging.cleanTagFromAutotagging(metadata.dc_subject())));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(metadata.dc_subject())));
assert (s.toString().indexOf(0) < 0);
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
assert (s.toString().indexOf(0) < 0);

@ -716,22 +716,6 @@ public final class RWIProcess extends Thread
}
}
// check vocabulary constraint
/*
final String tags = page.dc_subject();
final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
if (this.query.metatags != null && this.query.metatags.size() > 0) {
// all metatags must appear in the tags list
for (Tagging.Metatag metatag: this.query.metatags) {
if (!Autotagging.metatagAppearIn(metatag, taglist)) {
this.sortout++;
//Log.logInfo("RWIProcess", "sorted out " + page.url());
continue takeloop;
}
}
}
*/
// evaluate information of metadata for navigation
// author navigation:
if ( pageauthor != null && pageauthor.length() > 0 ) {
@ -787,24 +771,6 @@ public final class RWIProcess extends Thread
this.filetypeNavigator.inc(fileext);
}
// vocabulary navigation
/*
tagharvest: for (String tag: taglist) {
if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest;
try {
Tagging.Metatag metatag = LibraryProvider.autotagging.metatag(tag);
ScoreMap<String> voc = this.vocabularyNavigator.get(metatag.getVocabularyName());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
this.vocabularyNavigator.put(metatag.getVocabularyName(), voc);
}
voc.inc(metatag.getObject());
} catch (RuntimeException e) {
// tag may not be well-formed
}
}
*/
// accept url
return page;
}

Loading…
Cancel
Save