added a vocabulary editor

pull/1/head
Michael Peter Christen 13 years ago
parent ae34205a6d
commit eca38c53e7

@ -0,0 +1,82 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Federated Index</title>
#%env/templates/metas.template%#
</head>
<body id="Vocabulary_p">
#%env/templates/header.template%#
#%env/templates/submenuSemantic.template%#
<h2>Vocabulary Administration</h2>
<p>
Vocabularies can be used to produce a search navigation. A vocabulary must be created before content is indexed.
The vocabulary is used to annotate the indexed content with a reference to the object that is denoted by the term of the vocabulary.
The object can be denoted by a url stub that, combined with the term, becomes the url for the object.
</p>
<form action="Vocabulary_p.html" method="get" accept-charset="UTF-8">
<fieldset><legend>Vocabulary Selection</legend>
<dl>
<dt>Select Vocabulary</dt>
<dd>
<select name="vocabulary" onchange='this.form.submit()'>
#{vocabularyset}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/vocabularyset}#
</select>
</dd>
<dt></dt>
<dd><input type="submit" name="view" value="View" /></dd>
</dl>
</fieldset>
</form>
#(edit)#::
<form action="Vocabulary_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Vocabulary Editor</legend>
<dl>
<dt>File</dt><dd>#(editable)#[automatically generated, not stored, cannot be edited]::#[file]##(/editable)#</dd>
<dt>Name</dt><dd>#[name]#</dd>
<dt>Namespace</dt><dd>#[namespace]#</dd>
<dt>Predicate</dt><dd>#[predicate]#</dd>
<dt>Prefix</dt><dd>#[prefix]#</dd>
<dt>Objectspace</dt><dd>#(editable)##[objectspacepredicate]#::<input type="text" name="objectspace" value="#[objectspace]#" size="78" maxlength="128" /><br/>if set, this uses the predicate <a href="#[objectspacepredicate]#">#[objectspacepredicate]#</a> for generated objects#(/editable)#</dd>
</dl>
<p>This produces the following triples in the <a href="/GetRDF.xml?global=">triplestore</a> if a term or synonym matches in a document:</p>
<dl>
<dt>Triple #1</dt><dd>#[triple1]#</dd>
<dt>Triple #2</dt><dd>#[triple2]#</dd>
<dt>more Triples for linking into objectspace</dt><dd>#[tripleN]#</dd>
</dl>
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Modify</td>
<td>Delete</td>
<td>Literal</td>
<td>Synonyms</td>
</tr>
#{terms}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td align="center">#(editable)#&nbsp;::<input type="checkbox" name="modify_#[term]#" id="modify_#[term]#" value="checked" disabled="disabled"/>#(/editable)#</td>
<td align="center">#(editable)#&nbsp;::<input type="checkbox" name="delete_#[term]#" id="delete_#[term]#" value="checked" onchange='this.form.submit()'/>#(/editable)#</td>
<td align="left">#[term]#</td>
<td align="left">#(editable)##[synonyms]#::<input type="text" name="synonyms_#[term]#" value="#[synonyms]#" size="80" maxlength="1024" onclick="document.getElementById('modify_#[term]#').checked='checked'; document.getElementById('modify_#[term]#').disabled=''"/>#(/editable)#</td>
</tr>
#{/terms}#
#(editable)#::
<tr class="TableCellDark">
<td align="center" colspan="2"><input type="checkbox" name="modify_new" id="modify_new" value="checked" disabled="disabled"/><i>add</i></td>
<td align="left"><input type="text" name="newterm" id="newterm" value="" size="24" maxlength="128" onclick="document.getElementById('modify_new').checked='checked'; document.getElementById('modify_new').disabled=''"/></td>
<td align="left"><input type="text" name="newsynonyms" id="newsynonyms" value="" size="80" maxlength="1024"/></td>
</tr>
#(/editable)#
</table>
</fieldset>
<input type="hidden" name="vocabulary" value="#[name]#" />
<input type="submit" name="set" value="Set" />
</form>
#(/edit)#
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,117 @@
/**
* Vocabulary_p
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 07.05.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import net.yacy.cora.lod.vocabulary.DCTerms;
import net.yacy.cora.lod.vocabulary.Owl;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Vocabulary_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
Collection<Tagging> vocs = LibraryProvider.autotagging.getVocabularies();
String vocabularyName = (post == null) ? null : post.get("vocabulary", null);
Tagging vocabulary = vocabularyName == null ? null : LibraryProvider.autotagging.getVocabulary(vocabularyName);
int count = 0;
for (Tagging v: vocs) {
prop.put("vocabularyset_" + count + "_name", v.getName());
prop.put("vocabularyset_" + count + "_selected", (vocabularyName != null && vocabularyName.equals(v.getName())) ? 1 : 0);
count++;
}
prop.put("vocabularyset", count);
if (post != null && vocabulary != null) {
try {
// check if objectspace was set
vocabulary.setObjectspace(post.get("objectspace", vocabulary.getObjectspace() == null ? "" : vocabulary.getObjectspace()));
// check if a term was added
if (post.get("modify_new", "").equals("checked") && post.get("newterm", "").length() > 0) {
vocabulary.put(post.get("newterm", ""), post.get("newsynonyms", ""));
}
// check if a term was modified
for (Map.Entry<String, String> e : post.entrySet()) {
if (e.getKey().startsWith("modify_") && e.getValue().equals("checked")) {
String term = e.getKey().substring(7);
String synonyms = post.get("synonyms_" + term, "");
vocabulary.put(term, synonyms);
}
}
// check if a term shall be deleted
for (Map.Entry<String, String> e : post.entrySet()) {
if (e.getKey().startsWith("delete_") && e.getValue().equals("checked")) {
String term = e.getKey().substring(7);
vocabulary.delete(term);
}
}
} catch (IOException e) {
Log.logException(e);
}
}
if (vocabulary == null) {
prop.put("edit", 0);
} else {
prop.put("edit", 1);
boolean editable = vocabulary.getFile() != null && vocabulary.getFile().exists();
String yacyurl = YaCyMetadata.hashURI("[hash]".getBytes());
prop.put("edit_editable", editable ? 1 : 0);
prop.putHTML("edit_editable_file", editable ? vocabulary.getFile().getAbsolutePath() : "");
prop.putHTML("edit_name", vocabulary.getName());
prop.putHTML("edit_namespace", vocabulary.getNamespace());
prop.putHTML("edit_predicate", vocabulary.getPredicate());
prop.putHTML("edit_prefix", Tagging.DEFAULT_PREFIX);
prop.putHTML("edit_editable_objectspace", vocabulary.getObjectspace() == null ? "" : vocabulary.getObjectspace());
prop.putHTML("edit_editable_objectspacepredicate", DCTerms.references.getPredicate());
prop.putHTML("edit_triple1", "<" + yacyurl + "> <" + vocabulary.getPredicate() + "> \"[discovered-tags-commaseparated]\"");
prop.putHTML("edit_triple2", "<" + yacyurl + "> <" + Owl.SameAs.getPredicate() + "> <[document-url]>");
prop.putHTML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"" + vocabulary.getObjectspace() + "[discovered-tag]\"");
int c = 0;
boolean dark = false;
for (Map.Entry<String, String> entry: vocabulary.list().entrySet()) {
prop.put("edit_terms_" + c + "_editable", editable ? 1 : 0);
prop.put("edit_terms_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.putHTML("edit_terms_" + c + "_term", entry.getKey());
prop.putHTML("edit_terms_" + c + "_editable_term", entry.getKey());
prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue());
c++;
}
prop.put("edit_terms", c);
}
// return rewrite properties
return prop;
}
}

@ -30,7 +30,7 @@
<li><a href="/Crawler_p.html" class="MenuItemLink lock">Creation Monitor</a></li>
<li><a href="/IndexControlRWIs_p.html" class="MenuItemLink lock">Index Administration</a></li>
<li><a href="/Blacklist_p.html" class="MenuItemLink lock">Filter &amp; Blacklists</a></li>
<li><a href="/DictionaryLoader_p.html" class="MenuItemLink lock">Content Semantic</a></li>
<li><a href="/Vocabulary_p.html" class="MenuItemLink lock">Content Semantic</a></li>
</ul>
</li>
<li class="menugroup" id="menugroupMonitoring">

@ -1,10 +1,11 @@
<div class="SubMenu">
<h3>Semantic</h3>
<h3>Content Semantic</h3>
</div>
<div class="SubMenu">
<div class="SubMenugroup">
<h3>Automated Annotation</h3>
<ul class="SubMenu">
<li><a href="/Vocabulary_p.html" class="MenuItemLink lock">Auto-Annotation Vocabulary Editor</a></li>
<li><a href="/DictionaryLoader_p.html" class="MenuItemLink lock">Dictionary Loader</a></li>
</ul>
</div>

@ -20,11 +20,16 @@
package net.yacy.cora.lod.vocabulary;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
@ -42,6 +47,7 @@ public class Tagging {
private final Map<String, String> synonym2term;
private final Map<String, String> term2synonym;
private final Map<String, Set<String>> synonym2synonyms;
private File propFile;
private String predicate, namespace, objectspace;
@ -53,11 +59,197 @@ public class Tagging {
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + name;
this.objectspace = null;
this.propFile = null;
}
public Tagging(String name, File propFile) throws IOException {
public Tagging(String name, File propFile) throws IOException {
this(name);
BlockingQueue<String> list = Files.concurentLineReader(propFile, 1000);
this.propFile = propFile;
init(name);
}
public void updateTerm(String term, String[] synonyms) {
}
private File tmpFile() {
return new File(this.propFile.getAbsolutePath() + ".tmp");
}
public void put(String term, String synonyms) throws IOException {
File tmp = tmpFile();
BufferedWriter w = new BufferedWriter(new FileWriter(tmp));
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
if (this.namespace != null && !this.namespace.equals(DEFAULT_NAMESPACE)) w.write("#namespace:" + this.namespace + "\n");
if (this.objectspace != null && this.objectspace.length() > 0) w.write("#objectspace:" + this.objectspace + "\n");
String line;
boolean written = false;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[0].equals(term)) {
w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + "\n");
written = true;
} else {
w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n");
}
}
if (!written) {
w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + "\n");
}
} catch (InterruptedException e) {
}
w.close();
this.propFile.delete();
tmp.renameTo(this.propFile);
init(this.navigatorName);
}
public void delete(String term) throws IOException {
File tmp = tmpFile();
BufferedWriter w = new BufferedWriter(new FileWriter(tmp));
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
if (this.namespace != null && !this.namespace.equals(DEFAULT_NAMESPACE)) w.write("#namespace:" + this.namespace + "\n");
if (this.objectspace != null && this.objectspace.length() > 0) w.write("#objectspace:" + this.objectspace + "\n");
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[0].equals(term)) {
continue vocloop;
} else {
w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n");
}
}
} catch (InterruptedException e) {
}
w.close();
this.propFile.delete();
tmp.renameTo(this.propFile);
init(this.navigatorName);
}
public void setObjectspace(String os) throws IOException {
if (os == null || (this.objectspace != null && this.objectspace.equals(os))) return;
this.objectspace = os;
File tmp = tmpFile();
BufferedWriter w = new BufferedWriter(new FileWriter(tmp));
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
if (this.namespace != null && !this.namespace.equals(DEFAULT_NAMESPACE)) w.write("#namespace:" + this.namespace + "\n");
if (this.objectspace != null && this.objectspace.length() > 0) w.write("#objectspace:" + this.objectspace + "\n");
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n");
}
} catch (InterruptedException e) {
}
w.close();
this.propFile.delete();
tmp.renameTo(this.propFile);
init(this.navigatorName);
}
public Map<String, Set<String>> reconstructionSets() {
Map<String, Set<String>> r = new TreeMap<String, Set<String>>();
for (Map.Entry<String, String> e: this.term2synonym.entrySet()) {
Set<String> s = r.get(e.getKey());
if (s == null) {
s = new TreeSet<String>();
r.put(e.getKey(), s);
}
if (e.getValue() != null && e.getValue().length() != 0) s.add(e.getValue());
}
for (Map.Entry<String, String> e: this.synonym2term.entrySet()) {
Set<String> s = r.get(e.getValue());
if (s == null) {
s = new TreeSet<String>();
r.put(e.getValue(), s);
}
s.add(e.getKey());
}
return r;
}
public Map<String, String> reconstructionLists() {
Map<String, Set<String>> r = reconstructionSets();
Map<String, String> map = new TreeMap<String, String>();
for (Map.Entry<String, Set<String>> e: r.entrySet()) {
StringBuilder sb = new StringBuilder(e.getValue().size() * 10);
for (String s: e.getValue()) sb.append(',').append(s);
map.put(e.getKey(), sb.substring(1));
}
return map;
}
public Map<String, String> list() {
if (this.propFile == null) {
// create a virtual map for automatically generated vocabularies
return reconstructionLists();
}
Map<String, String> map = new LinkedHashMap<String, String>();
BlockingQueue<String> list;
try {
list=Files.concurentLineReader(this.propFile, 1000);
} catch (IOException e1) {
return map;
}
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
map.put(pl[0], pl[1] == null ? "" : pl[1]);
}
} catch (InterruptedException e) {
}
return map;
}
private final static String[] parseLine(String line) {
line = line.trim();
int p = line.indexOf('#');
if (p >= 0) {
line = line.substring(0, p).trim();
}
if (line.length() == 0) {
return null;
}
p = line.indexOf(':');
if (p < 0) {
p = line.indexOf('=');
}
if (p < 0) {
p = line.indexOf('\t');
}
if (p < 0) {
return new String[]{line, null};
}
return new String[]{line.substring(0, p), line.substring(p + 1)};
}
public void init(String name) throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.synonym2synonyms.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + name;
this.objectspace = null;
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
String term, v;
String[] tags;
int p;
@ -70,34 +262,28 @@ public class Tagging {
String comment = line.substring(p + 1).trim();
if (comment.startsWith("namespace:")) {
this.namespace = comment.substring(10).trim();
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#")) this.namespace += "#";
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
this.predicate = this.namespace + name;
}
if (comment.startsWith("objectspace:")) {
this.objectspace = comment.substring(12).trim();
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#")) this.objectspace += "#";
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
}
line = line.substring(0, p).trim();
}
if (line.length() == 0) {
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
p = line.indexOf(':');
if (p < 0) {
p = line.indexOf('=');
}
if (p < 0) {
p = line.indexOf('\t');
}
if (p < 0) {
term = normalizeKey(line);
v = normalizeWord(line);
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeWord(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
continue vocloop;
}
term = normalizeKey(line.substring(0, p));
v = line.substring(p + 1);
term = normalizeKey(pl[0]);
v = pl[1];
tags = v.split(",");
Set<String> synonyms = new HashSet<String>();
synonyms.add(term);
@ -150,6 +336,10 @@ public class Tagging {
return this.predicate;
}
public String getNamespace() {
return this.namespace;
}
public String getObjectspace() {
return this.objectspace;
}
@ -172,6 +362,10 @@ public class Tagging {
return this.navigatorName;
}
public File getFile() {
return this.propFile;
}
public Metatag getMetatagFromSynonym(char prefix, final String word) {
String printname = this.synonym2term.get(word);
if (printname == null) return null;

Loading…
Cancel
Save