- added loading of the dbpedia pnd triplestore in the dictionary loader

- renamed the dictionary loader to knowledge loader
- some refactoring in the library provider method names
pull/1/head
Michael Peter Christen 13 years ago
parent 6d17686258
commit a0f1decd82

@ -1,13 +1,13 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Dictionary Loader</title>
<title>YaCy '#[clientname]#': Knowledge Loader</title>
#%env/templates/metas.template%#
</head>
<body id="DictionaryLoader">
#%env/templates/header.template%#
#%env/templates/submenuSemantic.template%#
<h2>Dictionary Loader</h2>
<h2>Knowledge Loader</h2>
<p>
YaCy can use external libraries to enable or enhance some functions. These libraries are not
@ -15,6 +15,50 @@
You can download additional files here.
</p>
<form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend>Persons</legend>
Person dictionaries will help YaCy to identify Person names in input documents. As a result, a person Navigator is presented
<h4><a href="http://wiki.dbpedia.org/Downloads37#pnd">PND (Personennamendatei) identifiers from dbPedia - wikipedia extraction</a></h4>
<p>This file provides 150000 person names as an extraction from wikipedia where a PND from the Deutsche Nationalbibliothek is mentioned. It is not the 'raw' PND from d-dnb.de which is much larger but not available in the public.</p><!--http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2-->
<dl>
<dt><label>Download from</label></dt>
<dd>#[pnd0URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[pnd0Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(pnd0Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::deactivated#(/pnd0Status)#</dd>
<dt>Action</dt>
<dd>#(pnd0Status)#
<input type="submit" name="pnd0Load" value="Load" />::
<input type="submit" name="pnd0Deactivate" value="Deactivate" />
<input type="submit" name="pnd0Remove" value="Remove" />::
<input type="submit" name="pnd0Activate" value="Activate" />
<input type="submit" name="pnd0Remove" value="Remove" />
#(/pnd0Status)#</dd>
#(pnd0ActionLoaded)#::
<dt>Result</dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/pnd0ActionLoaded)#
#(pnd0ActionRemoved)#::
<dt>Result</dt><dd><div class="commit">deactivated and removed dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/pnd0ActionRemoved)#
#(pnd0ActionDeactivated)#::
<dt>Result</dt><dd><div class="commit">deactivated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot deactivate dictionary file: #[error]#</div></dd>
#(/pnd0ActionDeactivated)#
#(pnd0ActionActivated)#::
<dt>Result</dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/pnd0ActionActivated)#
</dl>
</fieldset>
</form>
<form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<legend>Geolocalization</legend>

@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@ -86,19 +86,19 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionRemoved", 1);
}
if (post.containsKey("geon0Deactivate")) {
LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionDeactivated", 1);
}
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
@ -110,8 +110,8 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
@ -129,19 +129,19 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionRemoved", 1);
}
if (post.containsKey("geo1Deactivate")) {
LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionDeactivated", 1);
}
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1ActionActivated", 1);
}
@ -153,7 +153,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file());
LibraryProvider.integrateDeReWo();
LibraryProvider.activateDeReWo();
LibraryProvider.initDidYouMean();
prop.put("drw0Status", LibraryProvider.Dictionary.DRW0.file().exists() ? 1 : 0);
prop.put("drw0ActionLoaded", 1);
@ -169,7 +169,7 @@ public class DictionaryLoader_p {
}
if (post.containsKey("drw0Remove")) {
LibraryProvider.removeDeReWo();
LibraryProvider.deactivateDeReWo();
LibraryProvider.initDidYouMean();
FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.fileDisabled());
@ -177,7 +177,7 @@ public class DictionaryLoader_p {
}
if (post.containsKey("drw0Deactivate")) {
LibraryProvider.removeDeReWo();
LibraryProvider.deactivateDeReWo();
LibraryProvider.initDidYouMean();
LibraryProvider.Dictionary.DRW0.file().renameTo(LibraryProvider.Dictionary.DRW0.fileDisabled());
prop.put("drw0ActionDeactivated", 1);
@ -185,11 +185,51 @@ public class DictionaryLoader_p {
if (post.containsKey("drw0Activate")) {
LibraryProvider.Dictionary.DRW0.fileDisabled().renameTo(LibraryProvider.Dictionary.DRW0.file());
LibraryProvider.integrateDeReWo();
LibraryProvider.activateDeReWo();
LibraryProvider.initDidYouMean();
prop.put("drw0ActionActivated", 1);
}
// PND0
if (post.containsKey("pnd0Load")) {
// load from the net
try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file());
LibraryProvider.activatePND();
prop.put("pnd0Status", LibraryProvider.Dictionary.PND0.file().exists() ? 1 : 0);
prop.put("pnd0ActionLoaded", 1);
} catch (final MalformedURLException e) {
Log.logException(e);
prop.put("pnd0ActionLoaded", 2);
prop.put("pnd0ActionLoaded_error", e.getMessage());
} catch (final IOException e) {
Log.logException(e);
prop.put("pnd0ActionLoaded", 2);
prop.put("pnd0ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("pnd0Remove")) {
LibraryProvider.deactivatePND();
FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.fileDisabled());
prop.put("pnd0ActionRemoved", 1);
}
if (post.containsKey("pnd0Deactivate")) {
LibraryProvider.deactivatePND();
LibraryProvider.Dictionary.PND0.file().renameTo(LibraryProvider.Dictionary.PND0.fileDisabled());
prop.put("pnd0ActionDeactivated", 1);
}
if (post.containsKey("pnd0Activate")) {
LibraryProvider.Dictionary.PND0.fileDisabled().renameTo(LibraryProvider.Dictionary.PND0.file());
LibraryProvider.activatePND();
prop.put("pnd0ActionActivated", 1);
}
// check status again
for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);

@ -6,7 +6,7 @@
<h3>Automated Annotation</h3>
<ul class="SubMenu">
<li><a href="/Vocabulary_p.html" class="MenuItemLink lock">Auto-Annotation Vocabulary Editor</a></li>
<li><a href="/DictionaryLoader_p.html" class="MenuItemLink lock">Dictionary Loader</a></li>
<li><a href="/DictionaryLoader_p.html" class="MenuItemLink lock">Knowledge Loader</a></li>
</ul>
</div>

@ -44,12 +44,15 @@ public class JenaTripleStore {
model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd");
model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE);
}
public static long size() {
return model.size();
}
public static ConcurrentHashMap<String, Model> privatestorage = null;
public static String file;
public static void load(String filename) throws IOException {
if (filename.endsWith(".nt")) LoadNTriples(filename);
else loadRDF(filename);
@ -70,16 +73,20 @@ public class JenaTripleStore {
}
public static void LoadNTriples(String fileNameOrUri) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri);
Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri);
InputStream is = FileManager.get().open(fileNameOrUri);
LoadNTriples(is);
}
public static void LoadNTriples(InputStream is) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
if (is != null) {
tmp.read(is, null, "N-TRIPLE");
Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + fileNameOrUri);
Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples");
model = model.union(tmp);
//model.write(System.out, "TURTLE");
} else {
throw new IOException("cannot read " + fileNameOrUri);
throw new IOException("cannot read input stream");
}
}
@ -174,7 +181,7 @@ public class JenaTripleStore {
}
public static void deleteObjects(String subject, String predicate) {
Resource r = getResource(subject);
Resource r = subject == null ? null : getResource(subject);
Property pr = getProperty(predicate);
JenaTripleStore.model.removeAll(r, pr, (Resource) null);
}

@ -40,6 +40,8 @@ import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.storage.Files;
import net.yacy.document.geolocalization.GeonamesLocation;
import net.yacy.document.geolocalization.OpenGeoDBLocation;
import net.yacy.document.geolocalization.OverarchingLocation;
@ -67,7 +69,8 @@ public class LibraryProvider {
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ),
GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ),
GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ),
DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" );
DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ),
PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" );
public String nickname, url, filename;
@ -106,10 +109,11 @@ public class LibraryProvider {
dictRoot = rootPath;
// initialize libraries
integrateDeReWo();
activateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
integrateGeonames();
activatePND();
initAutotagging(tagPrefix);
Set<String> allTags = new HashSet<String>() ;
allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity
@ -125,11 +129,11 @@ public class LibraryProvider {
if ( geo0.exists() ) {
geo0.renameTo(Dictionary.GEODB0.fileDisabled());
}
geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false));
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false));
return;
}
if ( geo0.exists() ) {
geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false));
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false));
return;
}
}
@ -137,7 +141,7 @@ public class LibraryProvider {
public static void integrateGeonames() {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon));
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon));
return;
}
}
@ -158,14 +162,7 @@ public class LibraryProvider {
autotagging = new Autotagging(autotaggingPath, prefix);
}
public static void removeDeReWo() {
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
final File derewoInput = LibraryProvider.Dictionary.DRW0.file();
final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words");
FileUtils.deletedelete(derewoOutput);
}
public static void integrateDeReWo() {
public static void activateDeReWo() {
// translate input files (once..)
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
if ( !dymDict.exists() ) {
@ -184,6 +181,37 @@ public class LibraryProvider {
}
}
public static void deactivateDeReWo() {
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
final File derewoInput = LibraryProvider.Dictionary.DRW0.file();
final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words");
FileUtils.deletedelete(derewoOutput);
}
public static void activatePND() {
// translate input files (once..)
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
if ( !dymDict.exists() ) {
dymDict.mkdirs();
}
// read the pnd file and store it into the triplestore
final File dictInput = LibraryProvider.Dictionary.PND0.file();
if ( dictInput.exists() ) {
try {
JenaTripleStore.LoadNTriples(Files.read(dictInput));
} catch ( final IOException e ) {
Log.logException(e);
}
}
// read the triplestore and generate a vocabulary
}
public static void deactivatePND() {
// remove the PND Triples from the triplestore
JenaTripleStore.deleteObjects(null, "http://dbpedia.org/ontology/individualisedPnd");
}
/*
private static ArrayList<String> loadList(final File file, String comment, boolean toLowerCase) {
final ArrayList<String> list = new ArrayList<String>();

@ -44,7 +44,7 @@ public class OverarchingLocation implements Locations {
* @param nickname the nickname of the service
* @param service the service
*/
public void addLocalization(final String nickname, final Locations service) {
public void activateLocalization(final String nickname, final Locations service) {
this.services.put(nickname, service);
}
@ -52,7 +52,7 @@ public class OverarchingLocation implements Locations {
* remove a localization service
* @param nickname
*/
public void removeLocalization(final String nickname) {
public void deactivateLocalization(final String nickname) {
this.services.remove(nickname);
}

Loading…
Cancel
Save