Merge branch 'master' of git://gitorious.org/yacy/rc1.git

pull/1/head
reger 13 years ago
commit b89a69ae2e

@ -691,7 +691,7 @@ crawlPause.localsearch=50
crawlPause.remotesearch=10
# Some configuration values for the crawler
crawler.clientTimeout=9000
crawler.clientTimeout=30000
# http crawler specific settings; size in bytes
crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8

@ -1,11 +1,6 @@
import java.util.Iterator;
import net.yacy.yacy;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.search.Switchboard;
import de.anomic.data.BookmarkHelper;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -13,17 +8,15 @@ public final class DemoServlet {
public static serverObjects respond(final RequestHeader header,
final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
final Switchboard sb = Switchboard.getSwitchboard();
prop.put("temperature", "-10°C");
// Display currently logged on user
prop.put("username", Interaction.GetLoggedOnUser(header));
//Generate Userlist
int numUsers = 0;
for (String user : Interaction.GetUsers()) {
@ -31,36 +24,36 @@ public final class DemoServlet {
numUsers++;
}
prop.put("users", numUsers);
if (post != null) {
if (post.containsKey("submit")) {
prop.put("temperature", post.get("textthing"));
String filename= post.get("textthing");
int counter = 0;
while (counter < 10) {
prop.put("localimg_"+counter+"_path","/"+filename);
prop.put("localimg_"+counter+"_checked", "2");
counter++;
}
prop.put("localimg", counter);
prop.put("temperature", yacy.homedir+"/DATA/HTDOCS/"+filename);
}
}
// return rewrite properties
return prop;
}

@ -1,5 +1,4 @@
import net.yacy.cora.protocol.RequestHeader;
import de.anomic.data.BookmarkHelper;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -9,9 +8,9 @@ public final class DemoServletInteraction {
final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
prop.put("temperature", "-10°C");
// return rewrite properties
return prop;
}

@ -1,6 +1,4 @@
import net.yacy.yacy;
import net.yacy.cora.protocol.RequestHeader;
import de.anomic.data.BookmarkHelper;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -10,37 +8,37 @@ public final class DemoServletRDF {
final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
// prop.put("temperature", "-10°C");
if (post != null) {
if (post.containsKey("submit")) {
prop.put("temperature", post.get("textthing"));
String filename= post.get("textthing");
// prop.put("imglink", filename+".jpg");
int counter = 0;
while (counter < 10) {
prop.put("localimg_"+counter+"_path","/"+filename);
prop.put("localimg_"+counter+"_checked", "2");
counter++;
}
prop.put("localimg", counter);
// prop.put("temperature",yacy.homedir+"/DATA/HTDOCS/"+filename);
}
}
// return rewrite properties
return prop;
}

@ -65,9 +65,11 @@
Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
<h4>GeoNames</h4>
<p>With this file it is possible to find cities with a population > 1000 all over the world.</p>
<p>With this file it is possible to find cities all over the world.</p>
<dl>
<dt>Content</dt>
<dd>cities with a population > 1000 all over the world</dd>
<dt><label>Download from</label></dt>
<dd>#[geon0URL]#</dd>
<dt><label>Storage location</label></dt>
@ -99,6 +101,74 @@
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon0ActionActivated)#
</dl>
<dl>
<dt>Content</dt>
<dd>cities with a population > 5000 all over the world</dd>
<dt><label>Download from</label></dt>
<dd>#[geon1URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon1Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon1Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::deactivated#(/geon1Status)#</dd>
<dt>Action</dt>
<dd>#(geon1Status)#
<input type="submit" name="geon1Load" value="Load" />::
<input type="submit" name="geon1Deactivate" value="Deactivate" />
<input type="submit" name="geon1Remove" value="Remove" />::
<input type="submit" name="geon1Activate" value="Activate" />
<input type="submit" name="geon1Remove" value="Remove" />
#(/geon1Status)#</dd>
#(geon1ActionLoaded)#::
<dt>Result</dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon1ActionLoaded)#
#(geon1ActionRemoved)#::
<dt>Result</dt><dd><div class="commit">deactivated and removed dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon1ActionRemoved)#
#(geon1ActionDeactivated)#::
<dt>Result</dt><dd><div class="commit">deactivated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot deactivate dictionary file: #[error]#</div></dd>
#(/geon1ActionDeactivated)#
#(geon1ActionActivated)#::
<dt>Result</dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon1ActionActivated)#
</dl>
<dl>
<dt>Content</dt>
<dd>cities with a population > 100000 all over the world (the set is is reduced to cities > 100000)</dd>
<dt><label>Download from</label></dt>
<dd>#[geon2URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon2Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon2Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::deactivated#(/geon2Status)#</dd>
<dt>Action</dt>
<dd>#(geon2Status)#
<input type="submit" name="geon2Load" value="Load" />::
<input type="submit" name="geon2Deactivate" value="Deactivate" />
<input type="submit" name="geon2Remove" value="Remove" />::
<input type="submit" name="geon2Activate" value="Activate" />
<input type="submit" name="geon2Remove" value="Remove" />
#(/geon2Status)#</dd>
#(geon2ActionLoaded)#::
<dt>Result</dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon2ActionLoaded)#
#(geon2ActionRemoved)#::
<dt>Result</dt><dd><div class="commit">deactivated and removed dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon2ActionRemoved)#
#(geon2ActionDeactivated)#::
<dt>Result</dt><dd><div class="commit">deactivated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot deactivate dictionary file: #[error]#</div></dd>
#(/geon2ActionDeactivated)#
#(geon2ActionActivated)#::
<dt>Result</dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt>Result</dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon2ActionActivated)#
</dl>
<h4>OpenGeoDB</h4>
<p>With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.</p>

@ -24,8 +24,8 @@ import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.LibraryProvider;
import net.yacy.document.geolocalization.GeonamesLocation;
import net.yacy.document.geolocalization.OpenGeoDBLocation;
import net.yacy.document.geolocation.GeonamesLocation;
import net.yacy.document.geolocation.OpenGeoDBLocation;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@ -68,7 +68,7 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
@ -98,11 +98,95 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file()));
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon0ActionActivated", 1);
}
// GEON1
if (post.containsKey("geon1Load")) {
// load from the net
try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon1Status", LibraryProvider.Dictionary.GEON1.file().exists() ? 1 : 0);
prop.put("geon1ActionLoaded", 1);
} catch (final MalformedURLException e) {
Log.logException(e);
prop.put("geon1ActionLoaded", 2);
prop.put("geon1ActionLoaded_error", e.getMessage());
} catch (final IOException e) {
Log.logException(e);
prop.put("geon1ActionLoaded", 2);
prop.put("geon1ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon1Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON1.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
prop.put("geon1ActionRemoved", 1);
}
if (post.containsKey("geon1Deactivate")) {
LibraryProvider.Dictionary.GEON1.file().renameTo(LibraryProvider.Dictionary.GEON1.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON1.nickname);
prop.put("geon1ActionDeactivated", 1);
}
if (post.containsKey("geon1Activate")) {
LibraryProvider.Dictionary.GEON1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON1.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon1ActionActivated", 1);
}
// GEON2
if (post.containsKey("geon2Load")) {
// load from the net
try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon2Status", LibraryProvider.Dictionary.GEON2.file().exists() ? 1 : 0);
prop.put("geon2ActionLoaded", 1);
} catch (final MalformedURLException e) {
Log.logException(e);
prop.put("geon2ActionLoaded", 2);
prop.put("geon2ActionLoaded_error", e.getMessage());
} catch (final IOException e) {
Log.logException(e);
prop.put("geon2ActionLoaded", 2);
prop.put("geon2ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon2Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON2.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
prop.put("geon2ActionRemoved", 1);
}
if (post.containsKey("geon2Deactivate")) {
LibraryProvider.Dictionary.GEON2.file().renameTo(LibraryProvider.Dictionary.GEON2.fileDisabled());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON2.nickname);
prop.put("geon2ActionDeactivated", 1);
}
if (post.containsKey("geon2Activate")) {
LibraryProvider.Dictionary.GEON2.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON2.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geon2ActionActivated", 1);
}
// GEO1
if (post.containsKey("geo1Load")) {
// load from the net
@ -110,8 +194,8 @@ public class DictionaryLoader_p {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
@ -141,7 +225,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false));
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null));
LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc);
prop.put("geo1ActionActivated", 1);
}
@ -229,7 +313,7 @@ public class DictionaryLoader_p {
LibraryProvider.activatePND();
prop.put("pnd0ActionActivated", 1);
}
// check status again
for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);

@ -40,6 +40,7 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Condenser;
@ -54,6 +55,9 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model;
import de.anomic.crawler.Cache;
import de.anomic.crawler.retrieval.Response;
import de.anomic.server.serverObjects;
@ -344,7 +348,8 @@ public class ViewFile {
prop.putNum("error_size", size);
prop.put("error_mimeTypeAvailable", (response.getMimeType() == null) ? "0" : "1");
prop.put("error_mimeTypeAvailable_mimeType", response.getMimeType());
prop.putXML("error_triples", JenaTripleStore.getMetadataByURLHash(url.hash()));
Model model = JenaTripleStore.getSubmodelBySubject(YaCyMetadata.hashURI(url.hash()));
prop.putXML("error_triples", JenaTripleStore.getRDFByModel(model));
return prop;
}

@ -3,8 +3,76 @@
<head>
<title>YaCy '#[clientname]#': Federated Index</title>
#%env/templates/metas.template%#
<script type="text/javascript">
//<![CDATA[
function xmlhttpPost() {
var searchform = document.getElementById('searchform');
search(searchform.discoverobjectspace.value);
}
function search(query) {
var xmlHttpReq = false;
var self = this;
if (window.XMLHttpRequest) { // Mozilla/Safari
self.xmlHttpReq = new XMLHttpRequest();
}
else if (window.ActiveXObject) { // IE
self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
}
self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&maximumRecords=100&nav=none&query=" + query + "+inurl:" + query, true);
self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
self.xmlHttpReq.onreadystatechange = function() {
if (self.xmlHttpReq.readyState == 4) {
updatepage(self.xmlHttpReq.responseText);
}
}
self.xmlHttpReq.send(null);
}
function updatepage(str) {
var raw = document.getElementById("raw");
if (raw != null) raw.innerHTML = str;
var rsp = eval("("+str+")");
var firstChannel = rsp.channels[0];
var totalResults = firstChannel.totalResults.replace(/[,.]/,"");
var startIndex = firstChannel.startIndex;
var itemsPerPage = firstChannel.itemsPerPage;
var navigation = firstChannel.navigation;
var html = "";
if (totalResults > 0 && firstChannel.items.length > 0) {
var item;
html += "<table class=\"networkTable\" border=\"0\" cellpadding=\"2\" cellspacing=\"1\" width=\"99%\">";
html += "<tr class=\"TableHeader\" valign=\"bottom\">";
html += "<td>URL from index (total results = " + totalResults + ")<\/td>";
for (var i = 0; i < firstChannel.items.length; i++) {
item = firstChannel.items[i];
html += "<tr class=\"TableCellLight\"><td align=\"left\" onclick=\"document.getElementById('searchform').value='" + item.link + "';\">" + item.link + "<\/td>";
}
html += "<\/table>";
}
document.getElementById("searchresults").innerHTML = html;
}
//]]>
</script>
</head>
<body id="Vocabulary_p" onload="document.getElementById('newterm').focus()">
<body id="Vocabulary_p">
#(edit)#::
<div id="api">
<a href="Vocabulary_p.xml" id="apilink"><img src="/env/grafics/api.png" width="60" height="40" alt="API"/></a>
<script type="text/javascript">
//<![CDATA[
document.getElementById('apilink').setAttribute('href', 'Vocabulary_p.xml?' + window.location.search.substring(1));
//]]>
</script>
<span>The information that is presented on this page can also be retrieved as XML
Click the API icon to see the RDF Ontology definition for this vocabulary.
To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de/wiki/index.php/Dev:API">API wiki page</a>.</span>
</div>
#(/edit)#
#%env/templates/header.template%#
#%env/templates/submenuSemantic.template%#
<h2>Vocabulary Administration</h2>
@ -31,14 +99,17 @@
</form>
#(create)#::
<form action="Vocabulary_p.html" method="get" accept-charset="UTF-8">
<!--<form action="Vocabulary_p.html" id="searchform" method="get" accept-charset="UTF-8" onkeyup="xmlhttpPost(); return false;">-->
<form action="Vocabulary_p.html" id="searchform" method="get" accept-charset="UTF-8" >
<fieldset><legend>Vocabulary Production</legend>
It is possible to produce a vocabulary out of the existing search index. This is done using a given 'objectspace' which you can enter as a URL Stub.
This stub is used to find all matching URLs. If the remaining path from the matching URLs then denotes a single file, the file name is used as vocabulary term.
This works best with wikis. Try to use a wiki url as objectspace path.
<dl>
<dt>Vocabulary Name</dt><dd><input type="text" name="discovername" value="" size="16" maxlength="128" /></dd>
<dt>Objectspace</dt><dd><input type="text" name="discoverobjectspace" value="http://" size="78" maxlength="128" /></dd>
<dt>Objectspace</dt><dd><input type="text" name="discoverobjectspace" value="http://" size="78" maxlength="128" />
<div id="searchresults"></div></dd>
<dt>Discover Terms from</dt><dd><input type="radio" name="discovermethod" value="path" checked="checked" />object link file name&nbsp;&nbsp;<input type="radio" name="discovermethod" value="title" />object page title&nbsp;&nbsp;<input type="radio" name="discovermethod" value="titlesplitted" />object page title (splitted)&nbsp;&nbsp;<input type="radio" name="discovermethod" value="author" />object page author</dd>
<dt></dt><dd><input type="submit" name="create" value="Create" /></dd>
</dl>
</fieldset>
@ -51,6 +122,7 @@
<dl>
<dt>Vocabulary Name</dt><dd>#[name]#</dd>
<dt>File</dt><dd>#(editable)#[automatically generated, not stored, cannot be edited]::#[file]##(/editable)#</dd>
<dt>Size</dt><dd>#[size]#</dd>
<dt>Namespace</dt><dd>#[namespace]#</dd>
<dt>Predicate</dt><dd>#[predicate]#</dd>
<dt>Prefix</dt><dd>#[prefix]#</dd>
@ -73,10 +145,10 @@
#{terms}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td align="center">#(editable)#&nbsp;::<input type="checkbox" name="modify_#[term]#" id="modify_#[term]#" value="checked" disabled="disabled"/>#(/editable)#</td>
<td align="center">#(editable)#&nbsp;::<input type="checkbox" name="delete_#[term]#" id="delete_#[term]#" value="checked" onchange="this.form.submit()"/>#(/editable)#</td>
<td align="center">#(editable)#&nbsp;::<input type="checkbox" name="delete_#[term]#" id="delete_#[term]#" value="checked" />#(/editable)#</td>
<td align="left">#[term]#</td>
<td align="left">#(editable)##[synonyms]#::<input type="text" name="synonyms_#[term]#" value="#[synonyms]#" size="40" maxlength="1024" onkeyup="document.getElementById('modify_#[term]#').checked='checked'; document.getElementById('modify_#[term]#').disabled=''"/>#(/editable)#</td>
<td align="left">#(editable)##[objectlink]#::<input type="text" name="objectlink_#[term]#" value="#[objectlink]#" size="60" maxlength="1024" onkeyup="document.getElementById('modify_#[term]#').checked='checked'; document.getElementById('modify_#[term]#').disabled=''"/>#(/editable)#</td>
<td align="left">#(editable)##[synonyms]#::<input type="text" name="synonyms_#[term]#" value="#[synonyms]#" size="40" maxlength="1024" onkeyup="document.getElementById('modify_#[term]#').checked='checked'; document.getElementById('modify_#[term]#').disabled=''; document.getElementById('delete_#[term]#').disabled='disabled';"/>#(/editable)#</td>
<td align="left">#(editable)##[objectlink]#::<input type="text" name="objectlink_#[term]#" value="#[objectlink]#" size="60" maxlength="1024" onkeyup="document.getElementById('modify_#[term]#').checked='checked'; document.getElementById('modify_#[term]#').disabled=''; document.getElementById('delete_#[term]#').disabled='disabled';"/>#(/editable)#</td>
</tr>
#{/terms}#
#(editable)#::

@ -35,6 +35,7 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
@ -50,44 +51,71 @@ public class Vocabulary_p {
Collection<Tagging> vocs = LibraryProvider.autotagging.getVocabularies();
String vocabularyName = (post == null) ? null : post.get("vocabulary", null);
String discovername = (post == null) ? null : post.get("discovername", null);
Tagging vocabulary = vocabularyName == null ? null : LibraryProvider.autotagging.getVocabulary(vocabularyName);
if (vocabulary == null) vocabularyName = null;
int count = 0;
for (Tagging v: vocs) {
prop.put("vocabularyset_" + count + "_name", v.getName());
prop.put("vocabularyset_" + count + "_selected", (vocabularyName != null && vocabularyName.equals(v.getName())) ? 1 : 0);
count++;
}
prop.put("vocabularyset", count);
if (post != null) {
try {
if (vocabulary == null) {
// create a vocabulary
String discovername = post.get("discovername", "");
if (discovername.length() > 0) {
if (discovername != null && discovername.length() > 0) {
String discoverobjectspace = post.get("discoverobjectspace", "");
MultiProtocolURI discoveruri = null;
if (discoverobjectspace.length() > 0) try {discoveruri = new MultiProtocolURI(discoverobjectspace);} catch (MalformedURLException e) {}
if (discoveruri == null) discoverobjectspace = "";
Map<String, Tagging.SOTuple> table = new TreeMap<String, Tagging.SOTuple>();
File propFile = LibraryProvider.autotagging.getVocabularyFile(discovername);
boolean discoverFromPath = post.get("discovermethod", "").equals("path");
boolean discoverFromTitle = post.get("discovermethod", "").equals("title");
boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted");
boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
if (discoveruri != null) {
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
Segment segment = sb.indexSegments.segment(segmentName);
Iterator<DigestURI> ui = segment.urlSelector(discoveruri);
String t;
while (ui.hasNext()) {
DigestURI u = ui.next();
String u0 = u.toNormalform(true, false);
String t = u0.substring(discoverobjectspace.length());
if (t.indexOf('/') >= 0) continue;
int p = t.indexOf('.');
if (p >= 0) t = t.substring(0, p);
while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1);
while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1);
if (p >= 0) t = t.substring(p + 1);
t = "";
if (discoverFromPath) {
t = u0.substring(discoverobjectspace.length());
if (t.indexOf('/') >= 0) continue;
int p = t.indexOf('.');
if (p >= 0) t = t.substring(0, p);
while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1);
while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1);
if (p >= 0) t = t.substring(p + 1);
}
if (discoverFromTitle || discoverFromTitleSplitted) {
URIMetadataRow m = segment.urlMetadata().load(u.hash());
if (m != null) t = m.dc_title();
if (t.endsWith(".jpg") || t.endsWith(".gif")) continue;
}
if (discoverFromAuthor) {
URIMetadataRow m = segment.urlMetadata().load(u.hash());
if (m != null) t = m.dc_creator();
}
t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();
if (t.length() == 0) continue;
table.put(t, new Tagging.SOTuple("", u0));
if (discoverFromTitleSplitted) {
String[] ts = t.split(" ");
for (String s: ts) {
if (s.length() == 0) continue;
if (s.endsWith(".jpg") || s.endsWith(".gif")) continue;
table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
}
} else if (discoverFromAuthor) {
String[] ts = t.split(";"); // author names are often separated by ';'
for (String s: ts) {
if (s.length() == 0) continue;
int p = s.indexOf(','); // check if there is a reversed method to mention the name
if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim();
table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
}
} else {
table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0));
}
}
}
Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table);
@ -143,6 +171,14 @@ public class Vocabulary_p {
}
}
int count = 0;
for (Tagging v: vocs) {
prop.put("vocabularyset_" + count + "_name", v.getName());
prop.put("vocabularyset_" + count + "_selected", ((vocabularyName != null && vocabularyName.equals(v.getName())) || (discovername != null && discovername.equals(v.getName()))) ? 1 : 0);
count++;
}
prop.put("vocabularyset", count);
prop.put("create", vocabularyName == null ? 1 : 0);
if (vocabulary == null) {
@ -154,27 +190,45 @@ public class Vocabulary_p {
prop.put("edit_editable", editable ? 1 : 0);
prop.putHTML("edit_editable_file", editable ? vocabulary.getFile().getAbsolutePath() : "");
prop.putHTML("edit_name", vocabulary.getName());
prop.putXML("edit_namexml", vocabulary.getName());
prop.putHTML("edit_namespace", vocabulary.getNamespace());
prop.put("edit_size", vocabulary.size());
prop.putHTML("edit_predicate", vocabulary.getPredicate());
prop.putHTML("edit_prefix", Tagging.DEFAULT_PREFIX);
prop.putHTML("edit_editable_objectspace", vocabulary.getObjectspace() == null ? "" : vocabulary.getObjectspace());
prop.putHTML("edit_editable_objectspacepredicate", DCTerms.references.getPredicate());
prop.putHTML("edit_triple1", "<" + yacyurl + "> <" + vocabulary.getPredicate() + "> \"[discovered-tags-commaseparated]\"");
prop.putHTML("edit_triple2", "<" + yacyurl + "> <" + Owl.SameAs.getPredicate() + "> <[document-url]>");
prop.putHTML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"[reference-link]#[tag]\" .");
prop.putXML("edit_triple1", "<" + yacyurl + "> <" + vocabulary.getPredicate() + "> \"[discovered-tags-commaseparated]\"");
prop.putXML("edit_triple2", "<" + yacyurl + "> <" + Owl.SameAs.getPredicate() + "> <[document-url]>");
prop.putXML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"[object-link]#[tag]\" .");
int c = 0;
boolean dark = false;
for (Map.Entry<String, SOTuple> entry: vocabulary.list().entrySet()) {
int osl = vocabulary.getObjectspace() == null ? 0 : vocabulary.getObjectspace().length();
Map<String, SOTuple> list = vocabulary.list();
prop.put("edit_size", list.size());
for (Map.Entry<String, SOTuple> entry: list.entrySet()) {
prop.put("edit_terms_" + c + "_editable", editable ? 1 : 0);
prop.put("edit_terms_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.putXML("edit_terms_" + c + "_label", osl > entry.getValue().getObjectlink().length() ? entry.getKey() : entry.getValue().getObjectlink().substring(osl));
prop.putHTML("edit_terms_" + c + "_term", entry.getKey());
prop.putXML("edit_terms_" + c + "_termxml", entry.getKey());
prop.putHTML("edit_terms_" + c + "_editable_term", entry.getKey());
prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue().getSynonymsCSV());
prop.putHTML("edit_terms_" + c + "_editable_objectlink", entry.getValue().getObjectlink());
String synonymss = entry.getValue().getSynonymsCSV();
prop.putHTML("edit_terms_" + c + "_editable_synonyms", synonymss);
if (synonymss.length() > 0) {
String[] synonymsa = entry.getValue().getSynonymsList();
for (int i = 0; i < synonymsa.length; i++) {
prop.put("edit_terms_" + c + "_synonyms_" + i + "_altLabel", synonymsa[i]);
}
prop.put("edit_terms_" + c + "_synonyms", synonymsa.length);
} else {
prop.put("edit_terms_" + c + "_synonyms", 0);
}
prop.putXML("edit_terms_" + c + "_editable_objectlink", entry.getValue().getObjectlink());
c++;
if (c > 3000) break;
}
prop.put("edit_terms", c);
}
// return rewrite properties

@ -0,0 +1,25 @@
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:owl="http://www.w3.org/2002/07/owl#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
#(edit)#::
<owl:Ontology rdf:about="#(editable)##[objectspace]#::#[objectspace]##(/editable)#">
<dc:title>#[namexml]#</dc:title>
</owl:Ontology>
#{terms}#
<owl:Class rdf:about="#(editable)##[objectlink]#::#[objectlink]##(/editable)#">
<rdfs:label>#[label]#</rdfs:label>
<skos:Concept>
<skos:prefLabel>#[termxml]#</skos:prefLabel>
#{synonyms}#
<skos:altLabel>#[altLabel]#</skos:altLabel>
#{/synonyms}#
</skos:Concept>
</owl:Class>
#{/terms}#
#(/edit)#
</rdf:RDF>

@ -102,7 +102,7 @@ public class getpageinfo {
}
if (scraper != null) {
// put the document title
prop.putXML("title", scraper.dc_title());
prop.putXML("title", removelinebreaks(scraper.dc_title()));
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@ -119,7 +119,7 @@ public class getpageinfo {
}
prop.put("tags", count);
// put description
prop.putXML("desc", scraper.dc_description());
prop.putXML("desc", removelinebreaks(scraper.dc_description()));
// put language
final Set<String> languages = scraper.getContentLanguages();
prop.putXML("lang", (languages == null) ? "unknown" : languages.iterator().next());
@ -185,7 +185,14 @@ public class getpageinfo {
return prop;
}
private static String checkOAI(final String url) {
private static String removelinebreaks(String dc_title) {
String newtitle = dc_title.replace ("\r", "");
newtitle = newtitle.replace ("\n", "");
newtitle = newtitle.replace ("\r\n", "");
return newtitle;
}
private static String checkOAI(final String url) {
final DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
try {

@ -0,0 +1,3 @@
{
"result": "#[taglinks]#"
}

@ -25,24 +25,15 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.ByteArrayOutputStream;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.Map;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.protocol.RequestHeader.FileType;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -50,6 +41,10 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -139,9 +134,25 @@ public class yacydoc {
prop.put("yacy_citations", sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation().count(entry.hash()));
prop.put("yacy_inbound", entry.llocal());
prop.put("yacy_outbound", entry.lother());
// extract the submodel from the triplestore
prop.putXML("triples", JenaTripleStore.getMetadataByURLHash(entry.hash()));
Model model = JenaTripleStore.getSubmodelBySubject(YaCyMetadata.hashURI(entry.hash()));
String rdf = JenaTripleStore.getRDFByModel(model);
prop.putXML("triples", rdf);
prop.put("rdf", header.fileType() == FileType.XML ? rdf : "");
String references = "";
Iterator<RDFNode> t = JenaTripleStore.getObjects("http://yacy.net/url#"+urlhash, "http://purl.org/dc/terms/references");
while (t.hasNext()) {
RDFNode r = t.next();
references += r.toString()+",";
}
Log.logInfo ("TRIPLESTORE", references);
prop.put("taglinks", references);
// return rewrite properties
return prop;

@ -9,4 +9,5 @@
<link media="screen" type="text/css" href="/currentyacypeer/jquery/css/jquery.tagsinput.css" rel="stylesheet" />
<script src="/currentyacypeer/jquery/js/jquery.tagsinput.min.js" type="text/javascript"></script>
<script src="/currentyacypeer/jquery/js/jquery.rdfquery.core-1.0.js" type="text/javascript"></script>
<!-- END jQuery header -->

@ -32,7 +32,6 @@ package interaction;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.Interaction;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
@ -99,55 +98,37 @@ public class Triple {
String s = "";
String p = "";
String o = "";
String result = "";
Boolean global = false;
if(post != null){
if (post != null) {
if(post.containsKey("s")){
s = post.get("s");
}
if(post.containsKey("sp")){
s = post.get("sp") + "#" + s;
}
if(post.containsKey("p")){
p = post.get("p");
}
if(post.containsKey("pp")){
p = post.get("pp") + "#" + p;
}
s = post.get("s", "");
p = post.get("p", "");
o = post.get("o", "");
if(post.containsKey("o")){
o = post.get("o");
}
if (post.containsKey("sp")) s = post.get("sp") + "#" + s;
if (post.containsKey("pp")) p = post.get("pp") + "#" + p;
global = post.containsKey("global");
if (post.containsKey("load")) {
if (global) {
result = JenaTripleStore.getObject(s, p);
} else {
result = JenaTripleStore.getPrivateObject(s, p, username);
}
} else {
if (global) {
JenaTripleStore.addTriple(s, p, o);
} else {
JenaTripleStore.addTriple(s, p, o, username);
}
}
}
if (post.containsKey("load")) {
if (global) {
o = JenaTripleStore.getObject(s, p);
} else {
o = JenaTripleStore.getPrivateObject(s, p, username);
}
} else {
if (global) {
JenaTripleStore.addTriple(s, p, o);
} else {
JenaTripleStore.addTriple(s, p, o, username);
}
}
prop.put("result", o);
prop.put("result", result);
return prop;
}

@ -0,0 +1,41 @@
<span id="sci_doc_#[hash]#" class="sci_doc" style="display: none;">
<img id="sci_doc_#[hash]#_img" src="/currentyacypeer/interaction_elements/document.png" width="16px" height="16px" alt="0" onclick="/* $('#sci_doc_#[hash]#_box').toggle();*/ return false">&nbsp; <span style="font-size: x-small; position: absolute; margin-top: 4px;"><span id="sci_doc_#[hash]#_title"></span></span>
</span>
<style type="text/css" >
.sci_doc {
font: arial,helvetica,sans-serif;
font-size: 10px;
background: #ffffff;
border:0px;
width: 150px;
height: 20px;
z-index:99998;
color: #5a346e;
}
</style>
<div id="sci_doc_#[hash]#_box" class="sci_panel" style="top: 50px; display: none;">
<script type="text/javascript" charset="utf-8">
document.getElementById('sci_doc_#[hash]#').style.display = "";
var metajson = getMetadata('#[url]#');
var title = metajson.item.title.substring(0, 20)+"...";
document.getElementById('sci_doc_#[hash]#_title').innerHTML = title;
</script>
</div>

@ -0,0 +1,29 @@
package interaction_elements;
import java.util.Collection;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.LibraryProvider;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Document_part {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
prop.put("hash", post.get("hash", ""));
prop.put("url", post.get("url", ""));
prop.put("action", post.get("action", ""));
return prop;
}
}

@ -25,12 +25,12 @@ color: #5a346e;
font: arial,helvetica,sans-serif;
font-size: 10px;
position: fixed;
right: 10px;
right: 200px;
top: 0;
background: #ffffff;
border:0px;
width: 65px;
width: 200px;
height: 20px;
padding: 5px 5px 5px 5px;
@ -51,7 +51,7 @@ top: 0;
background: #ffffff;
border:0px;
width: 30px;
width: 100px;
height: 20px;
padding: 5px 5px 5px 5px;
@ -178,12 +178,16 @@ z-index:99999;
<div id="sidebar-logo" class="sci_left" style="">
<img src="/currentyacypeer/env/grafics/yacy.png" height="20px" width="36px"/>
<!--#include virtual="/currentyacypeer/interaction_elements/Tag_part.html?hash=#[urlhash]#" -->
<img src="/currentyacypeer/env/grafics/yacy.png" height="20px" width="36px"/>
<img src="/currentyacypeer/env/grafics/empty.gif" height="20px" width="20px"/>
<!--#include virtual="/currentyacypeer/interaction_elements/Tag_part.html?action=#[action]#&hash=#[urlhash]#&url=#[url]#" -->
</div>
<div id="sidebar-tags" class="sci_right" style="">
<div id="sidebar-document" class="sci_right" style="">
<img src="/currentyacypeer/env/grafics/empty.gif" height="20px" width="20px"/>
<!--#include virtual="/currentyacypeer/interaction_elements/Document_part.html?action=#[action]#&hash=#[urlhash]#&url=#[url]#" -->
</div>

@ -1,24 +1,23 @@
package interaction_elements;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class OverlayInteraction {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
prop.put("enabled", env.getConfigBool("interaction.overlayinteraction.enabled", false) ? "1" : "0");
prop.put("enabled_url", post.get("url", ""));
prop.put("enabled_urlhash", post.get("urlhash", ""));
prop.put("enabled_urlhash", post.get("urlhash", ""));
prop.put("enabled_action", post.get("action", ""));
return prop;
}
}

@ -6,37 +6,16 @@
<style type="text/css" >
.sci_panel {
.sci_tag {
font: arial,helvetica,sans-serif;
font-size: 10px;
position: fixed;
right: 0;
background: #ffffff;
border:1px solid #5a346e;
width: 210px;
height: auto;
padding: 30px 110px 30px 30px;
z-index:99998;
color: #5a346e;
}
.sci_tags {
font: arial,helvetica,sans-serif;
font-size: 10px;
position: fixed;
right: 100px;
top: 0;
background: #ffffff;
border:0px;
width: 65px;
height: 20px;
padding: 5px 5px 5px 5px;
z-index:99998;
@ -49,6 +28,8 @@ color: #5a346e;
<div id="sci_tags_#[hash]#_box" class="sci_panel" style="top: 50px; display: none;">
<div id="rdfcontent_#[hash]#" class="rdfcontent_#[hash]#" style="display:none;"></div>
<input type="text" id="tags_#[hash]#" name="tags_#[hash]#" class="bm_input" size="80" />
<script type="text/javascript" charset="utf-8">
@ -59,11 +40,48 @@ color: #5a346e;
var existingtags = "";
var resultstring = "";
if (vocabularies_string != "") {
$.ajaxSetup({async: false});
$.getJSON('/currentyacypeer/api/yacydoc.htm?urlhash=#[hash]#', function(data) {
resultstring = data.result;
});
}
vocs.forEach (function(voc) {
var currenttags = triple_get_prefix ('http://yacy.net/url', '#[hash]#', 'http://yacy.net/autotagging', voc);
var finaltags = "";
currenttags.split(',').forEach (function(tag) {
var link = "";
resultstring.split(',').forEach (function(hyperlink) {
var h_url = hyperlink.substring (0, hyperlink.indexOf('#'));
var h_tag = hyperlink.substring (hyperlink.indexOf('#')+1);
if (h_tag == tag) link = h_url;
});
if (link != "") {
finaltags = finaltags + '<a href="'+ link +'" target="_blank">' + tag + '</a>&nbsp;';
}
else {
finaltags = finaltags + tag + '&nbsp';
}
});
if (existingtags != "" && currenttags != "") existingtags += " - ";
if (currenttags != "") existingtags += voc+":"+ currenttags;
if (currenttags != "") existingtags += voc+":"+ finaltags;
});

@ -4,36 +4,33 @@ package interaction_elements;
import java.util.Collection;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.LibraryProvider;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Tag_part {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
prop.put("hash", post.get("hash", ""));
prop.put("url", post.get("url", ""));
prop.put("action", post.get("action", ""));
String vocabularies = "";
Collection<Tagging> vocs = LibraryProvider.autotagging.getVocabularies();
for (Tagging v: vocs) {
vocabularies += v.getName()+",";
}
vocabularies += "manual";
prop.put("vocabularies", vocabularies);
return prop;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

@ -1,9 +1,10 @@
function newload (name, div) {
$.ajaxSetup({async: false});
$.get(name, function(data) {
$('#'+div).html(data);
$.get(name, function(data) {
document.getElementById(div).innerHTML = data;
});
}

@ -2,7 +2,7 @@
function getMetadata (url) {
var res = {"item": ""};
var res = {"item": {"title": "no title"}};
$.ajaxSetup({async: false});

File diff suppressed because it is too large Load Diff

@ -55,7 +55,7 @@ import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.Parser;
import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -480,7 +480,7 @@ public class yacysearch {
if (p > 0) {
String k = vocabulary.substring(0, p);
String v = vocabulary.substring(p + 1);
metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v));
metatags.add(LibraryProvider.autotagging.metatag(k, v));
}
}

@ -28,7 +28,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.opensearch.SRURSSConnector;
import net.yacy.document.LibraryProvider;
import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.server.serverCore;

@ -67,7 +67,7 @@ public final class HTTPLoader {
this.log = theLog;
// refreshing timeout value
this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000);
this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000);
}
public Response load(final Request entry, final int maxFileSize, final boolean checkBlacklist) throws IOException {

@ -1423,9 +1423,15 @@ public final class HTTPDFileHandler {
private static void doURLProxy(final serverObjects args, final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream out) throws IOException {
final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
URL proxyurl = null;
String action = "";
if(conProp != null && conProp.containsKey("ARGS")) {
final String strARGS = (String) conProp.get("ARGS");
String strARGS = (String) conProp.get("ARGS");
if(strARGS.startsWith("action=")) {
int detectnextargument = strARGS.indexOf("&");
action = strARGS.substring (7, detectnextargument);
strARGS = strARGS.substring(detectnextargument+1);
}
if(strARGS.startsWith("url=")) {
final String strUrl = strARGS.substring(4); // strip url=
@ -1461,6 +1467,9 @@ public final class HTTPDFileHandler {
requestHeader.remove("Authorization");
requestHeader.remove("Connection");
requestHeader.put(HeaderFramework.HOST, proxyurl.getHost());
// temporarily add argument to header to pass it on to augmented browsing
requestHeader.put("YACYACTION", action);
final ByteArrayOutputStream o = new ByteArrayOutputStream();
HTTPDProxyHandler.doGet(prop, requestHeader, o);
@ -1494,9 +1503,9 @@ public final class HTTPDFileHandler {
// rewrite location header
location = outgoingHeader.get("Location");
if (location.startsWith("http")) {
location = "/proxy.html?url=" + location;
location = "/proxy.html?action="+action+"&url=" + location;
} else {
location = "/proxy.html?url=http://" + proxyurl.getHost() + "/" + location;
location = "/proxy.html?action="+action+"&url=http://" + proxyurl.getHost() + "/" + location;
}
outgoingHeader.put("Location", location);
}

@ -602,7 +602,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
}
// setting other connection properties
prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, session.userAddress.isAnyLocalAddress() ? "localhost" : session.userAddress.getHostAddress());
prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, session.userAddress.isAnyLocalAddress() || session.userAddress.isLinkLocalAddress() || session.userAddress.isLoopbackAddress() ? "localhost" : session.userAddress.getHostAddress());
prop.put(HeaderFramework.CONNECTION_PROP_METHOD, HeaderFramework.METHOD_CONNECT);
prop.put(HeaderFramework.CONNECTION_PROP_PATH, "/");
prop.put(HeaderFramework.CONNECTION_PROP_EXT, "");

@ -61,7 +61,7 @@ public class serverSwitch
// configuration management
private final File configFile;
private final String configComment;
private final File dataPath;
public final File dataPath;
public final File appPath;
protected boolean firstInit;
protected Log log;
@ -167,7 +167,7 @@ public class serverSwitch
/**
* get my public IP, either set statically or figure out dynamic
* @return
* @return
*/
public String myPublicIP() {
// if a static IP was configured, we have to return it here ...
@ -191,7 +191,7 @@ public class serverSwitch
/**
* add whole map of key-value pairs to config
* @param otherConfigs
* @param otherConfigs
*/
public void setConfig(final Map<String, String> otherConfigs) {
final Iterator<Map.Entry<String, String>> i = otherConfigs.entrySet().iterator();
@ -228,7 +228,7 @@ public class serverSwitch
/**
* Gets a configuration parameter from the properties.
*
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
@ -246,7 +246,7 @@ public class serverSwitch
/**
* Gets a configuration parameter from the properties.
*
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
@ -261,7 +261,7 @@ public class serverSwitch
/**
* Gets a configuration parameter from the properties.
*
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
@ -276,7 +276,7 @@ public class serverSwitch
/**
* Gets a configuration parameter from the properties.
*
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
@ -291,7 +291,7 @@ public class serverSwitch
/**
* Gets a configuration parameter from the properties.
*
*
* @param key name of the configuration parameter
* @param dflt default value which will be used in case parameter can not be found or if it is invalid
* @return value if the parameter or default value
@ -302,7 +302,7 @@ public class serverSwitch
/**
* Create a File instance for a configuration setting specifying a path.
*
*
* @param key config key
* @param dflt default path value, that is used when there is no value <code>key</code> in the
* configuration.
@ -311,19 +311,19 @@ public class serverSwitch
* the relative path setting.
*/
public File getDataPath(final String key, final String dflt) {
return getFileByPath(key, dflt, dataPath);
return getFileByPath(key, dflt, this.dataPath);
}
/**
* return file at path from config entry "key", or fallback to default dflt
* @param key
* @param dflt
* @return
* @return
*/
public File getAppPath(final String key, final String dflt) {
return getFileByPath(key, dflt, appPath);
return getFileByPath(key, dflt, this.appPath);
}
private File getFileByPath(String key, String dflt, File prefix) {
final String path = getConfig(key, dflt).replace('\\', '/');
final File f = new File(path);
@ -345,7 +345,7 @@ public class serverSwitch
/**
* Gets configuration parameters which have been removed during initialization.
*
*
* @return contains parameter name as key and parameter value as value
*/
public ConcurrentMap<String, String> getRemoved() {
@ -613,7 +613,7 @@ public class serverSwitch
/**
* Retrieve text data (e. g. config file) from file file may be an url or a filename with path relative to
* rootPath parameter
*
*
* @param file url or filename
* @param rootPath searchpath for file
* @param file file to use when remote fetching fails (null if unused)
@ -665,7 +665,7 @@ public class serverSwitch
/**
* Generates a random password.
*
*
* @return random password which is 20 characters long.
*/
public String genRandomPassword() {
@ -674,7 +674,7 @@ public class serverSwitch
/**
* Generates a random password of a given length.
*
*
* @param length length o password
* @return password of given length
*/

@ -2,12 +2,14 @@
package net.yacy.cora.lod;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
@ -38,7 +40,7 @@ public class JenaTripleStore {
public static Model model = ModelFactory.createDefaultModel();
static {
init(model);
}
private final static void init(Model model) {
model.setNsPrefix(YaCyMetadata.PREFIX, YaCyMetadata.NAMESPACE);
@ -48,7 +50,7 @@ public class JenaTripleStore {
model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd");
model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE);
}
public static long size() {
return model.size();
}
@ -81,7 +83,7 @@ public class JenaTripleStore {
InputStream is = FileManager.get().open(fileNameOrUri);
LoadNTriples(is);
}
public static void LoadNTriples(InputStream is) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
if (is != null) {
@ -105,20 +107,33 @@ public class JenaTripleStore {
model = model.union(tmp);
}
}
public static void saveFile(String filename) {
public static void saveFile(String filename) {
saveFile(filename, model);
}
public static void saveFile(String filename, Model model) {
File f = new File(filename);
File ftmp = new File(filename + "." + System.currentTimeMillis());
if (model.size() == 0 && !f.exists()) {
// we don't store zero-size models if they did not exist before
Log.logInfo("TRIPLESTORE", "NOT saving triplestore with " + model.size() + " triples to " + filename);
return;
}
Log.logInfo("TRIPLESTORE", "Saving triplestore with " + model.size() + " triples to " + filename);
FileOutputStream fout;
OutputStream fout;
try {
fout = new FileOutputStream(filename);
fout = new BufferedOutputStream(new FileOutputStream(ftmp));
model.write(fout);
fout.close();
// if something went wrong until here, the original file is not overwritten
// since we are happy here, we can remove the old file and replace it with the new one
f.delete();
if (!f.exists()) {
ftmp.renameTo(f);
}
Log.logInfo("TRIPLESTORE", "Saved triplestore with " + model.size() + " triples to " + filename);
} catch (Exception e) {
// TODO Auto-generated catch block
Log.logWarning("TRIPLESTORE", "Saving to " + filename+" failed");
}
}
@ -145,13 +160,13 @@ public class JenaTripleStore {
Property pr = model.getProperty(predicate);
JenaTripleStore.model.removeAll(r, pr, (Resource) null);
}
public static void addTriple(String subject, String predicate, String object, String username) {
if (privatestorage != null && privatestorage.containsKey(username)) {
addTriple (subject, predicate, object, privatestorage.get(username));
}
}
public static void addTriple(String subject, String predicate, String object) {
addTriple (subject, predicate, object, model);
}
@ -162,43 +177,43 @@ public class JenaTripleStore {
r.addProperty(pr, object);
Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object);
}
public static String getObject(final String subject, final String predicate) {
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ");
Iterator<RDFNode> ni = JenaTripleStore.getObjects(subject, predicate);
if (!ni.hasNext()) return "";
return ni.next().toString();
String object = "";
if (ni.hasNext()) object = ni.next().toString();
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " - " + object);
return object;
}
public static Iterator<RDFNode> getObjects(final String subject, final String predicate) {
public static Iterator<RDFNode> getObjects(final String subject, final String predicate) {
final Resource r = subject == null ? null : JenaTripleStore.getResource(subject);
return getObjects(r, predicate);
}
public static String getPrivateObject(final String subject, final String predicate, final String username) {
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")");
public static String getPrivateObject(final String subject, final String predicate, final String username) {
Iterator<RDFNode> ni = JenaTripleStore.getPrivateObjects(subject, predicate, username);
if (!ni.hasNext()) return "";
return ni.next().toString();
String object = "";
if (ni.hasNext()) object = ni.next().toString();
Log.logInfo("TRIPLESTORE", "GET (" + username + ") " + subject + " - " + predicate + " - " + object);
return object;
}
private static Iterator<RDFNode> getPrivateObjects(final String subject, final String predicate, final String username) {
if (privatestorage != null && privatestorage.containsKey(username)) {
return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username));
}
return null;
}
public static Iterator<RDFNode> getObjects(final Resource r, final String predicate) {
return getObjects(r, predicate, model);
}
private static Iterator<RDFNode> getObjects(final Resource r, final String predicate, final Model model) {
final Property pr = model.getProperty(predicate);
final StmtIterator iter = model.listStatements(r, pr, (Resource) null);
final StmtIterator iter = model.listStatements(r, pr, (Resource) null);
return new Iterator<RDFNode>() {
@Override
public boolean hasNext() {
@ -214,15 +229,15 @@ public class JenaTripleStore {
}
};
}
public static Iterator<Resource> getSubjects(final String predicate) {
return getSubjects(predicate, model);
}
private static Iterator<Resource> getSubjects(final String predicate, final Model model) {
final Property pr = model.getProperty(predicate);
final ResIterator iter = model.listSubjectsWithProperty(pr);
final ResIterator iter = model.listSubjectsWithProperty(pr);
return new Iterator<Resource>() {
@Override
public boolean hasNext() {
@ -246,91 +261,72 @@ public class JenaTripleStore {
m.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE);
return m;
}
public static String getMetadataByURLHash(byte[] urlhash) {
String subject = YaCyMetadata.hashURI(urlhash);
Model model = JenaTripleStore.getSubmodelBySubject(subject);
public static String getRDFByModel(Model model) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
model.write(baos, "RDF/XML-ABBREV");
return UTF8.String(baos.toByteArray());
}
public static void initPrivateStores() {
Switchboard switchboard = Switchboard.getSwitchboard();
Log.logInfo("TRIPLESTORE", "Init private stores");
if (privatestorage == null) privatestorage = new ConcurrentHashMap<String, Model>();
if (privatestorage != null) privatestorage.clear();
try {
Iterator<de.anomic.data.UserDB.Entry> it = switchboard.userDB.iterator(true);
while (it.hasNext()) {
de.anomic.data.UserDB.Entry e = it.next();
String username = e.getUserName();
File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath()));
File currentuserfile = new File(triplestore, "private_store_"+username+".rdf");
Log.logInfo("TRIPLESTORE", "Init " + username + " from "+currentuserfile.getAbsolutePath());
Model tmp = ModelFactory.createDefaultModel();
init (tmp);
init (tmp);
if (currentuserfile.exists()) {
Log.logInfo("TRIPLESTORE", "Loading from " + currentuserfile.getAbsolutePath());
InputStream is = FileManager.get().open(currentuserfile.getAbsolutePath());
if (is != null) {
// read the RDF/XML file
tmp.read(is, null);
Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + currentuserfile.getAbsolutePath());
} else {
throw new IOException("cannot read " + currentuserfile.getAbsolutePath());
}
}
if (tmp != null) {
privatestorage.put(username, tmp);
}
}
}
catch (Exception anyex) {
} catch (Exception anyex) {
Log.logException(anyex);
}
}
public static void savePrivateStores(Switchboard switchboard) {
public static void savePrivateStores() {
Switchboard switchboard = Switchboard.getSwitchboard();
Log.logInfo("TRIPLESTORE", "Saving user triplestores");
if (privatestorage == null) return;
for (Entry<String, Model> s : privatestorage.entrySet()) {
File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath()));
File currentuserfile = new File(triplestore, "private_store_"+s.getKey()+".rdf");
saveFile (currentuserfile.getAbsolutePath(), s.getValue());
}
}
private static long lastModelSizeStored = -1;
public static void saveAll() {
Switchboard sb = Switchboard.getSwitchboard();
File triplestore = new File(sb.getConfig("triplestore", new File(sb.dataPath, "DATA/TRIPLESTORE").getAbsolutePath()));
if (model.size() != lastModelSizeStored){
JenaTripleStore.saveFile(new File(triplestore, "local.rdf").getAbsolutePath());
lastModelSizeStored = model.size();
}
JenaTripleStore.savePrivateStores();
}
}

@ -36,7 +36,8 @@ import java.util.regex.Pattern;
import net.yacy.cora.storage.Files;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocalization.Locations;
import net.yacy.document.geolocation.GeoLocation;
import net.yacy.document.geolocation.Locations;
public class Tagging {
@ -52,6 +53,39 @@ public class Tagging {
private String predicate, namespace, objectspace;
/**
* helper class: Synonym and Objectlink tuple
*/
public static class SOTuple {
private final String synonyms;
private final String objectlink;
public SOTuple(String synonyms, String objectlink) {
this.synonyms = synonyms;
this.objectlink = objectlink;
}
public SOTuple(String[] synonyms, String objectlink) {
StringBuilder sb = new StringBuilder(synonyms.length * 10);
for (String s: synonyms) sb.append(',').append(s);
this.synonyms = sb.substring(1);
this.objectlink = objectlink;
}
public String getSynonymsCSV() {
return this.synonyms;
}
public String[] getSynonymsList() {
return this.synonyms.split(",");
}
public String getObjectlink() {
return this.objectlink;
}
}
public Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap<String, String>();
@ -95,7 +129,7 @@ public class Tagging {
vocloop: for (Map.Entry<String, SOTuple> e: table.entrySet()) {
if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) {
term = normalizeKey(e.getKey());
v = normalizeWord(e.getKey());
v = normalizeTerm(e.getKey());
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -108,13 +142,13 @@ public class Tagging {
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink());
@ -129,49 +163,115 @@ public class Tagging {
for (Map.Entry<String, SOTuple> e: table.entrySet()) {
String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV();
String o = e.getValue() == null ? "" : e.getValue().getObjectlink();
w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n");
w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue().getSynonymsCSV()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n");
}
w.close();
init();
}
}
/**
* helper class: Synonym and Objectlink tuple
*/
public static class SOTuple {
private final String synonyms;
private final String objectlink;
public SOTuple(String synonyms, String objectlink) {
this.synonyms = synonyms;
this.objectlink = objectlink;
}
public SOTuple(String[] synonyms, String objectlink) {
StringBuilder sb = new StringBuilder(synonyms.length * 10);
for (String s: synonyms) sb.append(',').append(s);
this.synonyms = sb.substring(1);
this.objectlink = objectlink;
public Tagging(String name, Locations location) {
this(name);
Set<String> locNames = location.locationNames();
TreeSet<GeoLocation> geo;
GeoLocation g;
for (String loc: locNames) {
String syn = normalizeTerm(loc);
this.synonym2term.put(syn, loc);
this.term2synonym.put(loc, syn);
geo = location.find(loc, true);
if (geo.size() > 0) {
g = geo.iterator().next();
this.term2objectlink.put(loc, "http://www.openstreetmap.org/?lat=" + g.lat() + "&lon=" + g.lon() + "&zoom=16");
}
}
}
public String getSynonymsCSV() {
return this.synonyms;
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
for (StringBuilder word: words) {
s = word.toString();
this.synonym2term.put(s.toLowerCase(), s);
this.term2synonym.put(s, s.toLowerCase());
}
}
public String[] getSynonymsList() {
return this.synonyms.split(",");
}
public void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.synonym2synonyms.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
this.objectspace = null;
public String getObjectlink() {
return this.objectlink;
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
String term, v;
String[] tags;
int p;
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
line = line.trim();
p = line.indexOf('#');
if (p >= 0) {
String comment = line.substring(p + 1).trim();
if (comment.startsWith("namespace:")) {
this.namespace = comment.substring(10).trim();
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
this.predicate = this.namespace + this.navigatorName;
continue vocloop;
}
if (comment.startsWith("objectspace:")) {
this.objectspace = comment.substring(12).trim();
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
continue vocloop;
}
}
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeTerm(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
continue vocloop;
}
term = normalizeKey(pl[0]);
v = pl[1];
tags = v.split(",");
Set<String> synonyms = new HashSet<String>();
synonyms.add(term);
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeTerm(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeTerm(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
synonyms.add(synonym);
for (String s: synonyms) {
this.synonym2synonyms.put(s, synonyms);
}
}
} catch (InterruptedException e) {
}
}
public void updateTerm(String term, String[] synonyms) {
public int size() {
return this.term2objectlink.size();
}
private File tmpFile() {
@ -363,98 +463,6 @@ public class Tagging {
return new String[]{line.substring(0, p), line.substring(p + 1), c};
}
public void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
this.term2objectlink.clear();
this.synonym2synonyms.clear();
this.namespace = DEFAULT_NAMESPACE;
this.predicate = this.namespace + this.navigatorName;
this.objectspace = null;
BlockingQueue<String> list = Files.concurentLineReader(this.propFile, 1000);
String term, v;
String[] tags;
int p;
String line;
try {
vocloop: while ((line = list.take()) != Files.POISON_LINE) {
line = line.trim();
p = line.indexOf('#');
if (p >= 0) {
String comment = line.substring(p + 1).trim();
if (comment.startsWith("namespace:")) {
this.namespace = comment.substring(10).trim();
if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#";
this.predicate = this.namespace + this.navigatorName;
continue vocloop;
}
if (comment.startsWith("objectspace:")) {
this.objectspace = comment.substring(12).trim();
if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#";
continue vocloop;
}
}
String[] pl = parseLine(line);
if (pl == null) {
continue vocloop;
}
if (pl[1] == null) {
term = normalizeKey(pl[0]);
v = normalizeWord(pl[0]);
this.synonym2term.put(v, term);
this.term2synonym.put(term, v);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
continue vocloop;
}
term = normalizeKey(pl[0]);
v = pl[1];
tags = v.split(",");
Set<String> synonyms = new HashSet<String>();
synonyms.add(term);
tagloop: for (String synonym: tags) {
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
synonym = normalizeWord(synonym);
if (synonym.length() == 0) continue tagloop;
synonyms.add(synonym);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
}
String synonym = normalizeWord(term);
this.synonym2term.put(synonym, term);
this.term2synonym.put(term, synonym);
if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]);
synonyms.add(synonym);
for (String s: synonyms) {
this.synonym2synonyms.put(s, synonyms);
}
}
} catch (InterruptedException e) {
}
}
public Tagging(String name, Locations localization) {
this(name);
Set<String> locNames = localization.locationNames();
for (String loc: locNames) {
this.synonym2term.put(loc.toLowerCase(), loc);
this.term2synonym.put(loc, loc.toLowerCase());
}
}
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
for (StringBuilder word: words) {
s = word.toString();
this.synonym2term.put(s.toLowerCase(), s);
this.term2synonym.put(s, s.toLowerCase());
}
}
/**
* get the predicate name which already contains the prefix url stub
* @return
@ -493,14 +501,14 @@ public class Tagging {
return this.propFile;
}
public Metatag getMetatagFromSynonym(char prefix, final String word) {
public Metatag getMetatagFromSynonym(final String word) {
String printname = this.synonym2term.get(word);
if (printname == null) return null;
return new Metatag(prefix, printname);
return new Metatag(printname);
}
public Metatag getMetatagFromTerm(char prefix, final String word) {
return new Metatag(prefix, word);
public Metatag getMetatagFromTerm(final String word) {
return new Metatag(word);
}
public Set<String> getSynonyms(String term) {
@ -532,20 +540,23 @@ public class Tagging {
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
public static final String normalizeWord(String word) {
word = word.trim().toLowerCase();
word = PATTERN_AE.matcher(word).replaceAll("ae");
word = PATTERN_OE.matcher(word).replaceAll("oe");
word = PATTERN_UE.matcher(word).replaceAll("ue");
word = PATTERN_SZ.matcher(word).replaceAll("ss");
return word;
public static final String normalizeTerm(String term) {
term = term.trim().toLowerCase();
term = PATTERN_AE.matcher(term).replaceAll("ae");
term = PATTERN_OE.matcher(term).replaceAll("oe");
term = PATTERN_UE.matcher(term).replaceAll("ue");
term = PATTERN_SZ.matcher(term).replaceAll("ss");
// remove comma
int p;
while ((p = term.indexOf(',')) >= 0) {
term = term.substring(p + 1).trim() + " " + term.substring(0, p);
}
return term;
}
public class Metatag {
private final String object;
private final char prefix;
public Metatag(char prefix, String object) {
this.prefix = prefix;
public Metatag(String object) {
this.object = object;
}
@ -563,7 +574,7 @@ public class Tagging {
@Override
public String toString() {
return this.prefix + Tagging.this.navigatorName + ":" + encodePrintname(this.object);
return Tagging.this.navigatorName + ":" + encodePrintname(this.object);
}
@Override
@ -589,12 +600,12 @@ public class Tagging {
return PATTERN_UL.matcher(maskname).replaceAll(" ");
}
public static String cleanTagFromAutotagging(char prefix, final String tagString) {
public static String cleanTagFromAutotagging(final String tagString) {
if (tagString == null || tagString.length() == 0) return "";
String[] tags = PATTERN_SP.split(tagString);
StringBuilder sb = new StringBuilder(tagString.length());
for (String tag : tags) {
if (tag.length() > 0 && tag.charAt(0) != prefix) {
if (tag.length() > 0) {
sb.append(tag).append(' ');
}
}

@ -885,6 +885,7 @@ public class Domains {
"127.0.0.1".equals(host) ||
"localhost".equals(host) ||
host.startsWith("0:0:0:0:0:0:0:1") ||
host.startsWith("fe80:0:0:0:0:0:0:1") || // used by my mac as localhost
host.startsWith("::1/") ||
"::1".equals(host)
);

@ -116,6 +116,7 @@ public class RequestHeader extends HeaderFramework {
path = path.toLowerCase();
if (path.endsWith(".json")) return FileType.JSON;
if (path.endsWith(".xml")) return FileType.XML;
if (path.endsWith(".rdf")) return FileType.XML;
if (path.endsWith(".rss")) return FileType.XML;
return FileType.HTML;
}

@ -20,19 +20,15 @@
package net.yacy.document;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocalization.Locations;
import net.yacy.document.geolocation.Locations;
import net.yacy.kelondro.logging.Log;
/**
@ -44,7 +40,6 @@ public class Autotagging {
private final static Object PRESENT = new Object();
public final char prefixChar;
private final File autotaggingPath;
private final Map<String, Tagging> vocabularies; // mapping from vocabulary name to the tagging vocabulary
private final Map<String, Object> allTags;
@ -58,10 +53,9 @@ public class Autotagging {
* properties without values are allowed (the value is then set to the key)
* also the value can be used as a tag
*/
public Autotagging(final File autotaggingPath, char prefixChar) {
public Autotagging(final File autotaggingPath) {
this.vocabularies = new ConcurrentHashMap<String, Tagging>();
this.autotaggingPath = autotaggingPath;
this.prefixChar = prefixChar;
this.allTags = new ConcurrentHashMap<String, Object>();
if (this.autotaggingPath == null || !this.autotaggingPath.exists()) {
return;
@ -114,16 +108,6 @@ public class Autotagging {
}
}
public void addDictionaries(Map<String, Dictionary> dictionaries) {
for (Map.Entry<String, Dictionary> entry: dictionaries.entrySet()) {
Tagging voc = new Tagging(entry.getKey(), entry.getValue());
this.vocabularies.put(entry.getKey(), voc);
for (String t: voc.tags()) {
this.allTags.put(t, PRESENT);
}
}
}
public void addPlaces(Locations locations) {
if (locations.size() == 0) return; // otherwise we get a navigation that does nothing
Tagging voc = new Tagging("Locations", locations);
@ -137,28 +121,10 @@ public class Autotagging {
}
}
/**
* produce a set of tags for a given text.
* The set contains the names of the tags with a prefix character at the front
* @param text
* @return
*/
public Set<String> getPrintTagsFromText(String text) {
Set<String> as = new HashSet<String>();
if (this.vocabularies.isEmpty()) return as;
final WordTokenizer tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), LibraryProvider.dymLib);
String tag;
while (tokens.hasMoreElements()) {
tag = getTagFromTerm(tokens.nextElement().toString()).toString();
if (tag != null) as.add(tag);
}
return as;
}
public int size() {
return this.vocabularies.size();
}
/**
* maximum number of compound tags (number of words in one tag)
* @return
@ -171,41 +137,17 @@ public class Autotagging {
public Tagging.Metatag getTagFromTerm(String term) {
if (this.vocabularies.isEmpty()) return null;
Tagging.Metatag tag;
term = Tagging.normalizeWord(term);
term = Tagging.normalizeTerm(term);
for (Map.Entry<String, Tagging> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatagFromSynonym(this.prefixChar, term);
tag = v.getValue().getMetatagFromSynonym(term);
if (tag != null) return tag;
}
return null;
}
public static boolean metatagAppearIn(final Tagging.Metatag metatag, final String[] tags) {
String tag = metatag.toString();
for (String s: tags) {
if (tag.equals(s)) return true;
}
return false;
}
public Tagging.Metatag metatag(String metatag) {
int p = metatag.indexOf(':');
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
String vocName = metatag.substring(1, p);
public Tagging.Metatag metatag(String vocName, String term) {
Tagging tagging = this.vocabularies.get(vocName);
return tagging.getMetatagFromTerm(this.prefixChar, Tagging.decodeMaskname(metatag.substring(p + 1)));
}
public String cleanTagFromAutotagging(String tagString) {
return Tagging.cleanTagFromAutotagging(this.prefixChar, tagString);
}
public static void main(String[] args) {
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
for (Map.Entry<String, Tagging> entry: a.vocabularies.entrySet()) {
System.out.println(entry);
}
Set<String> tags = a.getPrintTagsFromText("In die Tueren und Fluchttueren muessen noch Schloesser eingebaut werden");
System.out.println(tags);
return tagging.getMetatagFromTerm(Tagging.decodeMaskname(term));
}
}

@ -227,10 +227,6 @@ dc_rights
String objectspace = vocabulary.getObjectspace();
StringBuilder sb = new StringBuilder(e.getValue().size() * 20);
for (Tagging.Metatag s: e.getValue()) {
String t = s.toString();
if (!this.keywords.contains(t)) {
this.keywords.add(t);
}
sb.append(',').append(s.getObject());
String objectlink = vocabulary.getObjectlink(s.getObject());
if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) {

@ -47,9 +47,9 @@ import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.Tagging.SOTuple;
import net.yacy.cora.storage.Files;
import net.yacy.document.geolocalization.GeonamesLocation;
import net.yacy.document.geolocalization.OpenGeoDBLocation;
import net.yacy.document.geolocalization.OverarchingLocation;
import net.yacy.document.geolocation.GeonamesLocation;
import net.yacy.document.geolocation.OpenGeoDBLocation;
import net.yacy.document.geolocation.OverarchingLocation;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@ -57,7 +57,6 @@ import com.hp.hpl.jena.rdf.model.Resource;
public class LibraryProvider {
public static final char tagPrefix = '$';
public static final String path_to_source_dictionaries = "source";
public static final String path_to_did_you_mean_dictionaries = "didyoumean";
public static final String path_to_autotagging_dictionaries = "autotagging";
@ -76,6 +75,8 @@ public class LibraryProvider {
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ),
GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ),
GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ),
GEON1( "geon1", "http://download.geonames.org/export/dump/cities5000.zip" ),
GEON2( "geon2", "http://download.geonames.org/export/dump/cities15000.zip" ),
DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ),
PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" );
@ -116,11 +117,13 @@ public class LibraryProvider {
dictRoot = rootPath;
// initialize libraries
initAutotagging(tagPrefix);
initAutotagging();
activateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
integrateGeonames();
integrateGeonames0(-1);
integrateGeonames1(-1);
integrateGeonames2(100000);
activatePND();
Set<String> allTags = new HashSet<String>() ;
allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity
@ -136,19 +139,33 @@ public class LibraryProvider {
if ( geo0.exists() ) {
geo0.renameTo(Dictionary.GEODB0.fileDisabled());
}
geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false));
geoLoc.activateLocation(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, dymLib));
return;
}
if ( geo0.exists() ) {
geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false));
geoLoc.activateLocation(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, dymLib));
return;
}
}
public static void integrateGeonames() {
public static void integrateGeonames0(long minPopulation) {
final File geon = Dictionary.GEON0.file();
if ( geon.exists() ) {
geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon));
geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
public static void integrateGeonames1(long minPopulation) {
final File geon = Dictionary.GEON1.file();
if ( geon.exists() ) {
geoLoc.activateLocation(Dictionary.GEON1.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
public static void integrateGeonames2(long minPopulation) {
final File geon = Dictionary.GEON2.file();
if ( geon.exists() ) {
geoLoc.activateLocation(Dictionary.GEON2.nickname, new GeonamesLocation(geon, dymLib, minPopulation));
return;
}
}
@ -161,12 +178,12 @@ public class LibraryProvider {
dymLib = new WordCache(dymDict);
}
public static void initAutotagging(char prefix) {
public static void initAutotagging() {
final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries);
if ( !autotaggingPath.exists() ) {
autotaggingPath.mkdirs();
}
autotagging = new Autotagging(autotaggingPath, prefix);
autotagging = new Autotagging(autotaggingPath);
}
public static void activateDeReWo() {
@ -220,7 +237,7 @@ public class LibraryProvider {
Resource resource = i.next();
String subject = resource.toString();
// prepare a propert term from the subject uri
// prepare a proper term from the subject uri
int p = subject.lastIndexOf('/');
if (p < 0) continue;
String term = subject.substring(p + 1);
@ -229,11 +246,12 @@ public class LibraryProvider {
if (p >= 0) term = term.substring(0, p);
term = term.replaceAll("_", " ").trim();
if (term.length() == 0) continue;
if (term.indexOf(' ') < 0) continue; // accept only names that have at least two parts
// store the term into the vocabulary map
map.put(term, new SOTuple("", subject));
map.put(term, new SOTuple(Tagging.normalizeTerm(term), subject));
}
try {
if (map.size() > 0) try {
Log.logInfo("LibraryProvider", "adding vocabulary to autotagging");
Tagging pndVoc = new Tagging("Persons", null, objectspace, map);
autotagging.addVocabulary(pndVoc);
@ -296,13 +314,6 @@ public class LibraryProvider {
InputStream derewoTxtEntry;
try {
final ZipFile zip = new ZipFile(file);
/*
final Enumeration<? extends ZipEntry> i = zip.entries();
while (i.hasMoreElements()) {
final ZipEntry e = i.nextElement();
System.out.println("loadDeReWo: " + e.getName());
}
*/
derewoTxtEntry = zip.getInputStream(zip.getEntry("derewo-v-100000t-2009-04-30-0.1"));
} catch ( final ZipException e ) {
Log.logException(e);

@ -20,7 +20,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
import java.util.Comparator;

@ -20,7 +20,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
/**
* Geolocation storage may vary using different data structures for the points.

@ -20,7 +20,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
import java.io.BufferedReader;
import java.io.File;
@ -40,6 +40,7 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
public class GeonamesLocation implements Locations
@ -72,8 +73,7 @@ public class GeonamesLocation implements Locations
private final Map<Integer, GeoLocation> id2loc;
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file;
public GeonamesLocation(final File file) {
public GeonamesLocation(final File file, WordCache dymLib, long minPopulation) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
@ -87,7 +87,9 @@ public class GeonamesLocation implements Locations
BufferedReader reader;
try {
final ZipFile zf = new ZipFile(file);
final ZipEntry ze = zf.getEntry("cities1000.txt");
String entryName = file.getName();
entryName = entryName.substring(0, entryName.length() - 3) + "txt";
final ZipEntry ze = zf.getEntry(entryName);
final InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch ( final IOException e ) {
@ -96,6 +98,28 @@ public class GeonamesLocation implements Locations
}
// when an error occurs after this line, just accept it and work on
/* parse this fields:
---------------------------------------------------
00 geonameid : integer id of record in geonames database
01 name : name of geographical point (utf8) varchar(200)
02 asciiname : name of geographical point in plain ascii characters, varchar(200)
03 alternatenames : alternatenames, comma separated varchar(5000)
04 latitude : latitude in decimal degrees (wgs84)
05 longitude : longitude in decimal degrees (wgs84)
06 feature class : see http://www.geonames.org/export/codes.html, char(1)
07 feature code : see http://www.geonames.org/export/codes.html, varchar(10)
08 country code : ISO-3166 2-letter country code, 2 characters
09 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
12 admin3 code : code for third level administrative division, varchar(20)
13 admin4 code : code for fourth level administrative division, varchar(20)
14 population : bigint (8 byte int)
15 elevation : in meters, integer
16 dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
17 timezone : the timezone id (see file timeZone.txt) varchar(40)
18 modification date : date of last modification in yyyy-MM-dd format
*/
try {
String line;
String[] fields;
@ -105,7 +129,9 @@ public class GeonamesLocation implements Locations
continue;
}
fields = line.split("\t");
final int id = Integer.parseInt(fields[0]);
final long population = Long.parseLong(fields[14]);
if (minPopulation > 0 && population < minPopulation) continue;
final int geonameid = Integer.parseInt(fields[0]);
locnames = new HashSet<StringBuilder>();
locnames.add(new StringBuilder(fields[1]));
locnames.add(new StringBuilder(fields[2]));
@ -115,13 +141,15 @@ public class GeonamesLocation implements Locations
final GeoLocation c =
new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]);
c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c);
this.id2loc.put(geonameid, c);
for ( final StringBuilder name : locnames ) {
if (dymLib != null && dymLib.contains(name)) continue;
if (name.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue;
List<Integer> locs = this.name2ids.get(name);
if ( locs == null ) {
locs = new ArrayList<Integer>(1);
}
locs.add(id);
locs.add(geonameid);
this.name2ids.put(name, locs);
}
}

@ -1,4 +1,4 @@
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
/**
* GeoPoint implementation with Integer accuracy

@ -21,7 +21,7 @@
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
import java.util.Set;
import java.util.TreeSet;

@ -20,7 +20,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
import java.io.BufferedReader;
import java.io.File;
@ -40,6 +40,7 @@ import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import net.yacy.document.StringBuilderComparator;
import net.yacy.document.WordCache;
import net.yacy.kelondro.logging.Log;
/**
@ -59,7 +60,7 @@ public class OpenGeoDBLocation implements Locations
private final Map<String, Integer> zip2id;
private final File file;
public OpenGeoDBLocation(final File file, final boolean lonlat) {
public OpenGeoDBLocation(final File file, WordCache dymLib) {
this.file = file;
this.id2loc = new HashMap<Integer, GeoLocation>();
@ -99,21 +100,22 @@ public class OpenGeoDBLocation implements Locations
line = line.substring(18 + 7);
v = line.split(",");
v = line.split(",");
if ( lonlat ) {
lon = Float.parseFloat(v[2]);
lat = Float.parseFloat(v[3]);
} else {
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
}
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
}
if ( line.startsWith("geodb_textdata ") ) {
line = line.substring(15 + 7);
v = line.split(",");
if ( v[1].equals("500100000") ) { // Ortsname
if (v.length > 10) {
// a ',' is probably inside the location name
v[2] = v[2] + "," + v[3];
}
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
if (h.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue;
if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue;
List<Integer> l = this.name2ids.get(new StringBuilder(h));
if ( l == null ) {
l = new ArrayList<Integer>(1);
@ -136,8 +138,8 @@ public class OpenGeoDBLocation implements Locations
} else if ( v[1].equals("400300000") ) { // Ortstyp
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
final Integer hc = h.hashCode();
/*
final Integer hc = h.hashCode();
final byte[] tb = this.locTypeHash2locType.get(hc);
if ( tb == null ) {
this.locTypeHash2locType.put(hc, UTF8.getBytes(h));
@ -173,13 +175,12 @@ public class OpenGeoDBLocation implements Locations
}
private static final String removeQuotes(String s) {
if ( s.length() > 0 && s.charAt(0) != '\'' ) {
return s;
if ( s.length() > 0 && s.charAt(0) == '\'' ) {
s = s.substring(1);
}
if ( s.charAt(s.length() - 1) != '\'' ) {
return s;
if ( s.charAt(s.length() - 1) == '\'' ) {
s = s.substring(0, s.length() - 1);
}
s = s.substring(1, s.length() - 1);
return s;
}

@ -20,7 +20,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
package net.yacy.document.geolocation;
import java.util.HashMap;
import java.util.HashSet;
@ -30,6 +30,7 @@ import java.util.TreeSet;
public class OverarchingLocation implements Locations {
public static int MINIMUM_NAME_LENGTH = 4;
private final Map<String, Locations> services;
/**
@ -44,7 +45,7 @@ public class OverarchingLocation implements Locations {
* @param nickname the nickname of the service
* @param service the service
*/
public void activateLocalization(final String nickname, final Locations service) {
public void activateLocation(final String nickname, final Locations service) {
this.services.put(nickname, service);
}

@ -27,6 +27,7 @@ import java.io.IOException;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.retrieval.Response;
@ -47,7 +48,21 @@ public class OAIPMHLoader {
this.source = source;
// load the file from the net
final Response response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true);
Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true, false));
Response response = null;
IOException ee = null;
for (int i = 0; i < 5; i++) {
// make some retries if first attempt fails
try {
response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true);
break;
} catch (IOException e) {
Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true, false));
ee = e;
continue;
}
}
if (response == null) throw ee;
final byte[] b = response.getContent();
this.resumptionToken = new ResumptionToken(source, b);
//System.out.println("*** ResumptionToken = " + this.resumptionToken.toString());
@ -57,19 +72,6 @@ public class OAIPMHLoader {
// transaction-safe writing
FileUtils.copy(b, f0);
f0.renameTo(f1);
/*
SurrogateReader sr = new SurrogateReader(new ByteArrayInputStream(b), 100);
Thread srt = new Thread(sr);
srt.start();
DCEntry dce;
while ((dce = sr.take()) != DCEntry.poison) {
System.out.println(dce.toString());
}
try {
srt.join();
} catch (InterruptedException e) {}
*/
}
public ResumptionToken getResumptionToken() {
@ -244,4 +246,4 @@ http://nbn-resolving.de/urn:nbn:de:bsz:960-opus-1860
</ListRecords>
</OAI-PMH>
*/
*/

@ -99,7 +99,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
strong(TagType.pair),
i(TagType.pair),
li(TagType.pair),
script(TagType.pair);
script(TagType.pair),
style(TagType.pair);
public TagType type;
private Tag(final TagType type) {
@ -201,6 +202,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
@Override
public void scrapeText(final char[] newtext, final String insideTag) {
// System.out.println("SCRAPE: " + UTF8.String(newtext));
if (insideTag != null && ("script".equals(insideTag) || "style".equals(insideTag))) return;
int p, pl, q, s = 0;
// match evaluation pattern
@ -434,7 +436,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
@Override
public void scrapeTag1(final String tagname, final Properties tagopts, final char[] text) {
public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) {
// System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text));
if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
final String href = tagopts.getProperty("href", EMPTY_STRING);

@ -202,7 +202,7 @@ public final class TransformerWriter extends Writer {
if (tag == null) {
// case (1): this is not a tag opener/closer
if (this.scraper != null) this.scraper.scrapeText(content, null);
if (this.scraper != null && content.length > 0) this.scraper.scrapeText(content, null);
if (this.transformer != null) return this.transformer.transformText(content);
return content;
}
@ -222,7 +222,9 @@ public final class TransformerWriter extends Writer {
// we are collection tag text for the tag 'filterTag' -> case (4) - (7)
if (tag == null || tag.equals("!")) {
// case (4): getting no tag, go on collecting content
if (this.scraper != null) this.scraper.scrapeText(content, this.filterTag);
if (this.scraper != null) {
this.scraper.scrapeText(content, this.filterTag);
}
if (this.transformer != null) {
this.filterCont.append(this.transformer.transformText(content));
} else {
@ -330,7 +332,7 @@ public final class TransformerWriter extends Writer {
if (in[1] == '/') {
// a closing tag
tagend = tagEnd(in, 2);
tag = new String(in, 2, tagend - 2);
tag = new String(in, 2, tagend - 2).toLowerCase();
final char[] text = new char[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, false, text, quotechar);
@ -338,7 +340,7 @@ public final class TransformerWriter extends Writer {
// an opening tag
tagend = tagEnd(in, 1);
tag = new String(in, 1, tagend - 1);
tag = new String(in, 1, tagend - 1).toLowerCase();
final char[] text = new char[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, true, text, quotechar);

@ -23,7 +23,6 @@ import net.yacy.search.Switchboard;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.visitors.NodeVisitor;
@ -58,7 +57,7 @@ public class AugmentHtmlStream {
// Link
Log.logInfo("AUGMENTATION", tag.getAttribute("href"));
LinkTag lt = (LinkTag)tag;
//LinkTag lt = (LinkTag)tag;
}
@ -90,7 +89,7 @@ public class AugmentHtmlStream {
private int counter;
public VisitorText() {
this.setCounter(0);
this.counter = 0;
}
@Override
@ -211,9 +210,12 @@ public class AugmentHtmlStream {
}
public static StringBuffer process (StringBuffer data, Charset charset, DigestURI url, RequestHeader requestHeader) {
String action = requestHeader.get("YACYACTION");
requestHeader.remove("YACYACTION");
globalrequestHeader = requestHeader;
Switchboard sb = Switchboard.getSwitchboard();
boolean augmented = false;
@ -377,8 +379,8 @@ public class AugmentHtmlStream {
.nextNode());
NodeList bodychildren = bt.getChildren();
bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction_elements/OverlayInteraction.html?urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader)));
bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction_elements/OverlayInteraction.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader)));
bt.setChildren(bodychildren);

@ -9,7 +9,6 @@ import java.util.Map;
import java.util.Set;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
@ -22,10 +21,6 @@ import net.yacy.search.Switchboard;
import org.apache.http.entity.mime.content.ContentBody;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import de.anomic.data.UserDB;
@ -36,7 +31,7 @@ public class Interaction {
UserDB.Entry entry = null;
String result = "anonymous";
//String result = "anonymous";
entry = Switchboard.getSwitchboard().userDB.proxyAuth((requestHeader.get(RequestHeader.AUTHORIZATION, "xxxxxx")));
if(entry != null){
@ -126,8 +121,8 @@ public class Interaction {
}
public static String GetTableentry(String url, String type, String username, String peer) {

@ -37,7 +37,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.document.LibraryProvider;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Row;
@ -230,7 +230,7 @@ public class URIMetadataRow implements URIMetadata {
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
tags = LibraryProvider.autotagging.cleanTagFromAutotagging(tags);
tags = Tagging.cleanTagFromAutotagging(tags);
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
@ -313,7 +313,7 @@ public class URIMetadataRow implements URIMetadata {
assert (s.toString().indexOf(0) < 0);
s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
assert (s.toString().indexOf(0) < 0);
s.append(",tags=").append(crypt.simpleEncode(LibraryProvider.autotagging.cleanTagFromAutotagging(metadata.dc_subject())));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(metadata.dc_subject())));
assert (s.toString().indexOf(0) < 0);
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
assert (s.toString().indexOf(0) < 0);

@ -1048,9 +1048,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
try {
final URL url = new URL(seedURL);
final String host = url.getHost();
if ( host.equals("localhost")
|| host.startsWith("127.")
|| (host.startsWith("0:0:0:0:0:0:0:1")) ) {
if (Domains.isLocalhost(host)) {
return "seedURL in localhost rejected";
}
} catch ( final MalformedURLException e ) {

@ -190,7 +190,7 @@ public final class LoaderDispatcher {
final String host = url.getHost();
// check if url is in blacklist
if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) {
if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}

@ -632,7 +632,7 @@ public final class Switchboard extends serverSwitch
+ " entries"
+ ", "
+ ppRamString(userDbFile.length() / 1024));
// init user triplestores
JenaTripleStore.initPrivateStores();
@ -664,7 +664,7 @@ public final class Switchboard extends serverSwitch
}
}
}.start();
// define a realtime parsable mimetype list
this.log.logConfig("Parser: Initializing Mime Type deny list");
TextParser.setDenyMime(getConfig(SwitchboardConstants.PARSER_MIME_DENY, ""));
@ -2222,6 +2222,11 @@ public final class Switchboard extends serverSwitch
this.tables.cleanFailURLS(getConfigLong("cleanup.failedSearchURLtimeout", -1));
}
// periodically store the triple store
if (getConfigBool("triplestore.persistent", false)) {
JenaTripleStore.saveAll();
}
return true;
} catch ( final InterruptedException e ) {
this.log.logInfo("cleanupJob: Shutdown detected");

@ -716,22 +716,6 @@ public final class RWIProcess extends Thread
}
}
// check vocabulary constraint
/*
final String tags = page.dc_subject();
final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
if (this.query.metatags != null && this.query.metatags.size() > 0) {
// all metatags must appear in the tags list
for (Tagging.Metatag metatag: this.query.metatags) {
if (!Autotagging.metatagAppearIn(metatag, taglist)) {
this.sortout++;
//Log.logInfo("RWIProcess", "sorted out " + page.url());
continue takeloop;
}
}
}
*/
// evaluate information of metadata for navigation
// author navigation:
if ( pageauthor != null && pageauthor.length() > 0 ) {
@ -787,24 +771,6 @@ public final class RWIProcess extends Thread
this.filetypeNavigator.inc(fileext);
}
// vocabulary navigation
/*
tagharvest: for (String tag: taglist) {
if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest;
try {
Tagging.Metatag metatag = LibraryProvider.autotagging.metatag(tag);
ScoreMap<String> voc = this.vocabularyNavigator.get(metatag.getVocabularyName());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
this.vocabularyNavigator.put(metatag.getVocabularyName(), voc);
}
voc.inc(metatag.getObject());
} catch (RuntimeException e) {
// tag may not be well-formed
}
}
*/
// accept url
return page;
}

@ -306,19 +306,26 @@ public final class yacy {
HTTPClient.setDefaultUserAgent(ClientIdentification.getUserAgent());
// initial fill of the triplestore
try {
File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath()));
mkdirIfNeseccary(triplestore);
for (String s: triplestore.list()) {
if ((s.endsWith(".rdf") || s.endsWith(".nt")) && !s.equals("local.rdf") && !s.endsWith("_triplestore.rdf") && !s.startsWith("private_store_")) JenaTripleStore.load(new File(triplestore, s).getAbsolutePath());
File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath()));
mkdirIfNeseccary(triplestore);
for (String s: triplestore.list()) {
if ((s.endsWith(".rdf") || s.endsWith(".nt")) && !s.equals("local.rdf") && !s.endsWith("_triplestore.rdf") && !s.startsWith("private_store_")) {
try {
JenaTripleStore.load(new File(triplestore, s).getAbsolutePath());
} catch (IOException e) {
Log.logException(e);
}
}
}
if (sb.getConfigBool("triplestore.persistent", false)) {
File local = new File(triplestore, "local.rdf");
if (local.exists()) {
try {
JenaTripleStore.load(local.getAbsolutePath());
} catch (IOException e) {
Log.logException(e);
}
}
if (sb.getConfigBool("triplestore.persistent", false)) {
File local = new File(triplestore, "local.rdf");
if (local.exists()) JenaTripleStore.load(local.getAbsolutePath());
}
} catch (IOException e) {
Log.logException(e);
}
// start main threads
@ -407,26 +414,11 @@ public final class yacy {
server.terminate(false);
server.interrupt();
server.close();
/*
if (server.isAlive()) try {
// TODO only send request, don't read response (cause server is already down resulting in error)
final DigestURI u = new DigestURI((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null);
Client.wget(u.toString(), null, 10000); // kick server
Log.logConfig("SHUTDOWN", "sent termination signal to server socket");
} catch (final IOException ee) {
Log.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");
}
*/
// Client.closeAllConnections();
// MultiThreadedHttpConnectionManager.shutdownAll();
// idle until the processes are down
if (server.isAlive()) {
//Thread.sleep(2000); // wait a while
server.interrupt();
// MultiThreadedHttpConnectionManager.shutdownAll();
}
// MultiThreadedHttpConnectionManager.shutdownAll();
Log.logConfig("SHUTDOWN", "server has terminated");
sb.close();
} catch (final Exception e) {
@ -440,11 +432,9 @@ public final class yacy {
} finally {
}
// save the triple store
if (sb.getConfigBool("triplestore.persistent", false)) {
File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath()));
JenaTripleStore.saveFile(new File(triplestore, "local.rdf").getAbsolutePath());
JenaTripleStore.savePrivateStores(sb);
JenaTripleStore.saveAll();
}
Log.logConfig("SHUTDOWN", "goodbye. (this is the last line)");

Loading…
Cancel
Save