changed behavior of navigation processing:

- vocabulary annotation is not done any more into the metadata of urldb
- vocabularies are written into the jena triplestore using a rdf
vocabulary
- vocabularies for rdf tripel must be updated; refactoring done
- with the new navigation tags in the triplestore a faster
pre-urldb-lookup is possible: navigation is processed now within the RWI
during pre-ranking retrieval
- added also a Owl vocabulary stub to add the plain-text url to the
triplestore using the owl:sameas predicate
pull/1/head
Michael Peter Christen 13 years ago
parent 5fc6524ca8
commit 8b53771db2

@ -41,7 +41,7 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -236,7 +236,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
new HashSet<SimpleVocabulary.Metatag>(),
new HashSet<Tagging.Metatag>(),
"", // no navigation
null, // no snippet computation
count,
@ -299,7 +299,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
new HashSet<SimpleVocabulary.Metatag>(),
new HashSet<Tagging.Metatag>(),
"", // no navigation
null, // no snippet computation
count,

@ -45,7 +45,7 @@ import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -124,9 +124,9 @@ public class yacysearch {
prop.put("focus", ((post == null) ? true : post.get("focus", "1").equals("1")) ? 1 : 0);
// produce vocabulary navigation sidebars
Collection<SimpleVocabulary> vocabularies = LibraryProvider.autotagging.getVocabularies();
Collection<Tagging> vocabularies = LibraryProvider.autotagging.getVocabularies();
int j = 0;
for (SimpleVocabulary v: vocabularies) {
for (Tagging v: vocabularies) {
prop.put("sidebarVocabulary_" + j + "_vocabulary", v.getName());
j++;
}
@ -463,7 +463,7 @@ public class yacysearch {
}
int voc = 0;
Collection<SimpleVocabulary.Metatag> metatags = new ArrayList<SimpleVocabulary.Metatag>(1);
Collection<Tagging.Metatag> metatags = new ArrayList<Tagging.Metatag>(1);
while ((voc = querystring.indexOf("/vocabulary/", 0)) >= 0) {
String vocabulary = "";
int ve = querystring.indexOf(' ', voc + 12);

@ -28,7 +28,7 @@ import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.document.LibraryProvider;
@ -307,7 +307,7 @@ public class yacysearchtrailer {
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = ve.getValue().get(name);
nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(SimpleVocabulary.encodePrintname(name)).toString();
nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURI.escape(Tagging.encodePrintname(name)).toString();
queryStringForUrl = theQuery.queryStringForUrl();
p = queryStringForUrl.indexOf(nav);
if (p < 0) {

@ -18,6 +18,7 @@ import net.yacy.search.Switchboard;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.util.FileManager;
@ -124,31 +125,31 @@ public class JenaTripleStore {
Resource r = getResource(subject);
Property pr = getProperty(predicate);
r.addProperty(pr, object);
Log.logInfo("TRIPLElSTORE", "ADD " + subject + " - " + predicate + " - " + object);
Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object);
}
public static Iterator<String> getObjects(final String subject, final String predicate) {
Log.logInfo ("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ");
public static Iterator<RDFNode> getObjects(final String subject, final String predicate) {
Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ");
final Resource r = JenaTripleStore.getResource(subject);
return getObjects(r, predicate);
}
public static Iterator<RDFNode> getObjects(final Resource r, final String predicate) {
final Property pr = JenaTripleStore.getProperty(predicate);
final StmtIterator iter = JenaTripleStore.model.listStatements(r, pr, (Resource) null);
return new Iterator<String>() {
return new Iterator<RDFNode>() {
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public String next() {
return iter.nextStatement().getObject().toString();
public RDFNode next() {
return iter.nextStatement().getObject();
}
@Override
public void remove() {
iter.remove();
}
};
}

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -36,25 +36,25 @@ import java.util.Set;
* http://stackoverflow.com/questions/1414755/java-extend-enum
*/
public interface Vocabulary {
/**
* get the RDF identifier
* get the RDF identifier as an URL stub
* @return
*/
public String getIdentifier();
public String getURLStub();
/**
* get the prefix for the predicates of this vocabulary
* @return
*/
public String getPrefix();
public String getShortName();
/**
* get the predicate name which already contains the prefix and the ':'
* get the predicate name which already contains the prefix url stub
* @return
*/
public String getPredicate();
/**
* get a set of literals that are allowed for the predicate as values
* @return
@ -65,7 +65,7 @@ public interface Vocabulary {
* the name method is identical to the java.lang.Enum method.
* If an Enum class for vocabularies
* implements this interface, the name() method is automatically implemented
*
*
* @return Returns the name of the enum constant as declared in the enum declaration.
*/
public String name();

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -38,7 +38,7 @@ import net.yacy.cora.lod.Vocabulary;
* http://creativecommons.org/ns#
*/
public enum CreativeCommons implements Vocabulary {
// License Properties
permits(new Literal[]{
PermitLiteral.Reproduction,
@ -58,7 +58,7 @@ public enum CreativeCommons implements Vocabulary {
jurisdiction,
legalcode,
deprecatedOn,
// Work Properties
license,
morePermissions,
@ -67,16 +67,16 @@ public enum CreativeCommons implements Vocabulary {
useGuidelines;
enum PermitLiteral implements Literal {
Reproduction("Reproduction", "http://creativecommons.org/ns#Permission", ".*"),
Distribution("Distribution", "http://creativecommons.org/ns#Permission", ".*"),
DerivativeWorks("Derivative Works", "http://creativecommons.org/ns#Permission", ".*"),
Sharing("Sharing", "http://creativecommons.org/ns#Permission", ".*");
String terminal;
MultiProtocolURI subject;
Pattern discoveryPattern;
private PermitLiteral(
String terminal,
String subject,
@ -89,7 +89,7 @@ public enum CreativeCommons implements Vocabulary {
}
this.discoveryPattern = Pattern.compile(discoveryPattern == null ? ".*" : discoveryPattern);
}
@Override
public String getTerminal() { return this.terminal; }
@ -99,20 +99,20 @@ public enum CreativeCommons implements Vocabulary {
@Override
public Pattern getDiscoveryPattern() { return this.discoveryPattern; }
}
enum RequirementLiteral implements Literal {
Notice("Notice", "http://creativecommons.org/ns#Requirement", ".*"),
Attribution("Attribution", "http://creativecommons.org/ns#Requirement", ".*"),
ShareAlike("Share Alike", "http://creativecommons.org/ns#Requirement", ".*"),
SourceCode("Source Code", "http://creativecommons.org/ns#Requirement", ".*"),
Copyleft("Copyleft", "http://creativecommons.org/ns#Requirement", ".*"),
LesserCopyleft("Lesser Copyleft", "http://creativecommons.org/ns#Requirement", ".*");
String terminal;
MultiProtocolURI subject;
Pattern discoveryPattern;
private RequirementLiteral(
String terminal,
String subject,
@ -125,7 +125,7 @@ public enum CreativeCommons implements Vocabulary {
}
this.discoveryPattern = Pattern.compile(discoveryPattern == null ? ".*" : discoveryPattern);
}
@Override
public String getTerminal() { return this.terminal; }
@ -137,14 +137,14 @@ public enum CreativeCommons implements Vocabulary {
}
enum ProhibitionLiteral implements Literal {
CommercialUse("Commercial Use", "http://creativecommons.org/ns#Prohibition", ".*"),
HighIncomeNationUse("High Income Nation Use", "http://creativecommons.org/ns#Prohibition", ".*");
String terminal;
MultiProtocolURI subject;
Pattern discoveryPattern;
private ProhibitionLiteral(
String terminal,
String subject,
@ -157,7 +157,7 @@ public enum CreativeCommons implements Vocabulary {
}
this.discoveryPattern = Pattern.compile(discoveryPattern == null ? ".*" : discoveryPattern);
}
@Override
public String getTerminal() { return this.terminal; }
@ -167,34 +167,34 @@ public enum CreativeCommons implements Vocabulary {
@Override
public Pattern getDiscoveryPattern() { return this.discoveryPattern; }
}
public final static String IDENTIFIER = "http://creativecommons.org/ns#";
public final static String PREFIX = "cc";
private final String predicate;
private final Set<Literal> literals;
private CreativeCommons() {
this.predicate = PREFIX + ":" + this.name();
this.predicate = IDENTIFIER + this.name();
this.literals = null;
}
private CreativeCommons(Literal[] literals) {
this.predicate = PREFIX + ":" + this.name();
this.predicate = IDENTIFIER + this.name();
this.literals = new HashSet<Literal>();
for (Literal l: literals) this.literals.add(l);
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -47,26 +47,26 @@ public enum DublinCore implements Vocabulary {
Subject,
Title,
Type;
public final static String IDENTIFIER = "http://dublincore.org/documents/2010/10/11/dces/";
public final static String PREFIX = "dc";
private final String predicate;
private DublinCore() {
this.predicate = PREFIX + ":" + this.name().toLowerCase();
this.predicate = IDENTIFIER + this.name().toLowerCase();
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -37,13 +37,13 @@ public enum Foaf implements Vocabulary {
;
@Override
public String getIdentifier() {
public String getURLStub() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getPrefix() {
public String getShortName() {
// TODO Auto-generated method stub
return null;
}

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -33,26 +33,26 @@ public enum Geo implements Vocabulary {
Long,
Lat;
public final static String IDENTIFIER = "http://www.w3.org/2003/01/geo/wgs84_pos#";
public final static String PREFIX = "geo";
private final String predicate;
private Geo() {
this.predicate = PREFIX + ":" + this.name().toLowerCase();
this.predicate = IDENTIFIER + this.name().toLowerCase();
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -33,7 +33,7 @@ import net.yacy.cora.lod.Vocabulary;
public enum HttpHeader implements Vocabulary {
//The following properties may appear in nodes of type Request:
accept, // representing an Accept header,
acceptCharset, // representing an Accept-Charset header,
acceptEncoding, // representing an Accept-Encoding header,
@ -85,26 +85,26 @@ public enum HttpHeader implements Vocabulary {
upgrade, // representing an Upgrade header,
via, // representing a Via header,
warning; // representing a Warning header.
public final static String IDENTIFIER = "http://www.w3.org/WAI/ER/HTTP/WD-HTTP-in-RDF-20060131";
public final static String PREFIX = "http";
private final String predicate;
private HttpHeader() {
this.predicate = PREFIX + ":" + this.name();
this.predicate = IDENTIFIER + this.name();
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -0,0 +1,64 @@
/**
* Owl
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 11.06.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
public enum Owl implements Vocabulary {
SameAs("sameAs");
public final static String IDENTIFIER = "http://www.w3.org/2002/07/owl#";
public final static String PREFIX = "owl";
private final String predicate;
private Owl() {
this.predicate = IDENTIFIER + this.name().toLowerCase();
}
private Owl(String name) {
this.predicate = IDENTIFIER + name;
}
@Override
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;
}
@Override
public String getPredicate() {
return this.predicate;
}
}

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -30,7 +30,7 @@ import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
public enum Rdf implements Vocabulary {
RDF,
Description,
Bag,
@ -39,23 +39,23 @@ public enum Rdf implements Vocabulary {
public final static String IDENTIFIER = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
public final static String PREFIX = "rdf";
private final String predicate;
private Rdf() {
this.predicate = PREFIX + ":" + this.name();
this.predicate = IDENTIFIER + this.name();
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -18,7 +18,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod;
package net.yacy.cora.lod.vocabulary;
import java.io.File;
import java.io.IOException;
@ -33,10 +33,9 @@ import net.yacy.cora.storage.Files;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocalization.Localization;
public class SimpleVocabulary {
public class Tagging {
public final static String DEFAULT_SUBJECT_PREFIX = "http://yacy.net/url/";
public final static String DEFAULT_PREDICATE_PREFIX = "http://yacy.net/voc/";
private final static String DEFAULT_IDENTIFIER_STUB = "http://yacy.net/tagging#";
private final String navigatorName;
private final Map<String, String> synonym2term;
@ -45,17 +44,17 @@ public class SimpleVocabulary {
private String predicate, predicatePrefix, objectPrefix;
public SimpleVocabulary(String name) {
public Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap<String, String>();
this.term2synonym = new ConcurrentHashMap<String, String>();
this.synonym2synonyms = new ConcurrentHashMap<String, Set<String>>();
this.predicatePrefix = DEFAULT_PREDICATE_PREFIX;
this.predicatePrefix = DEFAULT_IDENTIFIER_STUB;
this.predicate = this.predicatePrefix + name;
this.objectPrefix = "";
}
public SimpleVocabulary(String name, File propFile) throws IOException {
public Tagging(String name, File propFile) throws IOException {
this(name);
BlockingQueue<String> list = Files.concurentLineReader(propFile, 1000);
String term, v;
@ -122,7 +121,7 @@ public class SimpleVocabulary {
}
}
public SimpleVocabulary(String name, Localization localization) {
public Tagging(String name, Localization localization) {
this(name);
Set<String> locNames = localization.locationNames();
for (String loc: locNames) {
@ -131,7 +130,7 @@ public class SimpleVocabulary {
}
}
public SimpleVocabulary(String name, Dictionary dictionary) {
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
@ -142,6 +141,14 @@ public class SimpleVocabulary {
}
}
/**
* get the predicate name which already contains the prefix url stub
* @return
*/
public String getPredicate() {
return this.predicate;
}
private final String normalizeKey(String k) {
k = k.trim();
k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute
@ -152,22 +159,6 @@ public class SimpleVocabulary {
return k;
}
/**
* get the RDF predicate name
* @return
*/
public String getPredicate() {
return this.predicate;
}
/**
* get the prefix of the object name
* @return
*/
public String getObjectPrefix() {
return this.objectPrefix;
}
/**
* get the name of the navigator; this is part of the RDF predicate name (see: getPredicate())
* @return
@ -176,10 +167,14 @@ public class SimpleVocabulary {
return this.navigatorName;
}
public Metatag getMetatag(char prefix, final String word) {
public Metatag getMetatagFromSynonym(char prefix, final String word) {
String printname = this.synonym2term.get(word);
if (printname == null) return null;
return new Metatag(prefix, this.navigatorName, this.predicate, printname);
return new Metatag(prefix, printname);
}
public Metatag getMetatagFromTerm(char prefix, final String word) {
return new Metatag(prefix, word);
}
public Set<String> getSynonyms(String term) {
@ -190,6 +185,17 @@ public class SimpleVocabulary {
return this.synonym2term.keySet();
}
@Override
public boolean equals(Object m) {
Tagging m0 = (Tagging) m;
return this.navigatorName.equals(m0.navigatorName);
}
@Override
public int hashCode() {
return this.navigatorName.hashCode();
}
@Override
public String toString() {
return this.term2synonym.toString();
@ -209,31 +215,20 @@ public class SimpleVocabulary {
return word;
}
public static class Metatag {
private final String vocName, predicate, object;
public class Metatag {
private final String object;
private final char prefix;
public Metatag(char prefix, String vocName, String predicate, String object) {
public Metatag(char prefix, String object) {
this.prefix = prefix;
this.vocName = vocName;
this.predicate = predicate;
this.object = object;
}
public Metatag(char prefix, String metatag) throws RuntimeException {
this.prefix = prefix;
assert metatag.charAt(0) == prefix;
int p = metatag.indexOf(':');
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
this.vocName = metatag.substring(1, p);
this.predicate = DEFAULT_PREDICATE_PREFIX + this.vocName;
this.object = decodeMaskname(metatag.substring(p + 1));
}
public String getVocabularyName() {
return this.vocName;
return Tagging.this.navigatorName;
}
public String getPredicate() {
return this.predicate;
return Tagging.this.predicate;
}
public String getObject() {
@ -242,18 +237,18 @@ public class SimpleVocabulary {
@Override
public String toString() {
return this.prefix + this.vocName + ":" + encodePrintname(this.object);
return this.prefix + Tagging.this.navigatorName + ":" + encodePrintname(this.object);
}
@Override
public boolean equals(Object m) {
Metatag m0 = (Metatag) m;
return this.vocName.equals(m0.vocName) && this.object.equals(m0.object);
return Tagging.this.navigatorName.equals(m0.getVocabularyName()) && this.object.equals(m0.object);
}
@Override
public int hashCode() {
return this.vocName.hashCode() + this.object.hashCode();
return Tagging.this.navigatorName.hashCode() + this.object.hashCode();
}
}
@ -280,4 +275,5 @@ public class SimpleVocabulary {
if (sb.length() == 0) return "";
return sb.substring(0, sb.length() - 1);
}
}

@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
@ -27,6 +27,7 @@ package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
@ -46,13 +47,13 @@ public enum YaCyMetadata implements Vocabulary {
dt, // doctype, taken from extension or any other heuristic
flags, // flags; any stuff (see Word-Entity definition)
lang, // language
llocal, // # of outlinks to same domain; for video and image: width
llocal, // # of outlinks to same domain; for video and image: width
lother, // # of outlinks to outside domain; for video and image: height
limage, // # of embedded image links
laudio, // # of embedded audio links; for audio: track number; for video: number of audio tracks
lvideo, // # of embedded video links
lapp; // # of embedded links to applications
/*
"String hash-12, " + // the url's hash
"Cardinal mod-4 {b256}, " + // last-modified from the httpd
@ -65,7 +66,7 @@ public enum YaCyMetadata implements Vocabulary {
"byte[] dt-1, " + // doctype, taken from extension or any other heuristic
"Bitfield flags-4, " + // flags; any stuff (see Word-Entity definition)
"String lang-2, " + // language
"Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width
"Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width
"Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height
"Cardinal limage-2 {b256}, " + // # of embedded image links
"Cardinal laudio-2 {b256}, " + // # of embedded audio links; for audio: track number; for video: number of audio tracks
@ -73,25 +74,34 @@ public enum YaCyMetadata implements Vocabulary {
"Cardinal lapp-2 {b256}", // # of embedded links to applications
*/
public final static byte[] HASH_PREFIX = ASCII.getBytes("http://yacy.net/hash#");
public final static int HASH_PREFIX_LENGTH = HASH_PREFIX.length;
public final static String IDENTIFIER = "http://yacy.net/vocabularies/yacymetadata#";
public final static String PREFIX = "yacy";
private final String predicate;
public static String hashURI(byte[] hash) {
byte[] b = new byte[HASH_PREFIX_LENGTH + hash.length];
System.arraycopy(HASH_PREFIX, 0, b, 0, HASH_PREFIX_LENGTH);
System.arraycopy(hash, 0, b, HASH_PREFIX_LENGTH, hash.length);
return ASCII.String(b);
}
private YaCyMetadata() {
this.predicate = PREFIX + ":" + this.name();
this.predicate = IDENTIFIER + this.name();
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;

@ -30,7 +30,7 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.document.WordCache.Dictionary;
import net.yacy.document.geolocalization.Localization;
import net.yacy.kelondro.logging.Log;
@ -46,11 +46,11 @@ public class Autotagging {
public final char prefixChar;
private final File autotaggingPath;
private final Map<String, SimpleVocabulary> vocabularies;
private final Map<String, Tagging> vocabularies; // mapping from vocabulary name to the tagging vocabulary
private final Map<String, Object> allTags;
public Autotagging(final File autotaggingPath, char prefixChar) {
this.vocabularies = new ConcurrentHashMap<String, SimpleVocabulary>();
this.vocabularies = new ConcurrentHashMap<String, Tagging>();
this.autotaggingPath = autotaggingPath;
this.prefixChar = prefixChar;
this.allTags = new ConcurrentHashMap<String, Object>();
@ -79,7 +79,7 @@ public class Autotagging {
File ff = new File(this.autotaggingPath, f);
String vocName = ff.getName();
vocName = vocName.substring(0, vocName.length() - 11);
SimpleVocabulary voc = new SimpleVocabulary(vocName, ff);
Tagging voc = new Tagging(vocName, ff);
this.vocabularies.put(vocName, voc);
for (String t: voc.tags()) {
this.allTags.put(t, PRESENT);
@ -91,7 +91,11 @@ public class Autotagging {
}
}
public Collection<SimpleVocabulary> getVocabularies() {
public Tagging getVocabulary(String name) {
return this.vocabularies.get(name);
}
public Collection<Tagging> getVocabularies() {
return this.vocabularies.values();
}
@ -101,7 +105,7 @@ public class Autotagging {
public void addDictionaries(Map<String, Dictionary> dictionaries) {
for (Map.Entry<String, Dictionary> entry: dictionaries.entrySet()) {
SimpleVocabulary voc = new SimpleVocabulary(entry.getKey(), entry.getValue());
Tagging voc = new Tagging(entry.getKey(), entry.getValue());
this.vocabularies.put(entry.getKey(), voc);
for (String t: voc.tags()) {
this.allTags.put(t, PRESENT);
@ -110,7 +114,7 @@ public class Autotagging {
}
public void addLocalization(Localization localization) {
SimpleVocabulary voc = new SimpleVocabulary("Locale", localization);
Tagging voc = new Tagging("Locale", localization);
this.vocabularies.put("Locale", voc);
for (String t: voc.tags()) {
this.allTags.put(t, PRESENT);
@ -135,18 +139,18 @@ public class Autotagging {
return as;
}
public SimpleVocabulary.Metatag getTagFromWord(String word) {
public Tagging.Metatag getTagFromWord(String word) {
if (this.vocabularies.isEmpty()) return null;
SimpleVocabulary.Metatag tag;
word = SimpleVocabulary.normalizeWord(word);
for (Map.Entry<String, SimpleVocabulary> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatag(this.prefixChar, word);
Tagging.Metatag tag;
word = Tagging.normalizeWord(word);
for (Map.Entry<String, Tagging> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatagFromSynonym(this.prefixChar, word);
if (tag != null) return tag;
}
return null;
}
public static boolean metatagAppearIn(final SimpleVocabulary.Metatag metatag, final String[] tags) {
public static boolean metatagAppearIn(final Tagging.Metatag metatag, final String[] tags) {
String tag = metatag.toString();
for (String s: tags) {
if (tag.equals(s)) return true;
@ -154,17 +158,21 @@ public class Autotagging {
return false;
}
public SimpleVocabulary.Metatag metatag(String metatag) {
return new SimpleVocabulary.Metatag(this.prefixChar, metatag);
public Tagging.Metatag metatag(String metatag) {
int p = metatag.indexOf(':');
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
String vocName = metatag.substring(1, p);
Tagging tagging = this.vocabularies.get(vocName);
return tagging.getMetatagFromTerm(this.prefixChar, Tagging.decodeMaskname(metatag.substring(p + 1)));
}
public String cleanTagFromAutotagging(String tagString) {
return SimpleVocabulary.cleanTagFromAutotagging(this.prefixChar, tagString);
return Tagging.cleanTagFromAutotagging(this.prefixChar, tagString);
}
public static void main(String[] args) {
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
for (Map.Entry<String, SimpleVocabulary> entry: a.vocabularies.entrySet()) {
for (Map.Entry<String, Tagging> entry: a.vocabularies.entrySet()) {
System.out.println(entry);
}
Set<String> tags = a.getPrintTagsFromText("In die Tueren und Fluchttueren muessen noch Schloesser eingebaut werden");

@ -40,7 +40,7 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.document.language.Identificator;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.word.Word;
@ -86,7 +86,7 @@ public final class Condenser {
//private Properties analysis;
private final Map<String, Word> words; // a string (the words) to (indexWord) - relation
private final Set<SimpleVocabulary.Metatag> tags = new HashSet<SimpleVocabulary.Metatag>(); // a set of tags, discovered from Autotagging
private final Map<String, Set<Tagging.Metatag>> tags = new HashMap<String, Set<Tagging.Metatag>>(); // a set of tags, discovered from Autotagging
//public int RESULT_NUMB_TEXT_BYTES = -1;
public int RESULT_NUMB_WORDS = -1;
@ -300,7 +300,7 @@ public final class Condenser {
final Set<String> currsentwords = new HashSet<String>();
String word = "";
String k;
SimpleVocabulary.Metatag tag;
Tagging.Metatag tag;
int wordlen;
Word wsp;
final Word wsp1;
@ -324,7 +324,16 @@ public final class Condenser {
// get tags from autotagging
if (doAutotagging) {
tag = LibraryProvider.autotagging.getTagFromWord(word);
if (tag != null) this.tags.add(tag);
if (tag != null) {
Set<Tagging.Metatag> tagset = this.tags.get(tag.getVocabularyName());
if (tagset == null) {
tagset = new HashSet<Tagging.Metatag>();
tagset.add(tag);
this.tags.put(tag.getVocabularyName(), tagset);
} else {
tagset.add(tag);
}
}
}
// distinguish punctuation and words

@ -52,12 +52,13 @@ import java.util.Set;
import java.util.TreeSet;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Owl;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
@ -214,19 +215,24 @@ dc_rights
* These keywords will appear in dc_subject
* @param tags
*/
public void addMetatags(Set<SimpleVocabulary.Metatag> tags) {
public void addMetatags(Map<String, Set<Tagging.Metatag>> tags) {
for (String s: this.keywords) {
tags.remove(s);
}
for (SimpleVocabulary.Metatag s: tags) {
String t = s.toString();
if (!this.keywords.contains(t)) {
this.keywords.add(t);
for (Map.Entry<String, Set<Tagging.Metatag>> e: tags.entrySet()) {
StringBuilder sb = new StringBuilder(e.getValue().size() * 20);
for (Tagging.Metatag s: e.getValue()) {
String t = s.toString();
if (!this.keywords.contains(t)) {
this.keywords.add(t);
}
sb.append(',').append(s.getObject());
}
// put to triplestore
String subject = SimpleVocabulary.DEFAULT_SUBJECT_PREFIX + ASCII.String(this.source.hash());
JenaTripleStore.addTriple(subject, s.getPredicate(), s.getObject());
JenaTripleStore.addTriple(subject, "http://www.w3.org/2002/07/owl#sameAs", this.source.toNormalform(true, false));
String subject = YaCyMetadata.hashURI(this.source.hash());
Tagging vocabulary = LibraryProvider.autotagging.getVocabulary(e.getKey());
JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1));
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true, false));
}
}

@ -41,12 +41,12 @@ public enum MetadataVocabulary implements Vocabulary {
}
@Override
public String getIdentifier() {
public String getURLStub() {
return IDENTIFIER;
}
@Override
public String getPrefix() {
public String getShortName() {
return PREFIX;
}

@ -44,7 +44,7 @@ import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.AbstractScraper;
@ -115,7 +115,7 @@ public final class QueryParams {
public final boolean urlMask_isCatchall, prefer_isMatchnothing;
public final Classification.ContentDomain contentdom;
public final String targetlang;
public final Collection<SimpleVocabulary.Metatag> metatags;
public final Collection<Tagging.Metatag> metatags;
public final String navigators;
public final Searchdom domType;
public final int zonecode;
@ -180,7 +180,7 @@ public final class QueryParams {
this.itemsPerPage = itemsPerPage;
this.offset = 0;
this.targetlang = "en";
this.metatags = new ArrayList<SimpleVocabulary.Metatag>(0);
this.metatags = new ArrayList<Tagging.Metatag>(0);
this.domType = Searchdom.LOCAL;
this.zonecode = DigestURI.TLD_any_zone_filter;
this.domMaxTargets = 0;
@ -215,7 +215,7 @@ public final class QueryParams {
final String modifier,
final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language,
final Collection<SimpleVocabulary.Metatag> metatags,
final Collection<Tagging.Metatag> metatags,
final String navigators,
final CacheStrategy snippetCacheStrategy,
final int itemsPerPage, final int offset, final String urlMask,

@ -42,7 +42,9 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.lod.SimpleVocabulary;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.Scanner;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.sorting.ClusteredScoreMap;
@ -50,7 +52,6 @@ import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Autotagging;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.DigestURI;
@ -71,6 +72,9 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ResultEntry;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
public final class RWIProcess extends Thread
{
@ -109,7 +113,8 @@ public final class RWIProcess extends Thread
private final ScoreMap<String> namespaceNavigator; // a counter for name spaces
private final ScoreMap<String> protocolNavigator; // a counter for protocol types
private final ScoreMap<String> filetypeNavigator; // a counter for file types
private final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies
private final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName()
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
public RWIProcess(final QueryParams query, final ReferenceOrder order, final boolean remote) {
// we collect the urlhashes and construct a list with urlEntry objects
@ -147,6 +152,13 @@ public final class RWIProcess extends Thread
this.expectedRemoteReferences = new AtomicInteger(0);
this.receivedRemoteReferences = new AtomicInteger(0);
this.maxtime = query.maxtime;
// pre-calculate some values for navigation
this.taggingPredicates = new HashMap<String, String>();
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
this.taggingPredicates.put(t.getName(), t.getPredicate());
}
}
public void addExpectedRemoteReferences(int x) {
@ -357,6 +369,35 @@ public final class RWIProcess extends Thread
}
}
// check vocabulary constraint
String subject = YaCyMetadata.hashURI(iEntry.urlhash());
Resource resource = JenaTripleStore.getResource(subject);
if (this.query.metatags != null && this.query.metatags.size() > 0) {
// all metatags must appear in the tags list
for (Tagging.Metatag metatag: this.query.metatags) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, metatag.getPredicate());
if (!ni.hasNext()) continue pollloop;
String tags = ni.next().toString();
if (tags.indexOf(metatag.getObject()) < 0) continue pollloop;
}
}
// add navigators using the triplestore
for (Map.Entry<String, String> v: this.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
String[] tags = ni.next().toString().split(",");
for (String tag: tags) {
ScoreMap<String> voc = this.vocabularyNavigator.get(v.getKey());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
this.vocabularyNavigator.put(v.getKey(), voc);
}
voc.inc(tag);
}
}
}
// finally make a double-check and insert result to stack
// the url hashes should be unique, no reason to check that
//if (!this.urlhashes.has(iEntry.urlhash())) {
@ -678,11 +719,12 @@ public final class RWIProcess extends Thread
}
// check vocabulary constraint
/*
final String tags = page.dc_subject();
final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
if (this.query.metatags != null && this.query.metatags.size() > 0) {
// all metatags must appear in the tags list
for (SimpleVocabulary.Metatag metatag: this.query.metatags) {
for (Tagging.Metatag metatag: this.query.metatags) {
if (!Autotagging.metatagAppearIn(metatag, taglist)) {
this.sortout++;
//Log.logInfo("RWIProcess", "sorted out " + page.url());
@ -690,6 +732,7 @@ public final class RWIProcess extends Thread
}
}
}
*/
// evaluate information of metadata for navigation
// author navigation:
@ -747,10 +790,11 @@ public final class RWIProcess extends Thread
}
// vocabulary navigation
/*
tagharvest: for (String tag: taglist) {
if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest;
try {
SimpleVocabulary.Metatag metatag = LibraryProvider.autotagging.metatag(tag);
Tagging.Metatag metatag = LibraryProvider.autotagging.metatag(tag);
ScoreMap<String> voc = this.vocabularyNavigator.get(metatag.getVocabularyName());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
@ -761,6 +805,7 @@ public final class RWIProcess extends Thread
// tag may not be well-formed
}
}
*/
// accept url
return page;

@ -532,7 +532,7 @@ public class SnippetProcess {
}
}
if (System.currentTimeMillis() >= this.timeout) {
Log.logWarning("SnippetProcess", "worker ended with timoeout");
Log.logWarning("SnippetProcess", "worker ended with timeout");
}
//System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops);
} catch (final Exception e) {

Loading…
Cancel
Save