removed dependency of vocabulary navigation from Jena and it's

triplestore; the vocabulary search is now done using generic solr fields
which are created on-the-fly during runtime.
pull/1/head
Michael Peter Christen 12 years ago
parent 664499bb10
commit 34f8786508

@ -293,7 +293,7 @@ public class yacysearchtrailer {
}
// vocabulary navigators
final Map<String, ScoreMap<String>> vocabularyNavigators = theSearch.rankingProcess.getVocabularyNavigators();
final Map<String, ScoreMap<String>> vocabularyNavigators = theSearch.vocabularyNavigator;
if (vocabularyNavigators != null && !vocabularyNavigators.isEmpty()) {
int navvoccount = 0;
vocnav: for (Map.Entry<String, ScoreMap<String>> ve: vocabularyNavigators.entrySet()) {

@ -193,6 +193,9 @@ public enum YaCySchema implements Schema {
ext_tracker_val(SolrType.num_integer, true, true, true, "number of attribute counts in ext_tracker_txt"),
ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"),
ext_title_val(SolrType.num_integer, true, true, true, "number of matching title expressions");
public final static String VOCABULARY_PREFIX = "vocabulary_";
public final static String VOCABULARY_SUFFIX = "_sxt";
private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() )
private final SolrType type;

@ -60,14 +60,12 @@ public class JenaTripleStore {
public static ConcurrentHashMap<String, Model> privatestorage = null;
public static String file;
public static void load(String filename) throws IOException {
if (filename.endsWith(".nt")) LoadNTriples(filename);
else loadRDF(filename);
}
public static void loadRDF(String fileNameOrUri) throws IOException {
private static void loadRDF(String fileNameOrUri) throws IOException {
Model tmp = ModelFactory.createDefaultModel();
log.info("Loading from " + fileNameOrUri);
InputStream is = FileManager.get().open(fileNameOrUri);
@ -81,7 +79,7 @@ public class JenaTripleStore {
}
}
public static void LoadNTriples(String fileNameOrUri) throws IOException {
private static void LoadNTriples(String fileNameOrUri) throws IOException {
log.info("Loading N-Triples from " + fileNameOrUri);
InputStream is = FileManager.get().open(fileNameOrUri);
LoadNTriples(is);
@ -111,11 +109,11 @@ public class JenaTripleStore {
}
}
public static void saveFile(String filename) {
private static void saveFile(String filename) {
saveFile(filename, model);
}
public static void saveFile(String filename, Model model) {
private static void saveFile(String filename, Model model) {
File f = new File(filename);
File ftmp = new File(filename + "." + System.currentTimeMillis());
if (model.isEmpty() && !f.exists()) {
@ -154,7 +152,7 @@ public class JenaTripleStore {
* @param uri
* @return
*/
public static Resource getResource(String uri) {
private static Resource getResource(String uri) {
return model.getResource(uri);
}
@ -174,7 +172,7 @@ public class JenaTripleStore {
addTriple (subject, predicate, object, model);
}
public static void addTriple(String subject, String predicate, String object, Model model) {
private static void addTriple(String subject, String predicate, String object, Model model) {
Resource r = model.getResource(subject);
Property pr = model.getProperty(predicate);
r.addProperty(pr, object);
@ -209,7 +207,7 @@ public class JenaTripleStore {
return null;
}
public static Iterator<RDFNode> getObjects(final Resource r, final String predicate) {
private static Iterator<RDFNode> getObjects(final Resource r, final String predicate) {
return getObjects(r, predicate, model);
}
@ -309,7 +307,7 @@ public class JenaTripleStore {
}
}
public static void savePrivateStores() {
private static void savePrivateStores() {
Switchboard switchboard = Switchboard.getSwitchboard();
log.info("Saving user triplestores");
if (privatestorage == null) return;

@ -34,7 +34,6 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.WordCache.Dictionary;
import net.yacy.cora.geo.GeoLocation;
import net.yacy.cora.geo.Locations;
import net.yacy.cora.storage.Files;
@ -66,7 +65,7 @@ public class Tagging {
this.objectlink = objectlink;
}
public SOTuple(String[] synonyms, String objectlink) {
private SOTuple(String[] synonyms, String objectlink) {
StringBuilder sb = new StringBuilder(synonyms.length * 10);
for (String s: synonyms) sb.append(',').append(s);
this.synonyms = sb.substring(1);
@ -87,7 +86,7 @@ public class Tagging {
}
public Tagging(String name) {
private Tagging(String name) {
this.navigatorName = name;
this.synonym2term = new ConcurrentHashMap<String, String>();
this.term2synonym = new ConcurrentHashMap<String, String>();
@ -188,18 +187,7 @@ public class Tagging {
}
}
public Tagging(String name, Dictionary dictionary) {
this(name);
Set<StringBuilder> words = dictionary.getWords();
String s;
for (StringBuilder word: words) {
s = word.toString();
this.synonym2term.put(s.toLowerCase(), s);
this.term2synonym.put(s, s.toLowerCase());
}
}
public void init() throws IOException {
private void init() throws IOException {
if (this.propFile == null) return;
this.synonym2term.clear();
this.term2synonym.clear();
@ -378,7 +366,7 @@ public class Tagging {
init();
}
public Map<String, Set<String>> reconstructionSets() {
private Map<String, Set<String>> reconstructionSets() {
Map<String, Set<String>> r = new TreeMap<String, Set<String>>();
for (Map.Entry<String, String> e: this.term2synonym.entrySet()) {
Set<String> s = r.get(e.getKey());
@ -399,7 +387,7 @@ public class Tagging {
return r;
}
public Map<String, SOTuple> reconstructionLists() {
private Map<String, SOTuple> reconstructionLists() {
Map<String, Set<String>> r = reconstructionSets();
Map<String, SOTuple> map = new TreeMap<String, SOTuple>();
for (Map.Entry<String, Set<String>> e: r.entrySet()) {
@ -511,10 +499,6 @@ public class Tagging {
return new Metatag(word);
}
public Set<String> getSynonyms(String term) {
return this.synonym2synonyms.get(term);
}
public Set<String> tags() {
return this.synonym2term.keySet();
}
@ -556,7 +540,7 @@ public class Tagging {
public class Metatag {
private final String object;
public Metatag(String object) {
private Metatag(String object) {
this.object = object;
}

@ -252,7 +252,7 @@ public final class Condenser {
this.exact_signature = l;
}
public Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) {
private Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) {
this.languageIdentificator = null; // we don't need that here
// analysis = new Properties();
this.words = new TreeMap<String, Word>();
@ -372,10 +372,11 @@ public final class Condenser {
//System.out.println("Testing: " + testterm);
tag = LibraryProvider.autotagging.getTagFromTerm(testterm);
if (tag != null) {
Set<Tagging.Metatag> tagset = this.tags.get(tag.getVocabularyName());
String navigatorName = tag.getVocabularyName();
Set<Tagging.Metatag> tagset = this.tags.get(navigatorName);
if (tagset == null) {
tagset = new HashSet<Tagging.Metatag>();
this.tags.put(tag.getVocabularyName(), tagset);
this.tags.put(navigatorName, tagset);
}
tagset.add(tag);
}

@ -56,11 +56,7 @@ import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.DCTerms;
import net.yacy.cora.lod.vocabulary.Owl;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
@ -96,6 +92,7 @@ public class Document {
private final boolean indexingDenied;
private final double lon, lat;
private final Object parserObject; // the source object that was used to create the Document
private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document
public Document(final DigestURI location, final String mimeType, final String charset,
final Object parserObject,
@ -138,6 +135,7 @@ public class Document {
this.languages = languages;
this.indexingDenied = indexingDenied;
this.text = text == null ? "" : text;
this.generic_facets = new HashMap<String, Set<String>>();
}
public Object getParserObject() {
@ -152,6 +150,10 @@ public class Document {
return this.source.getFileExtension();
}
public Map<String, Set<String>> getGenericFacets() {
return this.generic_facets;
}
/**
* compute a set of languages that this document contains
* the language is not computed using a statistical analysis of the content, only from given metadata that came with the document
@ -202,9 +204,7 @@ dc_rights
if (title != null) this.titles.add(title);
}
public void addTitle(final String title) {
if (title != null) this.titles.add(title);
}
public String dc_creator() {
return (this.creator == null) ? "" : this.creator.toString();
@ -229,26 +229,31 @@ dc_rights
* These keywords will appear in dc_subject
* @param tags
*/
public void addMetatags(Map<String, Set<Tagging.Metatag>> tags) {
String subject = YaCyMetadata.hashURI(this.source.hash());
protected void addMetatags(Map<String, Set<Tagging.Metatag>> tags) {
//String subject = YaCyMetadata.hashURI(this.source.hash());
//for (String s: this.keywords) {
// tags.remove(s);
//}
for (Map.Entry<String, Set<Tagging.Metatag>> e: tags.entrySet()) {
Tagging vocabulary = LibraryProvider.autotagging.getVocabulary(e.getKey());
if (vocabulary == null) continue;
String objectspace = vocabulary.getObjectspace();
StringBuilder sb = new StringBuilder(e.getValue().size() * 20);
//String objectspace = vocabulary.getObjectspace();
//StringBuilder sb = new StringBuilder(e.getValue().size() * 20);
Set<String> objects = new HashSet<String>();
for (Tagging.Metatag s: e.getValue()) {
sb.append(',').append(s.getObject());
objects.add(s.getObject());
//sb.append(',').append(s.getObject());
/*
String objectlink = vocabulary.getObjectlink(s.getObject());
if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) {
JenaTripleStore.addTriple(subject, DCTerms.references.getPredicate(), objectlink == null || objectlink.isEmpty() ? objectspace + s.getObject() + "#" + s.getObject() : objectlink + "#" + s.getObject());
}
*/
}
// put to triplestore
JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1));
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true));
//JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true));
//JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1)); // superfluous with the generic_facets
this.generic_facets.put(vocabulary.getName(), objects);
}
}
@ -580,7 +585,7 @@ dc_rights
return v;
}
public static Map<MultiProtocolURI, String> allReflinks(final Collection<?> links) {
private static Map<MultiProtocolURI, String> allReflinks(final Collection<?> links) {
// links is either a Set of Strings (with urls) or
// htmlFilterImageEntries
// we find all links that are part of a reference inside a url

@ -748,7 +748,7 @@ public final class Protocol
return result.urlcount;
}
public static void remoteSearchProcess(
private static void remoteSearchProcess(
final SearchEvent event,
final int count,
final long time,
@ -1037,7 +1037,7 @@ public final class Protocol
solrQuery.setRows(count);
// set facet query attributes
if (getFacets && event.query.facetfields.length > 0) {
if (getFacets && event.query.facetfields.size() > 0) {
solrQuery.setFacet(true);
solrQuery.setFacetLimit(event.query.maxfacets);
solrQuery.setFacetSort(FacetParams.FACET_SORT_COUNT);
@ -1082,7 +1082,7 @@ public final class Protocol
}
// evaluate facets
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.length);
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.size());
for (String field: event.query.facetfields) {
FacetField facet = rsp.getFacetField(field);
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);

@ -777,6 +777,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains(YaCySchema.videolinkscount_i)) add(doc, YaCySchema.videolinkscount_i, document.getVideolinks().size());
if (allAttr || contains(YaCySchema.applinkscount_i)) add(doc, YaCySchema.applinkscount_i, document.getApplinks().size());
// write generic navigation
// there are no pre-defined solr fields for navigation because the vocabulary is generic
// we use dynamically allocated solr fields for this.
// It must be a multi-value string/token field, therefore we use _sxt extensions for the field names
for (Map.Entry<String, Set<String>> facet: document.getGenericFacets().entrySet()) {
String facetName = facet.getKey();
Set<String> facetValues = facet.getValue();
doc.setField(YaCySchema.VOCABULARY_PREFIX + facetName + YaCySchema.VOCABULARY_SUFFIX, facetValues.toArray(new String[facetValues.size()]));
}
return doc;
}

@ -31,6 +31,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
@ -55,7 +56,7 @@ import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -107,7 +108,7 @@ public final class QueryParams {
public static final Pattern catchall_pattern = Pattern.compile(".*");
private static final Pattern matchnothing_pattern = Pattern.compile("");
public final QueryGoal queryGoal;
private final QueryGoal queryGoal;
public int itemsPerPage;
public int offset;
public final Pattern urlMask, prefer;
@ -124,7 +125,7 @@ public final class QueryParams {
public final RankingProfile ranking;
private final Segment indexSegment;
public final String clienthost; // this is the client host that starts the query, not a site operator
public final String nav_sitehost; // this is a domain name which is used to navigate to that host
private final String nav_sitehost; // this is a domain name which is used to navigate to that host
public final String nav_sitehash; // this is a domain hash, 6 bytes long or null
protected final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
public final String authorhash;
@ -132,14 +133,14 @@ public final class QueryParams {
public Seed remotepeer;
public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
protected final long maxtime;
protected final long timeout;
private final long timeout;
// values that are set after a search:
public int transmitcount; // number of results that had been shown to the user
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
public final String userAgent;
protected boolean filterfailurls;
protected double lat, lon, radius;
public String[] facetfields;
public List<String> facetfields;
public int maxfacets;
// the following values are filled during the search process as statistics for the search
@ -200,7 +201,8 @@ public final class QueryParams {
this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers
this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
this.facetfields = defaultfacetfields;
this.facetfields = new ArrayList<String>(); for (String f: defaultfacetfields) facetfields.add(f);
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
}
@ -279,7 +281,8 @@ public final class QueryParams {
this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers
this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
this.facetfields = defaultfacetfields;
this.facetfields = new ArrayList<String>(); for (String f: defaultfacetfields) facetfields.add(f);
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
}
@ -417,6 +420,11 @@ public final class QueryParams {
q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"');
}
// add vocabulary facets
for (Tagging.Metatag tag: this.metatags) {
q.append(" AND ").append(YaCySchema.VOCABULARY_PREFIX).append(tag.getVocabularyName()).append(YaCySchema.VOCABULARY_SUFFIX).append(":\"").append(tag.getObject()).append('\"');
}
// construct query
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");

@ -40,15 +40,12 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
@ -68,9 +65,6 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ResultEntry;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
public final class RankingProcess extends Thread {
protected static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit'
@ -89,8 +83,7 @@ public final class RankingProcess extends Thread {
protected final ReferenceOrder order;
protected final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
protected final ScoreMap<String> hostNavigator = new ConcurrentScoreMap<String>(); // a counter for the appearance of host names
protected final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
protected final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName()
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
private boolean remote;
protected RankingProcess(final QueryParams query, boolean remote) {
@ -115,7 +108,6 @@ public final class RankingProcess extends Thread {
this.receivedRemoteReferences = new AtomicInteger(0);
this.order = new ReferenceOrder(this.query.ranking, UTF8.getBytes(this.query.targetlang));
this.urlhashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100);
this.vocabularyNavigator = new ConcurrentHashMap<String, ScoreMap<String>>();
this.taggingPredicates = new HashMap<String, String>();
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
this.taggingPredicates.put(t.getName(), t.getPredicate());
@ -340,6 +332,7 @@ public final class RankingProcess extends Thread {
//this.hostHashResolver.put(hosthash, iEntry.urlhash());
// check vocabulary constraint
/*
String subject = YaCyMetadata.hashURI(iEntry.urlhash());
Resource resource = JenaTripleStore.getResource(subject);
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
@ -351,8 +344,9 @@ public final class RankingProcess extends Thread {
if (tags.indexOf(metatag.getObject()) < 0) continue pollloop;
}
}
*/
// add navigators using the triplestore
/*
for (Map.Entry<String, String> v: this.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
@ -367,7 +361,8 @@ public final class RankingProcess extends Thread {
}
}
}
*/
// finally extend the double-check and insert result to stack
this.urlhashes.putUnique(iEntry.urlhash());
rankingtryloop: while (true) {
@ -407,10 +402,6 @@ public final class RankingProcess extends Thread {
return this.hostNavigator;
}
public Map<String,ScoreMap<String>> getVocabularyNavigators() {
return this.vocabularyNavigator;
}
public ScoreMap<String> getTopicNavigator(final int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls

@ -37,9 +37,6 @@ import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
@ -48,9 +45,7 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.federate.yacy.Distribution;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.Scanner;
import net.yacy.cora.sorting.ConcurrentScoreMap;
@ -60,11 +55,11 @@ import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.Element;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.WorkTables;
import net.yacy.document.Condenser;
import net.yacy.document.LargeNumberCache;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
@ -121,13 +116,14 @@ public final class SearchEvent {
public final ScoreMap<String> namespaceNavigator; // a counter for name spaces
public final ScoreMap<String> protocolNavigator; // a counter for protocol types
public final ScoreMap<String> filetypeNavigator; // a counter for file types
public final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName()
protected final WeakPriorityBlockingQueue<URIMetadataNode> nodeStack;
protected final WeakPriorityBlockingQueue<ResultEntry> result;
protected final LoaderDispatcher loader;
protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
protected final boolean deleteIfSnippetFail;
private SnippetWorker[] workerThreads;
protected long urlRetrievalAllTime;
private long urlRetrievalAllTime;
protected long snippetComputationAllTime;
protected ConcurrentHashMap<String, String> snippets;
private final boolean remote;
@ -169,6 +165,8 @@ public final class SearchEvent {
}
this.protocolNavigator = new ConcurrentScoreMap<String>();
this.filetypeNavigator = new ConcurrentScoreMap<String>();
this.vocabularyNavigator = new ConcurrentHashMap<String, ScoreMap<String>>();
this.snippets = new ConcurrentHashMap<String, String>();
this.secondarySearchSuperviser =
@ -493,6 +491,20 @@ public final class SearchEvent {
//fcts = facets.get(YaCySchema.author.getSolrFieldName());
//if (fcts != null) this.authorNavigator.inc(fcts);
// get the vocabulary navigation
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) {
fcts = facets.get(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
if (fcts != null) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(v.getName());
if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>();
this.vocabularyNavigator.put(v.getName(), vocNav);
}
vocNav.inc(fcts);
}
}
// apply all constraints
try {
pollloop: for (URIMetadataNode iEntry: index) {
@ -539,6 +551,7 @@ public final class SearchEvent {
}
// check vocabulary constraint
/*
String subject = YaCyMetadata.hashURI(iEntry.hash());
Resource resource = JenaTripleStore.getResource(subject);
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
@ -550,8 +563,9 @@ public final class SearchEvent {
if (tags.indexOf(metatag.getObject()) < 0) continue pollloop;
}
}
*/
// add navigators using the triplestore
/*
for (Map.Entry<String, String> v: this.rankingProcess.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
@ -566,6 +580,7 @@ public final class SearchEvent {
}
}
}
*/
// finally extend the double-check and insert result to stack
this.rankingProcess.urlhashes.putUnique(iEntry.hash());

Loading…
Cancel
Save