diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 4efca40aa..87af58bd3 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -293,7 +293,7 @@ public class yacysearchtrailer { } // vocabulary navigators - final Map> vocabularyNavigators = theSearch.rankingProcess.getVocabularyNavigators(); + final Map> vocabularyNavigators = theSearch.vocabularyNavigator; if (vocabularyNavigators != null && !vocabularyNavigators.isEmpty()) { int navvoccount = 0; vocnav: for (Map.Entry> ve: vocabularyNavigators.entrySet()) { diff --git a/source/net/yacy/cora/federate/solr/YaCySchema.java b/source/net/yacy/cora/federate/solr/YaCySchema.java index bf2e7e01d..857bc3be3 100644 --- a/source/net/yacy/cora/federate/solr/YaCySchema.java +++ b/source/net/yacy/cora/federate/solr/YaCySchema.java @@ -193,6 +193,9 @@ public enum YaCySchema implements Schema { ext_tracker_val(SolrType.num_integer, true, true, true, "number of attribute counts in ext_tracker_txt"), ext_title_txt(SolrType.text_general, true, true, true, "names matching title expressions"), ext_title_val(SolrType.num_integer, true, true, true, "number of matching title expressions"); + + public final static String VOCABULARY_PREFIX = "vocabulary_"; + public final static String VOCABULARY_SUFFIX = "_sxt"; private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() ) private final SolrType type; diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index cfcf6e50c..04cfdfb46 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -60,14 +60,12 @@ public class JenaTripleStore { public static ConcurrentHashMap privatestorage = null; - public static String file; - public static void load(String filename) throws IOException { if (filename.endsWith(".nt")) LoadNTriples(filename); else loadRDF(filename); } - public static void loadRDF(String fileNameOrUri) throws IOException { + private static void loadRDF(String fileNameOrUri) throws IOException { Model tmp = ModelFactory.createDefaultModel(); log.info("Loading from " + fileNameOrUri); InputStream is = FileManager.get().open(fileNameOrUri); @@ -81,7 +79,7 @@ public class JenaTripleStore { } } - public static void LoadNTriples(String fileNameOrUri) throws IOException { + private static void LoadNTriples(String fileNameOrUri) throws IOException { log.info("Loading N-Triples from " + fileNameOrUri); InputStream is = FileManager.get().open(fileNameOrUri); LoadNTriples(is); @@ -111,11 +109,11 @@ public class JenaTripleStore { } } - public static void saveFile(String filename) { + private static void saveFile(String filename) { saveFile(filename, model); } - public static void saveFile(String filename, Model model) { + private static void saveFile(String filename, Model model) { File f = new File(filename); File ftmp = new File(filename + "." + System.currentTimeMillis()); if (model.isEmpty() && !f.exists()) { @@ -154,7 +152,7 @@ public class JenaTripleStore { * @param uri * @return */ - public static Resource getResource(String uri) { + private static Resource getResource(String uri) { return model.getResource(uri); } @@ -174,7 +172,7 @@ public class JenaTripleStore { addTriple (subject, predicate, object, model); } - public static void addTriple(String subject, String predicate, String object, Model model) { + private static void addTriple(String subject, String predicate, String object, Model model) { Resource r = model.getResource(subject); Property pr = model.getProperty(predicate); r.addProperty(pr, object); @@ -209,7 +207,7 @@ public class JenaTripleStore { return null; } - public static Iterator getObjects(final Resource r, final String predicate) { + private static Iterator getObjects(final Resource r, final String predicate) { return getObjects(r, predicate, model); } @@ -309,7 +307,7 @@ public class JenaTripleStore { } } - public static void savePrivateStores() { + private static void savePrivateStores() { Switchboard switchboard = Switchboard.getSwitchboard(); log.info("Saving user triplestores"); if (privatestorage == null) return; diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 89d2bf8fb..2dee1b55c 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -34,7 +34,6 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; -import net.yacy.cora.document.WordCache.Dictionary; import net.yacy.cora.geo.GeoLocation; import net.yacy.cora.geo.Locations; import net.yacy.cora.storage.Files; @@ -66,7 +65,7 @@ public class Tagging { this.objectlink = objectlink; } - public SOTuple(String[] synonyms, String objectlink) { + private SOTuple(String[] synonyms, String objectlink) { StringBuilder sb = new StringBuilder(synonyms.length * 10); for (String s: synonyms) sb.append(',').append(s); this.synonyms = sb.substring(1); @@ -87,7 +86,7 @@ public class Tagging { } - public Tagging(String name) { + private Tagging(String name) { this.navigatorName = name; this.synonym2term = new ConcurrentHashMap(); this.term2synonym = new ConcurrentHashMap(); @@ -188,18 +187,7 @@ public class Tagging { } } - public Tagging(String name, Dictionary dictionary) { - this(name); - Set words = dictionary.getWords(); - String s; - for (StringBuilder word: words) { - s = word.toString(); - this.synonym2term.put(s.toLowerCase(), s); - this.term2synonym.put(s, s.toLowerCase()); - } - } - - public void init() throws IOException { + private void init() throws IOException { if (this.propFile == null) return; this.synonym2term.clear(); this.term2synonym.clear(); @@ -378,7 +366,7 @@ public class Tagging { init(); } - public Map> reconstructionSets() { + private Map> reconstructionSets() { Map> r = new TreeMap>(); for (Map.Entry e: this.term2synonym.entrySet()) { Set s = r.get(e.getKey()); @@ -399,7 +387,7 @@ public class Tagging { return r; } - public Map reconstructionLists() { + private Map reconstructionLists() { Map> r = reconstructionSets(); Map map = new TreeMap(); for (Map.Entry> e: r.entrySet()) { @@ -511,10 +499,6 @@ public class Tagging { return new Metatag(word); } - public Set getSynonyms(String term) { - return this.synonym2synonyms.get(term); - } - public Set tags() { return this.synonym2term.keySet(); } @@ -556,7 +540,7 @@ public class Tagging { public class Metatag { private final String object; - public Metatag(String object) { + private Metatag(String object) { this.object = object; } diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index 68b10c174..9d17ae5e6 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -252,7 +252,7 @@ public final class Condenser { this.exact_signature = l; } - public Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) { + private Condenser(final String text, final WordCache meaningLib, boolean doAutotagging) { this.languageIdentificator = null; // we don't need that here // analysis = new Properties(); this.words = new TreeMap(); @@ -372,10 +372,11 @@ public final class Condenser { //System.out.println("Testing: " + testterm); tag = LibraryProvider.autotagging.getTagFromTerm(testterm); if (tag != null) { - Set tagset = this.tags.get(tag.getVocabularyName()); + String navigatorName = tag.getVocabularyName(); + Set tagset = this.tags.get(navigatorName); if (tagset == null) { tagset = new HashSet(); - this.tags.put(tag.getVocabularyName(), tagset); + this.tags.put(navigatorName, tagset); } tagset.add(tag); } diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index a1fecbdaa..e63ad37e6 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -56,11 +56,7 @@ import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.document.analysis.Classification; -import net.yacy.cora.lod.JenaTripleStore; -import net.yacy.cora.lod.vocabulary.DCTerms; -import net.yacy.cora.lod.vocabulary.Owl; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.crawler.retrieval.Request; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ImageEntry; @@ -96,6 +92,7 @@ public class Document { private final boolean indexingDenied; private final double lon, lat; private final Object parserObject; // the source object that was used to create the Document + private final Map> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document public Document(final DigestURI location, final String mimeType, final String charset, final Object parserObject, @@ -138,6 +135,7 @@ public class Document { this.languages = languages; this.indexingDenied = indexingDenied; this.text = text == null ? "" : text; + this.generic_facets = new HashMap>(); } public Object getParserObject() { @@ -152,6 +150,10 @@ public class Document { return this.source.getFileExtension(); } + public Map> getGenericFacets() { + return this.generic_facets; + } + /** * compute a set of languages that this document contains * the language is not computed using a statistical analysis of the content, only from given metadata that came with the document @@ -202,9 +204,7 @@ dc_rights if (title != null) this.titles.add(title); } - public void addTitle(final String title) { - if (title != null) this.titles.add(title); - } + public String dc_creator() { return (this.creator == null) ? "" : this.creator.toString(); @@ -229,26 +229,31 @@ dc_rights * These keywords will appear in dc_subject * @param tags */ - public void addMetatags(Map> tags) { - String subject = YaCyMetadata.hashURI(this.source.hash()); + protected void addMetatags(Map> tags) { + //String subject = YaCyMetadata.hashURI(this.source.hash()); //for (String s: this.keywords) { // tags.remove(s); //} for (Map.Entry> e: tags.entrySet()) { Tagging vocabulary = LibraryProvider.autotagging.getVocabulary(e.getKey()); if (vocabulary == null) continue; - String objectspace = vocabulary.getObjectspace(); - StringBuilder sb = new StringBuilder(e.getValue().size() * 20); + //String objectspace = vocabulary.getObjectspace(); + //StringBuilder sb = new StringBuilder(e.getValue().size() * 20); + Set objects = new HashSet(); for (Tagging.Metatag s: e.getValue()) { - sb.append(',').append(s.getObject()); + objects.add(s.getObject()); + //sb.append(',').append(s.getObject()); + /* String objectlink = vocabulary.getObjectlink(s.getObject()); if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) { JenaTripleStore.addTriple(subject, DCTerms.references.getPredicate(), objectlink == null || objectlink.isEmpty() ? objectspace + s.getObject() + "#" + s.getObject() : objectlink + "#" + s.getObject()); } + */ } // put to triplestore - JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1)); - JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true)); + //JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true)); + //JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1)); // superfluous with the generic_facets + this.generic_facets.put(vocabulary.getName(), objects); } } @@ -580,7 +585,7 @@ dc_rights return v; } - public static Map allReflinks(final Collection links) { + private static Map allReflinks(final Collection links) { // links is either a Set of Strings (with urls) or // htmlFilterImageEntries // we find all links that are part of a reference inside a url diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 611ec37ca..68b19822c 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -748,7 +748,7 @@ public final class Protocol return result.urlcount; } - public static void remoteSearchProcess( + private static void remoteSearchProcess( final SearchEvent event, final int count, final long time, @@ -1037,7 +1037,7 @@ public final class Protocol solrQuery.setRows(count); // set facet query attributes - if (getFacets && event.query.facetfields.length > 0) { + if (getFacets && event.query.facetfields.size() > 0) { solrQuery.setFacet(true); solrQuery.setFacetLimit(event.query.maxfacets); solrQuery.setFacetSort(FacetParams.FACET_SORT_COUNT); @@ -1082,7 +1082,7 @@ public final class Protocol } // evaluate facets - Map> facets = new HashMap>(event.query.facetfields.length); + Map> facets = new HashMap>(event.query.facetfields.size()); for (String field: event.query.facetfields) { FacetField facet = rsp.getFacetField(field); ReversibleScoreMap result = new ClusteredScoreMap(UTF8.insensitiveUTF8Comparator); diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index c5f48e65f..a769cb4ae 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -777,6 +777,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (allAttr || contains(YaCySchema.videolinkscount_i)) add(doc, YaCySchema.videolinkscount_i, document.getVideolinks().size()); if (allAttr || contains(YaCySchema.applinkscount_i)) add(doc, YaCySchema.applinkscount_i, document.getApplinks().size()); + // write generic navigation + // there are no pre-defined solr fields for navigation because the vocabulary is generic + // we use dynamically allocated solr fields for this. + // It must be a multi-value string/token field, therefore we use _sxt extensions for the field names + for (Map.Entry> facet: document.getGenericFacets().entrySet()) { + String facetName = facet.getKey(); + Set facetValues = facet.getValue(); + doc.setField(YaCySchema.VOCABULARY_PREFIX + facetName + YaCySchema.VOCABULARY_SUFFIX, facetValues.toArray(new String[facetValues.size()])); + } + return doc; } diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 7cfcc729a..00afbc9f0 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -31,6 +31,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; @@ -55,7 +56,7 @@ import net.yacy.cora.order.Base64Order; import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.Condenser; -import net.yacy.document.parser.html.CharacterCoding; +import net.yacy.document.LibraryProvider; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; @@ -107,7 +108,7 @@ public final class QueryParams { public static final Pattern catchall_pattern = Pattern.compile(".*"); private static final Pattern matchnothing_pattern = Pattern.compile(""); - public final QueryGoal queryGoal; + private final QueryGoal queryGoal; public int itemsPerPage; public int offset; public final Pattern urlMask, prefer; @@ -124,7 +125,7 @@ public final class QueryParams { public final RankingProfile ranking; private final Segment indexSegment; public final String clienthost; // this is the client host that starts the query, not a site operator - public final String nav_sitehost; // this is a domain name which is used to navigate to that host + private final String nav_sitehost; // this is a domain name which is used to navigate to that host public final String nav_sitehash; // this is a domain hash, 6 bytes long or null protected final Set siteexcludes; // set of domain hashes that are excluded if not included by sitehash public final String authorhash; @@ -132,14 +133,14 @@ public final class QueryParams { public Seed remotepeer; public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds) protected final long maxtime; - protected final long timeout; + private final long timeout; // values that are set after a search: public int transmitcount; // number of results that had been shown to the user public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets public final String userAgent; protected boolean filterfailurls; protected double lat, lon, radius; - public String[] facetfields; + public List facetfields; public int maxfacets; // the following values are filled during the search process as statistics for the search @@ -200,7 +201,8 @@ public final class QueryParams { this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed this.misses = Collections.synchronizedSortedSet(new TreeSet(URIMetadataRow.rowdef.objectOrder)); - this.facetfields = defaultfacetfields; + this.facetfields = new ArrayList(); for (String f: defaultfacetfields) facetfields.add(f); + for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX); this.maxfacets = defaultmaxfacets; } @@ -279,7 +281,8 @@ public final class QueryParams { this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed this.misses = Collections.synchronizedSortedSet(new TreeSet(URIMetadataRow.rowdef.objectOrder)); - this.facetfields = defaultfacetfields; + this.facetfields = new ArrayList(); for (String f: defaultfacetfields) facetfields.add(f); + for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX); this.maxfacets = defaultmaxfacets; } @@ -417,6 +420,11 @@ public final class QueryParams { q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"'); } + // add vocabulary facets + for (Tagging.Metatag tag: this.metatags) { + q.append(" AND ").append(YaCySchema.VOCABULARY_PREFIX).append(tag.getVocabularyName()).append(YaCySchema.VOCABULARY_SUFFIX).append(":\"").append(tag.getObject()).append('\"'); + } + // construct query final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); diff --git a/source/net/yacy/search/query/RankingProcess.java b/source/net/yacy/search/query/RankingProcess.java index 6bbe3021e..420ce2525 100644 --- a/source/net/yacy/search/query/RankingProcess.java +++ b/source/net/yacy/search/query/RankingProcess.java @@ -40,15 +40,12 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.federate.yacy.CacheStrategy; -import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.WeakPriorityBlockingQueue; import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.HandleSet; -import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.Condenser; import net.yacy.document.LibraryProvider; @@ -68,9 +65,6 @@ import net.yacy.search.index.Segment; import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.snippet.ResultEntry; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; - public final class RankingProcess extends Thread { protected static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit' @@ -89,8 +83,7 @@ public final class RankingProcess extends Thread { protected final ReferenceOrder order; protected final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) protected final ScoreMap hostNavigator = new ConcurrentScoreMap(); // a counter for the appearance of host names - protected final Map taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris - protected final Map> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName() + private final Map taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris private boolean remote; protected RankingProcess(final QueryParams query, boolean remote) { @@ -115,7 +108,6 @@ public final class RankingProcess extends Thread { this.receivedRemoteReferences = new AtomicInteger(0); this.order = new ReferenceOrder(this.query.ranking, UTF8.getBytes(this.query.targetlang)); this.urlhashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100); - this.vocabularyNavigator = new ConcurrentHashMap>(); this.taggingPredicates = new HashMap(); for (Tagging t: LibraryProvider.autotagging.getVocabularies()) { this.taggingPredicates.put(t.getName(), t.getPredicate()); @@ -340,6 +332,7 @@ public final class RankingProcess extends Thread { //this.hostHashResolver.put(hosthash, iEntry.urlhash()); // check vocabulary constraint + /* String subject = YaCyMetadata.hashURI(iEntry.urlhash()); Resource resource = JenaTripleStore.getResource(subject); if (this.query.metatags != null && !this.query.metatags.isEmpty()) { @@ -351,8 +344,9 @@ public final class RankingProcess extends Thread { if (tags.indexOf(metatag.getObject()) < 0) continue pollloop; } } - + */ // add navigators using the triplestore + /* for (Map.Entry v: this.taggingPredicates.entrySet()) { Iterator ni = JenaTripleStore.getObjects(resource, v.getValue()); while (ni.hasNext()) { @@ -367,7 +361,8 @@ public final class RankingProcess extends Thread { } } } - + */ + // finally extend the double-check and insert result to stack this.urlhashes.putUnique(iEntry.urlhash()); rankingtryloop: while (true) { @@ -407,10 +402,6 @@ public final class RankingProcess extends Thread { return this.hostNavigator; } - public Map> getVocabularyNavigators() { - return this.vocabularyNavigator; - } - public ScoreMap getTopicNavigator(final int count) { // create a list of words that had been computed by statistics over all // words that appeared in the url or the description of all urls diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 499af400a..c4e228596 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -37,9 +37,6 @@ import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; - import net.yacy.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; @@ -48,9 +45,7 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.federate.yacy.Distribution; -import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.order.Base64Order; import net.yacy.cora.protocol.Scanner; import net.yacy.cora.sorting.ConcurrentScoreMap; @@ -60,11 +55,11 @@ import net.yacy.cora.sorting.WeakPriorityBlockingQueue; import net.yacy.cora.sorting.WeakPriorityBlockingQueue.Element; import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.HandleSet; -import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.SpaceExceededException; import net.yacy.data.WorkTables; import net.yacy.document.Condenser; import net.yacy.document.LargeNumberCache; +import net.yacy.document.LibraryProvider; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; @@ -121,13 +116,14 @@ public final class SearchEvent { public final ScoreMap namespaceNavigator; // a counter for name spaces public final ScoreMap protocolNavigator; // a counter for protocol types public final ScoreMap filetypeNavigator; // a counter for file types + public final Map> vocabularyNavigator; // counters for Vocabularies; key is metatag.getVocabularyName() protected final WeakPriorityBlockingQueue nodeStack; protected final WeakPriorityBlockingQueue result; protected final LoaderDispatcher loader; protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets protected final boolean deleteIfSnippetFail; private SnippetWorker[] workerThreads; - protected long urlRetrievalAllTime; + private long urlRetrievalAllTime; protected long snippetComputationAllTime; protected ConcurrentHashMap snippets; private final boolean remote; @@ -169,6 +165,8 @@ public final class SearchEvent { } this.protocolNavigator = new ConcurrentScoreMap(); this.filetypeNavigator = new ConcurrentScoreMap(); + this.vocabularyNavigator = new ConcurrentHashMap>(); + this.snippets = new ConcurrentHashMap(); this.secondarySearchSuperviser = @@ -493,6 +491,20 @@ public final class SearchEvent { //fcts = facets.get(YaCySchema.author.getSolrFieldName()); //if (fcts != null) this.authorNavigator.inc(fcts); + // get the vocabulary navigation + for (Tagging v: LibraryProvider.autotagging.getVocabularies()) { + fcts = facets.get(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX); + if (fcts != null) { + ScoreMap vocNav = this.vocabularyNavigator.get(v.getName()); + if (vocNav == null) { + vocNav = new ConcurrentScoreMap(); + this.vocabularyNavigator.put(v.getName(), vocNav); + } + vocNav.inc(fcts); + } + } + + // apply all constraints try { pollloop: for (URIMetadataNode iEntry: index) { @@ -539,6 +551,7 @@ public final class SearchEvent { } // check vocabulary constraint + /* String subject = YaCyMetadata.hashURI(iEntry.hash()); Resource resource = JenaTripleStore.getResource(subject); if (this.query.metatags != null && !this.query.metatags.isEmpty()) { @@ -550,8 +563,9 @@ public final class SearchEvent { if (tags.indexOf(metatag.getObject()) < 0) continue pollloop; } } - + */ // add navigators using the triplestore + /* for (Map.Entry v: this.rankingProcess.taggingPredicates.entrySet()) { Iterator ni = JenaTripleStore.getObjects(resource, v.getValue()); while (ni.hasNext()) { @@ -566,6 +580,7 @@ public final class SearchEvent { } } } + */ // finally extend the double-check and insert result to stack this.rankingProcess.urlhashes.putUnique(iEntry.hash());