Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit f5b817bac4

@ -86,6 +86,14 @@ public class docParser extends AbstractParser implements Parser {
if (title.length() == l) break; if (title.length() == l) break;
l = title.length(); l = title.length();
} }
// get keywords (for yacy as array)
final String keywords = extractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else {
keywlist = null;
}
Document[] docs; Document[] docs;
docs = new Document[]{new Document( docs = new Document[]{new Document(
@ -94,9 +102,9 @@ public class docParser extends AbstractParser implements Parser {
"UTF-8", "UTF-8",
this, this,
null, null,
null, keywlist,
singleList(title), singleList(title),
"", // TODO: AUTHOR extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
extractor.getDocSummaryInformation().getCompany(), // publisher extractor.getDocSummaryInformation().getCompany(), // publisher
null, null,
null, null,

@ -78,6 +78,12 @@ public class pptParser extends AbstractParser implements Parser {
if (title.length() == l) break; if (title.length() == l) break;
l = title.length(); l = title.length();
} }
// get keywords (for yacy as array)
final String keywords = pptExtractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else keywlist = null;
/* /*
* create the plasmaParserDocument for the database * create the plasmaParserDocument for the database
@ -89,9 +95,9 @@ public class pptParser extends AbstractParser implements Parser {
"UTF-8", "UTF-8",
this, this,
null, null,
null, keywlist,
singleList(title), singleList(title),
"", // TODO: AUTHOR pptExtractor.getSummaryInformation().getAuthor(), // may be null
pptExtractor.getDocSummaryInformation().getCompany(), pptExtractor.getDocSummaryInformation().getCompany(),
null, null,
null, null,

@ -35,11 +35,6 @@ import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.common.params.FacetParams;
import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
@ -65,6 +60,10 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile; import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
public final class QueryParams { public final class QueryParams {
@ -227,7 +226,8 @@ public final class QueryParams {
this.solrSchema = indexSegment.fulltext().getDefaultConfiguration(); this.solrSchema = indexSegment.fulltext().getDefaultConfiguration();
for (String navkey: search_navigation) { for (String navkey: search_navigation) {
CollectionSchema f = defaultfacetfields.get(navkey); CollectionSchema f = defaultfacetfields.get(navkey);
if (f != null && solrSchema.contains(f)) this.facetfields.add(f.getSolrFieldName()); // handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield))
if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt"))) this.facetfields.add(f.getSolrFieldName());
} }
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_SUFFIX); for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets; this.maxfacets = defaultmaxfacets;
@ -358,8 +358,8 @@ public final class QueryParams {
bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10"; bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10";
} }
if (fq.length() > 0) { if (fq.length() > 0) {
String oldfq = params.get("fq"); String oldfq = params.get(CommonParams.FQ);
params.setParam("fq", oldfq == null || oldfq.length() == 0 ? fq : "(" + oldfq + ") AND (" + fq + ")"); params.setParam(CommonParams.FQ, oldfq == null || oldfq.length() == 0 ? fq : "(" + oldfq + ") AND (" + fq + ")");
} }
if (bq.length() > 0) params.setParam("bq", bq); if (bq.length() > 0) params.setParam("bq", bq);
if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
@ -465,8 +465,8 @@ public final class QueryParams {
fq.append(" AND ").append(CollectionSchema.language_s.getSolrFieldName()).append(":\"").append(this.modifier.language).append('\"'); fq.append(" AND ").append(CollectionSchema.language_s.getSolrFieldName()).append(":\"").append(this.modifier.language).append('\"');
} }
// add author facets // add author facets (check for contains(author) as author_sxt is omitted copyfield)
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author_sxt)) { if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author)) {
fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"'); fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"');
} }

@ -911,6 +911,13 @@ public final class SearchEvent {
continue pollloop; continue pollloop;
} }
} }
if (this.query.modifier.author != null) {
if (!this.query.modifier.author.equals(iEntry.dc_creator())) {
if (log.isFine()) log.fine ("dropped Node: author");
continue pollloop;
}
}
// finally extend the double-check and insert result to stack // finally extend the double-check and insert result to stack
this.urlhashes.putUnique(iEntry.hash()); this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) { rankingtryloop: while (true) {
@ -1098,6 +1105,13 @@ public final class SearchEvent {
continue; continue;
} }
// check modifier constraint (author)
if (this.query.modifier.author != null && !page.dc_creator().toLowerCase().contains(this.query.modifier.author.toLowerCase()) /*!this.query.modifier.author.equalsIgnoreCase(page.dc_creator())*/) {
if (log.isFine()) log.fine("dropped RWI: author constraint = " + this.query.modifier.author);
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
// Check for blacklist // Check for blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page.url())) { if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page.url())) {
if (log.isFine()) log.fine("dropped RWI: url is blacklisted in url blacklist"); if (log.isFine()) log.fine("dropped RWI: url is blacklisted in url blacklist");

Loading…
Cancel
Save