From 842faf96a2ee8d1826940819400982bf984b8948 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 7 Nov 2012 17:27:13 +0100 Subject: [PATCH] fixed media search --- source/net/yacy/search/index/SolrConfiguration.java | 5 ++++- source/net/yacy/search/query/SearchEvent.java | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index ef190a8fc..6365b0add 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -371,7 +371,10 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (allAttr || contains(YaCySchema.content_type)) add(doc, YaCySchema.content_type, new String[]{document.dc_format()}); if (allAttr || contains(YaCySchema.last_modified)) add(doc, YaCySchema.last_modified, responseHeader == null ? new Date() : responseHeader.lastModified()); if (allAttr || contains(YaCySchema.keywords)) add(doc, YaCySchema.keywords, document.dc_subject(' ')); - final String content = document.getTextString(); + String content = document.getTextString(); + if (content == null || content.length() == 0) { + content = digestURI.toTokens(); + } if (allAttr || contains(YaCySchema.text_t)) add(doc, YaCySchema.text_t, content); if (allAttr || contains(YaCySchema.wordcount_i)) { final int contentwc = content.split(" ").length; diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 47f78dce1..c2a5d39f3 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -726,12 +726,15 @@ public final class SearchEvent { } // check content domain - if (((this.query.contentdom.getCode() > 0 && page.url().getContentDomain() != this.query.contentdom) || - (this.query.contentdom == Classification.ContentDomain.TEXT && page.url().getContentDomain().getCode() > 0)) && this.query.urlMask_isCatchall) { + if (((this.query.contentdom == Classification.ContentDomain.TEXT && page.url().getContentDomain() == Classification.ContentDomain.IMAGE) || + (this.query.contentdom == Classification.ContentDomain.IMAGE && page.url().getContentDomain() != Classification.ContentDomain.IMAGE) || + (this.query.contentdom == Classification.ContentDomain.AUDIO && page.url().getContentDomain() != Classification.ContentDomain.AUDIO) || + (this.query.contentdom == Classification.ContentDomain.VIDEO && page.url().getContentDomain() != Classification.ContentDomain.VIDEO) || + (this.query.contentdom == Classification.ContentDomain.APP && page.url().getContentDomain() != Classification.ContentDomain.APP)) && this.query.urlMask_isCatchall) { this.query.misses.add(page.hash()); continue; } - + // Check for blacklist if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page)) { this.query.misses.add(page.hash());