diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index c0ba43b4c..51020747f 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -124,10 +124,9 @@ public class CrawlResults { if (post.containsKey("deletedomain")) { final String domain = post.get("domain", null); - final String hashpart = domain == null ? null : DigestURI.hosthash6(domain); - if (hashpart != null) { - sb.index.fulltext().deleteDomain(hashpart, null, false); - ResultURLs.deleteDomain(tabletype, domain, hashpart); + if (domain != null) { + sb.index.fulltext().deleteDomainHostname(domain, null, false); + ResultURLs.deleteDomain(tabletype, domain); } } diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 389860108..f5f40a8e8 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -294,7 +294,7 @@ public class Crawler_p { siteFilter = CrawlProfile.siteFilter(rootURLs); if (deleteold) { for (DigestURI u: rootURLs) { - int count = sb.index.fulltext().deleteDomain(u.hosthash(), deleteageDate, rootURLs.size() > 1); + int count = sb.index.fulltext().deleteDomainHashpart(u.hosthash(), deleteageDate, rootURLs.size() > 1); if (count > 0) Log.logInfo("Crawler_p", "deleted " + count + " documents for host " + u.getHost()); } } diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html index 3f7db9c6f..23b79b53a 100644 --- a/htroot/IndexControlURLs_p.html +++ b/htroot/IndexControlURLs_p.html @@ -77,7 +77,6 @@ function updatepage(str) {
Retrieve by URL-Hash:
-
@@ -132,7 +131,7 @@ function updatepage(str) {
- +
@@ -206,13 +205,6 @@ function updatepage(str) {
Stored a solr dump to file #[dumpfile]#
:: #(/indexdump)# - #(urlhashsimilar)#::

Sequential List of URL-Hashes:
- #{rows}# - #{cols}##[urlHash]# #{/cols}#
- #{/rows}# -

- #(/urlhashsimilar)# - #(genUrlProfile)# ::No entry found for URL-hash #[urlhash]# ::
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index 59ac827e1..bbbb2b480 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -30,13 +30,15 @@ import java.io.IOException; import java.net.MalformedURLException; import java.util.Iterator; import java.util.List; +import java.util.Map; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; +import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.lod.JenaTripleStore; -import net.yacy.cora.order.Base64Order; import net.yacy.cora.protocol.RequestHeader; +import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.crawler.data.Cache; import net.yacy.crawler.data.ResultURLs; import net.yacy.data.WorkTables; @@ -44,7 +46,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.util.RotateIterator; import net.yacy.search.Switchboard; import net.yacy.search.index.Fulltext; import net.yacy.search.index.Segment; @@ -236,30 +237,6 @@ public class IndexControlURLs_p { } } - // generate list - if (post.containsKey("urlhashsimilar")) { - final Iterator entryIt = new RotateIterator(segment.fulltext().urls(), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), (int) segment.RWICount()); - final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:
"); - DigestURI entry; - int i = 0, rows = 0, cols = 0; - prop.put("urlhashsimilar", "1"); - while (entryIt.hasNext() && i < 256) { - entry = entryIt.next(); - if (entry == null) break; - prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", ASCII.String(entry.hash())); - cols++; - if (cols==8) { - prop.put("urlhashsimilar_rows_"+rows+"_cols", cols); - cols = 0; - rows++; - } - i++; - } - prop.put("statistics", 0); - prop.put("urlhashsimilar_rows", rows); - prop.put("result", result.toString()); - } - if (post.containsKey("lurlexport")) { // parse format int format = 0; @@ -279,7 +256,7 @@ public class IndexControlURLs_p { final File f = new File(s); f.getParentFile().mkdirs(); final String filter = post.get("exportfilter", ".*"); - final Fulltext.Export running = segment.fulltext().export(f, filter, null, format, dom); + final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom); prop.put("lurlexport_exportfile", s); prop.put("lurlexport_urlcount", running.count()); @@ -301,29 +278,29 @@ public class IndexControlURLs_p { } if (post.containsKey("deletedomain")) { - final String hp = post.get("hashpart"); - segment.fulltext().deleteDomain(hp, null, false); + final String domain = post.get("domain"); + segment.fulltext().deleteDomainHostname(domain, null, false); // trigger the loading of the table post.put("statistics", ""); } if (post.containsKey("statistics")) { final int count = post.getInt("lines", 100); - Iterator statsiter; prop.put("statistics_lines", count); int cnt = 0; try { final Fulltext metadata = segment.fulltext(); - statsiter = metadata.statistics(count, metadata.urlSampleScores(metadata.domainSampleCollector())); + Map> scores = metadata.getSolr().getFacets(YaCySchema.httpstatus_i.getSolrFieldName() + ":200", count, YaCySchema.host_s.getSolrFieldName()); + ReversibleScoreMap stats = scores.get(YaCySchema.host_s.getSolrFieldName()); + Iterator statsiter = stats.keys(false); boolean dark = true; - Fulltext.HostStat hs; + String hostname; + prop.put("statisticslines_domains_" + cnt + "lines", count); while (statsiter.hasNext() && cnt < count) { - hs = statsiter.next(); + hostname = statsiter.next(); prop.put("statisticslines_domains_" + cnt + "_dark", (dark) ? "1" : "0"); - prop.put("statisticslines_domains_" + cnt + "_domain", hs.hostname + ((hs.port == 80) ? "" : ":" + hs.port)); - prop.put("statisticslines_domains_" + cnt + "lines", count); - prop.put("statisticslines_domains_" + cnt + "_hashpart", hs.hosthash); - prop.put("statisticslines_domains_" + cnt + "_count", hs.count); + prop.put("statisticslines_domains_" + cnt + "_domain", hostname); + prop.put("statisticslines_domains_" + cnt + "_count", stats.get(hostname)); dark = !dark; cnt++; } diff --git a/htroot/IndexControlURLs_p.xml b/htroot/IndexControlURLs_p.xml index 79ce730bc..0a9b0a63c 100644 --- a/htroot/IndexControlURLs_p.xml +++ b/htroot/IndexControlURLs_p.xml @@ -13,13 +13,4 @@ #(indexdump)#:: #[dumpfile]#:: #(/indexdump)# - #(urlhashsimilar)#:: - - #{rows}# - #{cols}# - #[urlHash]# - #{/cols}# - #{/rows}# - - #(/urlhashsimilar)# \ No newline at end of file diff --git a/htroot/YBRFetch_p.html b/htroot/YBRFetch_p.html deleted file mode 100644 index e69de29bb..000000000 diff --git a/htroot/YBRFetch_p.java b/htroot/YBRFetch_p.java deleted file mode 100644 index 9ff84558f..000000000 --- a/htroot/YBRFetch_p.java +++ /dev/null @@ -1,70 +0,0 @@ -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import net.yacy.cora.protocol.RequestHeader; -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.rwi.ReferenceContainerCache; -import net.yacy.kelondro.util.MemoryControl; -import net.yacy.peers.graphics.WebStructureGraph.HostReference; -import net.yacy.search.Switchboard; -import net.yacy.search.index.Fulltext; -import net.yacy.search.index.Fulltext.HostStat; -import net.yacy.search.index.Segment; -import net.yacy.search.ranking.BlockRank; -import net.yacy.server.serverObjects; -import net.yacy.server.serverSwitch; -import net.yacy.server.servletProperties; - -public class YBRFetch_p -{ - - public static servletProperties respond( - @SuppressWarnings("unused") final RequestHeader requestHeader, - final serverObjects post, - final serverSwitch env) { - final servletProperties prop = new servletProperties(); - final Switchboard sb = (Switchboard) env; - - if ( post == null || !post.containsKey("ghrt4") || MemoryControl.available() < 1024L * 1024L * 1024L ) { - return prop; - } - final File hostIndexFile = new File(sb.queuesRoot, "hostIndex.blob"); - - ReferenceContainerCache hostIndex; // this will get large, more than 0.5 million entries by now - if ( !hostIndexFile.exists() ) { - hostIndex = BlockRank.collect(sb.peers, sb.webStructure, Integer.MAX_VALUE); - BlockRank.saveHostIndex(hostIndex, hostIndexFile); - } else { - hostIndex = BlockRank.loadHostIndex(hostIndexFile); - } - - // use an index segment to find hosts for given host hashes - final Segment segment = sb.index; - final Fulltext metadata = segment.fulltext(); - Map hostHashResolver; - try { - hostHashResolver = metadata.domainHashResolver(metadata.domainSampleCollector()); - } catch ( final IOException e ) { - hostHashResolver = new HashMap(); - } - - // recursively compute a new ranking table - Log.logInfo("BLOCK RANK", "computing new ranking tables..."); - BlockRank.ybrTables = BlockRank.evaluate(hostIndex, hostHashResolver, null, 0); - hostIndex = null; // we don't need that here any more, so free the memory - - // use the web structure and the hostHash resolver to analyse the ranking table - Log.logInfo("BLOCK RANK", "analysis of " + BlockRank.ybrTables.length + " tables..."); - BlockRank.analyse(sb.webStructure, hostHashResolver); - // store the new table - Log.logInfo("BLOCK RANK", "storing fresh table..."); - final File rankingPath = new File(sb.appPath, "ranking/YBR".replace('/', File.separatorChar)); - BlockRank.storeBlockRankTable(rankingPath); - BlockRank.loadBlockRankTable(rankingPath, 16); - - return prop; - } - -} diff --git a/source/net/yacy/crawler/data/ResultURLs.java b/source/net/yacy/crawler/data/ResultURLs.java index 802e947f4..17480e230 100644 --- a/source/net/yacy/crawler/data/ResultURLs.java +++ b/source/net/yacy/crawler/data/ResultURLs.java @@ -143,17 +143,8 @@ public final class ResultURLs { return getDomains(stack).keys(false); } - public static int deleteDomain(final EventOrigin stack, final String host, final String hosthash) { + public static int deleteDomain(final EventOrigin stack, final String host) { assert host != null : "host = null"; - assert hosthash.length() == 6; - final Iterator> i = results(stack); - Map.Entry w; - String urlhash; - while (i.hasNext()) { - w = i.next(); - urlhash = w.getKey(); - if (urlhash == null || urlhash.substring(6).equals(hosthash)) i.remove(); - } assert getDomains(stack) != null : "getDomains(" + stack + ") = null"; return getDomains(stack).delete(host); } diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 126536ed3..27138a7f1 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -34,9 +34,9 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.TreeSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.ISO8601Formatter; @@ -49,8 +49,8 @@ import net.yacy.cora.federate.solr.connector.MirrorSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.sorting.ConcurrentScoreMap; +import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ScoreMap; -import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.ZIPReader; import net.yacy.cora.storage.ZIPWriter; import net.yacy.document.parser.html.CharacterCoding; @@ -64,15 +64,15 @@ import net.yacy.kelondro.index.Row; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.util.MemoryControl; -import net.yacy.kelondro.util.MergeIterator; import net.yacy.search.Switchboard; +import org.apache.commons.httpclient.util.DateUtil; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; -public final class Fulltext implements Iterable { +public final class Fulltext { private static final String SOLR_PATH = "solr_40"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml private static final String SOLR_OLD_PATH[] = new String[]{"solr_36"}; @@ -359,7 +359,7 @@ public final class Fulltext implements Iterable { * @return number of deleted domains * @throws IOException */ - public int deleteDomain(final String hosthash, Date freshdate, boolean concurrent) { + public int deleteDomainHashpart(final String hosthash, Date freshdate, boolean concurrent) { // first collect all url hashes that belong to the domain assert hosthash.length() == 6; final String q = YaCySchema.host_id_s.getSolrFieldName() + ":\"" + hosthash + "\"" + @@ -412,6 +412,38 @@ public final class Fulltext implements Iterable { return count.get(); } + public int deleteDomainHostname(final String hostname, Date freshdate, boolean concurrent) { + // first collect all url hashes that belong to the domain + final String q = YaCySchema.host_s.getSolrFieldName() + ":\"" + hostname + "\"" + + ((freshdate != null && freshdate.before(new Date())) ? (" AND " + YaCySchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]") : ""); + final AtomicInteger count = new AtomicInteger(0); + Thread t = new Thread() { + public void run() { + // delete in solr + synchronized (Fulltext.this.solr) { + try { + count.addAndGet(Fulltext.this.solr.deleteByQuery(q)); + if (count.get() > 0) Fulltext.this.solr.commit(true); + } catch (IOException e) {} + } + // finally remove the line with statistics + if (Fulltext.this.statsDump != null) { + final Iterator hsi = Fulltext.this.statsDump.iterator(); + HostStat hs; + while (hsi.hasNext()) { + hs = hsi.next(); + if (hs.hostname.equals(hostname)) { + hsi.remove(); + break; + } + } + } + } + }; + if (concurrent) t.start(); else t.run(); + return count.get(); + } + /** * remove a full subpath from the index * @param subpath the left path of the url; at least until the end of the host @@ -510,96 +542,6 @@ public final class Fulltext implements Iterable { if (reason == null) return null; return reason == null ? null : reason.length() == 0 ? null : reason; } - - @Override - public Iterator iterator() { - CloneableIterator a = null; - if (this.urlIndexFile != null) try {a = this.urlIndexFile.keys(true, null);} catch (IOException e) {} - final Iterator idi = this.solr.iterator(); - CloneableIterator b = new CloneableIterator() { - @Override - public boolean hasNext() { - return idi.hasNext(); - } - @Override - public byte[] next() { - String s = idi.next(); - return s == null ? null : ASCII.getBytes(s); - } - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - @Override - public CloneableIterator clone(Object modifier) { - return this; - } - @Override - public void close() { - } - }; - if (a == null) return b; - return new MergeIterator(a, b, - URIMetadataRow.rowdef.objectOrder, - MergeIterator.simpleMerge, - true); - } - - public CloneableIterator urls() { - // enumerates entry elements - final Iterator ids = iterator(); - return new CloneableIterator() { - @Override - public CloneableIterator clone(final Object secondHash) { - return this; - } - @Override - public final boolean hasNext() { - return ids.hasNext(); - } - @Override - public final DigestURI next() { - byte[] id = ids.next(); - if (id == null) return null; - return getURL(id); - } - @Override - public final void remove() { - ids.remove(); - } - @Override - public void close() { - } - }; - } - - public CloneableIterator entries() { - // enumerates entry elements - final Iterator ids = iterator(); - return new CloneableIterator() { - @Override - public CloneableIterator clone(final Object secondHash) { - return this; - } - @Override - public final boolean hasNext() { - return ids.hasNext(); - } - @Override - public final URIMetadataNode next() { - byte[] id = ids.next(); - if (id == null) return null; - return getMetadata(id); - } - @Override - public final void remove() { - ids.remove(); - } - @Override - public void close() { - } - }; - } public List dumpFiles() { EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0(); @@ -675,12 +617,12 @@ public final class Fulltext implements Iterable { } // export methods - public Export export(final File f, final String filter, final HandleSet set, final int format, final boolean dom) { + public Export export(final File f, final String filter, final int format, final boolean dom) { if ((this.exportthread != null) && (this.exportthread.isAlive())) { Log.logWarning("LURL-EXPORT", "cannot start another export thread, already one running"); return this.exportthread; } - this.exportthread = new Export(f, filter, set, format, dom); + this.exportthread = new Export(f, filter, format, dom); this.exportthread.start(); return this.exportthread; } @@ -691,22 +633,20 @@ public final class Fulltext implements Iterable { public class Export extends Thread { private final File f; - private final String filter; + private final Pattern pattern; private int count; private String failure; private final int format; private final boolean dom; - private final HandleSet set; - private Export(final File f, final String filter, final HandleSet set, final int format, boolean dom) { + private Export(final File f, final String filter, final int format, boolean dom) { // format: 0=text, 1=html, 2=rss/xml this.f = f; - this.filter = filter; + this.pattern = filter == null ? null : Pattern.compile(filter); this.count = 0; this.failure = null; this.format = format; this.dom = dom; - this.set = set; if ((dom) && (format == 2)) dom = false; } @@ -724,43 +664,54 @@ public final class Fulltext implements Iterable { pw.println(""); pw.println(""); pw.println(""); - pw.println("YaCy Peer-to-Peer - Web-Search LURL Export"); + pw.println("YaCy Peer-to-Peer - Web-Search URL Export"); pw.println(""); pw.println("http://yacy.net"); } - + + if (this.dom) { - final TreeSet set = domainNameCollector(-1, domainSampleCollector()); - for (final String host: set) { - if (!host.matches(this.filter)) continue; + Map> scores = Fulltext.this.getSolr().getFacets(YaCySchema.httpstatus_i.getSolrFieldName() + ":200", 100000, YaCySchema.host_s.getSolrFieldName()); + ReversibleScoreMap stats = scores.get(YaCySchema.host_s.getSolrFieldName()); + for (final String host: stats) { + if (this.pattern != null && !this.pattern.matcher(host).matches()) continue; if (this.format == 0) pw.println(host); if (this.format == 1) pw.println("" + host + "
"); this.count++; } } else { - final Iterator i = entries(); // iterates indexURLEntry objects - URIMetadataNode entry; - String url; - while (i.hasNext()) { - entry = i.next(); - if (this.set != null && !this.set.has(entry.hash())) continue; - url = entry.url().toNormalform(true); - if (!url.matches(this.filter)) continue; + BlockingQueue docs = Fulltext.this.getSolr().concurrentQuery(YaCySchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100, + YaCySchema.id.getSolrFieldName(), YaCySchema.sku.getSolrFieldName(), YaCySchema.title.getSolrFieldName(), + YaCySchema.author.getSolrFieldName(), YaCySchema.description.getSolrFieldName(), YaCySchema.size_i.getSolrFieldName(), YaCySchema.last_modified.getSolrFieldName()); + SolrDocument doc; + ArrayList title; + String url, author, description, hash; + Integer size; + Date date; + while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { + hash = (String) doc.getFieldValue(YaCySchema.id.getSolrFieldName()); + url = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()); + title = (ArrayList) doc.getFieldValue(YaCySchema.title.getSolrFieldName()); + author = (String) doc.getFieldValue(YaCySchema.author.getSolrFieldName()); + description = (String) doc.getFieldValue(YaCySchema.description.getSolrFieldName()); + size = (Integer) doc.getFieldValue(YaCySchema.size_i.getSolrFieldName()); + date = (Date) doc.getFieldValue(YaCySchema.last_modified.getSolrFieldName()); + if (this.pattern != null && !this.pattern.matcher(url).matches()) continue; if (this.format == 0) { pw.println(url); } if (this.format == 1) { - pw.println("" + CharacterCoding.unicode2xml(entry.dc_title(), true) + "
"); + if (title != null) pw.println("" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + ""); } if (this.format == 2) { pw.println(""); - pw.println("" + CharacterCoding.unicode2xml(entry.dc_title(), true) + ""); + if (title != null) pw.println("" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + ""); pw.println("" + MultiProtocolURI.escape(url) + ""); - if (!entry.dc_creator().isEmpty()) pw.println("" + CharacterCoding.unicode2xml(entry.dc_creator(), true) + ""); - if (!entry.dc_subject().isEmpty()) pw.println("" + CharacterCoding.unicode2xml(entry.dc_subject(), true) + ""); - pw.println("" + entry.moddate().toString() + ""); - pw.println("" + entry.size() + ""); - pw.println("" + ASCII.String(entry.hash()) + ""); + if (author != null && !author.isEmpty()) pw.println("" + CharacterCoding.unicode2xml(author, true) + ""); + if (description != null && !description.isEmpty()) pw.println("" + CharacterCoding.unicode2xml(description, true) + ""); + if (date != null) pw.println("" + DateUtil.formatDate(date) + ""); + if (size != null) pw.println("" + size.intValue() + ""); + pw.println("" + hash + ""); pw.println(""); } this.count++; @@ -798,60 +749,6 @@ public final class Fulltext implements Iterable { } - /** - * collect domain samples: all url hashes from the metadata database is listed and the domain part - * of the url hashes is used to count how many of these domain hashes appear - * @return a map from domain hashes to hash statistics - * @throws IOException - */ - public Map domainSampleCollector() throws IOException { - final Map map = new HashMap(); - // first collect all domains and calculate statistics about it - synchronized (this) { - final Iterator i = this.iterator(); - String hosthash; - byte[] urlhashb; - URLHashCounter ds; - if (i != null) while (i.hasNext()) { - urlhashb = i.next(); - hosthash = ASCII.String(urlhashb, 6, 6); - ds = map.get(hosthash); - if (ds == null) { - ds = new URLHashCounter(urlhashb); - map.put(hosthash, ds); - } else { - ds.count++; - } - } - } - return map; - } - - /** - * create a list of domain names in this database - * @param count number of entries or -1 for all - * @param domainSamples a map from domain hashes to hash statistics - * @return a set of domain names, ordered by name of the domains - */ - private TreeSet domainNameCollector(int count, final Map domainSamples) { - // collect hashes from all domains - - // fetch urls from the database to determine the host in clear text - DigestURI url; - if (count < 0 || count > domainSamples.size()) count = domainSamples.size(); - this.statsDump = new ArrayList(); - final TreeSet set = new TreeSet(); - for (final URLHashCounter hs: domainSamples.values()) { - if (hs == null) continue; - url = this.getURL(hs.urlhashb); - if (url == null || url.getHost() == null) continue; - set.add(url.getHost()); - count--; - if (count == 0) break; - } - return set; - } - /** * calculate a score map for url hash samples: each sample is a single url hash * that stands for all entries for the corresponding domain. The map counts the number diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index edc745b87..53a6d520a 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -246,7 +246,8 @@ public class QueryGoal { q.append(')'); // add filter to prevent that results come from failed urls - q.append(" AND -").append(YaCySchema.failreason_t.getSolrFieldName()).append(":[* TO *]"); + q.append(" AND ").append(YaCySchema.httpstatus_i.getSolrFieldName()).append(":200"); + //q.append(" AND -").append(YaCySchema.failreason_t.getSolrFieldName()).append(":[* TO *]"); return q; }