From 000dde95116b89366ffdcbbb330255de9ec2e7a9 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 26 May 2015 04:15:00 +0200 Subject: [PATCH] Eleminate duplication of values for search ResultEntry by instatiation from URIMetadataNode, by eleminating differentiation of ResultEntry/URIMetadataNode. - moved remaining ResultEntry functionallity to URIMetadataNode - for 1:1 functionallity added a function makeResultEntry() - removed ResultEntry - refactored related code Main difference is after makeResultEntry the text_t content is removed and alternative title/url strings for display are calculated. Main difference left is, that --- htroot/yacy/search.java | 6 +- htroot/yacysearchitem.java | 16 +- .../kelondro/data/meta/URIMetadataNode.java | 156 +++++++++++++++- source/net/yacy/kelondro/index/BinSearch.java | 117 ------------ source/net/yacy/search/query/SearchEvent.java | 47 ++--- .../net/yacy/search/snippet/ResultEntry.java | 166 ------------------ 6 files changed, 188 insertions(+), 320 deletions(-) delete mode 100644 source/net/yacy/kelondro/index/BinSearch.java delete mode 100644 source/net/yacy/search/snippet/ResultEntry.java diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index a5ce1170b..dd050b86f 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -52,6 +52,7 @@ import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.ByteBuffer; import net.yacy.cora.util.SpaceExceededException; import net.yacy.gui.Audio; +import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceRow; @@ -77,7 +78,6 @@ import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventType; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ResultEntry; import net.yacy.server.serverCore; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -221,7 +221,7 @@ public final class search { int indexabstractContainercount = 0; QueryParams theQuery = null; SearchEvent theSearch = null; - ArrayList> accu = null; + ArrayList> accu = null; if (query.isEmpty() && abstractSet != null) { // this is _not_ a normal search, only a request for index abstracts final Segment indexSegment = sb.index; @@ -413,7 +413,7 @@ public final class search { final long timer = System.currentTimeMillis(); final StringBuilder links = new StringBuilder(6000); String resource = null; - WeakPriorityBlockingQueue.Element entry; + WeakPriorityBlockingQueue.Element entry; for (int i = 0; i < accu.size(); i++) { entry = accu.get(i); resource = entry.getElement().resource(); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 0668795ef..25b6d4780 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -28,7 +28,6 @@ import java.net.MalformedURLException; import java.util.Collection; import java.util.Date; import java.util.Iterator; -import java.util.Map; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.ISO8601Formatter; @@ -44,6 +43,7 @@ import net.yacy.cora.protocol.RequestHeader.FileType; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.Memory; import net.yacy.crawler.data.Cache; +import net.yacy.crawler.retrieval.Response; import net.yacy.data.URLLicense; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.util.Formatter; @@ -58,7 +58,6 @@ import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventType; -import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -115,7 +114,7 @@ public class yacysearchitem { // text search // generate result object - final ResultEntry result = theSearch.oneResult(item, timeout); + final URIMetadataNode result = theSearch.oneResult(item, timeout); if (result == null) return prop; // no content final String resultUrlstring = result.urlstring(); final DigestURL resultURL = result.url(); @@ -218,13 +217,11 @@ public class yacysearchitem { prop.put("content_showProxy_link", resultUrlstring); prop.put("content_showHostBrowser_link", resultUrlstring); if (sb.getConfigBool("search.result.show.vocabulary", true)) { - URIMetadataNode node = result; int c = 0; - for (Map.Entry entry: node.entrySet()) { - String key = entry.getKey(); + for (String key: result.getFieldNames()) { if (key.startsWith("vocabulary_") && key.endsWith("_sxt")) { @SuppressWarnings("unchecked") - Collection terms = (Collection) entry.getValue(); + Collection terms = result.getFieldValues(key); prop.putHTML("content_showVocabulary_vocabulary_" + c + "_name", key.substring(11, key.length() - 4)); prop.putHTML("content_showVocabulary_vocabulary_" + c + "_terms", terms.toString()); c++; @@ -276,8 +273,7 @@ public class yacysearchitem { prop.put("content_heuristic_name", heuristic.heuristicName); } EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "" + item, 0, 0), false); - final String ext = MultiProtocolURL.getFileExtension(resultFileName); - if (MultiProtocolURL.isImage(ext)) { + if (result.doctype() == Response.DT_IMAGE) { final String license = URLLicense.aquireLicense(resultURL); prop.put("content_code", license); } else { @@ -343,7 +339,7 @@ public class yacysearchitem { // any other media content // generate result object - final ResultEntry ms = theSearch.oneResult(item, timeout); + final URIMetadataNode ms = theSearch.oneResult(item, timeout); prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content if (ms == null) { prop.put("content_item", "0"); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index cbe6ccc52..3fe3da8a7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -22,6 +22,7 @@ package net.yacy.kelondro.data.meta; +import java.io.IOException; import java.net.MalformedURLException; import java.text.ParseException; import java.util.ArrayList; @@ -39,6 +40,7 @@ import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.solr.SolrType; import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.order.Base64Order; @@ -46,14 +48,20 @@ import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.retrieval.Response; import net.yacy.document.Condenser; import net.yacy.document.SentenceReader; +import net.yacy.document.parser.pdfParser; +import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.MapTools; import net.yacy.kelondro.util.kelondroException; +import net.yacy.peers.Seed; +import net.yacy.peers.SeedDB; +import net.yacy.search.index.Segment; import net.yacy.search.query.QueryParams; import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionSchema; +import net.yacy.search.snippet.TextSnippet; import net.yacy.utils.crypt; import org.apache.solr.common.SolrDocument; @@ -64,7 +72,7 @@ import org.apache.solr.common.SolrDocument; * The purpose of this object is the migration from the old metadata structure to solr document. * Future implementations should try to replace URIMetadata objects completely by SolrDocument objects */ -public class URIMetadataNode extends SolrDocument { +public class URIMetadataNode extends SolrDocument /* implements Comparable, Comparator */ { private static final long serialVersionUID = -256046934741561968L; @@ -77,6 +85,11 @@ public class URIMetadataNode extends SolrDocument { protected String snippet = null; protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests + // fields for search results (implemented from ResultEntry) + private String alternative_urlstring; + private String alternative_urlname; + private TextSnippet textSnippet = null; + public URIMetadataNode(final Properties prop, String collection) { // generates an plasmaLURLEntry using the properties from the argument // the property names must correspond to the one from toString @@ -662,4 +675,145 @@ public class URIMetadataNode extends SolrDocument { return a; } + // --- implementation for use as search result ---------- + /** + * Initialisize some variables only needed for search results + * and eleminates underlaying fields not needed for search results + * + * ! never put this back to the index because of the reduced content fields + * @param indexSegment + * @param peers + * @param textSnippet + * @return + */ + public URIMetadataNode makeResultEntry( + final Segment indexSegment, + SeedDB peers, + final TextSnippet textSnippet) { + this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here + //this.indexSegment = indexSegment; + this.alternative_urlstring = null; + this.alternative_urlname = null; + this.textSnippet = textSnippet; + final String host = this.url().getHost(); + if (host != null && host.endsWith(".yacyh")) { + // translate host into current IP + int p = host.indexOf('.'); + final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6)); + final Seed seed = peers.getConnected(hash); + final String path = this.url().getFile(); + String address = null; + if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) { + // seed is not known from here + try { + if (indexSegment.termIndex() != null) indexSegment.termIndex().remove( + Word.words2hashesHandles(Condenser.getWords( + ("yacyshare " + + path.replace('?', ' ') + + " " + + this.dc_title()), null).keySet()), + this.hash()); + } catch (final IOException e) { + ConcurrentLog.logException(e); + } + indexSegment.fulltext().remove(this.hash()); // clean up + throw new RuntimeException("index void"); + } + this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path; + this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path; + if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p); + } + return this; + } + /** + * used for search result entry + */ + public String urlstring() { + if (this.alternative_urlstring != null) return this.alternative_urlstring; + + if (!pdfParser.individualPages) return this.url().toNormalform(true); + if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true); + // for pdf links we rewrite the url + // this is a special treatment of pdf files which can be splitted into subpages + String pageprop = pdfParser.individualPagePropertyname; + String resultUrlstring = this.url().toNormalform(true); + int p = resultUrlstring.lastIndexOf(pageprop + "="); + if (p > 0) { + return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1); + } + return resultUrlstring; + } + /** + * used for search result entry + */ + public String urlname() { + return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname; + } + /** + * used for search result entry + */ + public String title() { + String titlestr = this.dc_title(); + // if title is empty use filename as title + if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" ) + titlestr = this.url() != null ? this.url().getFileName() : ""; + } + return titlestr; + } + /** + * used for search result entry + */ + public TextSnippet textSnippet() { + return this.textSnippet; + } + /** + * used for search result entry + */ + public Date[] events() { + return this.datesInContent(); + } + /** + * used for search result entry + */ + public boolean hasTextSnippet() { + return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail()); + } + /** + * used for search result entry + */ + public String resource() { + // generate transport resource + if ((this.textSnippet == null) || (!this.textSnippet.exists())) { + return this.toString(); + } + return this.toString(this.textSnippet.getLineRaw()); + } +/* + taken from ResultEntry (should work without) + + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override + public int hashCode() { + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.hash()); + } + return this.hashCache; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!(obj instanceof URIMetadataNode)) return false; + URIMetadataNode other = (URIMetadataNode) obj; + return Base64Order.enhancedCoder.equal(this.hash(), other.hash()); + } + @Override + public int compareTo(URIMetadataNode o) { + return Base64Order.enhancedCoder.compare(this.hash(), o.hash()); + } + @Override + public int compare(URIMetadataNode o1, URIMetadataNode o2) { + return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash()); + }*/ } \ No newline at end of file diff --git a/source/net/yacy/kelondro/index/BinSearch.java b/source/net/yacy/kelondro/index/BinSearch.java deleted file mode 100644 index 61426c4de..000000000 --- a/source/net/yacy/kelondro/index/BinSearch.java +++ /dev/null @@ -1,117 +0,0 @@ -// BinSearch.java -// ----------------------- -// part of The Kelondro Database -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// created 22.11.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.index; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -import net.yacy.cora.order.ByteOrder; -import net.yacy.cora.order.NaturalOrder; - - -public final class BinSearch { - - private final byte[] chunks; - private final int chunksize; - private final int count; - private static final ByteOrder objectOrder = new NaturalOrder(true); // the natural order is much faster than the b64Order - - public BinSearch(final byte[] chunks, final int chunksize) { - this.chunks = chunks; - this.chunksize = chunksize; - this.count = chunks.length / chunksize; - } - - public BinSearch(final List chunkList, final int chunksize) { - byte[][] chunksa = new byte[chunkList.size()][]; - chunksa = chunkList.toArray(chunksa); - Arrays.sort(chunksa, objectOrder); - this.chunks = new byte[chunkList.size() * chunksize]; - for (int i = 0; i < chunksa.length; i++) System.arraycopy(chunksa[i], 0, this.chunks, i * chunksize, chunksize); - this.chunksize = chunksize; - this.count = chunks.length / chunksize; - assert this.count == chunkList.size(); - } - - public final boolean contains(final byte[] t) { - return contains(t, 0, this.count); - } - - private final boolean contains(final byte[] t, int beginPos, int endPos) { - // the endPos is exclusive, beginPos is inclusive - // this method is synchronized to make the use of the buffer possible - assert t.length == this.chunksize; - while (true) { - if (beginPos >= endPos) return false; - final int pivot = (beginPos + endPos) / 2; - if ((pivot < 0) || (pivot >= this.count)) return false; - assert this.chunksize == t.length; - final int c = objectOrder.compare(this.chunks, pivot * this.chunksize, t, 0, this.chunksize); - if (c == 0) return true; - if (c < 0) /* buffer < t */ {beginPos = pivot + 1; continue;} - if (c > 0) /* buffer > t */ {endPos = pivot; continue;} - return false; - } - } - - public final int size() { - return count; - } - - public final byte[] get(final int element) { - final byte[] a = new byte[chunksize]; - System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize); - return a; - } - - public final byte[] get(final int element, byte[] a) { - assert a.length == chunksize; - System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize); - return a; - } - - public final void write(File f) throws IOException { - FileOutputStream os = new FileOutputStream(f); - os.write(this.chunks); - os.flush(); - os.close(); - } - - public static void main(final String[] args) { - final String s = "4CEvsI8FRczRBo_ApRCkwfEbFLn1pIFXg39QGMgj5RHM6HpIMJq67QX3M5iQYr_LyI_5aGDaa_bYbRgJ9XnQjpmq6QkOoGWAoEaihRqhV3kItLFHjRtqauUR"; - final BinSearch bs = new BinSearch(s.getBytes(), 6); - for (int i = 0; i + 6 <= s.length(); i = i + 6) { - System.out.println(s.substring(i, i + 6) + ":" + ((bs.contains(s.substring(i, i + 6).getBytes())) ? "drin" : "draussen")); - } - for (int i = 0; i + 7 <= s.length(); i = i + 6) { - System.out.println(s.substring(i + 1, i + 7) + ":" + ((bs.contains(s.substring(i + 1, i + 7).getBytes())) ? "drin" : "draussen")); - } - } -} diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index dd896c4c0..b8f10a48b 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -99,7 +99,6 @@ import net.yacy.search.index.Segment; import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionSchema; -import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; import net.yacy.search.snippet.TextSnippet.ResultClass; @@ -174,7 +173,7 @@ public final class SearchEvent { private final Map taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris private final WeakPriorityBlockingQueue rwiStack; // thats the bag where the RWI search process writes to private final WeakPriorityBlockingQueue nodeStack; // thats the bag where the solr results are written to - private final WeakPriorityBlockingQueue resultList; // thats the result list where the actual search result is waiting to be displayed + private final WeakPriorityBlockingQueue resultList; // thats the result list where the actual search result is waiting to be displayed private final boolean pollImmediately; // if this is true, then every entry in result List is polled immediately to prevent a re-ranking in the resultList. This is usefull if there is only one index source. public final boolean excludeintext_image; @@ -406,7 +405,7 @@ public final class SearchEvent { this.deleteIfSnippetFail = deleteIfSnippetFail; this.urlRetrievalAllTime = 0; this.snippetComputationAllTime = 0; - this.resultList = new WeakPriorityBlockingQueue(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking + this.resultList = new WeakPriorityBlockingQueue(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking // snippets do not need to match with the complete query hashes, // only with the query minus the stopwords which had not been used for the search @@ -1312,7 +1311,7 @@ public final class SearchEvent { false); final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal()); final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal()); - ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet); + URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet); addResult(re); success = true; } else { @@ -1380,16 +1379,16 @@ public final class SearchEvent { * place the result to the result vector and apply post-ranking * @param resultEntry */ - public void addResult(ResultEntry resultEntry) { + public void addResult(URIMetadataNode resultEntry) { if (resultEntry == null) return; float score = resultEntry.score(); final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/); - this.resultList.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow + this.resultList.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries. this.addTopics(resultEntry); } - private long postRanking(final ResultEntry rentry, final ScoreMap topwords) { + private long postRanking(final URIMetadataNode rentry, final ScoreMap topwords) { long r = 0; // for media search: prefer pages with many links @@ -1400,8 +1399,10 @@ public final class SearchEvent { // apply citation count //System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother()); - r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; - + if (this.query.getSegment().connectedCitation()) { + int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash()); + r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; + } /* else r += 0; */ // prefer hit with 'prefer' pattern if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) r += 256 << this.query.ranking.coeff_prefer; if (this.query.prefer.matcher(rentry.title()).matches()) r += 256 << this.query.ranking.coeff_prefer; @@ -1432,7 +1433,7 @@ public final class SearchEvent { return r; } - public ResultEntry getSnippet(URIMetadataNode page, final CacheStrategy cacheStrategy) { + public URIMetadataNode getSnippet(URIMetadataNode page, final CacheStrategy cacheStrategy) { if (page == null) return null; if (cacheStrategy == null) { @@ -1444,7 +1445,7 @@ public final class SearchEvent { ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), SearchEvent.SNIPPET_MAX_LENGTH, !this.query.isLocal()); - return new ResultEntry(page, this.query.getSegment(), this.peers, snippet); // result without snippet + return page.makeResultEntry(this.query.getSegment(), this.peers, snippet); // result without snippet } // load snippet @@ -1464,16 +1465,16 @@ public final class SearchEvent { if (!snippet.getErrorCode().fail()) { // we loaded the file and found the snippet - return new ResultEntry(page, this.query.getSegment(), this.peers, snippet); // result with snippet attached + return page.makeResultEntry(this.query.getSegment(), this.peers, snippet); // result with snippet attached } else if (cacheStrategy.mustBeOffline()) { // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result // this may happen during a remote search, because snippet loading is omitted to retrieve results faster - return new ResultEntry(page, this.query.getSegment(), this.peers, null); // result without snippet + return page.makeResultEntry(this.query.getSegment(), this.peers, null); // result without snippet } else { // problems with snippet fetch if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { // we accept that because the word cannot be on the page - return new ResultEntry(page, this.query.getSegment(), this.peers, null); + return page.makeResultEntry(this.query.getSegment(), this.peers, null); } final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); if (this.deleteIfSnippetFail) { @@ -1483,10 +1484,10 @@ public final class SearchEvent { return null; } } - return new ResultEntry(page, this.query.getSegment(), this.peers, null); // result without snippet + return page.makeResultEntry(this.query.getSegment(), this.peers, null); // result without snippet } - public ResultEntry oneResult(final int item, final long timeout) { + public URIMetadataNode oneResult(final int item, final long timeout) { // check if we already retrieved this item // (happens if a search pages is accessed a second time) final long finishTime = timeout == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + timeout; @@ -1515,7 +1516,7 @@ public final class SearchEvent { // check if we have a success if (this.resultList.sizeAvailable() > item) { // we have the wanted result already in the result array .. return that - final ResultEntry re = this.resultList.element(item).getElement(); + final URIMetadataNode re = this.resultList.element(item).getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false); if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) { @@ -1570,7 +1571,7 @@ public final class SearchEvent { public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException { if (item < imageViewed.size()) return nthImage(item); if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare - ResultEntry doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare + URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare // check if the match was made in the url or in the image links if (doc == null) { if (hasSpare()) return nextSpare(); @@ -1641,12 +1642,12 @@ public final class SearchEvent { } } - public ArrayList> completeResults(final long waitingtime) { + public ArrayList> completeResults(final long waitingtime) { final long timeout = waitingtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + waitingtime; int i = 0; while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) { - ResultEntry re = oneResult(i++, timeout - System.currentTimeMillis()); + URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis()); if (re == null) break; } return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable())); @@ -1659,8 +1660,8 @@ public final class SearchEvent { * @return true if an entry was deleted, false otherwise */ protected boolean delete(final String urlhash) { - final Iterator> i = this.resultList.iterator(); - Element entry; + final Iterator> i = this.resultList.iterator(); + Element entry; while (i.hasNext()) { entry = i.next(); if (urlhash.equals(ASCII.String(entry.getElement().url().hash()))) { @@ -1810,7 +1811,7 @@ public final class SearchEvent { } } - protected void addTopics(final ResultEntry resultEntry) { + protected void addTopics(final URIMetadataNode resultEntry) { // take out relevant information for reference computation if ((resultEntry.url() == null) || (resultEntry.title() == null)) return; final String[] descrcomps = MultiProtocolURL.splitpattern.split(resultEntry.title()); // words in the description diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java deleted file mode 100644 index 402fc5cd0..000000000 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ /dev/null @@ -1,166 +0,0 @@ -// ResultEntry.java -// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 10.10.2005 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.search.snippet; - -import java.io.IOException; -import java.util.Comparator; -import java.util.Date; - -import net.yacy.cora.document.id.MultiProtocolURL; -import net.yacy.cora.order.Base64Order; -import net.yacy.cora.util.ByteArray; -import net.yacy.cora.util.ConcurrentLog; -import net.yacy.document.Condenser; -import net.yacy.document.parser.pdfParser; -import net.yacy.kelondro.data.meta.URIMetadataNode; -import net.yacy.kelondro.data.word.Word; -import net.yacy.peers.Seed; -import net.yacy.peers.SeedDB; -import net.yacy.search.index.Segment; -import net.yacy.search.schema.CollectionSchema; - - -public class ResultEntry extends URIMetadataNode implements Comparable, Comparator { - - private static final long serialVersionUID = -256046934741561978L; - // payload objects - private String alternative_urlstring; - private String alternative_urlname; - private final TextSnippet textSnippet; - private final Segment indexSegment; - - public ResultEntry(final URIMetadataNode urlentry, - final Segment indexSegment, - SeedDB peers, - final TextSnippet textSnippet) { - super(urlentry); - this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here - this.indexSegment = indexSegment; - this.alternative_urlstring = null; - this.alternative_urlname = null; - this.textSnippet = textSnippet; - final String host = urlentry.url().getHost(); - if (host != null && host.endsWith(".yacyh")) { - // translate host into current IP - int p = host.indexOf('.'); - final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6)); - final Seed seed = peers.getConnected(hash); - final String path = urlentry.url().getFile(); - String address = null; - if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) { - // seed is not known from here - try { - if (indexSegment.termIndex() != null) indexSegment.termIndex().remove( - Word.words2hashesHandles(Condenser.getWords( - ("yacyshare " + - path.replace('?', ' ') + - " " + - urlentry.dc_title()), null).keySet()), - urlentry.hash()); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } - indexSegment.fulltext().remove(urlentry.hash()); // clean up - throw new RuntimeException("index void"); - } - this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path; - this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path; - if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p); - } - } - private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful - @Override - public int hashCode() { - if (this.hashCache == Integer.MIN_VALUE) { - this.hashCache = ByteArray.hashCode(this.hash()); - } - return this.hashCache; - } - @Override - public boolean equals(final Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (!(obj instanceof ResultEntry)) return false; - ResultEntry other = (ResultEntry) obj; - return Base64Order.enhancedCoder.equal(this.hash(), other.hash()); - } - - public String urlstring() { - if (this.alternative_urlstring != null) return this.alternative_urlstring; - - if (!pdfParser.individualPages) return this.url().toNormalform(true); - if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true); - // for pdf links we rewrite the url - // this is a special treatment of pdf files which can be splitted into subpages - String pageprop = pdfParser.individualPagePropertyname; - String resultUrlstring = this.url().toNormalform(true); - int p = resultUrlstring.lastIndexOf(pageprop + "="); - if (p > 0) { - return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1); - } - return resultUrlstring; - } - public String urlname() { - return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname; - } - public String title() { - String titlestr = this.dc_title(); - // if title is empty use filename as title - if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" ) - titlestr = this.url() != null ? this.url().getFileName() : ""; - } - return titlestr; - } - public TextSnippet textSnippet() { - return this.textSnippet; - } - public Date[] events() { - return this.datesInContent(); - } - public int referencesCount() { - // urlCitationIndex index might be null (= configuration option) - return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0; - } - public boolean hasTextSnippet() { - return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail()); - } - public String resource() { - // generate transport resource - if ((this.textSnippet == null) || (!this.textSnippet.exists())) { - return this.toString(); - } - return this.toString(this.textSnippet.getLineRaw()); - } - @Override - public int compareTo(ResultEntry o) { - return Base64Order.enhancedCoder.compare(this.hash(), o.hash()); - } - @Override - public int compare(ResultEntry o1, ResultEntry o2) { - return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash()); - } -}