make current ranking score value avail. to search interface / api

Update the result score result field with the result queue ranking value to reflect the actual calculated/used score, for rwi & solr stack results. (calc. etc. is unchanged, it's just that result entry carries the latest val as api retrieves the number from it)
9 years ago · cdb8f3b10d
parent a622c9b656
commit cdb8f3b10d
2 changed files with 47 additions and 22 deletions
--- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
@ -151,7 +151,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
        this.videoc = Integer.parseInt(prop.getProperty("lvideo", "0"));
        this.appc = Integer.parseInt(prop.getProperty("lapp", "0"));
        this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
-        this.score = Float.parseFloat(prop.getProperty("score", "0.0"));
+        this.score = Float.parseFloat(prop.getProperty("score", "0.0")); // we don't use the remote rwi ranking but the local rwi ranking profile
        List<String> cs = new ArrayList<String>();
        cs.add(collection);
        this.setField(CollectionSchema.collection_sxt.name(), cs);
@ -166,8 +166,11 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
        for (String name : doc.getFieldNames()) {
            this.addField(name, doc.getFieldValue(name));
        }
-        Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
+        /* score shall contain the YaCy score, getFieldValue("score") moved to
+        *  SearchEvent.addNodes() where the YaCy ranking for nodes is calculated
+        Float scorex = (Float) doc.getFieldValue("score"); // this is a special Solr field containing the ranking score of a search result
        this.score = scorex == null ? 0.0f : scorex.floatValue();
+        */
        final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
        final String urlRaw = getString(CollectionSchema.sku);
        this.url = new DigestURL(urlRaw, hash);
@ -314,10 +317,24 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
        return this.lon;
    }

+    /**
+     * Get the YaCy ranking score for this entry
+     * (the value is updated while adding to the result queue where score calc takes place)
+     * @return YaCy calculated score (number > 0)
+     */
    public float score() {
        return this.score;
    }

+    /**
+     * Set the YaCy ranking score to make it accessible in the search interface/api
+     * (should be set to the effective value of result queues getWeight)
+     * @param theScore YaCy ranking of search results
+     */
+    public void setScore(float theScore) {
+        this.score = theScore;
+    }
+
    public Date loaddate() {
        return getDate(CollectionSchema.load_date_dt);
    }
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@ -978,9 +978,15 @@ public final class SearchEvent {
                this.urlhashes.putUnique(iEntry.hash());
                rankingtryloop: while (true) {
                    try {
-                        long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
-                        //System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring());
-                        this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest)
+                        long score;
+                        // determine nodestack ranking (will be altered by postranking)
+                        // so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
+                        Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
+                        if (scorex != null && scorex > 0)
+                            score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
+                        else
+                            score = this.order.cardinal(iEntry);
+                        this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
                        break rankingtryloop;
                    } catch (final ArithmeticException e ) {
                        // this may happen if the concurrent normalizer changes values during cardinal computation
@ -1008,7 +1014,7 @@ public final class SearchEvent {
     * If the sjupDoubleDom option is selected, only different hosts are returned until no such rwi exists.
     * Then the best entry from domain stacks are returned.
     * @param skipDoubleDom
-     * @return a node from a rwi entry if one exist or null if not
+     * @return a node from a rwi entry if one exist or null if not (with score value set)
     */
    private URIMetadataNode pullOneRWI(final boolean skipDoubleDom) {

@ -1119,7 +1125,7 @@ public final class SearchEvent {
     * the future by calling this.feedingIsFinished()
     *
     * @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
-     * @return a metadata entry for a url
+     * @return a metadata entry for a url (with score value set)
     */
    public URIMetadataNode pullOneFilteredFromRWI(final boolean skipDoubleDom) {
        // returns from the current RWI list the best URL entry and removes this entry from the list
@ -1324,8 +1330,8 @@ public final class SearchEvent {
    public boolean drainStacksToResult() {
        // we take one entry from both stacks at the same time
        boolean success = false;
-        Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
-        URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
+        final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
+        final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
        if (node != null) {
            LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
            if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
@ -1341,16 +1347,16 @@ public final class SearchEvent {
                final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
                final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
                URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() >  yacysnippetline.length() ? solrsnippet : yacysnippet);
-                addResult(re);
+                addResult(re, localEntryElement.getWeight());
                success = true;
            } else {
                // we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
                if (SearchEvent.this.snippetFetchAlive.get() >= 10) {
                    // too many concurrent processes
-                    addResult(getSnippet(node, null));
+                    addResult(getSnippet(node, null), localEntryElement.getWeight());
                    success = true;
                } else {
-                    final URIMetadataNode node1 = node;
+
                    new Thread() {
                        @Override
                        public void run() {
@ -1358,7 +1364,7 @@ public final class SearchEvent {
                            try {
                                SearchEvent.this.snippetFetchAlive.incrementAndGet();
                                try {
-                                    addResult(getSnippet(node1, SearchEvent.this.query.snippetCacheStrategy));
+                                    addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
                                } catch (final Throwable e) {} finally {
                                    SearchEvent.this.snippetFetchAlive.decrementAndGet();
                                }
@ -1372,9 +1378,9 @@ public final class SearchEvent {
        }
        if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) {
            // too many concurrent processes
-            node = pullOneFilteredFromRWI(true);
-            if (node != null) {
-                addResult(getSnippet(node, null));
+            final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
+            if (noderwi != null) {
+                addResult(getSnippet(noderwi, null), noderwi.score());
                success = true;
            }
        } else {
@ -1383,11 +1389,11 @@ public final class SearchEvent {
                public void run() {
                    SearchEvent.this.oneFeederStarted();
                    try {
-                        final URIMetadataNode node = pullOneFilteredFromRWI(true);
-                        if (node != null) {
+                        final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
+                        if (noderwi != null) {
                            SearchEvent.this.snippetFetchAlive.incrementAndGet();
                            try {
-                                addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy));
+                                addResult(getSnippet(noderwi, SearchEvent.this.query.snippetCacheStrategy), noderwi.score());
                            } catch (final Throwable e) {
                                ConcurrentLog.logException(e);
                            } finally {    
@ -1406,12 +1412,14 @@ public final class SearchEvent {
    
    /**
     * place the result to the result vector and apply post-ranking
-     * @param resultEntry
+     * post-ranking is added to the current score, 
+     * @param resultEntry to add
+     * @param score current ranking
     */
-    public void addResult(URIMetadataNode resultEntry) {
+    public void addResult(URIMetadataNode resultEntry, final float score) {
        if (resultEntry == null) return;
-        float score = resultEntry.score();
        final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
+        resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
        this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
        if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
        this.addTopics(resultEntry);