make current ranking score value avail. to search interface / api

Update the result score result field with the result queue ranking value to reflect
the actual calculated/used score,
for rwi & solr stack results.
(calc. etc. is unchanged, it's just that result entry carries the latest val
as api retrieves the number from it)
pull/34/head
reger 9 years ago
parent a622c9b656
commit cdb8f3b10d

@ -151,7 +151,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
this.videoc = Integer.parseInt(prop.getProperty("lvideo", "0"));
this.appc = Integer.parseInt(prop.getProperty("lapp", "0"));
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
this.score = Float.parseFloat(prop.getProperty("score", "0.0"));
this.score = Float.parseFloat(prop.getProperty("score", "0.0")); // we don't use the remote rwi ranking but the local rwi ranking profile
List<String> cs = new ArrayList<String>();
cs.add(collection);
this.setField(CollectionSchema.collection_sxt.name(), cs);
@ -166,8 +166,11 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
for (String name : doc.getFieldNames()) {
this.addField(name, doc.getFieldValue(name));
}
Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
/* score shall contain the YaCy score, getFieldValue("score") moved to
* SearchEvent.addNodes() where the YaCy ranking for nodes is calculated
Float scorex = (Float) doc.getFieldValue("score"); // this is a special Solr field containing the ranking score of a search result
this.score = scorex == null ? 0.0f : scorex.floatValue();
*/
final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
final String urlRaw = getString(CollectionSchema.sku);
this.url = new DigestURL(urlRaw, hash);
@ -314,10 +317,24 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
return this.lon;
}
/**
* Get the YaCy ranking score for this entry
* (the value is updated while adding to the result queue where score calc takes place)
* @return YaCy calculated score (number > 0)
*/
public float score() {
return this.score;
}
/**
* Set the YaCy ranking score to make it accessible in the search interface/api
* (should be set to the effective value of result queues getWeight)
* @param theScore YaCy ranking of search results
*/
public void setScore(float theScore) {
this.score = theScore;
}
public Date loaddate() {
return getDate(CollectionSchema.load_date_dt);
}

@ -978,9 +978,15 @@ public final class SearchEvent {
this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
try {
long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
//System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring());
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest)
long score;
// determine nodestack ranking (will be altered by postranking)
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
if (scorex != null && scorex > 0)
score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
else
score = this.order.cardinal(iEntry);
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -1008,7 +1014,7 @@ public final class SearchEvent {
* If the sjupDoubleDom option is selected, only different hosts are returned until no such rwi exists.
* Then the best entry from domain stacks are returned.
* @param skipDoubleDom
* @return a node from a rwi entry if one exist or null if not
* @return a node from a rwi entry if one exist or null if not (with score value set)
*/
private URIMetadataNode pullOneRWI(final boolean skipDoubleDom) {
@ -1119,7 +1125,7 @@ public final class SearchEvent {
* the future by calling this.feedingIsFinished()
*
* @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
* @return a metadata entry for a url
* @return a metadata entry for a url (with score value set)
*/
public URIMetadataNode pullOneFilteredFromRWI(final boolean skipDoubleDom) {
// returns from the current RWI list the best URL entry and removes this entry from the list
@ -1324,8 +1330,8 @@ public final class SearchEvent {
public boolean drainStacksToResult() {
// we take one entry from both stacks at the same time
boolean success = false;
Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
if (node != null) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
@ -1341,16 +1347,16 @@ public final class SearchEvent {
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re);
addResult(re, localEntryElement.getWeight());
success = true;
} else {
// we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
if (SearchEvent.this.snippetFetchAlive.get() >= 10) {
// too many concurrent processes
addResult(getSnippet(node, null));
addResult(getSnippet(node, null), localEntryElement.getWeight());
success = true;
} else {
final URIMetadataNode node1 = node;
new Thread() {
@Override
public void run() {
@ -1358,7 +1364,7 @@ public final class SearchEvent {
try {
SearchEvent.this.snippetFetchAlive.incrementAndGet();
try {
addResult(getSnippet(node1, SearchEvent.this.query.snippetCacheStrategy));
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
} catch (final Throwable e) {} finally {
SearchEvent.this.snippetFetchAlive.decrementAndGet();
}
@ -1372,9 +1378,9 @@ public final class SearchEvent {
}
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) {
// too many concurrent processes
node = pullOneFilteredFromRWI(true);
if (node != null) {
addResult(getSnippet(node, null));
final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
if (noderwi != null) {
addResult(getSnippet(noderwi, null), noderwi.score());
success = true;
}
} else {
@ -1383,11 +1389,11 @@ public final class SearchEvent {
public void run() {
SearchEvent.this.oneFeederStarted();
try {
final URIMetadataNode node = pullOneFilteredFromRWI(true);
if (node != null) {
final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
if (noderwi != null) {
SearchEvent.this.snippetFetchAlive.incrementAndGet();
try {
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy));
addResult(getSnippet(noderwi, SearchEvent.this.query.snippetCacheStrategy), noderwi.score());
} catch (final Throwable e) {
ConcurrentLog.logException(e);
} finally {
@ -1406,12 +1412,14 @@ public final class SearchEvent {
/**
* place the result to the result vector and apply post-ranking
* @param resultEntry
* post-ranking is added to the current score,
* @param resultEntry to add
* @param score current ranking
*/
public void addResult(URIMetadataNode resultEntry) {
public void addResult(URIMetadataNode resultEntry, final float score) {
if (resultEntry == null) return;
float score = resultEntry.score();
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.addTopics(resultEntry);

Loading…
Cancel
Save