reactivate the use of included-in-topwords boost in postRanking

+ changed the postRanking to add one score only if word appears more as one time.
+ getTopics() unused code block rem'd (save performace)-> routine needs rework !
pull/72/head^2
reger 9 years ago
parent 6801673a07
commit cc2d9dd3f1

@ -1428,7 +1428,11 @@ public final class SearchEvent {
*/
public void addResult(URIMetadataNode resultEntry, final float score) {
if (resultEntry == null) return;
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.ref /*this.getTopicNavigator(MAX_TOPWORDS)*/);
// TODO: above was originally using (see below), but getTopicNavigator returns this.ref and possibliy alters this.ref on first call (this.ref.size < 2 -> this.ref.clear)
// TODO: verify and straighten the use of addTopic, getTopic and getTopicNavigator and related score calculation
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
@ -1467,24 +1471,27 @@ public final class SearchEvent {
final String urlstring = rentry.url().toNormalform(true);
final String[] urlcomps = MultiProtocolURL.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURL.splitpattern.split(rentry.title().toLowerCase());
for (final String urlcomp : urlcomps) {
// apply query-in-result matching
final QueryGoal.NormalizedWords urlcompmap = new QueryGoal.NormalizedWords(urlcomps);
final QueryGoal.NormalizedWords descrcompmap = new QueryGoal.NormalizedWords(descrcomps);
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for (final String urlcomp : urlcompmap) {
int tc = topwords.get(urlcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_urlcompintoplist;
}
for (final String descrcomp : descrcomps) {
for (final String descrcomp : descrcompmap) {
int tc = topwords.get(descrcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching
final QueryGoal.NormalizedWords urlcomph = new QueryGoal.NormalizedWords(urlcomps);
final QueryGoal.NormalizedWords descrcomph = new QueryGoal.NormalizedWords(descrcomps);
final Iterator<String> shi = this.query.getQueryGoal().getIncludeWords();
String queryword;
while (shi.hasNext()) {
queryword = shi.next();
if (urlcomph.contains(queryword)) r += 256 << this.query.ranking.coeff_appurl;
if (descrcomph.contains(queryword)) r += 256 << this.query.ranking.coeff_app_dc_title;
if (urlcompmap.contains(queryword)) r += 256 << this.query.ranking.coeff_appurl;
if (descrcompmap.contains(queryword)) r += 256 << this.query.ranking.coeff_app_dc_title;
}
return r;
}
@ -1827,14 +1834,24 @@ public final class SearchEvent {
// this is only available if execQuery() was called before
return this.localSearchInclusion;
}
/**
* create a list of words that had been computed by statistics over all
* words that appeared in the url or the description of all urls
*
* @param maxcount max number of topwords to return
* @param maxtime max time allowed to use
* @return
*/
public ScoreMap<String> getTopics(final int maxcount, final long maxtime) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
if ( this.ref.sizeSmaller(2) ) {
this.ref.clear(); // navigators with one entry are not useful
}
/* ---------------------------------- start of rem (2016-09-03)
// TODO: result map is not used currently, verify if it should and use or delete this code block
// TODO: as it is not used now - in favour of performance this code block is rem'ed (2016-09-03)
final Map<String, Float> counts = new HashMap<String, Float>();
final Iterator<String> i = this.ref.keys(false);
String word;
@ -1860,6 +1877,7 @@ public final class SearchEvent {
result.set(ce.getKey(), (int) (((double) maxcount) * (ce.getValue() - min) / (max - min)));
}
}
/* ------------------------------------ end of rem (2016-09-03) */
return this.ref;
}

Loading…
Cancel
Save