git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6343 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 031e6eefbd
commit 1171a72006

@ -43,9 +43,9 @@ import de.anomic.search.RankingProcess;
import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyURL;
public class ReferenceOrder { public class ReferenceOrder {
protected int maxdomcount; protected int maxdomcount;
protected WordReferenceVars min, max; protected WordReferenceVars min, max;
protected final ScoreCluster<String> doms; // collected for "authority" heuristic protected final ScoreCluster<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking; private final RankingProfile ranking;
private String language; private String language;
@ -60,22 +60,22 @@ public class ReferenceOrder {
} }
public class Normalizer extends Thread { public class Normalizer extends Thread {
private ReferenceContainer<WordReference> container; private ReferenceContainer<WordReference> container;
private BlockingQueue<WordReferenceVars> decodedEntries; private BlockingQueue<WordReferenceVars> decodedEntries;
public Normalizer(final ReferenceContainer<WordReference> container) { public Normalizer(final ReferenceContainer<WordReference> container) {
// normalize ranking: find minimum and maximum of separate ranking criteria // normalize ranking: find minimum and maximum of separate ranking criteria
assert (container != null); assert (container != null);
this.container = container; this.container = container;
this.decodedEntries = new LinkedBlockingQueue<WordReferenceVars>(); this.decodedEntries = new LinkedBlockingQueue<WordReferenceVars>();
} }
public void run() { public void run() {
BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(container); BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(container);
WordReferenceVars entryMin = null; WordReferenceVars entryMin = null;
WordReferenceVars entryMax = null; WordReferenceVars entryMax = null;
HashMap<String, Integer> doms0 = new HashMap<String, Integer>(); HashMap<String, Integer> doms0 = new HashMap<String, Integer>();
Integer int1 = 1; Integer int1 = 1;
@ -83,50 +83,55 @@ public class ReferenceOrder {
String dom; String dom;
Integer count; Integer count;
try { try {
while ((iEntry = vars.take()) != WordReferenceVars.poison) { while ((iEntry = vars.take()) != WordReferenceVars.poison) {
decodedEntries.put(iEntry); decodedEntries.put(iEntry);
// find min/max // find min/max
if (entryMin == null) entryMin = iEntry.clone(); else entryMin.min(iEntry); if (entryMin == null) entryMin = iEntry.clone(); else entryMin.min(iEntry);
if (entryMax == null) entryMax = iEntry.clone(); else entryMax.max(iEntry); if (entryMax == null) entryMax = iEntry.clone(); else entryMax.max(iEntry);
// update domcount // update domcount
dom = iEntry.metadataHash().substring(6); dom = iEntry.metadataHash().substring(6);
count = doms0.get(dom); count = doms0.get(dom);
if (count == null) { if (count == null) {
doms0.put(dom, int1); doms0.put(dom, int1);
} else { } else {
doms0.put(dom, Integer.valueOf(count.intValue() + 1)); doms0.put(dom, Integer.valueOf(count.intValue() + 1));
} }
} }
} catch (InterruptedException e) {}
if (min == null) min = entryMin.clone(); else min.min(entryMin);
if (min == null) min = entryMin.clone(); else min.min(entryMin); if (max == null) max = entryMax.clone(); else max.max(entryMax);
if (max == null) max = entryMax.clone(); else max.max(entryMax); Map.Entry<String, Integer> entry;
Map.Entry<String, Integer> entry; final Iterator<Map.Entry<String, Integer>> di = doms0.entrySet().iterator();
final Iterator<Map.Entry<String, Integer>> di = doms0.entrySet().iterator(); while (di.hasNext()) {
while (di.hasNext()) { entry = di.next();
entry = di.next(); doms.addScore(entry.getKey(), (entry.getValue()).intValue());
doms.addScore(entry.getKey(), (entry.getValue()).intValue()); }
if (doms.size() > 0) maxdomcount = doms.getMaxScore();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
decodedEntries.put(WordReferenceVars.poison);
} catch (InterruptedException e) {}
} }
if (doms.size() > 0) maxdomcount = doms.getMaxScore();
try {
decodedEntries.put(WordReferenceVars.poison);
} catch (InterruptedException e) {}
} }
public BlockingQueue<WordReferenceVars> decoded() { public BlockingQueue<WordReferenceVars> decoded() {
return this.decodedEntries; return this.decodedEntries;
} }
} }
public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container) { public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container) {
Normalizer n = new Normalizer(container); Normalizer n = new Normalizer(container);
n.start(); n.start();
return n.decoded(); return n.decoded();
} }
public int authority(final String urlHash) { public int authority(final String urlHash) {
return (doms.getScore(urlHash.substring(6)) << 8) / (1 + this.maxdomcount); return (doms.getScore(urlHash.substring(6)) << 8) / (1 + this.maxdomcount);
} }
public long cardinal(final WordReferenceVars t) { public long cardinal(final WordReferenceVars t) {

@ -39,6 +39,7 @@ import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import de.anomic.document.Condenser; import de.anomic.document.Condenser;
import de.anomic.document.Word; import de.anomic.document.Word;
@ -184,7 +185,9 @@ public final class RankingProcess extends Thread {
// apply all constraints // apply all constraints
try { try {
while ((iEntry = decodedEntries.take()) != WordReferenceVars.poison) { while (true) {
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS);
if (iEntry == null || iEntry == WordReferenceVars.poison) break;
assert (iEntry.metadataHash().length() == index.row().primaryKeyLength); assert (iEntry.metadataHash().length() == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;

Loading…
Cancel
Save