removed unused code (HostStat)

pull/1/head
orbiter 11 years ago
parent d3a88eaecb
commit 76c53faeb2

@ -32,7 +32,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -56,7 +55,6 @@ import net.yacy.cora.federate.solr.instance.RemoteInstance;
import net.yacy.cora.federate.solr.instance.ShardInstance;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.ZIPReader;
import net.yacy.cora.storage.ZIPWriter;
@ -87,7 +85,6 @@ public final class Fulltext {
private final File segmentPath;
private final File archivePath;
private Export exportthread; // will have a export thread assigned if exporter is running
private ArrayList<HostStat> statsDump;
private InstanceMirror solrInstances;
private final CollectionConfiguration collectionConfiguration;
private final WebgraphConfiguration webgraphConfiguration;
@ -98,7 +95,6 @@ public final class Fulltext {
this.segmentPath = segmentPath;
this.archivePath = archivePath;
this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null;
this.solrInstances = new InstanceMirror();
this.collectionConfiguration = collectionConfiguration;
this.webgraphConfiguration = webgraphConfiguration;
@ -206,9 +202,7 @@ public final class Fulltext {
}
public void clearCaches() {
if (this.statsDump != null) this.statsDump.clear();
this.solrInstances.clearCaches();
this.statsDump = null;
}
public void clearLocalSolr() throws IOException {
@ -261,7 +255,6 @@ public final class Fulltext {
}
public void close() {
this.statsDump = null;
try {
this.solrInstances.close();
} catch (Throwable e) {}
@ -347,7 +340,6 @@ public final class Fulltext {
} catch (final SolrException e) {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCaches();
}
@ -359,7 +351,6 @@ public final class Fulltext {
} catch (final SolrException e) {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCaches();
}
@ -378,7 +369,6 @@ public final class Fulltext {
} catch (final SolrException e) {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCaches();
}
@ -398,16 +388,6 @@ public final class Fulltext {
if (this.writeWebgraph) deleteDomainWithConstraint(this.getWebgraphConnector(), WebgraphSchema.source_host_id_s.getSolrFieldName(), hosthashes,
(freshdate == null || freshdate.after(now)) ? null :
(WebgraphSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]"));
// remove the line with statistics
if (Fulltext.this.statsDump != null) {
final Iterator<HostStat> hsi = Fulltext.this.statsDump.iterator();
HostStat hs;
while (hsi.hasNext()) {
hs = hsi.next();
if (hosthashes.contains(hs.hosthash)) hsi.remove();
}
}
}
public void deleteStaleDomainNames(final Set<String> hostnames, Date freshdate) {
@ -419,16 +399,6 @@ public final class Fulltext {
if (this.writeWebgraph) deleteDomainWithConstraint(this.getWebgraphConnector(), WebgraphSchema.source_host_s.getSolrFieldName(), hostnames,
(freshdate == null || freshdate.after(now)) ? null :
(WebgraphSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]"));
// finally remove the line with statistics
if (Fulltext.this.statsDump != null) {
final Iterator<HostStat> hsi = Fulltext.this.statsDump.iterator();
HostStat hs;
while (hsi.hasNext()) {
hs = hsi.next();
if (hostnames.contains(hs.hostname)) hsi.remove();
}
}
}
/**
@ -790,42 +760,5 @@ public final class Fulltext {
}
}
public Iterator<HostStat> statistics(int count, final ScoreMap<String> domainScore) {
// prevent too heavy IO.
if (this.statsDump != null && count <= this.statsDump.size()) return this.statsDump.iterator();
// fetch urls from the database to determine the host in clear text
final Iterator<String> j = domainScore.keys(false); // iterate urlhash-examples in reverse order (biggest first)
String urlhash;
count += 10; // make some more to prevent that we have to do this again after deletions too soon.
if (count < 0 || domainScore.sizeSmaller(count)) count = domainScore.size();
this.statsDump = new ArrayList<HostStat>();
DigestURL url;
while (j.hasNext()) {
urlhash = j.next();
if (urlhash == null) continue;
url = this.getURL(ASCII.getBytes(urlhash));
if (url == null || url.getHost() == null) continue;
if (this.statsDump == null) return new ArrayList<HostStat>().iterator(); // some other operation has destroyed the object
this.statsDump.add(new HostStat(url.getHost(), url.getPort(), urlhash.substring(6), domainScore.get(urlhash)));
count--;
if (count == 0) break;
}
// finally return an iterator for the result array
return (this.statsDump == null) ? new ArrayList<HostStat>().iterator() : this.statsDump.iterator();
}
public static class HostStat {
public String hostname, hosthash;
public int port;
public int count;
private HostStat(final String host, final int port, final String urlhashfragment, final int count) {
assert urlhashfragment.length() == 6;
this.hostname = host;
this.port = port;
this.hosthash = urlhashfragment;
this.count = count;
}
}
}

@ -29,13 +29,8 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.sorting.OrderedScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.index.BinSearch;
@ -47,10 +42,8 @@ import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.search.index.Fulltext.HostStat;
import net.yacy.search.index.Segment;
public class BlockRank {
/**
@ -149,64 +142,6 @@ public class BlockRank {
return index;
}
public static BinSearch[] evaluate(final ReferenceContainerCache<HostReference> index, final Map<String, HostStat> hostHashResolver, final BinSearch[] referenceTable, int recusions) {
// first find out the maximum count of the hostHashResolver
int maxHostCount = 1;
for (final HostStat stat: hostHashResolver.values()) {
if (stat.count > maxHostCount) maxHostCount = stat.count;
}
// then just count the number of references. all other information from the index is not used because they cannot be trusted
final ScoreMap<byte[]> hostScore = new OrderedScoreMap<byte[]>(index.termKeyOrdering());
HostStat hostStat;
int hostCount;
for (final ReferenceContainer<HostReference> container: index) {
if (container.isEmpty()) continue;
if (referenceTable == null) {
hostStat = hostHashResolver.get(ASCII.String(container.getTermHash()));
hostCount = hostStat == null ? 6 /* high = a penalty for 'i do not know this', this may not be fair*/ : Math.max(1, hostStat.count);
hostScore.set(container.getTermHash(), container.size() * maxHostCount / hostCount);
} else {
int score = 0;
final Iterator<HostReference> hri = container.entries();
HostReference hr;
while (hri.hasNext()) {
hr = hri.next();
hostStat = hostHashResolver.get(ASCII.String(hr.urlhash()));
hostCount = hostStat == null ? 6 /* high = a penalty for 'i do not know this', this may not be fair*/ : Math.max(1, hostStat.count);
score += (17 - ranking(hr.urlhash(), referenceTable)) * maxHostCount / hostCount;
}
hostScore.set(container.getTermHash(), score);
}
}
// now divide the scores into two halves until the score map is empty
final List<BinSearch> table = new ArrayList<BinSearch>();
while (hostScore.size() > 10) {
final List<byte[]> smallest = hostScore.lowerHalf();
if (smallest.isEmpty()) break; // should never happen but this ensures termination of the loop
ConcurrentLog.info("BlockRank", "index evaluation: computed partition of size " + smallest.size());
table.add(new BinSearch(smallest, 6));
for (final byte[] host: smallest) hostScore.delete(host);
}
if (!hostScore.isEmpty()) {
final ArrayList<byte[]> list = new ArrayList<byte[]>();
for (final byte[] entry: hostScore) list.add(entry);
ConcurrentLog.info("BlockRank", "index evaluation: computed last partition of size " + list.size());
table.add(new BinSearch(list, 6));
}
// the last table entry has now a list of host hashes that has the most references
final int binTables = Math.min(16, table.size());
final BinSearch[] newTables = new BinSearch[binTables];
for (int i = 0; i < binTables; i++) newTables[i] = table.get(table.size() - i - 1);
// re-use the new table for a recursion
if (recusions == 0) return newTables;
return evaluate(index, hostHashResolver, newTables, --recusions); // one recursion step
}
public static int ranking(final byte[] hash, final BinSearch[] rankingTable) {
if (rankingTable == null) return 16;
byte[] hosthash;

Loading…
Cancel
Save