using MultiProtocolURL for edge data which is faster (hash computation

is now much easier) and smaller in size
pull/1/head
Michael Peter Christen 11 years ago
parent b21c208b4d
commit 6ea8bb7348

@ -20,14 +20,14 @@
package net.yacy.search.schema; package net.yacy.search.schema;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL;
public class HyperlinkEdge { public class HyperlinkEdge {
public DigestURL source, target; public MultiProtocolURL source, target;
public HyperlinkType type; public HyperlinkType type;
public HyperlinkEdge(DigestURL source, DigestURL target, HyperlinkType type) { public HyperlinkEdge(MultiProtocolURL source, MultiProtocolURL target, HyperlinkType type) {
this.source = source; this.source = source;
this.target = target; this.target = target;
this.type = type; this.type = type;

@ -31,12 +31,12 @@ import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.FailType; import net.yacy.cora.federate.solr.FailType;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segment.ReferenceReportCache; import net.yacy.search.index.Segment.ReferenceReportCache;
@ -53,12 +53,12 @@ public class HyperlinkGraph implements Iterable<HyperlinkEdge> {
} }
Map<String, HyperlinkEdge> edges; Map<String, HyperlinkEdge> edges;
Map<DigestURL, Integer> depths; Map<MultiProtocolURL, Integer> depths;
String hostname; String hostname;
public HyperlinkGraph() { public HyperlinkGraph() {
this.edges = new LinkedHashMap<String, HyperlinkEdge>(); this.edges = new LinkedHashMap<String, HyperlinkEdge>();
this.depths = new HashMap<DigestURL, Integer>(); this.depths = new HashMap<MultiProtocolURL, Integer>();
this.hostname = null; this.hostname = null;
} }
@ -164,8 +164,8 @@ public class HyperlinkGraph implements Iterable<HyperlinkEdge> {
int remaining = this.edges.size(); int remaining = this.edges.size();
// first find root nodes // first find root nodes
Set<DigestURL> nodes = new HashSet<DigestURL>(); Set<MultiProtocolURL> nodes = new HashSet<MultiProtocolURL>();
Set<DigestURL> nextnodes = new HashSet<DigestURL>(); Set<MultiProtocolURL> nextnodes = new HashSet<MultiProtocolURL>();
for (HyperlinkEdge edge: this.edges.values()) { for (HyperlinkEdge edge: this.edges.values()) {
String path = edge.source.getPath(); String path = edge.source.getPath();
if (ROOTFNS.contains(path)) { if (ROOTFNS.contains(path)) {
@ -176,14 +176,22 @@ public class HyperlinkGraph implements Iterable<HyperlinkEdge> {
remaining--; remaining--;
} }
} }
if (nodes.size() == 0 && this.edges.size() > 0) ConcurrentLog.warn("HyperlinkGraph", "could not find a root node for " + hostname + " in " + this.edges.size() + " edges"); if (nodes.size() == 0 && this.edges.size() > 0) {
ConcurrentLog.warn("HyperlinkGraph", "could not find a root node for " + hostname + " in " + this.edges.size() + " edges");
// add virtual nodes to have any kind of root
for (String rootpath: ROOTFNS) {
try {
nodes.add(new DigestURL("http://" + hostname + rootpath));
} catch (MalformedURLException e) {}
}
}
// recusively step into depth and find next level // recursively step into depth and find next level
int depth = 1; int depth = 1;
while (remaining > 0) { while (remaining > 0) {
boolean found = false; boolean found = false;
nodes = nextnodes; nodes = nextnodes;
nextnodes = new HashSet<DigestURL>(); nextnodes = new HashSet<MultiProtocolURL>();
for (HyperlinkEdge edge: this.edges.values()) { for (HyperlinkEdge edge: this.edges.values()) {
if (nodes.contains(edge.source)) { if (nodes.contains(edge.source)) {
if (!this.depths.containsKey(edge.source)) this.depths.put(edge.source, depth); if (!this.depths.containsKey(edge.source)) this.depths.put(edge.source, depth);
@ -200,7 +208,7 @@ public class HyperlinkGraph implements Iterable<HyperlinkEdge> {
return depth - 1; return depth - 1;
} }
public Integer getDepth(DigestURL url) { public Integer getDepth(MultiProtocolURL url) {
return this.depths.get(url); return this.depths.get(url);
} }

Loading…
Cancel
Save