|
|
@ -42,7 +42,6 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph;
|
|
|
|
import net.yacy.search.Switchboard;
|
|
|
|
import net.yacy.search.Switchboard;
|
|
|
|
import net.yacy.search.index.Segments;
|
|
|
|
import net.yacy.search.index.Segments;
|
|
|
|
|
|
|
|
|
|
|
|
import de.anomic.server.serverObjects;
|
|
|
|
import de.anomic.server.serverObjects;
|
|
|
|
import de.anomic.server.serverSwitch;
|
|
|
|
import de.anomic.server.serverSwitch;
|
|
|
|
|
|
|
|
|
|
|
@ -110,13 +109,13 @@ public class webstructure {
|
|
|
|
prop.put("references_documents_0_count", scraper.inboundLinkCount() + scraper.outboundLinkCount());
|
|
|
|
prop.put("references_documents_0_count", scraper.inboundLinkCount() + scraper.outboundLinkCount());
|
|
|
|
prop.put("references_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date()));
|
|
|
|
prop.put("references_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date()));
|
|
|
|
prop.put("references_documents_0_urle", url == null ? 0 : 1);
|
|
|
|
prop.put("references_documents_0_urle", url == null ? 0 : 1);
|
|
|
|
if (url != null) prop.put("references_documents_0_urle_url", url.toNormalform(true, false));
|
|
|
|
if (url != null) prop.putXML("references_documents_0_urle_url", url.toNormalform(true, false));
|
|
|
|
int d = 0;
|
|
|
|
int d = 0;
|
|
|
|
Iterator<MultiProtocolURI> i = scraper.inboundLinks().iterator();
|
|
|
|
Iterator<MultiProtocolURI> i = scraper.inboundLinks().iterator();
|
|
|
|
while (i.hasNext()) {
|
|
|
|
while (i.hasNext()) {
|
|
|
|
DigestURI refurl = new DigestURI(i.next());
|
|
|
|
DigestURI refurl = new DigestURI(i.next());
|
|
|
|
byte[] refhash = refurl.hash();
|
|
|
|
byte[] refhash = refurl.hash();
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
|
|
|
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_outbound", 0);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_outbound", 0);
|
|
|
|
d++;
|
|
|
|
d++;
|
|
|
@ -125,7 +124,7 @@ public class webstructure {
|
|
|
|
while (i.hasNext()) {
|
|
|
|
while (i.hasNext()) {
|
|
|
|
DigestURI refurl = new DigestURI(i.next());
|
|
|
|
DigestURI refurl = new DigestURI(i.next());
|
|
|
|
byte[] refhash = refurl.hash();
|
|
|
|
byte[] refhash = refurl.hash();
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
|
|
|
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_outbound", 1);
|
|
|
|
prop.put("references_documents_0_anchors_" + d + "_outbound", 1);
|
|
|
|
d++;
|
|
|
|
d++;
|
|
|
@ -136,7 +135,7 @@ public class webstructure {
|
|
|
|
prop.put("references_count", 0);
|
|
|
|
prop.put("references_count", 0);
|
|
|
|
prop.put("references_documents", 0);
|
|
|
|
prop.put("references_documents", 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// citations
|
|
|
|
// citations
|
|
|
|
prop.put("citations", 1);
|
|
|
|
prop.put("citations", 1);
|
|
|
|
IndexCell<CitationReference> citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation();
|
|
|
|
IndexCell<CitationReference> citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation();
|
|
|
@ -153,7 +152,7 @@ public class webstructure {
|
|
|
|
prop.put("citations_documents_0_count", citations.size());
|
|
|
|
prop.put("citations_documents_0_count", citations.size());
|
|
|
|
prop.put("citations_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote())));
|
|
|
|
prop.put("citations_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote())));
|
|
|
|
prop.put("citations_documents_0_urle", url == null ? 0 : 1);
|
|
|
|
prop.put("citations_documents_0_urle", url == null ? 0 : 1);
|
|
|
|
if (url != null) prop.put("citations_documents_0_urle_url", url.toNormalform(true, false));
|
|
|
|
if (url != null) prop.putXML("citations_documents_0_urle_url", url.toNormalform(true, false));
|
|
|
|
int d = 0;
|
|
|
|
int d = 0;
|
|
|
|
Iterator<CitationReference> i = citations.entries();
|
|
|
|
Iterator<CitationReference> i = citations.entries();
|
|
|
|
while (i.hasNext()) {
|
|
|
|
while (i.hasNext()) {
|
|
|
@ -161,7 +160,7 @@ public class webstructure {
|
|
|
|
byte[] refhash = cr.urlhash();
|
|
|
|
byte[] refhash = cr.urlhash();
|
|
|
|
DigestURI refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null;
|
|
|
|
DigestURI refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null;
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
|
|
|
|
if (refurl != null) prop.put("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
|
|
|
|
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified())));
|
|
|
|
prop.put("citations_documents_0_anchors_" + d + "_urle_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified())));
|
|
|
|
d++;
|
|
|
|
d++;
|
|
|
@ -194,7 +193,7 @@ public class webstructure {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prop.put("out_maxref", WebStructureGraph.maxref);
|
|
|
|
prop.put("out_maxref", WebStructureGraph.maxref);
|
|
|
|
prop.put("maxhosts", WebStructureGraph.maxhosts);
|
|
|
|
prop.put("maxhosts", WebStructureGraph.maxhosts);
|
|
|
|
|
|
|
|
|
|
|
|
// return rewrite properties
|
|
|
|
// return rewrite properties
|
|
|
|
return prop;
|
|
|
|
return prop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|