diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 3e26b6ca7..bd3e5a3e1 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -560,7 +560,7 @@ public class HostBrowser {
StringBuilder sbi = new StringBuilder();
int c = 0;
for (String s: references_internal_urls) {
- sbi.append("");
+ sbi.append("");
c++;
if (c % 80 == 0) sbi.append("
");
}
@@ -568,7 +568,7 @@ public class HostBrowser {
StringBuilder sbe = new StringBuilder();
c = 0;
for (String s: references_external_urls) {
- sbe.append("");
+ sbe.append("");
c++;
if (c % 80 == 0) sbe.append("
");
}
diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java
index 641de3af8..d97ec6706 100644
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@@ -54,8 +54,10 @@ import net.yacy.crawler.retrieval.HTTPLoader;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.SMBLoader;
import net.yacy.crawler.robots.RobotsTxt;
+import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB;
import net.yacy.repository.Blacklist.BlacklistType;
@@ -133,6 +135,15 @@ public final class CrawlStacker {
// this is the method that is called by the busy thread from outside
if (entry == null) return null;
+ // record the link graph for this request
+ byte[] anchorhash = entry.url().hash();
+ IndexCell urlCitationIndex = this.indexSegment.urlCitation();
+ if (urlCitationIndex != null) try {
+ urlCitationIndex.add(anchorhash, new CitationReference(entry.referrerhash(), entry.appdate().getTime()));
+ } catch (final Exception e) {
+ Log.logException(e);
+ }
+
try {
final String rejectReason = stackCrawl(entry);
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index f6e122e3e..5c5edd8c8 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -33,7 +33,6 @@ import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
-import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
@@ -498,27 +497,6 @@ public class Segment {
return this.segmentPath;
}
- private int addCitationIndex(final DigestURI url, final Date urlModified, final Map anchors) {
- if (anchors == null) return 0;
- int refCount = 0;
-
- // iterate over all outgoing links, this will create a context for those links
- final byte[] urlhash = url.hash();
- final long urldate = urlModified.getTime();
- for (Map.Entry anchorEntry: anchors.entrySet()) {
- DigestURI anchor = anchorEntry.getKey();
- byte[] refhash = anchor.hash();
- //System.out.println("*** addCitationIndex: urlhash = " + ASCII.String(urlhash) + ", refhash = " + ASCII.String(refhash) + ", urldate = " + urlModified.toString());
- if (this.urlCitationIndex != null) try {
- this.urlCitationIndex.add(refhash, new CitationReference(urlhash, urldate));
- } catch (final Exception e) {
- Log.logException(e);
- }
- refCount++;
- }
- return refCount;
- }
-
public synchronized void close() {
this.indexingPutDocumentProcessor.shutdown();
if (this.termIndex != null) this.termIndex.close();
@@ -795,9 +773,6 @@ public class Segment {
}
}
- // STORE PAGE REFERENCES INTO CITATION INDEX
- final int refs = addCitationIndex(url, modDate, document.getAnchors());
-
// finish index time
final long indexingEndTime = System.currentTimeMillis();
@@ -807,7 +782,7 @@ public class Segment {
"\n\tDescription: " + dc_title +
"\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | " +
"Size: " + document.getTextLength() + " bytes | " +
- "Anchors: " + refs +
+ //"Anchors: " + refs +
"\n\tLinkStorageTime: " + (storageEndTime - startTime) + " ms | " +
"indexStorageTime: " + (indexingEndTime - storageEndTime) + " ms");
}
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index 4e04b8288..238cc4bca 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -488,6 +488,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
Set inboundLinks = document.inboundLinks();
Set outboundLinks = document.outboundLinks();
+ Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
+ Map alllinks = document.getAnchors();
int c = 0;
final Object parser = document.getParserObject();
Map images = new HashMap();
@@ -677,7 +679,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
frames[c++] = u.toNormalform(false);
}
add(doc, CollectionSchema.framesscount_i, frames.length);
- if (frames.length > 0) add(doc, CollectionSchema.frames_sxt, frames);
+ if (frames.length > 0) {
+ add(doc, CollectionSchema.frames_sxt, frames);
+ //webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, true, framess, citations); // add here because links have been removed from remaining inbound/outbound
+ }
}
// IFrames
@@ -691,7 +696,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
iframes[c++] = u.toNormalform(false);
}
add(doc, CollectionSchema.iframesscount_i, iframes.length);
- if (iframes.length > 0) add(doc, CollectionSchema.iframes_sxt, iframes);
+ if (iframes.length > 0) {
+ add(doc, CollectionSchema.iframes_sxt, iframes);
+ //webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, true, iframess, citations); // add here because links have been removed from remaining inbound/outbound
+ }
}
// canonical tag
@@ -791,10 +799,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (allAttr || contains(CollectionSchema.inboundlinksnofollowcount_i)) add(doc, CollectionSchema.inboundlinksnofollowcount_i, document.inboundLinkNofollowCount());
if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, outboundLinks.size());
if (allAttr || contains(CollectionSchema.outboundlinksnofollowcount_i)) add(doc, CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount());
- Map alllinks = document.getAnchors();
// create a subgraph
- Subgraph subgraph = new Subgraph(inboundLinks.size(), outboundLinks.size());
//if () {
webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, true, inboundLinks, citations);
webgraph.addEdges(subgraph, digestURI, responseHeader, collections, clickdepth, alllinks, images, false, outboundLinks, citations);