added collection attribute also to the rss feed reader

pull/1/head
Michael Peter Christen 12 years ago
parent 43ca359e24
commit c091000165

@ -77,21 +77,25 @@
</dd> </dd>
<dt><label>Dynamic URLs</label></dt> <dt><label>Dynamic URLs</label></dt>
<dd> <dd>
<input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /> allow <a href="http://en.wikipedia.org/wiki/Query_string">query-strings</a> (urls with a '?' in the path) <input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /> allow <a href="http://en.wikipedia.org/wiki/Query_string">query-strings</a> (urls with a '?' in the path)
<input type="hidden" name="directDocByURL" id="directDocByURL" value="off" /> </dd>
<input type="hidden" name="recrawl" id="recrawl" value="reload" /> <dt><label>Collection</label></dt>
<input type="hidden" name="reloadIfOlderNumber" id="reloadIfOlderNumber" value="3" /> <dd>
<input type="hidden" name="reloadIfOlderUnit" id="reloadIfOlderUnit" value="day" /> <input name="collection" id="collection" type="text" size="60" maxlength="100" value="#[collection]#" #(collectionEnabled)#disabled="disabled"::#(/collectionEnabled)# />
<input type="hidden" name="deleteold" id="deleteold" value="on" />
<input type="hidden" name="storeHTCache" id="storeHTCache" value="on" />
<input type="hidden" name="cachePolicy" id="cachePolicy" value="iffresh" />
<input type="hidden" name="indexText" id="indexText" value="on" />
<input type="hidden" name="indexMedia" id="indexMedia" value="on" />
<input type="hidden" name="intention" id="intention" value="" />
<input type="hidden" name="collection" id="collection" value="" />
</dd> </dd>
<dt><label>Start</label></dt> <dt><label>Start</label></dt>
<dd><input type="submit" name="crawlingstart" value="Start New Crawl" class="submitready"/> <dd>
<input type="hidden" name="directDocByURL" id="directDocByURL" value="off" />
<input type="hidden" name="recrawl" id="recrawl" value="reload" />
<input type="hidden" name="reloadIfOlderNumber" id="reloadIfOlderNumber" value="3" />
<input type="hidden" name="reloadIfOlderUnit" id="reloadIfOlderUnit" value="day" />
<input type="hidden" name="deleteold" id="deleteold" value="on" />
<input type="hidden" name="storeHTCache" id="storeHTCache" value="on" />
<input type="hidden" name="cachePolicy" id="cachePolicy" value="iffresh" />
<input type="hidden" name="indexText" id="indexText" value="on" />
<input type="hidden" name="indexMedia" id="indexMedia" value="on" />
<input type="hidden" name="intention" id="intention" value="" />
<input type="submit" name="crawlingstart" value="Start New Crawl" class="submitready"/>
</dd> </dd>
</dl> </dl>

@ -34,28 +34,30 @@
<dt>Preview</dt> <dt>Preview</dt>
<dd><input type="submit" name="showrss" value="Show RSS Items" /></dd> <dd><input type="submit" name="showrss" value="Show RSS Items" /></dd>
<dt>Indexing</dt> <dt>Indexing</dt>
<dd>#(showload)#Available after successful loading of rss feed in preview:: <dd>#(showload)#<input type="hidden" name="collection" id="collection" value="#[collection]#" />Available after successful loading of rss feed in preview::
<input type="submit" name="indexAllItemContent" value="Add All Items to Index (full content of url)" />
<dl> <dl>
<dt>once<input type="radio" name="repeat" value="off" checked="checked"/></dt> <dt>once<input type="radio" name="repeat" value="off" checked="checked"/></dt>
<dd>load this feed once now</dd> <dd>load this feed once now</dd>
<dt>scheduled<input type="radio" name="repeat" value="on"/></dt> <dt>scheduled<input type="radio" name="repeat" value="on"/></dt>
<dd>repeat the feed loading every<br/> <dd>repeat the feed loading every<br/>
<select name="repeat_time"> <select name="repeat_time">
<option value="1">1</option><option value="2">2</option><option value="3">3</option> <option value="1">1</option><option value="2">2</option><option value="3">3</option>
<option value="4">4</option><option value="5">5</option><option value="6">6</option> <option value="4">4</option><option value="5">5</option><option value="6">6</option>
<option value="7" selected="selected">7</option> <option value="7" selected="selected">7</option>
<option value="8">8</option><option value="9">9</option><option value="10">10</option> <option value="8">8</option><option value="9">9</option><option value="10">10</option>
<option value="12">12</option><option value="14">14</option><option value="21">21</option> <option value="12">12</option><option value="14">14</option><option value="21">21</option>
<option value="28">28</option><option value="30">30</option> <option value="28">28</option><option value="30">30</option>
</select> </select>
<select name="repeat_unit"> <select name="repeat_unit">
<option value="selminutes">minutes</option> <option value="selminutes">minutes</option>
<option value="selhours">hours</option> <option value="selhours">hours</option>
<option value="seldays" selected="selected">days</option> <option value="seldays" selected="selected">days</option>
</select> automatically. </select> automatically.
</dd> </dd>
<dt>collection</dt>
<dd><input name="collection" id="collection" type="text" size="60" maxlength="100" value="#[collection]#" #(collectionEnabled)#disabled="disabled"::#(/collectionEnabled)# /></dd>
</dl> </dl>
<input type="submit" name="indexAllItemContent" value="Add All Items to Index (full content of url)" />
#(/showload)#</dd> #(/showload)#</dd>
</dl> </dl>

@ -36,6 +36,7 @@ import net.yacy.cora.document.RSSReader;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.RSSLoader; import net.yacy.crawler.retrieval.RSSLoader;
@ -48,6 +49,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -58,6 +60,11 @@ public class Load_RSS_p {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard)env; final Switchboard sb = (Switchboard)env;
final String collection = post == null ? "user" : CommonPattern.SPACE.matcher(post.get("collection", "user").trim()).replaceAll("");
final String[] collections = collection.length() == 0 ? new String[0] : collection.split(",");
boolean collectionEnabled = sb.index.fulltext().getDefaultConfiguration().isEmpty() || sb.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.collection_sxt);
prop.put("showload_collectionEnabled", collectionEnabled ? 1 : 0);
prop.put("showload_collection", collection);
prop.put("showload", 0); prop.put("showload", 0);
prop.put("showitems", 0); prop.put("showitems", 0);
prop.put("shownewfeeds", 0); prop.put("shownewfeeds", 0);
@ -167,7 +174,7 @@ public class Load_RSS_p {
continue; continue;
} }
// load feeds concurrently to get better responsibility in web interface // load feeds concurrently to get better responsibility in web interface
new RSSLoader(sb, url).start(); new RSSLoader(sb, url, collections).start();
} }
} }
} }
@ -274,7 +281,7 @@ public class Load_RSS_p {
final DigestURI messageurl = new DigestURI(message.getLink()); final DigestURI messageurl = new DigestURI(message.getLink());
if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop; if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(ASCII.String(messageurl.hash())) != null) continue loop; if (sb.urlExists(ASCII.String(messageurl.hash())) != null) continue loop;
sb.addToIndex(messageurl, null, null); sb.addToIndex(messageurl, null, null, collections);
RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
@ -287,7 +294,7 @@ public class Load_RSS_p {
if (rss != null && post.containsKey("indexAllItemContent")) { if (rss != null && post.containsKey("indexAllItemContent")) {
record_api = true; record_api = true;
final RSSFeed feed = rss.getFeed(); final RSSFeed feed = rss.getFeed();
RSSLoader.indexAllRssFeed(sb, url, feed); RSSLoader.indexAllRssFeed(sb, url, feed, collections);
} }
if (record_api && rss != null && rss.getFeed() != null && rss.getFeed().getChannel() != null) { if (record_api && rss != null && rss.getFeed() != null && rss.getFeed().getChannel() != null) {

@ -25,27 +25,20 @@
package net.yacy.cora.document; package net.yacy.cora.document;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties;
import java.util.Set; import java.util.Set;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.lod.vocabulary.DublinCore; import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.lod.vocabulary.Geo; import net.yacy.cora.lod.vocabulary.Geo;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.CommonPattern;
import net.yacy.document.Document;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> { public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {

@ -54,10 +54,12 @@ public class RSSLoader extends Thread {
DigestURI urlf; DigestURI urlf;
Switchboard sb; Switchboard sb;
String[] collections;
public RSSLoader(final Switchboard sb, final DigestURI urlf) { public RSSLoader(final Switchboard sb, final DigestURI urlf, final String[] collections) {
this.sb = sb; this.sb = sb;
this.urlf = urlf; this.urlf = urlf;
this.collections = collections;
} }
@Override @Override
@ -79,20 +81,20 @@ public class RSSLoader extends Thread {
return; return;
} }
final RSSFeed feed = rss.getFeed(); final RSSFeed feed = rss.getFeed();
indexAllRssFeed(this.sb, this.urlf, feed); indexAllRssFeed(this.sb, this.urlf, feed, this.collections);
// add the feed also to the scheduler // add the feed also to the scheduler
recordAPI(this.sb, null, this.urlf, feed, 7, "seldays"); recordAPI(this.sb, null, this.urlf, feed, 7, "seldays");
} }
public static void indexAllRssFeed(final Switchboard sb, final DigestURI url, final RSSFeed feed) { public static void indexAllRssFeed(final Switchboard sb, final DigestURI url, final RSSFeed feed, String[] collections) {
int loadCount = 0; int loadCount = 0;
loop: for (final RSSMessage message: feed) { loop: for (final RSSMessage message: feed) {
try { try {
final DigestURI messageurl = new DigestURI(message.getLink()); final DigestURI messageurl = new DigestURI(message.getLink());
if (indexTriggered.containsKey(messageurl.hash())) continue loop; if (indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(ASCII.String(messageurl.hash())) != null) continue loop; if (sb.urlExists(ASCII.String(messageurl.hash())) != null) continue loop;
sb.addToIndex(messageurl, null, null); sb.addToIndex(messageurl, null, null, collections);
indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
loadCount++; loadCount++;
} catch (final IOException e) { } catch (final IOException e) {

@ -2620,6 +2620,7 @@ public final class Switchboard extends serverSwitch {
for ( int i = 0; i < in.documents.length; i++ ) { for ( int i = 0; i < in.documents.length; i++ ) {
storeDocumentIndex( storeDocumentIndex(
in.queueEntry, in.queueEntry,
in.queueEntry.profile().collections(),
in.documents[i], in.documents[i],
in.condenser[i], in.condenser[i],
null, null,
@ -2631,6 +2632,7 @@ public final class Switchboard extends serverSwitch {
private void storeDocumentIndex( private void storeDocumentIndex(
final Response queueEntry, final Response queueEntry,
final String[] collections,
final Document document, final Document document,
final Condenser condenser, final Condenser condenser,
final SearchEvent searchEvent, final SearchEvent searchEvent,
@ -2679,7 +2681,7 @@ public final class Switchboard extends serverSwitch {
this.index.storeDocument( this.index.storeDocument(
url, url,
referrerURL, referrerURL,
queueEntry.profile(), collections,
queueEntry.getResponseHeader(), queueEntry.getResponseHeader(),
document, document,
condenser, condenser,
@ -2749,12 +2751,13 @@ public final class Switchboard extends serverSwitch {
final DigestURI url, final DigestURI url,
final Map<DigestURI, String> links, final Map<DigestURI, String> links,
final SearchEvent searchEvent, final SearchEvent searchEvent,
final String heuristicName) { final String heuristicName,
final String[] collections) {
// add the landing page to the index. should not load that again since it should be in the cache // add the landing page to the index. should not load that again since it should be in the cache
if ( url != null ) { if ( url != null ) {
try { try {
addToIndex(url, searchEvent, heuristicName); addToIndex(url, searchEvent, heuristicName, collections);
} catch ( final IOException e ) { } catch ( final IOException e ) {
} catch ( final Parser.Failure e ) { } catch ( final Parser.Failure e ) {
} }
@ -2767,7 +2770,7 @@ public final class Switchboard extends serverSwitch {
// take the matcher and load them all // take the matcher and load them all
for ( final Map.Entry<DigestURI, String> entry : matcher.entrySet() ) { for ( final Map.Entry<DigestURI, String> entry : matcher.entrySet() ) {
try { try {
addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName); addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName, collections);
} catch ( final IOException e ) { } catch ( final IOException e ) {
} catch ( final Parser.Failure e ) { } catch ( final Parser.Failure e ) {
} }
@ -2776,7 +2779,7 @@ public final class Switchboard extends serverSwitch {
// take then the no-matcher and load them also // take then the no-matcher and load them also
for ( final Map.Entry<DigestURI, String> entry : links.entrySet() ) { for ( final Map.Entry<DigestURI, String> entry : links.entrySet() ) {
try { try {
addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName); addToIndex(new DigestURI(entry.getKey(), (byte[]) null), searchEvent, heuristicName, collections);
} catch ( final IOException e ) { } catch ( final IOException e ) {
} catch ( final Parser.Failure e ) { } catch ( final Parser.Failure e ) {
} }
@ -2909,7 +2912,7 @@ public final class Switchboard extends serverSwitch {
* @throws IOException * @throws IOException
* @throws Parser.Failure * @throws Parser.Failure
*/ */
public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName) public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName, final String[] collections)
throws IOException, throws IOException,
Parser.Failure { Parser.Failure {
if (searchEvent != null) { if (searchEvent != null) {
@ -2956,6 +2959,7 @@ public final class Switchboard extends serverSwitch {
Switchboard.this.webStructure.generateCitationReference(url, document); Switchboard.this.webStructure.generateCitationReference(url, document);
storeDocumentIndex( storeDocumentIndex(
response, response,
collections,
document, document,
condenser, condenser,
searchEvent, searchEvent,
@ -3341,7 +3345,7 @@ public final class Switchboard extends serverSwitch {
} }
// add all pages to the index // add all pages to the index
addAllToIndex(url, links, searchEvent, "site"); addAllToIndex(url, links, searchEvent, "site", new String[]{"site"});
} }
} catch ( final Throwable e ) { } catch ( final Throwable e ) {
Log.logException(e); Log.logException(e);
@ -3454,7 +3458,7 @@ public final class Switchboard extends serverSwitch {
+ feedName + feedName
+ "' rss feed"); + "' rss feed");
// add all pages to the index // add all pages to the index
addAllToIndex(null, links, searchEvent, feedName); addAllToIndex(null, links, searchEvent, feedName, new String[]{"rss"});
} }
} catch ( final Throwable e ) { } catch ( final Throwable e ) {
//Log.logException(e); //Log.logException(e);

@ -51,7 +51,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.LookAheadIterator; import net.yacy.cora.util.LookAheadIterator;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
@ -449,7 +448,7 @@ public class Segment {
public SolrInputDocument storeDocument( public SolrInputDocument storeDocument(
final DigestURI url, final DigestURI url,
final DigestURI referrerURL, final DigestURI referrerURL,
final CrawlProfile profile, final String[] collections,
final ResponseHeader responseHeader, final ResponseHeader responseHeader,
final Document document, final Document document,
final Condenser condenser, final Condenser condenser,
@ -489,7 +488,7 @@ public class Segment {
char docType = Response.docType(document.dc_format()); char docType = Response.docType(document.dc_format());
// CREATE SOLR DOCUMENT // CREATE SOLR DOCUMENT
final CollectionConfiguration.SolrVector vector = this.fulltext.getDefaultConfiguration().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex, this.fulltext.getWebgraphConfiguration()); final CollectionConfiguration.SolrVector vector = this.fulltext.getDefaultConfiguration().yacy2solr(id, collections, responseHeader, document, condenser, referrerURL, language, urlCitationIndex, this.fulltext.getWebgraphConfiguration());
// FIND OUT IF THIS IS A DOUBLE DOCUMENT // FIND OUT IF THIS IS A DOUBLE DOCUMENT
String hostid = url.hosthash(); String hostid = url.hosthash();

@ -202,10 +202,11 @@ public class QueryModifier {
} }
StringBuilder filterQuery = new StringBuilder(20); StringBuilder filterQuery = new StringBuilder(20);
if (sites.size() > 1) { if (sites.size() > 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0)); filterQuery.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
for (int i = 1; i < sites.size(); i++) { for (int i = 1; i < sites.size(); i++) {
filterQuery.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i)); filterQuery.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
} }
filterQuery.append(')');
} else if (sites.size() == 1) { } else if (sites.size() == 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0)); filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
} }

@ -56,7 +56,6 @@ import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.CommonPattern;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.Document; import net.yacy.document.Document;
@ -329,7 +328,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
} }
public SolrVector yacy2solr( public SolrVector yacy2solr(
final String id, final CrawlProfile profile, final ResponseHeader responseHeader, final String id, final String[] collections, final ResponseHeader responseHeader,
final Document document, Condenser condenser, DigestURI referrerURL, String language, final Document document, Condenser condenser, DigestURI referrerURL, String language,
IndexCell<CitationReference> citations, IndexCell<CitationReference> citations,
WebgraphConfiguration webgraph) { WebgraphConfiguration webgraph) {
@ -362,7 +361,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
final InetAddress address = digestURI.getInetAddress(); final InetAddress address = digestURI.getInetAddress();
if (address != null) add(doc, CollectionSchema.ip_s, address.getHostAddress()); if (address != null) add(doc, CollectionSchema.ip_s, address.getHostAddress());
} }
if (allAttr || contains(CollectionSchema.collection_sxt) && profile != null) add(doc, CollectionSchema.collection_sxt, profile.collections()); if (allAttr || contains(CollectionSchema.collection_sxt) && collections != null && collections.length > 0) add(doc, CollectionSchema.collection_sxt, collections);
if (allAttr || contains(CollectionSchema.url_protocol_s)) add(doc, CollectionSchema.url_protocol_s, digestURI.getProtocol()); if (allAttr || contains(CollectionSchema.url_protocol_s)) add(doc, CollectionSchema.url_protocol_s, digestURI.getProtocol());
Map<String, String> searchpart = digestURI.getSearchpartMap(); Map<String, String> searchpart = digestURI.getSearchpartMap();
if (searchpart == null) { if (searchpart == null) {
@ -756,7 +755,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (allAttr || contains(CollectionSchema.outboundlinksnofollowcount_i)) add(doc, CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount()); if (allAttr || contains(CollectionSchema.outboundlinksnofollowcount_i)) add(doc, CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount());
// list all links // list all links
WebgraphConfiguration.Subgraph subgraph = webgraph.edges(digestURI, responseHeader, profile.collections(), clickdepth, document.getAnchors(), images, inboundLinks, outboundLinks, citations); WebgraphConfiguration.Subgraph subgraph = webgraph.edges(digestURI, responseHeader, collections, clickdepth, document.getAnchors(), images, inboundLinks, outboundLinks, citations);
doc.webgraphDocuments.addAll(subgraph.edges); doc.webgraphDocuments.addAll(subgraph.edges);
if (allAttr || contains(CollectionSchema.inboundlinks_protocol_sxt)) add(doc, CollectionSchema.inboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[0])); if (allAttr || contains(CollectionSchema.inboundlinks_protocol_sxt)) add(doc, CollectionSchema.inboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[0]));
if (allAttr || contains(CollectionSchema.inboundlinks_urlstub_txt)) add(doc, CollectionSchema.inboundlinks_urlstub_txt, subgraph.urlStubs[0]); if (allAttr || contains(CollectionSchema.inboundlinks_urlstub_txt)) add(doc, CollectionSchema.inboundlinks_urlstub_txt, subgraph.urlStubs[0]);

Loading…
Cancel
Save