diff --git a/defaults/solr.collection.schema b/defaults/solr.collection.schema index dc5e66e67..595769132 100644 --- a/defaults/solr.collection.schema +++ b/defaults/solr.collection.schema @@ -150,16 +150,22 @@ charset_s ## number of words in visible area, int wordcount_i -## number of outgoing inbound links, int +## number of all outgoing links; including linksnofollowcount_i, int +linkscount_i + +## number of all outgoing inks with nofollow tag, int +linksnofollowcount_i + +## number of outgoing inbound (to same domain) links; including inboundlinksnofollowcount_i, int inboundlinkscount_i -## number of outgoing inbound links with nofollow tag, int +## number of outgoing inbound (to same domain) links with nofollow tag, int inboundlinksnofollowcount_i -## number of outgoing inbound links, int +## number of outgoing outbound (to other domain) links, including outboundlinksnofollowcount_i, int outboundlinkscount_i -## number of external links with nofollow tag, int +## number of outgoing outbound (to other domain) links with nofollow tag, int outboundlinksnofollowcount_i ## number of images, int diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 84b344133..d9ed8a62f 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -289,6 +289,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } if (allAttr || contains(CollectionSchema.imagescount_i)) add(doc, CollectionSchema.imagescount_i, md.limage()); + if (allAttr || contains(CollectionSchema.linkscount_i)) add(doc, CollectionSchema.linkscount_i, md.llocal() + md.lother()); if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, md.llocal()); if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, md.lother()); if (allAttr || contains(CollectionSchema.charset_s)) add(doc, CollectionSchema.charset_s, "UTF8"); @@ -842,6 +843,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } // statistics about the links + if (allAttr || contains(CollectionSchema.linkscount_i)) add(doc, CollectionSchema.linkscount_i, inboundLinks.size() + outboundLinks.size()); + if (allAttr || contains(CollectionSchema.linksnofollowcount_i)) add(doc, CollectionSchema.linksnofollowcount_i, document.inboundLinkNofollowCount() + document.outboundLinkNofollowCount()); if (allAttr || contains(CollectionSchema.inboundlinkscount_i)) add(doc, CollectionSchema.inboundlinkscount_i, inboundLinks.size()); if (allAttr || contains(CollectionSchema.inboundlinksnofollowcount_i)) add(doc, CollectionSchema.inboundlinksnofollowcount_i, document.inboundLinkNofollowCount()); if (allAttr || contains(CollectionSchema.outboundlinkscount_i)) add(doc, CollectionSchema.outboundlinkscount_i, outboundLinks.size()); diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index 6870300ef..6a724fe84 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -84,10 +84,12 @@ public enum CollectionSchema implements SchemaDeclaration { keywords(SolrType.text_general, true, true, false, false, true, "content of keywords tag; words are separated by space"), charset_s(SolrType.string, true, true, false, false, false, "character encoding"), wordcount_i(SolrType.num_integer, true, true, false, false, false, "number of words in visible area"), - inboundlinkscount_i(SolrType.num_integer, true, true, false, false, false, "total number of inbound links"), - inboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, false, false, "number of inbound links with nofollow tag"), - outboundlinkscount_i(SolrType.num_integer, true, true, false, false, false, "external number of inbound links"), - outboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, false, false, "number of external links with nofollow tag"), + linkscount_i(SolrType.num_integer, true, true, false, false, false, "number of all outgoing links; including linksnofollowcount_i"), + linksnofollowcount_i(SolrType.num_integer, true, true, false, false, false, "number of all outgoing inks with nofollow tag"), + inboundlinkscount_i(SolrType.num_integer, true, true, false, false, false, "number of outgoing inbound (to same domain) links; including inboundlinksnofollowcount_i"), + inboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, false, false, "number of outgoing inbound (to same domain) links with nofollow tag"), + outboundlinkscount_i(SolrType.num_integer, true, true, false, false, false, "number of outgoing outbound (to other domain) links, including outboundlinksnofollowcount_i"), + outboundlinksnofollowcount_i(SolrType.num_integer, true, true, false, false, false, "number of outgoing outbound (to other domain) links with nofollow tag"), imagescount_i(SolrType.num_integer, true, true, false, false, false, "number of images"), responsetime_i(SolrType.num_integer, true, true, false, false, false, "response time of target server in milliseconds"), text_t(SolrType.text_general, true, true, false, false, true, "all visible text"),