removed unused tag fields

pull/1/head
Michael Peter Christen 12 years ago
parent 7804c12976
commit 2080fc7406

@ -205,18 +205,12 @@ h6_txt
## content of <meta name="generator" content=#content#> tag, text
#metagenerator_t
## internal links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow
#inboundlinks_tag_txt
## internal links, only the protocol
inboundlinks_protocol_sxt
## internal links, the url only without the protocol
inboundlinks_urlstub_txt
## external links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow
#outboundlinks_tag_txt
## external links, only the protocol
outboundlinks_protocol_sxt

@ -32,7 +32,6 @@ import net.yacy.cora.storage.Configuration;
import net.yacy.data.WorkTables;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import java.io.IOException;
import java.util.Iterator;

@ -712,10 +712,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// list all links
WebgraphConfiguration.Subgraph subgraph = webgraph.edges(digestURI, responseHeader, profile.collections(), clickdepth, document.getAnchors(), images, inboundLinks, outboundLinks, citations);
doc.webgraphDocuments.addAll(subgraph.edges);
if (allAttr || contains(CollectionSchema.inboundlinks_tag_txt)) add(doc, CollectionSchema.inboundlinks_tag_txt, subgraph.tags[0]); // if inboundlinks_tag_txt can be removed, remove also subgraph.tags
if (allAttr || contains(CollectionSchema.inboundlinks_protocol_sxt)) add(doc, CollectionSchema.inboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[0]));
if (allAttr || contains(CollectionSchema.inboundlinks_urlstub_txt)) add(doc, CollectionSchema.inboundlinks_urlstub_txt, subgraph.urlStubs[0]);
if (allAttr || contains(CollectionSchema.outboundlinks_tag_txt)) add(doc, CollectionSchema.outboundlinks_tag_txt, subgraph.tags[1]); // if outboundlinks_tag_txt can be removed, remove also subgraph.tags
if (allAttr || contains(CollectionSchema.outboundlinks_protocol_sxt)) add(doc, CollectionSchema.outboundlinks_protocol_sxt, protocolList2indexedList(subgraph.urlProtocols[1]));
if (allAttr || contains(CollectionSchema.outboundlinks_urlstub_txt)) add(doc, CollectionSchema.outboundlinks_urlstub_txt, subgraph.urlStubs[1]);

@ -111,10 +111,8 @@ public enum CollectionSchema implements SchemaDeclaration {
metagenerator_t(SolrType.text_general, true, true, false, false, false, "content of <meta name=\"generator\" content=#content#> tag"),
inboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "internal links, only the protocol"),
inboundlinks_urlstub_txt(SolrType.text_general, true, true, true, false, false, "internal links, the url only without the protocol"),
inboundlinks_tag_txt(SolrType.text_general, true, true, true, false, false, "internal links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
outboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "external links, only the protocol"),
outboundlinks_urlstub_txt(SolrType.text_general, true, true, true, false, false, "external links, the url only without the protocol"),
outboundlinks_tag_txt(SolrType.text_general, true, true, true, false, false, "external links, normalized (absolute URLs), as <a> - tag with anchor text and nofollow"),
images_tag_txt(SolrType.text_general, true, true, true, false, true, " all image tags, encoded as <img> tag inclusive alt- and title property"),
images_urlstub_txt(SolrType.text_general, true, true, true, false, true, "all image links without the protocol and '://'"),

@ -96,11 +96,10 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
}
public static class Subgraph {
public final ArrayList<String>[] tags, urlProtocols, urlStubs;
public final ArrayList<String>[] urlProtocols, urlStubs;
public final ArrayList<SolrInputDocument> edges;
@SuppressWarnings("unchecked")
public Subgraph(int inboundSize, int outboundSize) {
this.tags = new ArrayList[]{new ArrayList<String>(inboundSize), new ArrayList<String>(outboundSize)};
this.urlProtocols = new ArrayList[]{new ArrayList<String>(inboundSize), new ArrayList<String>(outboundSize)};
this.urlStubs = new ArrayList[]{new ArrayList<String>(inboundSize), new ArrayList<String>(outboundSize)};
this.edges = new ArrayList<SolrInputDocument>(inboundSize + outboundSize);
@ -208,8 +207,6 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
if (allAttr || contains(WebgraphSchema.target_linktext_t)) add(edge, WebgraphSchema.target_linktext_t, text.length() > 0 ? text : "");
if (allAttr || contains(WebgraphSchema.target_linktext_charcount_i)) add(edge, WebgraphSchema.target_linktext_charcount_i, text.length());
if (allAttr || contains(WebgraphSchema.target_linktext_wordcount_i)) add(edge, WebgraphSchema.target_linktext_wordcount_i, text.length() > 0 ? CommonPattern.SPACE.split(text).length : 0);
String tag = "<a href=\"" + target_url.toNormalform(false) + "\"" + (rel.length() > 0 ? " rel=\"" + rel + "\"" : "") + (name.length() > 0 ? " name=\"" + name + "\"" : "") + ">" + ((text.length() > 0) ? text : "") + "</a>";
subgraph.tags[ioidx].add(tag);
ImageEntry ientry = images.get(target_url);
String alttext = ientry == null ? "" : ientry.alt();
if (allAttr || contains(WebgraphSchema.target_alt_t)) add(edge, WebgraphSchema.target_alt_t, alttext);

Loading…
Cancel
Save