diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list
index dcf7d5434..78a3087f0 100644
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@@ -73,7 +73,12 @@ text_t
wordcount_i
## internal links, normalized (absolute URLs), as - tag with anchor text and nofollow, textgen
-attr_inboundlinks
+attr_inboundlinks_tag
+attr_inboundlinks_protocol
+attr_inboundlinks_urlstub
+attr_inboundlinks_name
+attr_inboundlinks_rel
+attr_inboundlinks_text
## total number of inbound links, int
inboundlinkscount_i
@@ -82,7 +87,12 @@ inboundlinkscount_i
inboundlinksnoindexcount_i
## external links, normalized (absolute URLs), as - tag with anchor text and nofollow, textgen
-attr_outboundlinks
+attr_outboundlinks_tag
+attr_outboundlinks_protocol
+attr_outboundlinks_urlstub
+attr_outboundlinks_name
+attr_outboundlinks_rel
+attr_outboundlinks_text
## total number of external links, int
outboundlinkscount_i
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index de76cffbc..0f4a511f7 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -115,13 +115,22 @@
::
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 2113854a7..1d378b7c2 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -36,6 +36,7 @@ import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
+import java.util.Properties;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
@@ -309,8 +310,8 @@ public class ViewFile {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
boolean dark = true;
int i = 0;
- i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
- i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
+ i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0), document.getAnchors());
+ i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0), document.getAnchors());
dark = (i % 2 == 0);
final Map ts = document.getImages();
@@ -324,15 +325,17 @@ public class ViewFile {
prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? " " : markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
- if (entry.width() > 0 && entry.height() > 0)
- prop.put("viewMode_links_" + i + "_attr", entry.width() + "x" + entry.height() + " Pixel");
- else
- prop.put("viewMode_links_" + i + "_attr", "unknown");
+ if (entry.width() > 0 && entry.height() > 0) {
+ prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
+ } else {
+ prop.put("viewMode_links_" + i + "_rel", "");
+ }
+ prop.put("viewMode_links_" + i + "_name", "");
dark = !dark;
i++;
}
- i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
- i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "link", (i % 2 == 0));
+ i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0), document.getAnchors());
+ i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "link", (i % 2 == 0), document.getAnchors());
prop.put("viewMode_links", i);
}
@@ -382,16 +385,29 @@ public class ViewFile {
return message;
}
- private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map media, final String name, boolean dark) {
+ private static int putMediaInfo(
+ final serverObjects prop,
+ final String[] wordArray,
+ int c,
+ final Map media,
+ final String type,
+ boolean dark,
+ final Map alllinks) {
int i = 0;
for (final Map.Entry entry : media.entrySet()) {
+ final Properties p = alllinks.get(entry.getKey());
+ final String name = p.getProperty("name", ""); // the name attribute
+ final String rel = p.getProperty("rel", ""); // the rel-attribute
+ final String text = p.getProperty("text", ""); // the text between the tag
+
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
- prop.putHTML("viewMode_links_" + c + "_type", name);
- prop.put("viewMode_links_" + c + "_text", ((entry.getValue().isEmpty()) ? " " : markup(wordArray, entry.getValue()) ));
+ prop.putHTML("viewMode_links_" + c + "_type", type);
+ prop.put("viewMode_links_" + c + "_text", text + "/" + ((entry.getValue().isEmpty()) ? " " : markup(wordArray, entry.getValue()) ));
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true, false)));
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true, false));
- prop.put("viewMode_links_" + c + "_attr", " ");
+ prop.put("viewMode_links_" + c + "_rel", rel);
+ prop.put("viewMode_links_" + c + "_name", name);
dark = !dark;
c++;
i++;
diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
index 868d0a9d1..063780ae7 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@@ -129,36 +129,76 @@ public class SolrScheme extends ConfigurationSet {
if (isEmpty() || contains("inboundlinkscount_i")) addSolr(solrdoc, "inboundlinkscount_i", yacydoc.inboundLinkCount());
if (isEmpty() || contains("inboundlinksnoindexcount_i")) addSolr(solrdoc, "inboundlinksnoindexcount_i", yacydoc.inboundLinkNoindexCount());
if (isEmpty() || contains("attr_inboundlinks")) {
- final String[] inboundlinks = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksTag = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksURLProtocol = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksURLStub = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksName = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksRel = new String[yacydoc.inboundLinkCount()];
+ final String[] inboundlinksText = new String[yacydoc.inboundLinkCount()];
for (final MultiProtocolURI url: yacydoc.inboundLinks()) {
final Properties p = alllinks.get(url);
- final String name = p.getProperty("name", "");
- final String rel = p.getProperty("rel", "");
- inboundlinks[c++] =
+ final String name = p.getProperty("name", ""); // the name attribute
+ final String rel = p.getProperty("rel", ""); // the rel-attribute
+ final String text = p.getProperty("text", ""); // the text between the tag
+ final String urls = url.toNormalform(false, false);
+ final int pr = urls.indexOf("://");
+ inboundlinksURLProtocol[c] = urls.substring(0, pr);
+ inboundlinksURLStub[c] = urls.substring(pr + 3);
+ inboundlinksName[c] = name.length() > 0 ? name : "";
+ inboundlinksRel[c] = rel.length() > 0 ? rel : "";
+ inboundlinksText[c] = text.length() > 0 ? rel : "";
+ inboundlinksTag[c] =
" 0 ? " rel=\"" + rel + "\"" : "") +
">" +
((name.length() > 0) ? name : "") + "";
+ c++;
}
- addSolr(solrdoc, "attr_inboundlinks", inboundlinks);
+ addSolr(solrdoc, "attr_inboundlinks_tag", inboundlinksTag);
+ addSolr(solrdoc, "attr_inboundlinks_protocol", inboundlinksURLProtocol);
+ addSolr(solrdoc, "attr_inboundlinks_urlstub", inboundlinksURLStub);
+ addSolr(solrdoc, "attr_inboundlinks_name", inboundlinksName);
+ addSolr(solrdoc, "attr_inboundlinks_rel", inboundlinksRel);
+ addSolr(solrdoc, "attr_inboundlinks_text", inboundlinksText);
}
+
c = 0;
if (isEmpty() || contains("outboundlinkscount_i")) addSolr(solrdoc, "outboundlinkscount_i", yacydoc.outboundLinkCount());
if (isEmpty() || contains("outboundlinksnoindexcount_i")) addSolr(solrdoc, "outboundlinksnoindexcount_i", yacydoc.outboundLinkNoindexCount());
if (isEmpty() || contains("attr_outboundlinks")) {
- final String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksTag = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksURLProtocol = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksURLStub = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksName = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksRel = new String[yacydoc.outboundLinkCount()];
+ final String[] outboundlinksText = new String[yacydoc.outboundLinkCount()];
for (final MultiProtocolURI url: yacydoc.outboundLinks()) {
final Properties p = alllinks.get(url);
- final String name = p.getProperty("name", "");
- final String rel = p.getProperty("rel", "");
- outboundlinks[c++] =
+ final String name = p.getProperty("name", ""); // the name attribute
+ final String rel = p.getProperty("rel", ""); // the rel-attribute
+ final String text = p.getProperty("text", ""); // the text between the tag
+ final String urls = url.toNormalform(false, false);
+ final int pr = urls.indexOf("://");
+ outboundlinksURLProtocol[c] = urls.substring(0, pr);
+ outboundlinksURLStub[c] = urls.substring(pr + 3);
+ outboundlinksName[c] = name.length() > 0 ? name : "";
+ outboundlinksRel[c] = rel.length() > 0 ? rel : "";
+ outboundlinksText[c] = text.length() > 0 ? rel : "";
+ outboundlinksTag[c] =
" 0 ? " rel=\"" + rel + "\"" : "") +
">" +
((name.length() > 0) ? name : "") + "";
+ c++;
}
- addSolr(solrdoc, "attr_outboundlinks", outboundlinks);
+ addSolr(solrdoc, "attr_outboundlinks_tag", outboundlinksTag);
+ addSolr(solrdoc, "attr_outboundlinks_protocol", outboundlinksURLProtocol);
+ addSolr(solrdoc, "attr_outboundlinks_urlstub", outboundlinksURLStub);
+ addSolr(solrdoc, "attr_outboundlinks_name", outboundlinksName);
+ addSolr(solrdoc, "attr_outboundlinks_rel", outboundlinksRel);
+ addSolr(solrdoc, "attr_outboundlinks_text", outboundlinksText);
}
+
// charset
addSolr(solrdoc, "charset_s", yacydoc.getCharset());
diff --git a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
index c429d4b66..e75ee0fd5 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
@@ -242,8 +242,9 @@ public class SolrSingleConnector {
}
protected void addSolr(final Collection docs) throws IOException, SolrException {
+
try {
- this.server.add(docs);
+ if (docs.size() != 0) this.server.add(docs);
this.server.commit();
/* To immediately commit after adding documents, you could use:
UpdateRequest req = new UpdateRequest();
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index d013b25f6..53755465b 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -37,6 +37,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
@@ -172,6 +173,36 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.canonical = null;
}
+ private void mergeAnchors(final MultiProtocolURI url, final Properties p) {
+ final Properties p0 = this.anchors.get(url);
+ if (p0 == null) {
+ this.anchors.put(url, p);
+ return;
+ }
+ // merge properties
+ for (final Entry