diff --git a/defaults/yacy.init b/defaults/yacy.init
index 51be0538a..a6a2a1024 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -769,6 +769,7 @@ search.result.show.date = true
search.result.show.size = false
search.result.show.metadata = false
search.result.show.parser = false
+search.result.show.citation = true
search.result.show.pictures = false
search.result.show.cache = true
search.result.show.proxy = false
diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html
index ad8487564..db03e3935 100644
--- a/htroot/ConfigPortal.html
+++ b/htroot/ConfigPortal.html
@@ -79,6 +79,7 @@
Size
Metadata
Parser
+ Citations
Pictures
Cache
Augmented Browsing
diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java
index fab45a661..456e6d445 100644
--- a/htroot/ConfigPortal.java
+++ b/htroot/ConfigPortal.java
@@ -99,6 +99,7 @@ public class ConfigPortal {
sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size"));
sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata"));
sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser"));
+ sb.setConfig("search.result.show.citation", post.getBoolean("search.result.show.citation"));
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures"));
sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache"));
sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy"));
@@ -170,6 +171,7 @@ public class ConfigPortal {
sb.setConfig("search.result.show.size", config.getProperty("search.result.show.size","false"));
sb.setConfig("search.result.show.metadata", config.getProperty("search.result.show.metadata","false"));
sb.setConfig("search.result.show.parser", config.getProperty("search.result.show.parser","false"));
+ sb.setConfig("search.result.show.citation", config.getProperty("search.result.show.citation","false"));
sb.setConfig("search.result.show.pictures", config.getProperty("search.result.show.pictures","false"));
sb.setConfig("search.result.show.cache", config.getProperty("search.result.show.cache","true"));
sb.setConfig("search.result.show.proxy", config.getProperty("search.result.show.proxy","false"));
@@ -205,6 +207,7 @@ public class ConfigPortal {
prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0);
prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0);
prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0);
+ prop.put("search.result.show.citation", sb.getConfigBool("search.result.show.citation", false) ? 1 : 0);
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0);
diff --git a/htroot/ConfigSearchPage_p.html b/htroot/ConfigSearchPage_p.html
index d86c07b5b..e34a44627 100644
--- a/htroot/ConfigSearchPage_p.html
+++ b/htroot/ConfigSearchPage_p.html
@@ -179,6 +179,7 @@ $(function() {
| 42 kbyte |
| Metadata |
| Parser |
+ | Citation |
| Pictures |
| Cache |
| Augmented Browsing |
@@ -190,6 +191,7 @@ $(function() {
|
|
|
+ |
|
|
|
diff --git a/htroot/ConfigSearchPage_p.java b/htroot/ConfigSearchPage_p.java
index 7306b23ea..9f4ef6f7d 100644
--- a/htroot/ConfigSearchPage_p.java
+++ b/htroot/ConfigSearchPage_p.java
@@ -72,6 +72,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size"));
sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata"));
sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser"));
+ sb.setConfig("search.result.show.citation", post.getBoolean("search.result.show.citation"));
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures"));
sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache"));
sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy"));
@@ -124,6 +125,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.size", config.getProperty("search.result.show.size","false"));
sb.setConfig("search.result.show.metadata", config.getProperty("search.result.show.metadata","false"));
sb.setConfig("search.result.show.parser", config.getProperty("search.result.show.parser","false"));
+ sb.setConfig("search.result.show.citation", config.getProperty("search.result.show.citation","false"));
sb.setConfig("search.result.show.pictures", config.getProperty("search.result.show.pictures","false"));
sb.setConfig("search.result.show.cache", config.getProperty("search.result.show.cache","true"));
sb.setConfig("search.result.show.proxy", config.getProperty("search.result.show.proxy","false"));
@@ -150,6 +152,7 @@ public class ConfigSearchPage_p {
prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0);
prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0);
prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0);
+ prop.put("search.result.show.citation", sb.getConfigBool("search.result.show.citation", false) ? 1 : 0);
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0);
diff --git a/htroot/api/citation.html b/htroot/api/citation.html
new file mode 100644
index 000000000..268ba99f8
--- /dev/null
+++ b/htroot/api/citation.html
@@ -0,0 +1,34 @@
+
+
+
+YaCy '#[clientname]#': Document Citations for url #[url]#
+#%env/templates/metas.template%#
+
+
+#%env/templates/embeddedheader.template%#
+ Document Citations for
#[url]#
+ #(similar)#::
+ Similar documents from different hosts:
+
+ #(/similar)#
+
+
+#%env/templates/embeddedfooter.template%#
+
+
diff --git a/htroot/api/citation.java b/htroot/api/citation.java
new file mode 100644
index 000000000..8cee1a124
--- /dev/null
+++ b/htroot/api/citation.java
@@ -0,0 +1,199 @@
+/**
+ * citation
+ * Copyright 2013 by Michael Peter Christen
+ * First released 12.6.2013 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+
+import net.yacy.cora.document.ASCII;
+import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.sorting.OrderedScoreMap;
+import net.yacy.document.SentenceReader;
+import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.search.Switchboard;
+import net.yacy.search.index.Segment;
+import net.yacy.search.schema.CollectionSchema;
+import net.yacy.server.serverObjects;
+import net.yacy.server.serverSwitch;
+
+public class citation {
+
+ public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
+ // return variable that accumulates replacements
+ final Switchboard sb = (Switchboard) env;
+ final serverObjects prop = new serverObjects();
+ final Segment segment = sb.index;
+ final SolrConnector connector = segment.fulltext().getDefaultConnector();
+
+ // avoid UNRESOLVED PATTERN
+ prop.put("url", "");
+ prop.put("citations", 0);
+ prop.put("sentences", 0);
+
+ DigestURI uri = null;
+ String url = "";
+ String hash = "";
+ int ch = 10;
+ if (post != null) {
+ if (post.containsKey("url")) {
+ url = post.get("url");
+ if (!url.startsWith("http://") &&
+ !url.startsWith("https://") &&
+ !url.startsWith("ftp://") &&
+ !url.startsWith("smb://") &&
+ !url.startsWith("file://")) {
+ url = "http://" + url;
+ }
+ }
+ if (post.containsKey("hash")) {
+ hash = post.get("hash");
+ }
+ if (post.containsKey("ch")) {
+ ch = post.getInt("ch", ch);
+ }
+ }
+
+ if (url.length() > 0) {
+ try {
+ uri = new DigestURI(url, null);
+ hash = ASCII.String(uri.hash());
+ } catch (MalformedURLException e) {}
+ }
+ if (uri == null && hash.length() > 0) {
+ uri = sb.getURL(ASCII.getBytes(hash));
+ }
+ if (uri == null) return prop; // no proper url addressed
+ url = uri.toNormalform(true);
+ prop.put("url", url);
+
+ // get the document from the index
+ SolrDocument doc;
+ try {
+ doc = segment.fulltext().getDefaultConnector().getDocumentById(hash, CollectionSchema.title.getSolrFieldName(), CollectionSchema.text_t.getSolrFieldName());
+ } catch (IOException e1) {
+ return prop;
+ }
+ @SuppressWarnings("unchecked")
+ ArrayList title = (ArrayList) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
+ String text = (String) doc.getFieldValue(CollectionSchema.text_t.getSolrFieldName());
+
+ ArrayList sentences = new ArrayList();
+ if (title != null) for (String s: title) if (s.length() > 0) sentences.add(s);
+ SentenceReader sr = new SentenceReader(text);
+ StringBuilder line;
+ while (sr.hasNext()) {
+ line = sr.next();
+ if (line.length() > 0) sentences.add(line.toString());
+ }
+
+ // for each line make a statistic about the number of occurrences somewhere else
+ OrderedScoreMap scores = new OrderedScoreMap(null); // accumulates scores for citating urls
+ LinkedHashMap> sentenceOcc = new LinkedHashMap>();
+ for (String sentence: sentences) {
+ if (sentence == null || sentence.length() < 40) {
+ // do not count the very short sentences
+ sentenceOcc.put(sentence, null);
+ continue;
+ }
+ try {
+ sentence = sentence.replace('"', '\'');
+ SolrDocumentList doclist = connector.getDocumentListByQuery("text_t:\"" + sentence + "\"", 0, 100, CollectionSchema.sku.getSolrFieldName());
+ int count = (int) doclist.getNumFound();
+ if (count > 0) {
+ Set list = new TreeSet();
+ for (SolrDocument d: doclist) {
+ String u = (String) d.getFieldValue(CollectionSchema.sku.getSolrFieldName());
+ if (u == null || u.equals(url)) continue;
+ scores.inc(u);
+ try {list.add(new DigestURI(u, null));} catch (MalformedURLException e) {}
+ }
+ sentenceOcc.put(sentence, list);
+ }
+ } catch (Throwable ee) {
+
+ }
+ }
+ sentences.clear(); // we do not need this again
+
+ // iterate the sentences
+ int i = 0;
+ for (Map.Entry> se: sentenceOcc.entrySet()) {
+ prop.put("sentences_" + i + "_dt", i);
+ StringBuilder dd = new StringBuilder(se.getKey());
+ Set app = se.getValue();
+ if (app != null && app.size() > 0) {
+ dd.append("
appears in:");
+ for (DigestURI u: app) {
+ if (u != null) {
+ dd.append(" ").append(u.getHost()).append("");
+ }
+ }
+ }
+ prop.put("sentences_" + i + "_dd", dd.toString());
+ i++;
+ }
+ prop.put("sentences", i);
+
+ // iterate the citations in order of number of citations
+ i = 0;
+ for (String u: scores.keyList(false)) {
+ try {
+ DigestURI uu = new DigestURI(u, null);
+ prop.put("citations_" + i + "_dt", "" + u + "");
+ StringBuilder dd = new StringBuilder();
+ dd.append("makes ").append(Integer.toString(scores.get(u))).append(" citations: of ").append(url);
+ for (Map.Entry> se: sentenceOcc.entrySet()) {
+ Set occurls = se.getValue();
+ if (occurls != null && occurls.contains(uu)) dd.append("
").append(se.getKey()).append("");
+ }
+ prop.put("citations_" + i + "_dd", dd.toString());
+ i++;
+ } catch (MalformedURLException e) {}
+ }
+ prop.put("citations", i);
+
+ // find similar documents from different hosts
+ i = 0;
+ for (String u: scores.keyList(false)) {
+ if (scores.get(u) < ch) continue;
+ try {
+ DigestURI uu = new DigestURI(u, null);
+ if (uu.getOrganization().equals(uri.getOrganization())) continue;
+ prop.put("similar_links_" + i + "_url", u);
+ i++;
+ } catch (MalformedURLException e) {}
+ }
+ prop.put("similar_links", i);
+ prop.put("similar", i > 0 ? 1 : 0);
+
+ // return rewrite properties
+ return prop;
+ }
+
+}
diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html
index 9b30e35b5..58996c0e7 100644
--- a/htroot/yacysearchitem.html
+++ b/htroot/yacysearchitem.html
@@ -28,6 +28,7 @@
#(showSize)#:: | #[sizename]##(/showSize)#
#(showMetadata)#:: | Metadata#(/showMetadata)#
#(showParser)#:: | Parser#(/showParser)#
+ #(showCitation)#:: | Citations#(/showCitation)#
#(showPictures)#:: | Pictures#(/showPictures)#
#(showCache)#:: | Cache#(/showCache)#
#(showProxy)#:: | Augmented Browsing#(/showProxy)#
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index 275d2e99e..80783193a 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -133,6 +133,7 @@ public class yacysearchitem {
prop.put("content_showSize", sb.getConfigBool("search.result.show.size", true) ? 1 : 0);
prop.put("content_showMetadata", sb.getConfigBool("search.result.show.metadata", true) ? 1 : 0);
prop.put("content_showParser", sb.getConfigBool("search.result.show.parser", true) ? 1 : 0);
+ prop.put("content_showCitation", sb.getConfigBool("search.result.show.citation", true) ? 1 : 0);
prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0);
prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) && Cache.has(resultURL.hash()) ? 1 : 0);
prop.put("content_showProxy", sb.getConfigBool("search.result.show.proxy", true) ? 1 : 0);
@@ -198,6 +199,7 @@ public class yacysearchitem {
prop.put("content_showProxy_link", resultUrlstring);
prop.put("content_showHostBrowser_link", resultUrlstring);
prop.put("content_showParser_urlhash", resulthashString);
+ prop.put("content_showCitation_urlhash", resulthashString);
prop.put("content_showTags_urlhash", resulthashString);
prop.put("content_urlhexhash", Seed.b64Hash2hexHash(resulthashString));
prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), MAX_URL_LENGTH));
diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java
index 979637254..d5e73af0b 100644
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@@ -755,6 +755,14 @@ public class MultiProtocolURI implements Serializable, Comparable\n");
+ writer.write("\n\n\n");
writer.write("\n");
writer.write("\n");
SolrParams params = request.getOriginalParams();
- boolean discover = params.getBool("discover", false);
String grep = params.get("grep");
String query = "";
String q = params.get("q"); if (q == null) q = "";
@@ -112,8 +106,6 @@ public class GrepHTMLResponseWriter implements QueryResponseWriter {
NamedList