Updated getpageinfo outputs to return page icons list.

pull/39/head
luc 9 years ago
parent 26f1ead57c
commit 8682dfbd5e

@ -35,11 +35,6 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
@ -52,6 +47,11 @@ import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class getpageinfo {
@ -65,7 +65,7 @@ public class getpageinfo {
prop.put("lang", "");
prop.put("robots-allowed", "3"); //unknown
prop.put("robotsInfo", ""); //unknown
prop.put("favicon","");
prop.put("icons","0");
prop.put("sitelist", "");
prop.put("filter", ".*");
prop.put("oai", 0);
@ -110,13 +110,15 @@ public class getpageinfo {
// put the document title
prop.putXML("title", removelinebreaks(scraper.dc_title()));
DigestURL favicon = null;
if (scraper.getIcons() != null && !scraper.getIcons().isEmpty()) {
favicon = scraper.getIcons().keySet().iterator().next();
}
// put the favicon that belongs to the document
prop.put("favicon", (favicon == null) ? "" : favicon.toString());
Set<DigestURL> iconURLs = scraper.getIcons().keySet();
int i = 0;
for (DigestURL iconURL : iconURLs) {
prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false));
prop.put("icons_" + i + "_eol", 1);
i++;
}
prop.put("icons_" + (i - 1) + "_eol", 0);
prop.put("icons", iconURLs.size());
// put keywords
final Set<String> list = scraper.dc_subject();

@ -6,7 +6,9 @@
"robots": "#(robots-allowed)#0::1::#(/robots-allowed)#",
"robotsInfo": "#[robotsInfo]#",
"favicon": "#[favicon]#",
"icons": [#{icons}#
"#[icon]#"#(eol)#::,#(/eol)#
#{/icons}#],
"filter": "#[filter]#",
"tags": "#{tags}##[tag]#,#{/tags}#",

@ -8,7 +8,11 @@
#{sitemaps}#
<sitemap>#[sitemap]#</sitemap>
#{/sitemaps}#
<favicon>#[favicon]#</favicon>
<icons>
#{icons}#
<icon>#[icon]#</icon>
#{/icons}#
</icons>
<sitelist>#[sitelist]#</sitelist>
<filter>#[filter]#</filter>
<tags>

@ -66,7 +66,7 @@ public class getpageinfo_p {
prop.put("robots-allowed", "3"); //unknown
prop.put("robotsInfo", ""); //unknown
prop.put("sitemap", "");
prop.put("favicon","");
prop.put("icons","0");
prop.put("sitelist", "");
prop.put("filter", ".*");
prop.put("oai", 0);
@ -109,14 +109,17 @@ public class getpageinfo_p {
if (scraper != null) {
// put the document title
prop.putXML("title", scraper.dc_title());
DigestURL favicon = null;
if (scraper.getIcons() != null && !scraper.getIcons().isEmpty()) {
favicon = scraper.getIcons().keySet().iterator().next();
}
// put the favicon that belongs to the document
prop.put("favicon", (favicon == null) ? "" : favicon.toString());
// put the icons that belongs to the document
Set<DigestURL> iconURLs = scraper.getIcons().keySet();
int i = 0;
for (DigestURL iconURL : iconURLs) {
prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false));
prop.put("icons_" + i + "_eol", 1);
i++;
}
prop.put("icons_" + (i - 1) + "_eol", 0);
prop.put("icons", iconURLs.size());
// put keywords
final Set<String> list = scraper.dc_subject();

@ -8,7 +8,11 @@
#{sitemaps}#
<sitemap>#[sitemap]#</sitemap>
#{/sitemaps}#
<favicon>#[favicon]#</favicon>
<icons>
#{icons}#
<icon>#[icon]#</icon>
#{/icons}#
</icons>
<sitelist>#[sitelist]#</sitelist>
<filter>#[filter]#</filter>
<tags>

Loading…
Cancel
Save