|
|
|
@ -35,11 +35,6 @@ import javax.xml.parsers.DocumentBuilder;
|
|
|
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
|
|
|
import javax.xml.parsers.ParserConfigurationException;
|
|
|
|
|
|
|
|
|
|
import org.w3c.dom.Document;
|
|
|
|
|
import org.w3c.dom.Node;
|
|
|
|
|
import org.w3c.dom.NodeList;
|
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.document.id.AnchorURL;
|
|
|
|
|
import net.yacy.cora.document.id.DigestURL;
|
|
|
|
|
import net.yacy.cora.federate.yacy.CacheStrategy;
|
|
|
|
@ -52,6 +47,11 @@ import net.yacy.search.Switchboard;
|
|
|
|
|
import net.yacy.server.serverObjects;
|
|
|
|
|
import net.yacy.server.serverSwitch;
|
|
|
|
|
|
|
|
|
|
import org.w3c.dom.Document;
|
|
|
|
|
import org.w3c.dom.Node;
|
|
|
|
|
import org.w3c.dom.NodeList;
|
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class getpageinfo {
|
|
|
|
|
|
|
|
|
@ -65,7 +65,7 @@ public class getpageinfo {
|
|
|
|
|
prop.put("lang", "");
|
|
|
|
|
prop.put("robots-allowed", "3"); //unknown
|
|
|
|
|
prop.put("robotsInfo", ""); //unknown
|
|
|
|
|
prop.put("favicon","");
|
|
|
|
|
prop.put("icons","0");
|
|
|
|
|
prop.put("sitelist", "");
|
|
|
|
|
prop.put("filter", ".*");
|
|
|
|
|
prop.put("oai", 0);
|
|
|
|
@ -110,13 +110,15 @@ public class getpageinfo {
|
|
|
|
|
// put the document title
|
|
|
|
|
prop.putXML("title", removelinebreaks(scraper.dc_title()));
|
|
|
|
|
|
|
|
|
|
DigestURL favicon = null;
|
|
|
|
|
if (scraper.getIcons() != null && !scraper.getIcons().isEmpty()) {
|
|
|
|
|
favicon = scraper.getIcons().keySet().iterator().next();
|
|
|
|
|
Set<DigestURL> iconURLs = scraper.getIcons().keySet();
|
|
|
|
|
int i = 0;
|
|
|
|
|
for (DigestURL iconURL : iconURLs) {
|
|
|
|
|
prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false));
|
|
|
|
|
prop.put("icons_" + i + "_eol", 1);
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// put the favicon that belongs to the document
|
|
|
|
|
prop.put("favicon", (favicon == null) ? "" : favicon.toString());
|
|
|
|
|
prop.put("icons_" + (i - 1) + "_eol", 0);
|
|
|
|
|
prop.put("icons", iconURLs.size());
|
|
|
|
|
|
|
|
|
|
// put keywords
|
|
|
|
|
final Set<String> list = scraper.dc_subject();
|
|
|
|
|