- refactoring of getpageinfo_p.xml (moved out of util)

- added more logging in getpageinfo_p.xml

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8037 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent abba31f02e
commit f8b8c82421

@ -229,7 +229,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
/
<a href="Bookmarks.html?delete=#[hash]#" class="bookmarkAction" onclick="return confirm('Confirm deletion')">Delete</a>
/
<a href="/api/util/getpageinfo_p.xml?url=#[link]#" class="bookmarkAction">Info</a>
<a href="/api/getpageinfo_p.xml?url=#[link]#" class="bookmarkAction">Info</a>
</p>
</div>
#{/bookmarks}#

@ -9,7 +9,7 @@
<body>
<div id="api">
<a href="http://localhost:8090/api/util/getpageinfo_p.xml?actions=title,robots&url=#[url]#" id="apilink">
<a href="http://localhost:8090/api/getpageinfo_p.xml?actions=title,robots&url=#[url]#" id="apilink">
<img src="/env/grafics/api.png" width="60" height="40" alt="API"/>
</a>
<span>See the page info about the url.</span>

@ -8,6 +8,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.crawler.RobotsTxtEntry;
import de.anomic.server.serverObjects;
@ -30,15 +31,15 @@ public class getpageinfo_p {
prop.put("filter", ".*");
// default actions
String actions="title,robots";
String actions = "title,robots";
if (post != null && post.containsKey("url")) {
if(post.containsKey("actions"))
if (post.containsKey("actions"))
actions=post.get("actions");
String url=post.get("url");
if(url.toLowerCase().startsWith("ftp://")){
if (url.toLowerCase().startsWith("ftp://")) {
prop.put("robots-allowed", "1");
prop.putXML("title", "FTP: "+url);
prop.putXML("title", "FTP: " + url);
return prop;
} else if (!url.startsWith("http://") &&
!url.startsWith("https://") &&
@ -47,18 +48,18 @@ public class getpageinfo_p {
!url.startsWith("file://")) {
url = "http://" + url;
}
if (actions.indexOf("title")>=0) {
if (actions.indexOf("title") >= 0) {
DigestURI u = null;
try {
u = new DigestURI(url);
} catch (final MalformedURLException e) {
// fail, do nothing
Log.logException(e);
}
ContentScraper scraper = null;
if (u != null) try {
scraper = sb.loader.parseResource(u, CacheStrategy.IFEXIST);
} catch (final IOException e) {
// now thats a fail, do nothing
Log.logException(e);
}
if (scraper != null) {
// put the document title
@ -68,9 +69,9 @@ public class getpageinfo_p {
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
// put keywords
final String list[]=scraper.getKeywords();
final String list[] = scraper.getKeywords();
int count = 0;
for (final String element : list) {
for (final String element: list) {
final String tag = element;
if (!tag.equals("")) {
prop.putXML("tags_"+count+"_tag", tag);
@ -100,7 +101,7 @@ public class getpageinfo_p {
prop.putXML("filter", filter.length() > 0 ? filter.substring(1) : ".*");
}
}
if (actions.indexOf("robots")>=0) {
if (actions.indexOf("robots") >= 0) {
try {
final DigestURI theURL = new DigestURI(url);
@ -110,13 +111,16 @@ public class getpageinfo_p {
robotsEntry = sb.robots.getEntry(theURL, sb.peers.myBotIDs());
} catch (final IOException e) {
robotsEntry = null;
Log.logException(e);
}
prop.put("robots-allowed", robotsEntry == null ? 1 : robotsEntry.isDisallowed(theURL) ? 0 : 1);
// get the sitemap URL of the domain
final MultiProtocolURI sitemapURL = robotsEntry == null ? null : robotsEntry.getSitemap();
prop.putXML("sitemap", sitemapURL == null ? "" : sitemapURL.toString());
} catch (final MalformedURLException e) {}
} catch (final MalformedURLException e) {
Log.logException(e);
}
}
}

@ -24,7 +24,7 @@ function loadTitle(){
url=document.getElementsByName("url")[0].value;
if(document.getElementsByName("title")[0].value==""){
sndReq('/api/util/getpageinfo_p.xml?actions=title&url='+url);
sndReq('/api/getpageinfo_p.xml?actions=title&url='+url);
}
}

@ -75,7 +75,7 @@ function loadInfos() {
url=document.getElementById("crawlingURL").value;
if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").disabled=true; else document.getElementById("crawlingQ").disabled=false;
sndReq('/api/util/getpageinfo_p.xml?actions=title,robots&url='+url);
document.getElementById("api").innerHTML = "<a href='http://localhost:8090/api/util/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='/env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>";
sndReq('/api/getpageinfo_p.xml?actions=title,robots&url='+url);
document.getElementById("api").innerHTML = "<a href='http://localhost:8090/api/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='/env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>";
}

@ -85,7 +85,7 @@
var url = $("input[name='bm_url']").getValue();
$.ajax({
type: "GET",
url: "/api/util/getpageinfo_p.xml?url="+url,
url: "/api/getpageinfo_p.xml?url="+url,
dataType: "xml",
success: function(xml) {
var title = $(xml).find('title').text();

@ -162,7 +162,7 @@
function getTags(url, i) {
$.ajax({
type: "GET",
url: "/api/util/getpageinfo_p.xml?url="+url,
url: "/api/getpageinfo_p.xml?url="+url,
dataType: "xml",
success: function(xml) {
tags = "";

Loading…
Cancel
Save