From f8b8c82421d170941bcab6eca1585db84446bac2 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Tue, 15 Nov 2011 00:22:40 +0000
Subject: [PATCH] - refactoring of getpageinfo_p.xml (moved out of util) -
added more logging in getpageinfo_p.xml
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8037 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/Bookmarks.html | 2 +-
htroot/ViewFile.html | 2 +-
htroot/api/{util => }/getpageinfo_p.java | 26 ++++++++++++++----------
htroot/api/{util => }/getpageinfo_p.xml | 0
htroot/js/Bookmarks.js | 2 +-
htroot/js/IndexCreate.js | 4 ++--
htroot/yacy/ui/yacyui-bookmarks.html | 2 +-
htroot/yacy/ui/yacyui-search.html | 2 +-
8 files changed, 22 insertions(+), 18 deletions(-)
rename htroot/api/{util => }/getpageinfo_p.java (87%)
rename htroot/api/{util => }/getpageinfo_p.xml (100%)
diff --git a/htroot/Bookmarks.html b/htroot/Bookmarks.html
index 9cb74d700..a580c0410 100644
--- a/htroot/Bookmarks.html
+++ b/htroot/Bookmarks.html
@@ -229,7 +229,7 @@ To see a list of all APIs, please visit the Delete
/
- Info
+ Info
#{/bookmarks}#
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index 6b706999c..6a60aaea8 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -9,7 +9,7 @@
-
+
See the page info about the url.
diff --git a/htroot/api/util/getpageinfo_p.java b/htroot/api/getpageinfo_p.java
similarity index 87%
rename from htroot/api/util/getpageinfo_p.java
rename to htroot/api/getpageinfo_p.java
index 5a101c611..888a2b38e 100755
--- a/htroot/api/util/getpageinfo_p.java
+++ b/htroot/api/getpageinfo_p.java
@@ -8,6 +8,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.crawler.RobotsTxtEntry;
import de.anomic.server.serverObjects;
@@ -30,15 +31,15 @@ public class getpageinfo_p {
prop.put("filter", ".*");
// default actions
- String actions="title,robots";
+ String actions = "title,robots";
if (post != null && post.containsKey("url")) {
- if(post.containsKey("actions"))
+ if (post.containsKey("actions"))
actions=post.get("actions");
String url=post.get("url");
- if(url.toLowerCase().startsWith("ftp://")){
+ if (url.toLowerCase().startsWith("ftp://")) {
prop.put("robots-allowed", "1");
- prop.putXML("title", "FTP: "+url);
+ prop.putXML("title", "FTP: " + url);
return prop;
} else if (!url.startsWith("http://") &&
!url.startsWith("https://") &&
@@ -47,18 +48,18 @@ public class getpageinfo_p {
!url.startsWith("file://")) {
url = "http://" + url;
}
- if (actions.indexOf("title")>=0) {
+ if (actions.indexOf("title") >= 0) {
DigestURI u = null;
try {
u = new DigestURI(url);
} catch (final MalformedURLException e) {
- // fail, do nothing
+ Log.logException(e);
}
ContentScraper scraper = null;
if (u != null) try {
scraper = sb.loader.parseResource(u, CacheStrategy.IFEXIST);
} catch (final IOException e) {
- // now thats a fail, do nothing
+ Log.logException(e);
}
if (scraper != null) {
// put the document title
@@ -68,9 +69,9 @@ public class getpageinfo_p {
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
// put keywords
- final String list[]=scraper.getKeywords();
+ final String list[] = scraper.getKeywords();
int count = 0;
- for (final String element : list) {
+ for (final String element: list) {
final String tag = element;
if (!tag.equals("")) {
prop.putXML("tags_"+count+"_tag", tag);
@@ -100,7 +101,7 @@ public class getpageinfo_p {
prop.putXML("filter", filter.length() > 0 ? filter.substring(1) : ".*");
}
}
- if (actions.indexOf("robots")>=0) {
+ if (actions.indexOf("robots") >= 0) {
try {
final DigestURI theURL = new DigestURI(url);
@@ -110,13 +111,16 @@ public class getpageinfo_p {
robotsEntry = sb.robots.getEntry(theURL, sb.peers.myBotIDs());
} catch (final IOException e) {
robotsEntry = null;
+ Log.logException(e);
}
prop.put("robots-allowed", robotsEntry == null ? 1 : robotsEntry.isDisallowed(theURL) ? 0 : 1);
// get the sitemap URL of the domain
final MultiProtocolURI sitemapURL = robotsEntry == null ? null : robotsEntry.getSitemap();
prop.putXML("sitemap", sitemapURL == null ? "" : sitemapURL.toString());
- } catch (final MalformedURLException e) {}
+ } catch (final MalformedURLException e) {
+ Log.logException(e);
+ }
}
}
diff --git a/htroot/api/util/getpageinfo_p.xml b/htroot/api/getpageinfo_p.xml
similarity index 100%
rename from htroot/api/util/getpageinfo_p.xml
rename to htroot/api/getpageinfo_p.xml
diff --git a/htroot/js/Bookmarks.js b/htroot/js/Bookmarks.js
index dd46ed8c7..6f8f2d8ed 100644
--- a/htroot/js/Bookmarks.js
+++ b/htroot/js/Bookmarks.js
@@ -24,7 +24,7 @@ function loadTitle(){
url=document.getElementsByName("url")[0].value;
if(document.getElementsByName("title")[0].value==""){
- sndReq('/api/util/getpageinfo_p.xml?actions=title&url='+url);
+ sndReq('/api/getpageinfo_p.xml?actions=title&url='+url);
}
}
diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js
index ab687bd2d..e3c840fab 100644
--- a/htroot/js/IndexCreate.js
+++ b/htroot/js/IndexCreate.js
@@ -75,7 +75,7 @@ function loadInfos() {
url=document.getElementById("crawlingURL").value;
if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").disabled=true; else document.getElementById("crawlingQ").disabled=false;
- sndReq('/api/util/getpageinfo_p.xml?actions=title,robots&url='+url);
- document.getElementById("api").innerHTML = "
See the page info about the start url.";
+ sndReq('/api/getpageinfo_p.xml?actions=title,robots&url='+url);
+ document.getElementById("api").innerHTML = "
See the page info about the start url.";
}
diff --git a/htroot/yacy/ui/yacyui-bookmarks.html b/htroot/yacy/ui/yacyui-bookmarks.html
index 482158902..6e46bde39 100644
--- a/htroot/yacy/ui/yacyui-bookmarks.html
+++ b/htroot/yacy/ui/yacyui-bookmarks.html
@@ -85,7 +85,7 @@
var url = $("input[name='bm_url']").getValue();
$.ajax({
type: "GET",
- url: "/api/util/getpageinfo_p.xml?url="+url,
+ url: "/api/getpageinfo_p.xml?url="+url,
dataType: "xml",
success: function(xml) {
var title = $(xml).find('title').text();
diff --git a/htroot/yacy/ui/yacyui-search.html b/htroot/yacy/ui/yacyui-search.html
index 636804bd6..e233dd700 100644
--- a/htroot/yacy/ui/yacyui-search.html
+++ b/htroot/yacy/ui/yacyui-search.html
@@ -162,7 +162,7 @@
function getTags(url, i) {
$.ajax({
type: "GET",
- url: "/api/util/getpageinfo_p.xml?url="+url,
+ url: "/api/getpageinfo_p.xml?url="+url,
dataType: "xml",
success: function(xml) {
tags = "";