From f8b8c82421d170941bcab6eca1585db84446bac2 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 15 Nov 2011 00:22:40 +0000
Subject: [PATCH] - refactoring of getpageinfo_p.xml (moved out of util) -
 added more logging in getpageinfo_p.xml

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8037 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/Bookmarks.html                    |  2 +-
 htroot/ViewFile.html                     |  2 +-
 htroot/api/{util => }/getpageinfo_p.java | 26 ++++++++++++++----------
 htroot/api/{util => }/getpageinfo_p.xml  |  0
 htroot/js/Bookmarks.js                   |  2 +-
 htroot/js/IndexCreate.js                 |  4 ++--
 htroot/yacy/ui/yacyui-bookmarks.html     |  2 +-
 htroot/yacy/ui/yacyui-search.html        |  2 +-
 8 files changed, 22 insertions(+), 18 deletions(-)
 rename htroot/api/{util => }/getpageinfo_p.java (87%)
 rename htroot/api/{util => }/getpageinfo_p.xml (100%)
diff --git a/htroot/Bookmarks.html b/htroot/Bookmarks.html
index 9cb74d700..a580c0410 100644
--- a/htroot/Bookmarks.html
+++ b/htroot/Bookmarks.html
@@ -229,7 +229,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
 					/
 					<a href="Bookmarks.html?delete=#[hash]#" class="bookmarkAction" onclick="return confirm('Confirm deletion')">Delete</a>
 					/
-					<a href="/api/util/getpageinfo_p.xml?url=#[link]#" class="bookmarkAction">Info</a>
+					<a href="/api/getpageinfo_p.xml?url=#[link]#" class="bookmarkAction">Info</a>
 				</p>
 			</div>
 			#{/bookmarks}#
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index 6b706999c..6a60aaea8 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -9,7 +9,7 @@
   <body>
   
 <div id="api">
-<a href="http://localhost:8090/api/util/getpageinfo_p.xml?actions=title,robots&url=#[url]#" id="apilink">
+<a href="http://localhost:8090/api/getpageinfo_p.xml?actions=title,robots&url=#[url]#" id="apilink">
 <img src="/env/grafics/api.png" width="60" height="40" alt="API"/>
 </a>
 <span>See the page info about the url.</span>
diff --git a/htroot/api/util/getpageinfo_p.java b/htroot/api/getpageinfo_p.java
similarity index 87%
rename from htroot/api/util/getpageinfo_p.java
rename to htroot/api/getpageinfo_p.java
index 5a101c611..888a2b38e 100755
--- a/htroot/api/util/getpageinfo_p.java
+++ b/htroot/api/getpageinfo_p.java
@@ -8,6 +8,7 @@ import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
 import net.yacy.search.Switchboard;
 import de.anomic.crawler.RobotsTxtEntry;
 import de.anomic.server.serverObjects;
@@ -30,15 +31,15 @@ public class getpageinfo_p {
         prop.put("filter", ".*");
 
         // default actions
-        String actions="title,robots";
+        String actions = "title,robots";
 
         if (post != null && post.containsKey("url")) {
-            if(post.containsKey("actions"))
+            if (post.containsKey("actions"))
                 actions=post.get("actions");
             String url=post.get("url");
-			if(url.toLowerCase().startsWith("ftp://")){
+			if (url.toLowerCase().startsWith("ftp://")) {
 				prop.put("robots-allowed", "1");
-				prop.putXML("title", "FTP: "+url);
+				prop.putXML("title", "FTP: " + url);
                 return prop;
 			} else if (!url.startsWith("http://") &&
 		               !url.startsWith("https://") &&
@@ -47,18 +48,18 @@ public class getpageinfo_p {
 		              !url.startsWith("file://")) {
                 url = "http://" + url;
             }
-            if (actions.indexOf("title")>=0) {
+            if (actions.indexOf("title") >= 0) {
                 DigestURI u = null;
                 try {
                     u = new DigestURI(url);
                 } catch (final MalformedURLException e) {
-                    // fail, do nothing
+                    Log.logException(e);
                 }
                 ContentScraper scraper = null;
                 if (u != null) try {
                     scraper = sb.loader.parseResource(u, CacheStrategy.IFEXIST);
                 } catch (final IOException e) {
-                    // now thats a fail, do nothing
+                    Log.logException(e);
                 }
                 if (scraper != null) {
                     // put the document title
@@ -68,9 +69,9 @@ public class getpageinfo_p {
                     prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
 
                     // put keywords
-                    final String list[]=scraper.getKeywords();
+                    final String list[] = scraper.getKeywords();
                     int count = 0;
-                    for (final String element : list) {
+                    for (final String element: list) {
                         final String tag = element;
                         if (!tag.equals("")) {
                             prop.putXML("tags_"+count+"_tag", tag);
@@ -100,7 +101,7 @@ public class getpageinfo_p {
                     prop.putXML("filter", filter.length() > 0 ? filter.substring(1) : ".*");
                 }
             }
-            if (actions.indexOf("robots")>=0) {
+            if (actions.indexOf("robots") >= 0) {
                 try {
                     final DigestURI theURL = new DigestURI(url);
 
@@ -110,13 +111,16 @@ public class getpageinfo_p {
                         robotsEntry = sb.robots.getEntry(theURL, sb.peers.myBotIDs());
                     } catch (final IOException e) {
                         robotsEntry = null;
+                        Log.logException(e);
                     }
                 	prop.put("robots-allowed", robotsEntry == null ? 1 : robotsEntry.isDisallowed(theURL) ? 0 : 1);
 
                     // get the sitemap URL of the domain
                     final MultiProtocolURI sitemapURL = robotsEntry == null ? null : robotsEntry.getSitemap();
                     prop.putXML("sitemap", sitemapURL == null ? "" : sitemapURL.toString());
-                } catch (final MalformedURLException e) {}
+                } catch (final MalformedURLException e) {
+                    Log.logException(e);
+                }
             }
 
         }
diff --git a/htroot/api/util/getpageinfo_p.xml b/htroot/api/getpageinfo_p.xml
similarity index 100%
rename from htroot/api/util/getpageinfo_p.xml
rename to htroot/api/getpageinfo_p.xml
diff --git a/htroot/js/Bookmarks.js b/htroot/js/Bookmarks.js
index dd46ed8c7..6f8f2d8ed 100644
--- a/htroot/js/Bookmarks.js
+++ b/htroot/js/Bookmarks.js
@@ -24,7 +24,7 @@ function loadTitle(){
 	
 	url=document.getElementsByName("url")[0].value;
 	if(document.getElementsByName("title")[0].value==""){
-		sndReq('/api/util/getpageinfo_p.xml?actions=title&url='+url);
+		sndReq('/api/getpageinfo_p.xml?actions=title&url='+url);
 	}
 }
 
diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js
index ab687bd2d..e3c840fab 100644
--- a/htroot/js/IndexCreate.js
+++ b/htroot/js/IndexCreate.js
@@ -75,7 +75,7 @@ function loadInfos() {
 	
 	url=document.getElementById("crawlingURL").value;
 	if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").disabled=true; else document.getElementById("crawlingQ").disabled=false;
-	sndReq('/api/util/getpageinfo_p.xml?actions=title,robots&url='+url);
-	document.getElementById("api").innerHTML = "<a href='http://localhost:8090/api/util/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='/env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>";
+	sndReq('/api/getpageinfo_p.xml?actions=title,robots&url='+url);
+	document.getElementById("api").innerHTML = "<a href='http://localhost:8090/api/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='/env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>";
 	
 }
diff --git a/htroot/yacy/ui/yacyui-bookmarks.html b/htroot/yacy/ui/yacyui-bookmarks.html
index 482158902..6e46bde39 100644
--- a/htroot/yacy/ui/yacyui-bookmarks.html
+++ b/htroot/yacy/ui/yacyui-bookmarks.html
@@ -85,7 +85,7 @@
 				var url = $("input[name='bm_url']").getValue();
 				$.ajax({
 					type: "GET",
-					url: "/api/util/getpageinfo_p.xml?url="+url,			
+					url: "/api/getpageinfo_p.xml?url="+url,			
 					dataType: "xml",
 					success: function(xml) {
 						var title = $(xml).find('title').text();
diff --git a/htroot/yacy/ui/yacyui-search.html b/htroot/yacy/ui/yacyui-search.html
index 636804bd6..e233dd700 100644
--- a/htroot/yacy/ui/yacyui-search.html
+++ b/htroot/yacy/ui/yacyui-search.html
@@ -162,7 +162,7 @@
 	function getTags(url, i) {
 		$.ajax({
 			type: "GET",
-			url: "/api/util/getpageinfo_p.xml?url="+url,			
+			url: "/api/getpageinfo_p.xml?url="+url,			
 			dataType: "xml",
 			success: function(xml) {					
 				tags = "";