From 52c68875bd3c515f7de1fb286c7c5045f31a472e Mon Sep 17 00:00:00 2001 From: low012 Date: Fri, 12 Oct 2007 15:27:23 +0000 Subject: [PATCH] *) removed (hopefully only) surplus double encodings (http://forum.yacy-websuche.de/viewtopic.php?t=368) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4159 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateIndexingQueue_p.java | 13 ++++++------- htroot/IndexCreateLoaderQueue_p.java | 5 ++--- htroot/IndexCreateWWWGlobalQueue_p.java | 9 ++++----- htroot/IndexCreateWWWLocalQueue_p.java | 9 ++++----- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index 970c69376..154fbe54f 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -48,7 +48,6 @@ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Iterator; -import de.anomic.data.htmlTools; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaHTCache; @@ -142,11 +141,11 @@ public class IndexCreateIndexingQueue_p { totalSize += entrySize; initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0)); - prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true))); + prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth()); prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate()); - prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":htmlTools.encodeUnicode2html(pcentry.anchorName(), true)); - prop.put("indexing-queue_list_"+entryCount+"_url", htmlTools.encodeUnicode2html(pcentry.url().toNormalform(false, true), false)); + prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName()); + prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.url().toNormalform(false, true)); prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize)); prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0); prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash()); @@ -189,9 +188,9 @@ public class IndexCreateIndexingQueue_p { executorHash = entry.executor(); initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); executorSeed = yacyCore.seedDB.getConnected(executorHash); - prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(initiatorSeed.getName(), true))); - prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(executorSeed.getName(), true))); - prop.put("rejected_list_"+j+"_url", htmlTools.encodeUnicode2html(url.toNormalform(false, true), false)); + prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); + prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); + prop.put("rejected_list_"+j+"_url", url.toNormalform(false, true)); prop.put("rejected_list_"+j+"_failreason", entry.anycause()); prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); dark = !dark; diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java index 7ced761e8..d4f5eff30 100644 --- a/htroot/IndexCreateLoaderQueue_p.java +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -43,7 +43,6 @@ // javac -classpath .:../classes IndexCreate_p.java // if the shell's current path is HTROOT -import de.anomic.data.htmlTools; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaSwitchboard; @@ -80,9 +79,9 @@ public class IndexCreateLoaderQueue_p { initiator = yacyCore.seedDB.getConnected(theMsg.initiator); prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) ); - prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); + prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("loader-set_list_"+count+"_depth", theMsg.depth ); - prop.put("loader-set_list_"+count+"_url", htmlTools.encodeUnicode2html(theMsg.url.toNormalform(false, true), false)); // null pointer exception here !!! maybe url = null; check reason. + prop.put("loader-set_list_"+count+"_url", theMsg.url.toNormalform(false, true)); // null pointer exception here !!! maybe url = null; check reason. dark = !dark; count++; } diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java index d91a72ee4..0f78b7e1a 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.java +++ b/htroot/IndexCreateWWWGlobalQueue_p.java @@ -47,7 +47,6 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; -import de.anomic.data.htmlTools; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlNURL; @@ -120,12 +119,12 @@ public class IndexCreateWWWGlobalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); - prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); - prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : htmlTools.encodeUnicode2html(profileEntry.name(), true))); + prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); - prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true)); - prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false)); + prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name()); + prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); dark = !dark; showNum++; diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index d4ecb88c7..e3c5c3843 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -51,7 +51,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; -import de.anomic.data.htmlTools; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlNURL; @@ -136,7 +135,7 @@ public class IndexCreateWWWLocalQueue_p { case ANCHOR: value = entry.name(); break; case DEPTH: value = Integer.toString(entry.depth()); break; case INITIATOR: - value = (entry.initiator() == null) ? "proxy" : htmlTools.encodeUnicode2html(entry.initiator(), true); + value = (entry.initiator() == null) ? "proxy" : entry.initiator(); break; case MODIFIED: value = daydate(entry.loaddate()); break; default: value = null; @@ -185,12 +184,12 @@ public class IndexCreateWWWLocalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); - prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); + prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); - prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true)); - prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false)); + prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name()); + prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); dark = !dark; showNum++;