*) removed (hopefully only) surplus double encodings (http://forum.yacy-websuche.de/viewtopic.php?t=368)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4159 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 17 years ago
parent b5f7df8d0a
commit 52c68875bd

@ -48,7 +48,6 @@ import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaHTCache;
@ -142,11 +141,11 @@ public class IndexCreateIndexingQueue_p {
totalSize += entrySize;
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate());
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":htmlTools.encodeUnicode2html(pcentry.anchorName(), true));
prop.put("indexing-queue_list_"+entryCount+"_url", htmlTools.encodeUnicode2html(pcentry.url().toNormalform(false, true), false));
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName());
prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.url().toNormalform(false, true));
prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize));
prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0);
prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash());
@ -189,9 +188,9 @@ public class IndexCreateIndexingQueue_p {
executorHash = entry.executor();
initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
executorSeed = yacyCore.seedDB.getConnected(executorHash);
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(initiatorSeed.getName(), true)));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(executorSeed.getName(), true)));
prop.put("rejected_list_"+j+"_url", htmlTools.encodeUnicode2html(url.toNormalform(false, true), false));
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.put("rejected_list_"+j+"_url", url.toNormalform(false, true));
prop.put("rejected_list_"+j+"_failreason", entry.anycause());
prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0));
dark = !dark;

@ -43,7 +43,6 @@
// javac -classpath .:../classes IndexCreate_p.java
// if the shell's current path is HTROOT
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
import de.anomic.plasma.plasmaSwitchboard;
@ -80,9 +79,9 @@ public class IndexCreateLoaderQueue_p {
initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("loader-set_list_"+count+"_depth", theMsg.depth );
prop.put("loader-set_list_"+count+"_url", htmlTools.encodeUnicode2html(theMsg.url.toNormalform(false, true), false)); // null pointer exception here !!! maybe url = null; check reason.
prop.put("loader-set_list_"+count+"_url", theMsg.url.toNormalform(false, true)); // null pointer exception here !!! maybe url = null; check reason.
dark = !dark;
count++;
}

@ -47,7 +47,6 @@ import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL;
@ -120,12 +119,12 @@ public class IndexCreateWWWGlobalQueue_p {
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : htmlTools.encodeUnicode2html(profileEntry.name(), true)));
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false));
prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark;
showNum++;

@ -51,7 +51,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL;
@ -136,7 +135,7 @@ public class IndexCreateWWWLocalQueue_p {
case ANCHOR: value = entry.name(); break;
case DEPTH: value = Integer.toString(entry.depth()); break;
case INITIATOR:
value = (entry.initiator() == null) ? "proxy" : htmlTools.encodeUnicode2html(entry.initiator(), true);
value = (entry.initiator() == null) ? "proxy" : entry.initiator();
break;
case MODIFIED: value = daydate(entry.loaddate()); break;
default: value = null;
@ -185,12 +184,12 @@ public class IndexCreateWWWLocalQueue_p {
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false));
prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark;
showNum++;

Loading…
Cancel
Save