*) removed (hopefully only) surplus double encodings (http://forum.yacy-websuche.de/viewtopic.php?t=368)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4159 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 18 years ago
parent b5f7df8d0a
commit 52c68875bd

@ -48,7 +48,6 @@ import java.text.DecimalFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlZURL; import de.anomic.plasma.plasmaCrawlZURL;
import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaHTCache;
@ -142,11 +141,11 @@ public class IndexCreateIndexingQueue_p {
totalSize += entrySize; totalSize += entrySize;
initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0)); prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true))); prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth()); prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate()); prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate());
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":htmlTools.encodeUnicode2html(pcentry.anchorName(), true)); prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName());
prop.put("indexing-queue_list_"+entryCount+"_url", htmlTools.encodeUnicode2html(pcentry.url().toNormalform(false, true), false)); prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.url().toNormalform(false, true));
prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize)); prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize));
prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0); prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0);
prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash()); prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash());
@ -189,9 +188,9 @@ public class IndexCreateIndexingQueue_p {
executorHash = entry.executor(); executorHash = entry.executor();
initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
executorSeed = yacyCore.seedDB.getConnected(executorHash); executorSeed = yacyCore.seedDB.getConnected(executorHash);
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(initiatorSeed.getName(), true))); prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : htmlTools.encodeUnicode2html(executorSeed.getName(), true))); prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.put("rejected_list_"+j+"_url", htmlTools.encodeUnicode2html(url.toNormalform(false, true), false)); prop.put("rejected_list_"+j+"_url", url.toNormalform(false, true));
prop.put("rejected_list_"+j+"_failreason", entry.anycause()); prop.put("rejected_list_"+j+"_failreason", entry.anycause());
prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0));
dark = !dark; dark = !dark;

@ -43,7 +43,6 @@
// javac -classpath .:../classes IndexCreate_p.java // javac -classpath .:../classes IndexCreate_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaCrawlLoaderMessage;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
@ -80,9 +79,9 @@ public class IndexCreateLoaderQueue_p {
initiator = yacyCore.seedDB.getConnected(theMsg.initiator); initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) ); prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("loader-set_list_"+count+"_depth", theMsg.depth ); prop.put("loader-set_list_"+count+"_depth", theMsg.depth );
prop.put("loader-set_list_"+count+"_url", htmlTools.encodeUnicode2html(theMsg.url.toNormalform(false, true), false)); // null pointer exception here !!! maybe url = null; check reason. prop.put("loader-set_list_"+count+"_url", theMsg.url.toNormalform(false, true)); // null pointer exception here !!! maybe url = null; check reason.
dark = !dark; dark = !dark;
count++; count++;
} }

@ -47,7 +47,6 @@ import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.Locale; import java.util.Locale;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaCrawlNURL;
@ -120,12 +119,12 @@ public class IndexCreateWWWGlobalQueue_p {
profileHandle = urle.profileHandle(); profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle); profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : htmlTools.encodeUnicode2html(profileEntry.name(), true))); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true)); prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false)); prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark; dark = !dark;
showNum++; showNum++;

@ -51,7 +51,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry; import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaCrawlNURL;
@ -136,7 +135,7 @@ public class IndexCreateWWWLocalQueue_p {
case ANCHOR: value = entry.name(); break; case ANCHOR: value = entry.name(); break;
case DEPTH: value = Integer.toString(entry.depth()); break; case DEPTH: value = Integer.toString(entry.depth()); break;
case INITIATOR: case INITIATOR:
value = (entry.initiator() == null) ? "proxy" : htmlTools.encodeUnicode2html(entry.initiator(), true); value = (entry.initiator() == null) ? "proxy" : entry.initiator();
break; break;
case MODIFIED: value = daydate(entry.loaddate()); break; case MODIFIED: value = daydate(entry.loaddate()); break;
default: value = null; default: value = null;
@ -185,12 +184,12 @@ public class IndexCreateWWWLocalQueue_p {
profileHandle = urle.profileHandle(); profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle); profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.encodeUnicode2html(initiator.getName(), true)) ); prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.encodeUnicode2html(urle.name(), true)); prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.encodeUnicode2html(urle.url().toNormalform(false, true), false)); prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
dark = !dark; dark = !dark;
showNum++; showNum++;

Loading…
Cancel
Save