From 4dbc871524cf4e805f27cb8dbbb1f3be356b39a0 Mon Sep 17 00:00:00 2001 From: low012 Date: Mon, 19 Sep 2005 20:36:29 +0000 Subject: [PATCH] *) Trying to get rid of possibility of exploits in IndexCreate* through HTML and JavaSkript in peernames, URLs, -tags etc. (see http://www.yacy-forum.de/viewtopic.php?t=1181) I hope I got them all and did not overdo it. *) Just a tiny bit of cleanig up in News.java. (I messed it up myself some time ago.) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@749 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateIndexingQueue_p.java | 12 +++++++----- htroot/IndexCreateLoaderQueue_p.java | 6 ++++-- htroot/IndexCreateWWWGlobalQueue_p.java | 10 ++++++---- htroot/IndexCreateWWWLocalQueue_p.java | 8 +++++--- htroot/IndexCreate_p.java | 22 ++++++++++++---------- htroot/News.java | 2 +- 6 files changed, 35 insertions(+), 25 deletions(-) diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index df0d11f49..82290feb3 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -51,6 +51,7 @@ import java.util.Iterator; import java.util.Locale; import java.io.IOException; +import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlEURL; import de.anomic.plasma.plasmaHTCache; @@ -71,6 +72,7 @@ public class IndexCreateIndexingQueue_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + wikiCode wikiTransformer = new wikiCode(switchboard); serverObjects prop = new serverObjects(); prop.put("rejected", 0); int showRejectedCount = 10; @@ -149,11 +151,11 @@ public class IndexCreateIndexingQueue_p { if ((pcentry != null)&&(pcentry.url() != null)) { initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0)); - prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName()))); prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth()); prop.put("indexing-queue_list_"+entryCount+"_modified", (pcentry.responseHeader() == null) ? "" : daydate(pcentry.responseHeader().lastModified())); prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName()); - prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.normalizedURLString()); + prop.put("indexing-queue_list_"+entryCount+"_url", wikiTransformer.replaceHTML(pcentry.normalizedURLString())); prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize)); prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0); prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash()); @@ -192,9 +194,9 @@ public class IndexCreateIndexingQueue_p { url = entry.url().toString(); initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); executorSeed = yacyCore.seedDB.getConnected(executorHash); - prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); - prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); - prop.put("rejected_list_"+j+"_url", url); + prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : wikiTransformer.replaceHTML(initiatorSeed.getName()))); + prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : wikiTransformer.replaceHTML(executorSeed.getName()))); + prop.put("rejected_list_"+j+"_url", wikiTransformer.replaceHTML(url)); prop.put("rejected_list_"+j+"_failreason", entry.failreason()); prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); dark = !dark; diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java index 843694c0b..9e61790f7 100644 --- a/htroot/IndexCreateLoaderQueue_p.java +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -47,6 +47,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; +import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaCrawlWorker; @@ -66,6 +67,7 @@ public class IndexCreateLoaderQueue_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + wikiCode wikiTransformer = new wikiCode(switchboard); serverObjects prop = new serverObjects(); @@ -88,9 +90,9 @@ public class IndexCreateLoaderQueue_p { initiator = yacyCore.seedDB.getConnected(theMsg.initiator); prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) ); - prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) ); prop.put("loader-set_list_"+count+"_depth", theMsg.depth ); - prop.put("loader-set_list_"+count+"_url", theMsg.url ); // null pointer exception here !!! maybe url = null; check reason. + prop.put("loader-set_list_"+count+"_url", wikiTransformer.replaceHTML(theMsg.url.toString())); // null pointer exception here !!! maybe url = null; check reason. dark = !dark; count++; } diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java index 1d6f6d26c..d859642ec 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.java +++ b/htroot/IndexCreateWWWGlobalQueue_p.java @@ -47,6 +47,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; +import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaCrawlProfile; @@ -66,6 +67,7 @@ public class IndexCreateWWWGlobalQueue_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + wikiCode wikiTransformer = new wikiCode(switchboard); serverObjects prop = new serverObjects(); if (post != null) { @@ -107,12 +109,12 @@ public class IndexCreateWWWGlobalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle); prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) ); - prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); - prop.put("crawler-queue_list_"+i+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); + prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) ); + prop.put("crawler-queue_list_"+i+"_profile", ((profileEntry == null) ? "unknown" : wikiTransformer.replaceHTML(profileEntry.name()))); prop.put("crawler-queue_list_"+i+"_depth", urle.depth()); prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); - prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); - prop.put("crawler-queue_list_"+i+"_url", urle.url()); + prop.put("crawler-queue_list_"+i+"_anchor", wikiTransformer.replaceHTML(urle.name())); + prop.put("crawler-queue_list_"+i+"_url", wikiTransformer.replaceHTML(urle.url().toString())); dark = !dark; } } diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index cc144066b..c6612c04f 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -47,6 +47,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; +import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlNURL; import de.anomic.plasma.plasmaCrawlProfile; @@ -66,6 +67,7 @@ public class IndexCreateWWWLocalQueue_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + wikiCode wikiTransformer = new wikiCode(switchboard); serverObjects prop = new serverObjects(); if (post != null) { @@ -105,12 +107,12 @@ public class IndexCreateWWWLocalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) ); - prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); - prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name()); - prop.put("crawler-queue_list_"+showNum+"_url", urle.url()); + prop.put("crawler-queue_list_"+showNum+"_anchor", wikiTransformer.replaceHTML(urle.name())); + prop.put("crawler-queue_list_"+showNum+"_url", wikiTransformer.replaceHTML(urle.url().toString())); prop.put("crawler-queue_list_"+showNum+"_hash", urle.hash()); dark = !dark; showNum++; diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index ea9b0e145..a3f555952 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -57,6 +57,7 @@ import java.util.Locale; import java.util.Map; import java.util.Properties; +import de.anomic.data.wikiCode; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterOutputStream; import de.anomic.http.httpHeader; @@ -86,6 +87,7 @@ public class IndexCreate_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + wikiCode wikiTransformer = new wikiCode(switchboard); serverObjects prop = new serverObjects(); prop.put("error", 0); @@ -184,7 +186,7 @@ public class IndexCreate_p { } else { prop.put("error", 5); //Crawling failed - prop.put("error_crawlingURL", ((String) post.get("crawlingURL"))); + prop.put("error_crawlingURL", wikiTransformer.replaceHTML(((String) post.get("crawlingURL")))); prop.put("error_reasonString", reasonString); switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash, @@ -360,9 +362,9 @@ public class IndexCreate_p { profile = (plasmaCrawlProfile.entry) it.next(); //table += profile.map().toString() + "<br>"; prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0)); - prop.put("crawlProfiles_"+count+"_name", profile.name()); - prop.put("crawlProfiles_"+count+"_startURL", profile.startURL()); - prop.put("crawlProfiles_"+count+"_handle", profile.handle()); + prop.put("crawlProfiles_"+count+"_name", wikiTransformer.replaceHTML(profile.name())); + prop.put("crawlProfiles_"+count+"_startURL", wikiTransformer.replaceHTML(profile.startURL())); + prop.put("crawlProfiles_"+count+"_handle", wikiTransformer.replaceHTML(profile.handle())); prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth()); prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter()); prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0)); @@ -392,9 +394,9 @@ public class IndexCreate_p { if (peer == null) peername = record.originator(); else peername = peer.getName(); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL")); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention")); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", wikiTransformer.replaceHTML(peername)); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", wikiTransformer.replaceHTML(record.attributes().get("startURL").toString())); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", wikiTransformer.replaceHTML(record.attributes().get("intention").toString())); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); showedCrawl++; @@ -417,9 +419,9 @@ public class IndexCreate_p { if (peer == null) peername = record.originator(); else peername = peer.getName(); prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL")); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention")); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", wikiTransformer.replaceHTML(peername)); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", wikiTransformer.replaceHTML(record.attributes().get("startURL").toString())); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", wikiTransformer.replaceHTML(record.attributes().get("intention").toString())); prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); showedCrawl++; diff --git a/htroot/News.java b/htroot/News.java index bb15bc3d8..55d62b01a 100644 --- a/htroot/News.java +++ b/htroot/News.java @@ -47,6 +47,7 @@ import java.util.Enumeration; import java.util.HashMap; import java.io.IOException; +import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -55,7 +56,6 @@ import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacyNewsRecord; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.data.wikiCode; public class News {