*) Trying to get rid of possibility of exploits in IndexCreate* through HTML and JavaSkript in peernames, URLs, <title>-tags etc. (see http://www.yacy-forum.de/viewtopic.php?t=1181) I hope I got them all and did not overdo it.

*) Just a tiny bit of cleanig up in News.java. (I messed it up myself some time ago.)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@749 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 19 years ago
parent 9dfbd93c7b
commit 4dbc871524

@ -51,6 +51,7 @@ import java.util.Iterator;
import java.util.Locale;
import java.io.IOException;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaHTCache;
@ -71,6 +72,7 @@ public class IndexCreateIndexingQueue_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
wikiCode wikiTransformer = new wikiCode(switchboard);
serverObjects prop = new serverObjects();
prop.put("rejected", 0);
int showRejectedCount = 10;
@ -149,11 +151,11 @@ public class IndexCreateIndexingQueue_p {
if ((pcentry != null)&&(pcentry.url() != null)) {
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())));
prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+entryCount+"_modified", (pcentry.responseHeader() == null) ? "" : daydate(pcentry.responseHeader().lastModified()));
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName());
prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.normalizedURLString());
prop.put("indexing-queue_list_"+entryCount+"_url", wikiTransformer.replaceHTML(pcentry.normalizedURLString()));
prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize));
prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0);
prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash());
@ -192,9 +194,9 @@ public class IndexCreateIndexingQueue_p {
url = entry.url().toString();
initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
executorSeed = yacyCore.seedDB.getConnected(executorHash);
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.put("rejected_list_"+j+"_url", url);
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : wikiTransformer.replaceHTML(initiatorSeed.getName())));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : wikiTransformer.replaceHTML(executorSeed.getName())));
prop.put("rejected_list_"+j+"_url", wikiTransformer.replaceHTML(url));
prop.put("rejected_list_"+j+"_failreason", entry.failreason());
prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0));
dark = !dark;

@ -47,6 +47,7 @@ import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
import de.anomic.plasma.plasmaCrawlWorker;
@ -66,6 +67,7 @@ public class IndexCreateLoaderQueue_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
wikiCode wikiTransformer = new wikiCode(switchboard);
serverObjects prop = new serverObjects();
@ -88,9 +90,9 @@ public class IndexCreateLoaderQueue_p {
initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) );
prop.put("loader-set_list_"+count+"_depth", theMsg.depth );
prop.put("loader-set_list_"+count+"_url", theMsg.url ); // null pointer exception here !!! maybe url = null; check reason.
prop.put("loader-set_list_"+count+"_url", wikiTransformer.replaceHTML(theMsg.url.toString())); // null pointer exception here !!! maybe url = null; check reason.
dark = !dark;
count++;
}

@ -47,6 +47,7 @@ import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaCrawlProfile;
@ -66,6 +67,7 @@ public class IndexCreateWWWGlobalQueue_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
wikiCode wikiTransformer = new wikiCode(switchboard);
serverObjects prop = new serverObjects();
if (post != null) {
@ -107,12 +109,12 @@ public class IndexCreateWWWGlobalQueue_p {
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+i+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+i+"_profile", ((profileEntry == null) ? "unknown" : wikiTransformer.replaceHTML(profileEntry.name())));
prop.put("crawler-queue_list_"+i+"_depth", urle.depth());
prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+i+"_anchor", urle.name());
prop.put("crawler-queue_list_"+i+"_url", urle.url());
prop.put("crawler-queue_list_"+i+"_anchor", wikiTransformer.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+i+"_url", wikiTransformer.replaceHTML(urle.url().toString()));
dark = !dark;
}
}

@ -47,6 +47,7 @@ import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaCrawlProfile;
@ -66,6 +67,7 @@ public class IndexCreateWWWLocalQueue_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
wikiCode wikiTransformer = new wikiCode(switchboard);
serverObjects prop = new serverObjects();
if (post != null) {
@ -105,12 +107,12 @@ public class IndexCreateWWWLocalQueue_p {
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : wikiTransformer.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.put("crawler-queue_list_"+showNum+"_url", urle.url());
prop.put("crawler-queue_list_"+showNum+"_anchor", wikiTransformer.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+showNum+"_url", wikiTransformer.replaceHTML(urle.url().toString()));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.hash());
dark = !dark;
showNum++;

@ -57,6 +57,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.http.httpHeader;
@ -86,6 +87,7 @@ public class IndexCreate_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
wikiCode wikiTransformer = new wikiCode(switchboard);
serverObjects prop = new serverObjects();
prop.put("error", 0);
@ -184,7 +186,7 @@ public class IndexCreate_p {
} else {
prop.put("error", 5); //Crawling failed
prop.put("error_crawlingURL", ((String) post.get("crawlingURL")));
prop.put("error_crawlingURL", wikiTransformer.replaceHTML(((String) post.get("crawlingURL"))));
prop.put("error_reasonString", reasonString);
switchboard.urlPool.errorURL.newEntry(crawlingStartURL, null, yacyCore.seedDB.mySeed.hash, yacyCore.seedDB.mySeed.hash,
@ -360,9 +362,9 @@ public class IndexCreate_p {
profile = (plasmaCrawlProfile.entry) it.next();
//table += profile.map().toString() + "<br>";
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_name", profile.name());
prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
prop.put("crawlProfiles_"+count+"_handle", profile.handle());
prop.put("crawlProfiles_"+count+"_name", wikiTransformer.replaceHTML(profile.name()));
prop.put("crawlProfiles_"+count+"_startURL", wikiTransformer.replaceHTML(profile.startURL()));
prop.put("crawlProfiles_"+count+"_handle", wikiTransformer.replaceHTML(profile.handle()));
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
@ -392,9 +394,9 @@ public class IndexCreate_p {
if (peer == null) peername = record.originator(); else peername = peer.getName();
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername);
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", wikiTransformer.replaceHTML(peername));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", wikiTransformer.replaceHTML(record.attributes().get("startURL").toString()));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", wikiTransformer.replaceHTML(record.attributes().get("intention").toString()));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
showedCrawl++;
@ -417,9 +419,9 @@ public class IndexCreate_p {
if (peer == null) peername = record.originator(); else peername = peer.getName();
prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created());
prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername);
prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", wikiTransformer.replaceHTML(peername));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", wikiTransformer.replaceHTML(record.attributes().get("startURL").toString()));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", wikiTransformer.replaceHTML(record.attributes().get("intention").toString()));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
showedCrawl++;

@ -47,6 +47,7 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.io.IOException;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -55,7 +56,6 @@ import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.data.wikiCode;
public class News {

Loading…
Cancel
Save