allow url parameter in worktable apicall

allow url=wwwl?param=a&param=b (with ?, & encoded)
fix:  http://mantis.tokeek.de/view.php?id=100

fix double adding of  '&' in MultiProtocolURL.escape()
pull/1/head
reger 11 years ago
parent b5ca20de15
commit 209e0f2fe8

@ -36,6 +36,7 @@ import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage; import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.document.feed.RSSReader; import net.yacy.cora.document.feed.RSSReader;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -212,7 +213,7 @@ public class Load_RSS_p {
final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK())); prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK()));
prop.put("showscheduledfeeds_list_" + apic + "_count", apic); prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl); prop.put("showscheduledfeeds_list_" + apic + "_rss", MultiProtocolURL.escape(messageurl).toString());
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", "")); prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true)); prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date()))); prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));

@ -66,8 +66,6 @@ import net.yacy.crawler.retrieval.Response;
*/ */
public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolURL> { public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolURL> {
public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators
private static final long serialVersionUID = -1173233022912141884L; private static final long serialVersionUID = -1173233022912141884L;
private static final long SMB_TIMEOUT = 5000; private static final long SMB_TIMEOUT = 5000;
@ -373,6 +371,11 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
escape(); escape();
} }
/**
* creates MultiProtocolURL
* if path contains '?' search part is automatically created by splitting input into path and searchpart
* dto for anchor's ('#')
*/
public MultiProtocolURL(final String protocol, String host, final int port, final String path) throws MalformedURLException { public MultiProtocolURL(final String protocol, String host, final int port, final String path) throws MalformedURLException {
if (protocol == null) throw new MalformedURLException("protocol is null"); if (protocol == null) throw new MalformedURLException("protocol is null");
if (host.indexOf(':') >= 0 && host.charAt(0) != '[') host = '[' + host + ']'; // IPv6 host must be enclosed in square brackets if (host.indexOf(':') >= 0 && host.charAt(0) != '[') host = '[' + host + ']'; // IPv6 host must be enclosed in square brackets
@ -521,9 +524,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) { if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
sbuf.append((char)ch); // leave it that way, it is used the right way sbuf.append((char)ch); // leave it that way, it is used the right way
} else { } else {
sbuf.append("&amp;"); // this must be urlencoded sbuf.append("%26"); // this must be urlencoded
} }
sbuf.append((char)ch);
} else if (ch == '#') { // RFC 1738 2.2 unsafe char is _not_ encoded because it may already be used for encoding } else if (ch == '#') { // RFC 1738 2.2 unsafe char is _not_ encoded because it may already be used for encoding
sbuf.append((char)ch); sbuf.append((char)ch);
} else if (ch == '!' || ch == ':' // unreserved } else if (ch == '!' || ch == ':' // unreserved

@ -28,6 +28,7 @@ package net.yacy.data;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
@ -42,6 +43,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
@ -222,7 +224,6 @@ public class WorkTables extends Tables {
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setTimout(120000); client.setTimout(120000);
Tables.Row row; Tables.Row row;
String url;
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>(); LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (final String pk: pks) { for (final String pk: pks) {
row = null; row = null;
@ -234,20 +235,25 @@ public class WorkTables extends Tables {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
if (row == null) continue; if (row == null) continue;
url = "http://" + host + ":" + port + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)); String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
ConcurrentLog.info("WorkTables", "executing url: " + url);
try { try {
client.GETbytes(url, username, pass, false); // use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
l.put(url, client.getStatusCode()); MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
} catch (final IOException e) { ConcurrentLog.info("WorkTables", "executing url: " + url.toString());
ConcurrentLog.logException(e); try {
l.put(url, -1); client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
l.put(url.toString(), client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
} }
} }
return l; return l;
} }
public static int execAPICall(String host, int port, String path, byte[] pk, final String username, final String pass) { public static int execAPICall(String host, int port, String path, byte[] pk, final String username, final String pass) {
// now call the api URLs and store the result status // now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);

Loading…
Cancel
Save