diff --git a/htroot/CrawlStartExpert.java b/htroot/CrawlStartExpert.java
index 79939bd54..037ea228d 100644
--- a/htroot/CrawlStartExpert.java
+++ b/htroot/CrawlStartExpert.java
@@ -55,7 +55,7 @@ public class CrawlStartExpert {
// ---------- Start point
// crawl start URL
if (post != null && post.containsKey("crawlingURL")) {
- final String crawlingURL = post.get("crawlingURL", "");
+ final String crawlingURL = post.get("crawlingURL", "").replaceAll("%0D%0A", "\n").replaceAll("%0A", "\n").replaceAll("%0D", "\n");
prop.put("starturl", crawlingURL);
// simple check for content since it may be empty
if (!crawlingURL.trim().isEmpty()) {
diff --git a/htroot/Table_API_p.html b/htroot/Table_API_p.html
index 233533f1b..aa3cf30bc 100644
--- a/htroot/Table_API_p.html
+++ b/htroot/Table_API_p.html
@@ -83,7 +83,29 @@ To see a list of all APIs, please visit the
-
#[type]##(isCrawlerStart)#::
#(/isCrawlerStart)#
+
#[type]#
+ #(isCrawlerStart)#::
+ ::
+
+
+
+
+ #(/isCrawlerStart)#
#[comment]#
#[callcount]#
#[dateRecording]#
diff --git a/htroot/Table_API_p.java b/htroot/Table_API_p.java
index b86e477b8..513bb46ae 100644
--- a/htroot/Table_API_p.java
+++ b/htroot/Table_API_p.java
@@ -31,6 +31,7 @@ import java.util.regex.Pattern;
import net.yacy.cora.date.AbstractFormatter;
import net.yacy.cora.document.encoding.UTF8;
+import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
@@ -290,9 +291,25 @@ public class Table_API_p {
// check type & action to link crawl start URLs back to CrawlStartExpert.html
if (prop.get("showtable_list_" + count + "_type", "").equals(WorkTables.TABLE_API_TYPE_CRAWLER)
&& prop.get("showtable_list_" + count + "_comment", "").startsWith("crawl start for")) {
- prop.put("showtable_list_" + count + "_isCrawlerStart", 1);
final String editUrl = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)).replace("Crawler_p", "CrawlStartExpert");
- prop.put("showtable_list_" + count + "_isCrawlerStart_url", editUrl);
+ if (editUrl.length() > 1000) {
+ final MultiProtocolURL u = new MultiProtocolURL("http://localhost:8090" + editUrl);
+ prop.put("showtable_list_" + count + "_isCrawlerStart", 2);
+ prop.put("showtable_list_" + count + "_isCrawlerStart_pk", UTF8.String(row.getPK()));
+ prop.put("showtable_list_" + count + "_isCrawlerStart_servlet", "/CrawlStartExpert.html");
+ Map attr = u.getAttributes();
+ int ac = 0;
+ for (Map.Entry entry: attr.entrySet()) {
+ prop.put("showtable_list_" + count + "_isCrawlerStart_attr_" + ac + "_key", entry.getKey());
+ prop.put("showtable_list_" + count + "_isCrawlerStart_attr_" + ac + "_value", entry.getValue());
+ ac++;
+ }
+ prop.put("showtable_list_" + count + "_isCrawlerStart_attr", ac);
+ } else {
+ // short calls
+ prop.put("showtable_list_" + count + "_isCrawlerStart", 1);
+ prop.put("showtable_list_" + count + "_isCrawlerStart_url", editUrl);
+ }
} else {
prop.put("showtable_list_" + count + "_isCrawlerStart", 0);
}
diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java
index dd9a252f2..ee5a8415b 100644
--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@@ -516,18 +516,12 @@ public class MultiProtocolURL implements Serializable, Comparable element: getAttributes().entrySet()) {
+ qtmp.append('&');
+ qtmp.append(element.getKey());
+ qtmp.append('=');
+ qtmp.append(escape(element.getValue()));
}
this.searchpart = qtmp.substring((qtmp.length() > 0) ? 1 : 0);
}
@@ -1013,6 +1007,21 @@ public class MultiProtocolURL implements Serializable, Comparable getAttributes() {
+ Map map = new LinkedHashMap<>();
+ if (this.searchpart == null) return map;
+ final String[] questp = CommonPattern.AMP.split(this.searchpart, -1);
+ for (final String element : questp) {
+ int p = element.indexOf('=');
+ if (p != -1) {
+ map.put(element.substring(0, p), element.substring(p + 1));
+ } else {
+ map.put(element.substring(0, p), "");
+ }
+ }
+ return map;
+ }
+
private static CharType charType(final char c) {
if (c >= 'a' && c <= 'z') return CharType.low;
if (c >= '0' && c <= '9') return CharType.number;
diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java
index 90ac309f8..21f156e30 100644
--- a/source/net/yacy/data/WorkTables.java
+++ b/source/net/yacy/data/WorkTables.java
@@ -240,18 +240,15 @@ public class WorkTables extends Tables {
if (row == null) continue;
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
+ MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
if (theapicall.length() > 1000) {
// use a POST to execute the call
- int ai = theapicall.indexOf('?');
- String[] tacs = theapicall.substring(ai + 1).split("&");
Map post = new HashMap<>();
- for (String a: tacs) {
- int f = a.indexOf('=');
- if (f < 0) continue;
- post.put(a.substring(0, f), UTF8.StringBody(a.substring(f + 1)));
+ for (Map.Entry a: url.getAttributes().entrySet()) {
+ post.put(a.getKey(), UTF8.StringBody(a.getValue()));
}
- MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall.substring(0, ai));
+ url = new MultiProtocolURL("http", host, port, url.getFileName());
try {
client.POSTbytes(url, "localhost", post, false, false);
} catch (final IOException e) {
@@ -260,7 +257,6 @@ public class WorkTables extends Tables {
}
} else {
// use a GET to execute the call
- MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true));
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%