From 5a994c97969af17a8115f113dbe5c9b284b3a6cd Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 19 Aug 2010 12:13:54 +0000 Subject: [PATCH] added a scheduler based on API actions - every process that is monitored with the API Steering interface can now be scheduled! - added input methods in Steering interface to set a scheduling time - added a view on the steering api that shows only crawl jobs inside the Crawl Profile servlet - added a scheduling call process in the cleanup process handler that triggers the scheduled processes This causes that the cleanup now also looks for scheduled processes. Such processes are therefore not executed at the same time as given in the target execution time but they will be executed within the cleanup process time window. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7050 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/CrawlProfileEditor_p.html | 44 +++-- htroot/CrawlProfileEditor_p.java | 9 +- htroot/Table_API_p.html | 55 +++++- htroot/Table_API_p.java | 176 ++++++++++++++---- htroot/Tables_p.html | 8 +- .../templates/submenuCrawlMonitor.template | 2 +- source/de/anomic/data/WorkTables.java | 93 ++++++++- source/de/anomic/search/Switchboard.java | 70 +++++-- source/net/yacy/kelondro/util/BEncoder.java | 81 -------- .../net/yacy/kelondro/util/DateFormatter.java | 11 -- 10 files changed, 364 insertions(+), 185 deletions(-) diff --git a/htroot/CrawlProfileEditor_p.html b/htroot/CrawlProfileEditor_p.html index 5be1fdca5..f00c8d814 100644 --- a/htroot/CrawlProfileEditor_p.html +++ b/htroot/CrawlProfileEditor_p.html @@ -4,13 +4,18 @@ YaCy '#[clientname]#': Crawl Profile Editor #%env/templates/metas.template%# - + #%env/templates/header.template%# #%env/templates/submenuCrawlMonitor.template%# -

Crawl Profile Editor

-

- Crawl profiles hold information about a specific URL which is internally used to perform the crawl it belongs to. -

+

Crawler Steering

+ +

Crawl Scheduler

+

Scheduled Crawls can be modified in this table

+ + + +

Crawl Profile Editor

+

Crawl profiles hold information about a crawl process that is currently ongoing.

@@ -41,25 +46,10 @@ Local Text Indexing Local Media Indexing Remote Indexing - Status / Action #{crawlProfiles}# #[name]# - #(status)#terminated::active#(/status)# - #[startURL]# - #[depth]# - #[mustmatch]# - #[mustnotmatch]# - #[crawlingIfOlder]# - #[crawlingDomFilterDepth]# - #{crawlingDomFilterContent}##[item]#
#{/crawlingDomFilterContent}# - #[crawlingDomMaxPages]# - #(withQuery)#no::yes#(/withQuery)# - #(storeCache)#no::yes#(/storeCache)# - #(indexText)#no::yes#(/indexText)# - #(indexMedia)#no::yes#(/indexMedia)# - #(remoteIndexing)#no::yes#(/remoteIndexing)# #(terminateButton)#::
Running
@@ -75,6 +65,20 @@
#(/deleteButton)# + #[startURL]# + #[depth]# + #[mustmatch]# + #[mustnotmatch]# + #[crawlingIfOlder]# + #[crawlingDomFilterDepth]# + #{crawlingDomFilterContent}##[item]#
#{/crawlingDomFilterContent}# + #[crawlingDomMaxPages]# + #(withQuery)#no::yes#(/withQuery)# + #(storeCache)#no::yes#(/storeCache)# + #(indexText)#no::yes#(/indexText)# + #(indexMedia)#no::yes#(/indexMedia)# + #(remoteIndexing)#no::yes#(/remoteIndexing)# + #{/crawlProfiles}# diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java index ef0644874..80a84cc77 100644 --- a/htroot/CrawlProfileEditor_p.java +++ b/htroot/CrawlProfileEditor_p.java @@ -238,8 +238,11 @@ public class CrawlProfileEditor_p { private static void putProfileEntry(final servletProperties prop, final CrawlProfile.entry profile, final boolean active, final boolean dark, final int count, final int domlistlength) { prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0"); - prop.put(CRAWL_PROFILE_PREFIX + count + "_status", active ? "1" : "0"); prop.put(CRAWL_PROFILE_PREFIX + count + "_name", profile.name()); + prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(profile.name())) ? "0" : "1"); + prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", profile.handle()); + prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1"); + prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", profile.handle()); prop.putXML(CRAWL_PROFILE_PREFIX + count + "_startURL", profile.startURL()); prop.put(CRAWL_PROFILE_PREFIX + count + "_handle", profile.handle()); prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", profile.depth()); @@ -268,9 +271,5 @@ public class CrawlProfileEditor_p { prop.put(CRAWL_PROFILE_PREFIX + count + "_indexText", (profile.indexText()) ? "1" : "0"); prop.put(CRAWL_PROFILE_PREFIX + count + "_indexMedia", (profile.indexMedia()) ? "1" : "0"); prop.put(CRAWL_PROFILE_PREFIX + count + "_remoteIndexing", (profile.remoteIndexing()) ? "1" : "0"); - prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(profile.name())) ? "0" : "1"); - prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", profile.handle()); - prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1"); - prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", profile.handle()); } } diff --git a/htroot/Table_API_p.html b/htroot/Table_API_p.html index 4538a6b09..9b9803788 100644 --- a/htroot/Table_API_p.html +++ b/htroot/Table_API_p.html @@ -26,14 +26,15 @@ - #%env/templates/header.template%# +
+ #(inline)##%env/templates/header.template%#

Steering of API Actions

This table shows actions that had been issued on the YaCy interface to change the configuration or to request crawl actions. These recorded actions can be used to repeat specific actions and to send them to a scheduler for a periodic execution. -

+

::#(/inline)# #(showtable)#::
@@ -47,7 +48,8 @@ Recording
Date Last Exec
Date Next Exec
Date - URL + Scheduler + #(inline)#URL::#(/inline)# #{list}# @@ -58,7 +60,40 @@ #[dateRecording]# #[dateLastExec]# #[dateNextExec]# - #[url]# + + #(scheduler)# + + + + + + + :: +
+
+ + + +
+ + + + +
+ #(/scheduler)# + + #(inline)##[url]#::#(/inline)# #{/list}# @@ -89,6 +124,16 @@
#(/showexec)# - #%env/templates/footer.template%# + #(showschedulerhint)#:: + Scheduled actions are executed after the next execution date has arrived within a time frame of #[tfminutes]# minutes. + #(/showschedulerhint)# + #(inline)##%env/templates/footer.template%#::#(/inline)# +
+ diff --git a/htroot/Table_API_p.java b/htroot/Table_API_p.java index 745629d42..21d45b733 100644 --- a/htroot/Table_API_p.java +++ b/htroot/Table_API_p.java @@ -18,21 +18,20 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import java.io.IOException; +import java.text.DateFormat; import java.util.Date; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.Map; import java.util.TreeSet; -import net.yacy.cora.protocol.Client; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.util.DateFormatter; import de.anomic.data.WorkTables; import de.anomic.http.server.RequestHeader; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -42,6 +41,76 @@ public class Table_API_p { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); + prop.put("showexec", 0); + prop.put("showtable", 0); + + prop.put("inline", 0); + boolean inline = false; + if (post != null && post.get("inline","false").equals("true")) { + prop.put("inline", 1); + inline = true; + } + + String typefilter = ".*"; + if (post != null && post.containsKey("filter")) { + typefilter = post.get("filter", ".*"); + } + + String pk; + if (post != null && post.containsKey("repeat_select") && ((pk = post.get("pk")) != null)) try { + String action = post.get("repeat_select", "off"); + if (action.equals("on")) { + Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes()); + if (row != null) { + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + WorkTables.calculateAPIScheduler(row, false); + sb.tables.update(WorkTables.TABLE_API_NAME, row); + } + } + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + + if (post != null && post.containsKey("repeat_time") && ((pk = post.get("pk")) != null)) try { + String action = post.get("repeat_time", "off"); + Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes()); + if (row != null) { + if (action.equals("off")) { + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + } else { + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.parseInt(action)); + } + WorkTables.calculateAPIScheduler(row, false); + sb.tables.update(WorkTables.TABLE_API_NAME, row); + } + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + + if (post != null && post.containsKey("repeat_unit") && ((pk = post.get("pk")) != null)) try { + String action = post.get("repeat_unit", "seldays"); + Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes()); + if (row != null) { + int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, action.substring(3)); + if (action.equals("selminutes") && time > 0 && time < 10) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 10); + if (action.equals("selminutes") && time > 50) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 50); + if (action.equals("selhours") && time > 23) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 23); + if (action.equals("seldays") && time > 30) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 30); + WorkTables.calculateAPIScheduler(row, false); + sb.tables.update(WorkTables.TABLE_API_NAME, row); + } + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + if (post != null && post.get("deleterows", "").length() > 0) { for (Map.Entry entry: post.entrySet()) { if (entry.getValue().startsWith("mark_")) { @@ -54,9 +123,6 @@ public class Table_API_p { } } - prop.put("showexec", 0); - prop.put("showtable", 0); - if (post != null && post.get("execrows", "").length() > 0) { // create a time-ordered list of events to execute TreeSet pks = new TreeSet(); @@ -67,28 +133,7 @@ public class Table_API_p { } // now call the api URLs and store the result status - final Client client = new Client(); - client.setRealm(sb.getConfig("adminAccountBase64MD5", "")); - client.setTimout(120000); - LinkedHashMap l = new LinkedHashMap(); - for (String pk: pks) { - try { - Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes()); - if (row != null) { - String url = "http://localhost:" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL)); - url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + pk; - url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1); - url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, ""); - url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, ""); - client.GETbytes(url); - l.put(url, client.getStatusCode()); - } - } catch (IOException e) { - Log.logException(e); - } catch (RowSpaceExceededException e) { - Log.logException(e); - } - } + Map l = sb.tables.execAPICall(pks, "localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", "")); // construct result table prop.put("showexec", 1); @@ -110,6 +155,7 @@ public class Table_API_p { // generate table prop.put("showtable", 1); + prop.put("showtable_inline", inline ? 1 : 0); // insert rows int count = 0; @@ -118,27 +164,83 @@ public class Table_API_p { final Iterator mapIterator = sb.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator(); Tables.Row row; boolean dark = true; + boolean scheduledactions = false; while (mapIterator.hasNext()) { row = mapIterator.next(); if (row == null) continue; - Date dfltdate = new Date(); - Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, dfltdate) : null; + String type = new String(row.get(WorkTables.TABLE_API_COL_TYPE)); + if (!type.matches(typefilter)) continue; + Date now = new Date(); + Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, now) : null; Date date_recording = row.get(WorkTables.TABLE_API_COL_DATE_RECORDING, date); Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date); - Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, dfltdate) : null; + Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, now) : null; int callcount = row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1); - prop.put("showtable_list_" + count + "_dark", ((dark) ? 1 : 0) ); dark=!dark; + String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + prop.put("showtable_list_" + count + "_inline", inline ? 1 : 0); + prop.put("showtable_list_" + count + "_dark", dark ? 1 : 0); dark=!dark; prop.put("showtable_list_" + count + "_pk", new String(row.getPK())); prop.put("showtable_list_" + count + "_count", count); prop.put("showtable_list_" + count + "_callcount", callcount); - prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormatter.formatHTML(date_recording)); - prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormatter.formatHTML(date_last_exec)); - prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormatter.formatHTML(date_next_exec)); - prop.put("showtable_list_" + count + "_type", row.get(WorkTables.TABLE_API_COL_TYPE)); + prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormat.getDateTimeInstance().format(date_recording)); + prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_last_exec)); + prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_next_exec)); + prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0); + prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0); + prop.put("showtable_list_" + count + "_selectedDays", (unit.length() == 0 || unit.equals("days")) ? 1 : 0); + prop.put("showtable_list_" + count + "_repeatTime", time); + prop.put("showtable_list_" + count + "_type", type); prop.put("showtable_list_" + count + "_comment", row.get(WorkTables.TABLE_API_COL_COMMENT)); - prop.put("showtable_list_" + count + "_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL))); + prop.put("showtable_list_" + count + "_inline_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL))); + + if (time == 0) { + prop.put("showtable_list_" + count + "_scheduler", 0); + prop.put("showtable_list_" + count + "_scheduler_pk", new String(row.getPK())); + } else { + scheduledactions = true; + prop.put("showtable_list_" + count + "_scheduler", 1); + prop.put("showtable_list_" + count + "_scheduler_pk", new String(row.getPK())); + prop.put("showtable_list_" + count + "_scheduler_scale_" + 0 + "_time", "off"); + prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 0); + prop.put("showtable_list_" + count + "_scheduler_selectedHours", 0); + prop.put("showtable_list_" + count + "_scheduler_selectedDays", 0); + if (unit.equals("minutes")) { + for (int i = 1; i <= 5 ; i++) { + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i * 10); + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0); + } + prop.put("showtable_list_" + count + "_scheduler_scale_" + (time / 10) + "_selected", 1); + prop.put("showtable_list_" + count + "_scheduler_scale", 6); + prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 1); + } else if (unit.equals("hours")) { + for (int i = 1; i <= 23 ; i++) { + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i); + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0); + } + prop.put("showtable_list_" + count + "_scheduler_scale_" + time + "_selected", 1); + prop.put("showtable_list_" + count + "_scheduler_scale", 24); + prop.put("showtable_list_" + count + "_scheduler_selectedHours", 1); + } else { + for (int i = 1; i <= 30 ; i++) { + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i); + prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0); + } + prop.put("showtable_list_" + count + "_scheduler_scale_" + time + "_selected", 1); + prop.put("showtable_list_" + count + "_scheduler_scale", 31); + prop.put("showtable_list_" + count + "_scheduler_selectedDays", 1); + } + } + prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false"); + prop.put("showtable_list_" + count + "_scheduler_filter", typefilter); count++; } + if (scheduledactions) { + prop.put("showschedulerhint", 1); + prop.put("showschedulerhint_tfminutes", sb.getConfigLong(SwitchboardConstants.CLEANUP_BUSYSLEEP, 300000) / 60000); + } else { + prop.put("showschedulerhint", 0); + } } catch (IOException e) { Log.logException(e); } diff --git a/htroot/Tables_p.html b/htroot/Tables_p.html index b9a9055bf..f8794c16f 100644 --- a/htroot/Tables_p.html +++ b/htroot/Tables_p.html @@ -34,16 +34,16 @@
Table Selection
Select Table: - #{tables}# #{/tables}#
-
+
 
show max. - @@ -56,7 +56,7 @@ search rows for
-
 
+
diff --git a/htroot/env/templates/submenuCrawlMonitor.template b/htroot/env/templates/submenuCrawlMonitor.template index 5c880851e..93e22724d 100644 --- a/htroot/env/templates/submenuCrawlMonitor.template +++ b/htroot/env/templates/submenuCrawlMonitor.template @@ -23,7 +23,7 @@ diff --git a/source/de/anomic/data/WorkTables.java b/source/de/anomic/data/WorkTables.java index 8ed4a9322..c1a6df2fd 100644 --- a/source/de/anomic/data/WorkTables.java +++ b/source/de/anomic/data/WorkTables.java @@ -28,8 +28,13 @@ package de.anomic.data; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.Date; +import java.util.LinkedHashMap; +import java.util.Map; +import net.yacy.cora.protocol.Client; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; @@ -62,6 +67,13 @@ public class WorkTables extends Tables { super(workPath, 12); } + /** + * recording of a api call. stores the call parameters into the API database table + * @param post the post arguments of the api call + * @param servletName the name of the servlet + * @param type name of the servlet category + * @param comment visual description of the process + */ public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) { // remove the apicall attributes from the post object String pk = post.remove(TABLE_API_COL_APICALL_PK); @@ -69,7 +81,7 @@ public class WorkTables extends Tables { if (count == null) count = "1"; String time = post.remove(TABLE_API_COL_APICALL_SCHEDULE_TIME); String unit = post.remove(TABLE_API_COL_APICALL_SCHEDULE_UNIT); - if (time == null || unit == null || unit.length() == 0 || "minues,hours,days".indexOf(unit) < 0) { + if (time == null || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) { time = ""; unit = ""; } @@ -125,4 +137,83 @@ public class WorkTables extends Tables { Log.logInfo("APICALL", apiurl); } + /** + * execute an API call using a api table row which contains all essentials + * to access the server also the host, port and the authentication realm must be given + * @param pks a collection of primary keys denoting the rows in the api table + * @param host the host where the api shall be called + * @param port the port on the host + * @param realm authentification realm + * @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred + */ + public Map execAPICall(Collection pks, String host, int port, String realm) { + // now call the api URLs and store the result status + final Client client = new Client(); + client.setRealm(realm); + client.setTimout(120000); + LinkedHashMap l = new LinkedHashMap(); + for (String pk: pks) { + Tables.Row row = null; + try { + row = select(WorkTables.TABLE_API_NAME, pk.getBytes()); + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + if (row == null) continue; + String url = "http://" + host + ":" + port + new String(row.get(WorkTables.TABLE_API_COL_URL)); + url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + new String(row.getPK()); + url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1); + url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, ""); + url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, ""); + try { + client.GETbytes(url); + l.put(url, client.getStatusCode()); + } catch (IOException e) { + Log.logException(e); + l.put(url, -1); + } + } + return l; + } + + /** + * simplified call to execute a single entry in the api database table + * @param pk the primary key of the entry + * @param host the host where the api shall be called + * @param port the port on the host + * @param realm authentification realm + * @return the http status code of the api call or -1 if any other IOException occurred + */ + public int execAPICall(String pk, String host, int port, String realm) { + ArrayList pks = new ArrayList(); + pks.add(pk); + Map m = execAPICall(pks, host, port, realm); + if (m.isEmpty()) return -1; + return m.values().iterator().next().intValue(); + } + + /** + * calculate the execution time in a api call table based on given scheduling time and last execution time + * @param row the database row in the api table + */ + public static void calculateAPIScheduler(Tables.Row row, boolean update) { + Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, new Date()) : null; + date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date); + int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); + if (time <= 0) { + row.remove(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC); + return; + } + String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + long d = date.getTime(); + if (unit.equals("minutes")) d += 60000L * time; + if (unit.equals("hours")) d += 60000L * 60L * time; + if (unit.equals("days")) d += 60000L * 60L * 24L * time; + if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L; + row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d)); + } + + } diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index c6b090a62..da84b7ba5 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -82,11 +82,13 @@ import net.yacy.document.TextParser; import net.yacy.document.content.DCEntry; import net.yacy.document.content.SurrogateReader; import net.yacy.document.importer.OAIListFriendsLoader; +import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow.Components; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.HandleSet; +import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Digest; @@ -1436,8 +1438,6 @@ public final class Switchboard extends serverSwitch { public boolean cleanupJob() { try { - boolean hasDoneSomething = false; - // clear caches if necessary if (!MemoryControl.request(8000000L, false)) { for (Segment indexSegment: this.indexSegments) indexSegment.urlMetadata().clearCache(); @@ -1486,6 +1486,40 @@ public final class Switchboard extends serverSwitch { Log.logException(e); } + // execute scheduled API actions + Tables.Row row; + ArrayList pks = new ArrayList(); + Date now = new Date(); + try { + Iterator plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME); + final Iterator mapIterator = this.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator(); + while (mapIterator.hasNext()) { + row = mapIterator.next(); + if (row == null) continue; + Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, now) : null; + if (date_next_exec == null) continue; + if (date_next_exec.after(now)) continue; + pks.add(new String(row.getPK())); + } + } catch (IOException e) { + Log.logException(e); + } + for (String pk: pks) try { + row = this.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes()); + WorkTables.calculateAPIScheduler(row, true); // calculate next update time + this.tables.update(WorkTables.TABLE_API_NAME, row); + } catch (IOException e) { + Log.logException(e); + continue; + } catch (RowSpaceExceededException e) { + Log.logException(e); + continue; + } + Map callResult = this.tables.execAPICall(pks, "localhost", (int) this.getConfigLong("port", 8080), this.getConfig("adminAccountBase64MD5", "")); + for (Map.Entry call: callResult.entrySet()) { + log.logInfo("Scheduler executed api call, response " + call.getValue() + ": " + call.getKey()); + } + // close unused connections de.anomic.http.client.Client.cleanup(); ConnectionInfo.cleanUp(); @@ -1505,7 +1539,6 @@ public final class Switchboard extends serverSwitch { if ((crawlQueues.delegatedURL.stackSize() > 1000)) { if (this.log.isFine()) log.logFine("Cleaning Delegated-URLs report stack, " + crawlQueues.delegatedURL.stackSize() + " entries on stack"); crawlQueues.delegatedURL.clearStack(); - hasDoneSomething = true; } // clean up error stack @@ -1513,7 +1546,6 @@ public final class Switchboard extends serverSwitch { if ((crawlQueues.errorURL.stackSize() > 1000)) { if (this.log.isFine()) log.logFine("Cleaning Error-URLs report stack, " + crawlQueues.errorURL.stackSize() + " entries on stack"); crawlQueues.errorURL.clearStack(); - hasDoneSomething = true; } // clean up loadedURL stack @@ -1522,21 +1554,21 @@ public final class Switchboard extends serverSwitch { if (crawlResults.getStackSize(origin) > 1000) { if (this.log.isFine()) log.logFine("Cleaning Loaded-URLs report stack, " + crawlResults.getStackSize(origin) + " entries on stack " + origin.getCode()); crawlResults.clearStack(origin); - hasDoneSomething = true; } } + // clean up image stack ResultImages.clearQueues(); // clean up profiles checkInterruption(); - if (cleanProfiles()) hasDoneSomething = true; + cleanProfiles(); // clean up news checkInterruption(); try { if (this.log.isFine()) log.logFine("Cleaning Incoming News, " + this.peers.newsPool.size(yacyNewsPool.INCOMING_DB) + " entries on stack"); - if (this.peers.newsPool.automaticProcess(peers) > 0) hasDoneSomething = true; + this.peers.newsPool.automaticProcess(peers); } catch (final Exception e) { Log.logException(e); } @@ -1548,33 +1580,31 @@ public final class Switchboard extends serverSwitch { } // clean up seed-dbs - if(getConfigBool("routing.deleteOldSeeds.permission",true)) { + if (getConfigBool("routing.deleteOldSeeds.permission",true)) { final long deleteOldSeedsTime = getConfigLong("routing.deleteOldSeeds.time",7)*24*3600000; Iterator e = this.peers.seedsSortedDisconnected(true,yacySeed.LASTSEEN); yacySeed seed = null; final ArrayList deleteQueue = new ArrayList(); checkInterruption(); - //clean passive seeds - while(e.hasNext()) { + // clean passive seeds + while (e.hasNext()) { seed = e.next(); - if(seed != null) { + if (seed != null) { //list is sorted -> break when peers are too young to delete - if(seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime)) - break; + if (seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime)) break; deleteQueue.add(seed.hash); } } - for(int i=0;i break when peers are too young to delete - if(seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime)) - break; + if (seed.getLastSeenUTC() > (System.currentTimeMillis() - deleteOldSeedsTime)) break; deleteQueue.add(seed.hash); } } @@ -1627,7 +1657,7 @@ public final class Switchboard extends serverSwitch { // after all clean up is done, check the resource usage observer.resourceObserverJob(); - return hasDoneSomething; + return true; } catch (final InterruptedException e) { this.log.logInfo("cleanupJob: Shutdown detected"); return false; diff --git a/source/net/yacy/kelondro/util/BEncoder.java b/source/net/yacy/kelondro/util/BEncoder.java index 27c0d0c87..e284cc787 100644 --- a/source/net/yacy/kelondro/util/BEncoder.java +++ b/source/net/yacy/kelondro/util/BEncoder.java @@ -81,87 +81,6 @@ public class BEncoder { return null; } - /* - public static byte[] encodeMap( - String key0, byte[] value0, - String key1, byte[] value1 - ) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - BDecoder.BDictionaryObject.toStream( - baos, - key0, value0, - key1, value1 - ); - baos.close(); - return baos.toByteArray(); - } catch (IOException e) {} - return null; - } - - public static byte[] encodeMap( - String key0, byte[] value0, - String key1, byte[] value1, - String key2, byte[] value2 - ) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - BDecoder.BDictionaryObject.toStream( - baos, - key0, value0, - key1, value1, - key2, value2 - ); - baos.close(); - return baos.toByteArray(); - } catch (IOException e) {} - return null; - } - - public static byte[] encodeMap( - String key0, byte[] value0, - String key1, byte[] value1, - String key2, byte[] value2, - String key3, byte[] value3 - ) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - BDecoder.BDictionaryObject.toStream( - baos, - key0, value0, - key1, value1, - key2, value2, - key3, value3 - ); - baos.close(); - return baos.toByteArray(); - } catch (IOException e) {} - return null; - } - - public static byte[] encodeMap( - String key0, byte[] value0, - String key1, byte[] value1, - String key2, byte[] value2, - String key3, byte[] value3, - String key4, byte[] value4 - ) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - BDecoder.BDictionaryObject.toStream( - baos, - key0, value0, - key1, value1, - key2, value2, - key3, value3, - key4, value4 - ); - baos.close(); - return baos.toByteArray(); - } catch (IOException e) {} - return null; - } - */ public static void main(final String[] args) { Map m = new HashMap(); m.put("k", "000".getBytes()); diff --git a/source/net/yacy/kelondro/util/DateFormatter.java b/source/net/yacy/kelondro/util/DateFormatter.java index 7120e6745..946e7bb6d 100644 --- a/source/net/yacy/kelondro/util/DateFormatter.java +++ b/source/net/yacy/kelondro/util/DateFormatter.java @@ -43,9 +43,6 @@ public final class DateFormatter { /** minimal date format including milliseconds (fixed width: 17) */ public static final String PATTERN_SHORT_MILSEC = "yyyyMMddHHmmssSSS"; - /** special time format for two-line display in html tables **/ - public static final String PATTERN_HTML = "dd' 'MMM' 'yyyy HH:mm:ss"; - /** default HTTP 1.1 header date format pattern */ public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss Z"; // with numeric time zone indicator as defined in RFC5322 public static final String PATTERN_RFC1123_SHORT = "EEE, dd MMM yyyy"; @@ -71,9 +68,6 @@ public final class DateFormatter { /** Date formatter/parser for minimal yyyyMMddHHmmssSSS pattern */ private static final SimpleDateFormat FORMAT_SHORT_MILSEC = new SimpleDateFormat(PATTERN_SHORT_MILSEC, Locale.US); - /** special time format for two-line display in html tables **/ - private static final SimpleDateFormat FORMAT_HTML = new SimpleDateFormat(PATTERN_HTML, Locale.US); - /** Date formatter/non-sloppy parser for W3C datetime (ISO8601) in GMT/UTC */ private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US); @@ -176,11 +170,6 @@ public final class DateFormatter { return FORMAT_RFC1123_SHORT.format(date); } - public static String formatHTML(final Date date) { - if (date == null) return ""; - return FORMAT_HTML.format(date); - } - /** * Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}.