From 24c9bb35f7c491da8f2dd78eeab4361f5eb21df5 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 22 Dec 2012 16:27:14 +0100 Subject: [PATCH] extended the Scheduler: introduced scheduled events - an event type (once, regular) can be selected - for this event type, a fixed time can be selected. This may be either directly after startup or at one of the full hours at a day (==25 options) The main point about this feature is the opportunity to start an action directly after startup. That makes it possible to create YaCy distributions which, after started at the first time, start to index parts of the intranet/internet by itself. --- htroot/Table_API_p.html | 57 ++++++- htroot/Table_API_p.java | 148 ++++++++++-------- .../net/yacy/cora/date/GenericFormatter.java | 4 +- source/net/yacy/data/WorkTables.java | 45 ++++-- .../kelondro/data/meta/URIMetadataNode.java | 10 ++ .../kelondro/workflow/AbstractBusyThread.java | 8 + .../yacy/kelondro/workflow/BusyThread.java | 14 +- source/net/yacy/search/Switchboard.java | 148 ++++++++++-------- 8 files changed, 284 insertions(+), 150 deletions(-) diff --git a/htroot/Table_API_p.html b/htroot/Table_API_p.html index 855573ead..b963ac4e3 100644 --- a/htroot/Table_API_p.html +++ b/htroot/Table_API_p.html @@ -21,8 +21,8 @@ @@ -78,6 +78,7 @@ To see a list of all APIs, please visit the Event Trigger Scheduler #(inline)#URL::#(/inline)# @@ -91,8 +92,54 @@ To see a list of all APIs, please visit the + + + + :: + + +
+ + + +
+ #(/event)# + + #(scheduler)# - @@ -121,8 +168,8 @@ To see a list of all APIs, please visit the
- + + diff --git a/htroot/Table_API_p.java b/htroot/Table_API_p.java index d5967f67f..ee031b17a 100644 --- a/htroot/Table_API_p.java +++ b/htroot/Table_API_p.java @@ -31,7 +31,6 @@ import java.util.regex.Pattern; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.util.SpaceExceededException; import net.yacy.data.WorkTables; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.logging.Log; @@ -73,58 +72,50 @@ public class Table_API_p { typefilter = Pattern.compile(post.get("filter", ".*")); } - boolean scheduleevent = false; // flag if schedule info of row changes - String current_schedule_pk = ""; // pk of changed schedule data row - if (post != null && post.containsKey("scheduleevent")) { - scheduleevent = post.get("scheduleevent", "false").equalsIgnoreCase("true"); - prop.put("scheduleevent", "false"); - current_schedule_pk = post.get("current_schedule_pk", ""); + // process scheduler and event input actions + boolean scheduleeventaction = false; // flag if schedule info of row changes + String current_pk = ""; // pk of changed schedule data row + if (post != null && post.containsKey("scheduleeventaction")) { + scheduleeventaction = post.get("scheduleeventaction", "false").equalsIgnoreCase("true"); + prop.put("scheduleeventaction", "false"); + current_pk = post.get("current_pk", ""); } - if (scheduleevent && !current_schedule_pk.isEmpty()) { - if (post != null && post.containsKey("repeat_select_" + current_schedule_pk) ) { - try { - final String action = post.get("repeat_select_" + current_schedule_pk, "off"); - if (action.equals("on")) { - Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes()); - if (row != null) { - row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7); - row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); - WorkTables.calculateAPIScheduler(row, false); - sb.tables.update(WorkTables.TABLE_API_NAME, row); - } + if (post != null && scheduleeventaction && !current_pk.isEmpty()) { + try { + Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_pk.getBytes()); + if (row != null) { + String action; + + // events + if (post.containsKey("event_select_" + current_pk) && post.get("event_select_" + current_pk, "off").equals("on")) { + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "regular"); + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); } - } catch (IOException e) { - Log.logException(e); - } catch (SpaceExceededException e) { - Log.logException(e); - } - } - - if (post != null && post.containsKey("repeat_time_" + current_schedule_pk) ) { - try { - final String action = post.get("repeat_time_" + current_schedule_pk, "off"); - final Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes()); - if (row != null) { - if ("off".equals(action)) { + + if (post.containsKey("event_kind_" + current_pk) ) { + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, post.get("event_kind_" + current_pk, "off")); + } + + if (post.containsKey("event_action_" + current_pk) ) { + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, post.get("event_action_" + current_pk, "startup")); + } + + // scheduler + if (post.containsKey("repeat_select_" + current_pk) && post.get("repeat_select_" + current_pk, "off").equals("on")) { + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7); + row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + } + + if (post.containsKey("repeat_time_" + current_pk) ) { + if ("off".equals(action = post.get("repeat_time_" + current_pk, "off"))) { row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); } else { row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.parseInt(action)); } - WorkTables.calculateAPIScheduler(row, false); - sb.tables.update(WorkTables.TABLE_API_NAME, row); } - } catch (IOException e) { - Log.logException(e); - } catch (SpaceExceededException e) { - Log.logException(e); - } - } - - if (post != null && post.containsKey("repeat_unit_" + current_schedule_pk) ) { - try { - final String action = post.get("repeat_unit_" + current_schedule_pk, "seldays"); - final Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes()); - if (row != null) { + + if (post.containsKey("repeat_unit_" + current_pk) ) { + action = post.get("repeat_unit_" + current_pk, "seldays"); int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, action.substring(3)); if (action.equals("selminutes") && time > 0 && time < 10) { @@ -139,16 +130,18 @@ public class Table_API_p { if (action.equals("seldays") && time > 30) { row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 30); } - WorkTables.calculateAPIScheduler(row, false); - sb.tables.update(WorkTables.TABLE_API_NAME, row); } - } catch (IOException e) { - Log.logException(e); - } catch (SpaceExceededException e) { - Log.logException(e); + + // switch scheduler off if event kind is 'regular' + final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + if ("regular".equals(kind)) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + + WorkTables.calculateAPIScheduler(row, false); + sb.tables.update(WorkTables.TABLE_API_NAME, row); } - } + } catch (Throwable e) { Log.logException(e); } } + if (post != null && !post.get("deleterows", "").isEmpty()) { for (final Map.Entry entry : post.entrySet()) { if (entry.getValue().startsWith("mark_")) { @@ -241,8 +234,6 @@ public class Table_API_p { final Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date); final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); final int callcount = row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1); - final String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); - final int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); prop.put("showtable_list_" + count + "_inline", inline ? 1 : 0); prop.put("showtable_list_" + count + "_dark", dark ? 1 : 0); dark = !dark; @@ -252,21 +243,50 @@ public class Table_API_p { prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormat.getDateTimeInstance().format(date_recording)); prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_last_exec)); prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_next_exec)); - prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0); - prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0); - prop.put("showtable_list_" + count + "_selectedDays", (unit.isEmpty() || unit.equals("days")) ? 1 : 0); - prop.put("showtable_list_" + count + "_repeatTime", time); prop.put("showtable_list_" + count + "_type", row.get(WorkTables.TABLE_API_COL_TYPE)); prop.put("showtable_list_" + count + "_comment", row.get(WorkTables.TABLE_API_COL_COMMENT)); prop.putHTML("showtable_list_" + count + "_inline_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8090") + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL))); + prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false"); + prop.put("showtable_list_" + count + "_scheduler_filter", typefilter.pattern()); + prop.put("showtable_list_" + count + "_scheduler_query", query.pattern()); + prop.put("showtable_list_" + count + "_scheduler_startRecord", startRecord); + prop.put("showtable_list_" + count + "_scheduler_maximumRecords", maximumRecords); + + // events + final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + final String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); + prop.put("showtable_list_" + count + "_event_pk", UTF8.String(row.getPK())); + boolean schedulerDisabled = "regular".equals(kind); + if ("off".equals(kind)) { + prop.put("showtable_list_" + count + "_event", 0); + } else { + prop.put("showtable_list_" + count + "_event", 1); + prop.put("showtable_list_" + count + "_event_selectedoff", "off".equals(kind) ? 1 : 0); + prop.put("showtable_list_" + count + "_event_selectedonce", "once".equals(kind) ? 1 : 0); + prop.put("showtable_list_" + count + "_event_selectedregular", "regular".equals(kind) ? 1 : 0); + prop.put("showtable_list_" + count + "_event_selectedstartup", "startup".equals(action) ? 1 : 0); + for (int i = 0; i < 24; i++) { + String is = Integer.toString(i); + if (is.length() == 1) is = "0" + is; + is = is + "00"; + prop.put("showtable_list_" + count + "_event_selected" + is, is.equals(action) ? 1 : 0); + } + } + // scheduler + final String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + final int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0); + prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0); + prop.put("showtable_list_" + count + "_selectedDays", (unit.isEmpty() || unit.equals("days")) ? 1 : 0); + prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK())); + prop.put("showtable_list_" + count + "_scheduler_disabled", schedulerDisabled ? 1 : 0); + prop.put("showtable_list_" + count + "_repeatTime", time); if (time == 0) { prop.put("showtable_list_" + count + "_scheduler", 0); - prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK())); } else { scheduledactions = true; prop.put("showtable_list_" + count + "_scheduler", 1); - prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK())); prop.put("showtable_list_" + count + "_scheduler_scale_" + 0 + "_time", "off"); prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 0); prop.put("showtable_list_" + count + "_scheduler_selectedHours", 0); @@ -296,12 +316,8 @@ public class Table_API_p { prop.put("showtable_list_" + count + "_scheduler_scale", 31); prop.put("showtable_list_" + count + "_scheduler_selectedDays", 1); } + } - prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false"); - prop.put("showtable_list_" + count + "_scheduler_filter", typefilter.pattern()); - prop.put("showtable_list_" + count + "_scheduler_query", query.pattern()); - prop.put("showtable_list_" + count + "_scheduler_startRecord", startRecord); - prop.put("showtable_list_" + count + "_scheduler_maximumRecords", maximumRecords); count++; } if (scheduledactions) { diff --git a/source/net/yacy/cora/date/GenericFormatter.java b/source/net/yacy/cora/date/GenericFormatter.java index fb84570a1..52bd43648 100644 --- a/source/net/yacy/cora/date/GenericFormatter.java +++ b/source/net/yacy/cora/date/GenericFormatter.java @@ -138,7 +138,9 @@ public class GenericFormatter extends AbstractFormatter implements DateFormatter if (timeString == null || timeString.isEmpty()) { return new Date(); } if (UTCOffset == null || UTCOffset.isEmpty()) { return new Date(); } try { - return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset)); + synchronized (this.dateFormat) { + return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset)); + } } catch (final Throwable e) { //serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]"); return new Date(); diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java index 38b4edd62..4fe3013e1 100644 --- a/source/net/yacy/data/WorkTables.java +++ b/source/net/yacy/data/WorkTables.java @@ -28,11 +28,14 @@ package net.yacy.data; import java.io.File; import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.Locale; import java.util.Map; import java.util.TreeMap; @@ -70,7 +73,9 @@ public class WorkTables extends Tables { public final static String TABLE_API_COL_APICALL_PK = "apicall_pk"; // the primary key for the table entry of that api call (not really a database field, only a name in the apicall) public final static String TABLE_API_COL_APICALL_COUNT = "apicall_count"; // counts how often the API was called (starts with 1) public final static String TABLE_API_COL_APICALL_SCHEDULE_TIME = "apicall_schedule_time"; // factor for SCHEULE_UNIT time units - public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT= "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days' + public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT = "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days' + public final static String TABLE_API_COL_APICALL_EVENT_KIND = "apicall_event_kind"; // + public final static String TABLE_API_COL_APICALL_EVENT_ACTION = "apicall_event_action"; // public final static String TABLE_ROBOTS_NAME = "robots"; @@ -277,26 +282,40 @@ public class WorkTables extends Tables { return m.values().iterator().next().intValue(); } + final static long hour = 1000L * 60L * 60L; + final static long day = hour * 24L; + /** * calculate the execution time in a api call table based on given scheduling time and last execution time * @param row the database row in the api table - * @param update if true then the next execution time is based on the latest computed execution time; othervise it is based on the last execution time + * @param update if true then the next execution time is based on the latest computed execution time; otherwise it is based on the last execution time */ public static void calculateAPIScheduler(Tables.Data row, boolean update) { Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, (Date) null) : null; date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date); - int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); - if (time <= 0) { - row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, ""); - return; - } - String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); long d = date.getTime(); - if (unit.equals("minutes")) d += 60000L * Math.max(10, time); - if (unit.equals("hours")) d += 60000L * 60L * time; - if (unit.equals("days")) d += 60000L * 60L * 24L * time; - if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L; - d -= d % 60000; // remove seconds + + final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + if ("off".equals(kind)) { + int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1); + if (time <= 0) { + row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, ""); + return; + } + String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days"); + if (unit.equals("minutes")) d += 60000L * Math.max(10, time); + if (unit.equals("hours")) d += 60000L * 60L * time; + if (unit.equals("days")) d += 60000L * 60L * 24L * time; + if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L; + d -= d % 60000; // remove seconds + } else { + String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); + if (!"startup".equals(action)) try { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm"); + d = dateFormat.parse(dateFormat.format(new Date()).substring(0, 8) + action).getTime(); + if (d < System.currentTimeMillis()) d += day; + } catch (ParseException e) {} + } row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d)); } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index 76575859a..befd65521 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -58,6 +58,16 @@ import org.apache.solr.common.SolrInputDocument; */ public class URIMetadataNode { + public static YaCySchema[] fieldList = new YaCySchema[]{ + YaCySchema.audiolinkscount_i, YaCySchema.author, YaCySchema.collection_sxt, YaCySchema.content_type, + YaCySchema.coordinate_p, YaCySchema.description, YaCySchema.fresh_date_dt, YaCySchema.host_id_s, YaCySchema.id, + YaCySchema.imagescount_i, YaCySchema.inboundlinks_protocol_sxt, YaCySchema.inboundlinks_urlstub_txt, + YaCySchema.inboundlinkscount_i, YaCySchema.keywords, YaCySchema.language_s, YaCySchema.last_modified, YaCySchema.load_date_dt, + YaCySchema.md5_s, YaCySchema.outboundlinks_protocol_sxt, YaCySchema.outboundlinks_urlstub_txt, + YaCySchema.outboundlinkscount_i, YaCySchema.publisher_t, YaCySchema.referrer_id_txt, YaCySchema.size_i, YaCySchema.sku, + YaCySchema.text_t, YaCySchema.title, YaCySchema.title_words_val, YaCySchema.url_chars_i, + YaCySchema.videolinkscount_i, YaCySchema.videolinkscount_i, YaCySchema.wordcount_i}; + private byte[] hash = null; private String urlRaw = null, keywords = null; private DigestURI url = null; diff --git a/source/net/yacy/kelondro/workflow/AbstractBusyThread.java b/source/net/yacy/kelondro/workflow/AbstractBusyThread.java index 369dadb0f..b5d90d85c 100644 --- a/source/net/yacy/kelondro/workflow/AbstractBusyThread.java +++ b/source/net/yacy/kelondro/workflow/AbstractBusyThread.java @@ -65,12 +65,20 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT return idlePause; } + public final long getIdleSleep() { + return idlePause; + } + public final long setBusySleep(final long milliseconds) { // sets a sleep time for pauses between two jobs busyPause = Math.min(this.maxBusySleep, Math.max(this.minBusySleep, milliseconds)); return busyPause; } + public final long getBusySleep() { + return busyPause; + } + public void setMemPreReqisite(final long freeBytes) { // sets minimum required amount of memory for the job execution memprereq = freeBytes; diff --git a/source/net/yacy/kelondro/workflow/BusyThread.java b/source/net/yacy/kelondro/workflow/BusyThread.java index 787c62e34..1b559c947 100644 --- a/source/net/yacy/kelondro/workflow/BusyThread.java +++ b/source/net/yacy/kelondro/workflow/BusyThread.java @@ -40,13 +40,25 @@ public interface BusyThread extends WorkflowThread { */ public long setIdleSleep(long milliseconds); + /** + * gets the sleep time for pauses between two jobs if the job returns false (idle) + * @return milliseconds + */ + public long getIdleSleep(); + /** * sets a sleep time for pauses between two jobs if the job returns true (busy) * @param milliseconds * @return */ public long setBusySleep(long milliseconds); - + + /** + * gets the sleep time for pauses between two jobs if the job returns true (busy) + * @return milliseconds + */ + public long getBusySleep(); + /** * sets minimum required amount of memory for the job execution * @param freeBytes diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index d9e0b94ca..480a56c29 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -53,10 +53,14 @@ import java.net.MalformedURLException; import java.security.NoSuchAlgorithmException; import java.security.PublicKey; import java.security.spec.InvalidKeySpecException; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Properties; @@ -273,6 +277,7 @@ public final class Switchboard extends serverSwitch { private final Semaphore shutdownSync = new Semaphore(0); private boolean terminate = false; + private boolean startupAction = true; // this is set to false after the first event private static Switchboard sb; public HashMap crawlJobsStatus = new HashMap(); @@ -1062,6 +1067,14 @@ public final class Switchboard extends serverSwitch { //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260); this.trail = new LinkedBlockingQueue(); + + // finally start jobs which shall be started after start-up + new Thread() { + public void run() { + try {Thread.sleep(10000);} catch (InterruptedException e) {} // we must wait until the httpd comes up + execAPIActions(); // trigger startup actions + } + }.start(); this.log.logConfig("Finished Switchboard Initialization"); } @@ -1992,27 +2005,19 @@ public final class Switchboard extends serverSwitch { insert = true; } if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) { - selentry.put( - CrawlProfile.RECRAWL_IF_OLDER, - Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE))); + selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE))); insert = true; } if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) { - selentry.put( - CrawlProfile.RECRAWL_IF_OLDER, - Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE))); + selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE))); insert = true; } if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) { - selentry.put( - CrawlProfile.RECRAWL_IF_OLDER, - Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE))); + selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE))); insert = true; } if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) { - selentry.put( - CrawlProfile.RECRAWL_IF_OLDER, - Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE))); + selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE))); insert = true; } if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE) ) { @@ -2027,58 +2032,7 @@ public final class Switchboard extends serverSwitch { Log.logException(e); } - // execute scheduled API actions - Tables.Row row; - final List pks = new ArrayList(); - final Date now = new Date(); - try { - final Iterator plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME); - final Iterator mapIterator = - this.tables - .orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING) - .iterator(); - while ( mapIterator.hasNext() ) { - row = mapIterator.next(); - if ( row == null ) { - continue; - } - final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); - if ( date_next_exec == null ) { - continue; - } - if ( date_next_exec.after(now) ) { - continue; - } - pks.add(UTF8.String(row.getPK())); - } - } catch ( final IOException e ) { - Log.logException(e); - } - for ( final String pk : pks ) { - try { - row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); - WorkTables.calculateAPIScheduler(row, true); // calculate next update time - this.tables.update(WorkTables.TABLE_API_NAME, row); - } catch ( final IOException e ) { - Log.logException(e); - continue; - } catch ( final SpaceExceededException e ) { - Log.logException(e); - continue; - } - } - final Map callResult = - this.tables.execAPICalls( - "localhost", - (int) getConfigLong("port", 8090), - getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), - pks); - for ( final Map.Entry call : callResult.entrySet() ) { - this.log.logInfo("Scheduler executed api call, response " - + call.getValue() - + ": " - + call.getKey()); - } + execAPIActions(); // close unused connections ConnectionInfo.cleanUp(); @@ -2266,6 +2220,72 @@ public final class Switchboard extends serverSwitch { } } + private void execAPIActions() { + + // execute scheduled API actions + Tables.Row row; + final Collection pks = new LinkedHashSet(); + final Date now = new Date(); + + try { + final Iterator plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME); + final Iterator mapIterator = this.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator(); + while (mapIterator.hasNext()) { + row = mapIterator.next(); + if (row == null) continue; + + // select api calls according to scheduler settings + final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); + if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); + + // select api calls according to event settings + final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + if (!"off".equals(kind)) { + String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); + if ("startup".equals(action)) { + if (startupAction) { + pks.add(UTF8.String(row.getPK())); + if ("once".equals(kind)) { + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + sb.tables.update(WorkTables.TABLE_API_NAME, row); + } + } + } else try { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm"); + long d = dateFormat.parse(dateFormat.format(new Date()).substring(0, 8) + action).getTime(); + long cycle = getThread(SwitchboardConstants.CLEANUP).getBusySleep(); + if (d < System.currentTimeMillis() && System.currentTimeMillis() - d < cycle) { + pks.add(UTF8.String(row.getPK())); + if ("once".equals(kind)) { + row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); + sb.tables.update(WorkTables.TABLE_API_NAME, row); + } + } + } catch (ParseException e) {} + } + } + } catch (final IOException e) { + Log.logException(e); + } + for (final String pk : pks) { + try { + row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); + WorkTables.calculateAPIScheduler(row, true); // calculate next update time + this.tables.update(WorkTables.TABLE_API_NAME, row); + } catch ( final Throwable e ) { + Log.logException(e); + continue; + } + } + startupAction = false; + + // execute api calls + final Map callResult = this.tables.execAPICalls("localhost", (int) getConfigLong("port", 8090), getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), pks); + for ( final Map.Entry call : callResult.entrySet() ) { + this.log.logInfo("Scheduler executed api call, response " + call.getValue() + ": " + call.getKey()); + } + } + /** * With this function the crawling process can be paused *