From 07311020d47d42000d5d2488489a7927020739a9 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 7 Aug 2016 05:08:55 +0200 Subject: [PATCH 1/3] postpone apicall exec date init until actual call fix for http://mantis.tokeek.de/view.php?id=677 The difference is on scheduling a large number of rss feeds and loading is not finished before shutdown of YaCy. The change makes sure not already loaded RSS will be loaded by the scheduler on next startup. --- source/net/yacy/data/WorkTables.java | 18 +++++++++++++---- source/net/yacy/search/Switchboard.java | 27 +++++++++++-------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java index b265b848d..82283bb7e 100644 --- a/source/net/yacy/data/WorkTables.java +++ b/source/net/yacy/data/WorkTables.java @@ -160,7 +160,9 @@ public class WorkTables extends Tables { /** * store a API call and set attributes to schedule a re-call of that API call according to a given frequence - * This is the same as the previous method but it also computes a re-call time and stores that additionally + * This is the same as the previous method but it also sets a re-call time and stores that additionally + * The method does't assume that the APICall was made and initializes the last_exec_date with null, this + * is set on actual APICall execution. * @param post the post arguments of the api call * @param servletName the name of the servlet * @param type name of the servlet category @@ -191,14 +193,14 @@ public class WorkTables extends Tables { data.put(TABLE_API_COL_COMMENT, UTF8.getBytes(comment)); byte[] date = ASCII.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format()); data.put(TABLE_API_COL_DATE_RECORDING, date); - data.put(TABLE_API_COL_DATE_LAST_EXEC, date); + data.put(TABLE_API_COL_DATE_LAST_EXEC, (Date)null); data.put(TABLE_API_COL_URL, UTF8.getBytes(apiurl)); // insert APICALL attributes - data.put(TABLE_API_COL_APICALL_COUNT, UTF8.getBytes("1")); + data.put(TABLE_API_COL_APICALL_COUNT, UTF8.getBytes("0")); data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, ASCII.getBytes(Integer.toString(time))); data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, UTF8.getBytes(unit)); - calculateAPIScheduler(data, false); // set next execution time + // calculateAPIScheduler(data, false); // set next execution time pk = super.insert(TABLE_API_NAME, data); } catch (final IOException e) { ConcurrentLog.logException(e); @@ -234,6 +236,14 @@ public class WorkTables extends Tables { } if (row == null) continue; String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK()); + try { // set exec time before the actual call to prevent repeat during client timeout (on short schedule duration) + final Date now = new Date(); + row.put(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, now); // record exec time + WorkTables.calculateAPIScheduler(row, false); // calculate next update time + Switchboard.getSwitchboard().tables.update(WorkTables.TABLE_API_NAME, row); + } catch (IOException ex) { + ConcurrentLog.warn("APICALL", "error updating exec time for " + theapicall); + } try { MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall); // use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes() diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index b5ccadbf7..6c2686be8 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2204,17 +2204,24 @@ public final class Switchboard extends serverSwitch { if (row == null) continue; // select api calls according to scheduler settings - final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); - if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); - + final int stime = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + if (stime > 0) { // has scheduled repeat + final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); + if (date_next_exec != null) { // has been executed befor + if (now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); + } else { // was never executed before + pks.add(UTF8.String(row.getPK())); + } + } // select api calls according to event settings final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); if (!"off".equals(kind)) { String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); + Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, (Date) null); if ("startup".equals(action)) { if (startupAction) { pks.add(UTF8.String(row.getPK())); - if ("once".equals(kind)) { + if ("once".equals(kind) && date_last_exec != null) { row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); sb.tables.update(WorkTables.TABLE_API_NAME, row); } @@ -2225,7 +2232,7 @@ public final class Switchboard extends serverSwitch { long cycle = getThread(SwitchboardConstants.CLEANUP).getBusySleep(); if (d < System.currentTimeMillis() && System.currentTimeMillis() - d < cycle) { pks.add(UTF8.String(row.getPK())); - if ("once".equals(kind)) { + if ("once".equals(kind) && date_last_exec != null) { row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, ""); sb.tables.update(WorkTables.TABLE_API_NAME, row); @@ -2237,16 +2244,6 @@ public final class Switchboard extends serverSwitch { } catch (final IOException e) { ConcurrentLog.logException(e); } - for (final String pk : pks) { - try { - row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); - WorkTables.calculateAPIScheduler(row, true); // calculate next update time - this.tables.update(WorkTables.TABLE_API_NAME, row); - } catch (final Throwable e ) { - ConcurrentLog.logException(e); - continue; - } - } startupAction = false; // execute api calls From 7c3f932e5dac3de726fd9107efce92c313e16880 Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 8 Aug 2016 01:57:31 +0200 Subject: [PATCH 2/3] revert due to conflict with double count recording by schedulter / servlet by the commit under normal operation (no shutdown) --- source/net/yacy/data/WorkTables.java | 18 ++++------------- source/net/yacy/search/Switchboard.java | 27 ++++++++++++++----------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java index 82283bb7e..b265b848d 100644 --- a/source/net/yacy/data/WorkTables.java +++ b/source/net/yacy/data/WorkTables.java @@ -160,9 +160,7 @@ public class WorkTables extends Tables { /** * store a API call and set attributes to schedule a re-call of that API call according to a given frequence - * This is the same as the previous method but it also sets a re-call time and stores that additionally - * The method does't assume that the APICall was made and initializes the last_exec_date with null, this - * is set on actual APICall execution. + * This is the same as the previous method but it also computes a re-call time and stores that additionally * @param post the post arguments of the api call * @param servletName the name of the servlet * @param type name of the servlet category @@ -193,14 +191,14 @@ public class WorkTables extends Tables { data.put(TABLE_API_COL_COMMENT, UTF8.getBytes(comment)); byte[] date = ASCII.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format()); data.put(TABLE_API_COL_DATE_RECORDING, date); - data.put(TABLE_API_COL_DATE_LAST_EXEC, (Date)null); + data.put(TABLE_API_COL_DATE_LAST_EXEC, date); data.put(TABLE_API_COL_URL, UTF8.getBytes(apiurl)); // insert APICALL attributes - data.put(TABLE_API_COL_APICALL_COUNT, UTF8.getBytes("0")); + data.put(TABLE_API_COL_APICALL_COUNT, UTF8.getBytes("1")); data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, ASCII.getBytes(Integer.toString(time))); data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, UTF8.getBytes(unit)); - // calculateAPIScheduler(data, false); // set next execution time + calculateAPIScheduler(data, false); // set next execution time pk = super.insert(TABLE_API_NAME, data); } catch (final IOException e) { ConcurrentLog.logException(e); @@ -236,14 +234,6 @@ public class WorkTables extends Tables { } if (row == null) continue; String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK()); - try { // set exec time before the actual call to prevent repeat during client timeout (on short schedule duration) - final Date now = new Date(); - row.put(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, now); // record exec time - WorkTables.calculateAPIScheduler(row, false); // calculate next update time - Switchboard.getSwitchboard().tables.update(WorkTables.TABLE_API_NAME, row); - } catch (IOException ex) { - ConcurrentLog.warn("APICALL", "error updating exec time for " + theapicall); - } try { MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall); // use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes() diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 6c2686be8..b5ccadbf7 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2204,24 +2204,17 @@ public final class Switchboard extends serverSwitch { if (row == null) continue; // select api calls according to scheduler settings - final int stime = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); - if (stime > 0) { // has scheduled repeat - final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); - if (date_next_exec != null) { // has been executed befor - if (now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); - } else { // was never executed before - pks.add(UTF8.String(row.getPK())); - } - } + final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); + if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); + // select api calls according to event settings final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); if (!"off".equals(kind)) { String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup"); - Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, (Date) null); if ("startup".equals(action)) { if (startupAction) { pks.add(UTF8.String(row.getPK())); - if ("once".equals(kind) && date_last_exec != null) { + if ("once".equals(kind)) { row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); sb.tables.update(WorkTables.TABLE_API_NAME, row); } @@ -2232,7 +2225,7 @@ public final class Switchboard extends serverSwitch { long cycle = getThread(SwitchboardConstants.CLEANUP).getBusySleep(); if (d < System.currentTimeMillis() && System.currentTimeMillis() - d < cycle) { pks.add(UTF8.String(row.getPK())); - if ("once".equals(kind) && date_last_exec != null) { + if ("once".equals(kind)) { row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, ""); sb.tables.update(WorkTables.TABLE_API_NAME, row); @@ -2244,6 +2237,16 @@ public final class Switchboard extends serverSwitch { } catch (final IOException e) { ConcurrentLog.logException(e); } + for (final String pk : pks) { + try { + row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); + WorkTables.calculateAPIScheduler(row, true); // calculate next update time + this.tables.update(WorkTables.TABLE_API_NAME, row); + } catch (final Throwable e ) { + ConcurrentLog.logException(e); + continue; + } + } startupAction = false; // execute api calls From 70d47ae38a0327a0fda87137c5d4e8e8fcd6e247 Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 8 Aug 2016 02:19:48 +0200 Subject: [PATCH 3/3] keep scheduler selection by repeat entry from https://github.com/yacy/yacy_search_server/commit/07311020d47d42000d5d2488489a7927020739a9 to allow exec schedule on actual exec event. Iterate on exec date (of advantage after interruption/shutdown) to schedule older or missed events first. --- source/net/yacy/search/Switchboard.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index b5ccadbf7..dc3af0bf6 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2198,15 +2198,22 @@ public final class Switchboard extends serverSwitch { final Date now = new Date(); try { final Iterator plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME); - final Iterator mapIterator = Tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator(); + final Iterator mapIterator = Tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_LAST_EXEC).iterator(); while (mapIterator.hasNext()) { row = mapIterator.next(); if (row == null) continue; // select api calls according to scheduler settings - final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); - if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); - + final int stime = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0); + if (stime > 0) { // has scheduled repeat + final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null); + if (date_next_exec != null) { // has been executed before + if (now.after(date_next_exec)) pks.add(UTF8.String(row.getPK())); + } else { // was never executed before + pks.add(UTF8.String(row.getPK())); + } + } + // select api calls according to event settings final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off"); if (!"off".equals(kind)) {