added a scheduler based on API actions

- every process that is monitored with the API Steering interface can now be scheduled!
- added input methods in Steering interface to set a scheduling time
- added a view on the steering api that shows only crawl jobs inside the Crawl Profile servlet
- added a scheduling call process in the cleanup process handler that triggers the scheduled processes
This causes that the cleanup now also looks for scheduled processes. Such processes are therefore not executed at
the same time as given in the target execution time but they will be executed within the cleanup process time window.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7050 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 189a986ebd
commit 5a994c9796

@ -4,13 +4,18 @@
<title>YaCy '#[clientname]#': Crawl Profile Editor</title>
#%env/templates/metas.template%#
</head>
<body id="IndexCreateWWWGlobalQueue">
<body id="ProfileEditor">
#%env/templates/header.template%#
#%env/templates/submenuCrawlMonitor.template%#
<h2>Crawl Profile Editor</h2>
<p>
Crawl profiles hold information about a specific URL which is internally used to perform the crawl it belongs to.
</p>
<h2>Crawler Steering</h2>
<h3>Crawl Scheduler</h3>
<p>Scheduled Crawls can be modified in this table</p>
<iframe id="APITable" src="Table_API_p.html?inline=true&filter=crawler" width="100%" height="0" align="left" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<h3>Crawl Profile Editor</h3>
<p>Crawl profiles hold information about a crawl process that is currently ongoing.</p>
<!-- crawl profile list -->
@ -41,25 +46,10 @@
<td><strong>Local Text Indexing</strong></td>
<td><strong>Local Media Indexing</strong></td>
<td><strong>Remote Indexing</strong></td>
<td><strong>Status / Action</strong></td>
</tr>
#{crawlProfiles}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[name]#</td>
<td>#(status)#terminated::active#(/status)#</td>
<td><a href="#[startURL]#">#[startURL]#</a></td>
<td>#[depth]#</td>
<td>#[mustmatch]#</td>
<td>#[mustnotmatch]#</td>
<td>#[crawlingIfOlder]#</td>
<td>#[crawlingDomFilterDepth]#</td>
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
<td>#[crawlingDomMaxPages]#</td>
<td>#(withQuery)#no::yes#(/withQuery)#</td>
<td>#(storeCache)#no::yes#(/storeCache)#</td>
<td>#(indexText)#no::yes#(/indexText)#</td>
<td>#(indexMedia)#no::yes#(/indexMedia)#</td>
<td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
<td>#(terminateButton)#::
<div style="text-decoration:blink">Running</div>
<form action="CrawlProfileEditor_p.html" method="get" enctype="multipart/form-data"><div>
@ -75,6 +65,20 @@
</div></form>
#(/deleteButton)#
</td>
<td><a href="#[startURL]#">#[startURL]#</a></td>
<td>#[depth]#</td>
<td>#[mustmatch]#</td>
<td>#[mustnotmatch]#</td>
<td>#[crawlingIfOlder]#</td>
<td>#[crawlingDomFilterDepth]#</td>
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
<td>#[crawlingDomMaxPages]#</td>
<td>#(withQuery)#no::yes#(/withQuery)#</td>
<td>#(storeCache)#no::yes#(/storeCache)#</td>
<td>#(indexText)#no::yes#(/indexText)#</td>
<td>#(indexMedia)#no::yes#(/indexMedia)#</td>
<td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
</tr>
#{/crawlProfiles}#
</table>

@ -238,8 +238,11 @@ public class CrawlProfileEditor_p {
private static void putProfileEntry(final servletProperties prop, final CrawlProfile.entry profile, final boolean active, final boolean dark, final int count, final int domlistlength) {
prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_status", active ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_name", profile.name());
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(profile.name())) ? "0" : "1");
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", profile.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1");
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", profile.handle());
prop.putXML(CRAWL_PROFILE_PREFIX + count + "_startURL", profile.startURL());
prop.put(CRAWL_PROFILE_PREFIX + count + "_handle", profile.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", profile.depth());
@ -268,9 +271,5 @@ public class CrawlProfileEditor_p {
prop.put(CRAWL_PROFILE_PREFIX + count + "_indexText", (profile.indexText()) ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_indexMedia", (profile.indexMedia()) ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_remoteIndexing", (profile.remoteIndexing()) ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(profile.name())) ? "0" : "1");
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", profile.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1");
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", profile.handle());
}
}

@ -26,14 +26,15 @@
<script type="text/javascript" src="/js/sorttable.js"></script>
</head>
<body id="Tables">
#%env/templates/header.template%#
<div id="fullcontent">
#(inline)##%env/templates/header.template%#
<h2>Steering of API Actions</h2>
<p>This table shows actions that had been issued on the YaCy interface
to change the configuration or to request crawl actions.
These recorded actions can be used to repeat specific actions and to send them
to a scheduler for a periodic execution.
</p>
</p>::#(/inline)#
#(showtable)#::
<form action="Table_API_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8" name="apilist">
<fieldset>
@ -47,7 +48,8 @@
<td>Recording<br/>Date</td>
<td>Last&nbsp;Exec<br/>Date</td>
<td>Next&nbsp;Exec<br/>Date</td>
<td>URL</td>
<td>Scheduler</td>
#(inline)#<td>URL</td>::#(/inline)#
</tr>
#{list}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
@ -58,7 +60,40 @@
<td>#[dateRecording]#</td>
<td>#[dateLastExec]#</td>
<td>#[dateNextExec]#</td>
<td>#[url]#</td>
<td>
#(scheduler)#
<form action="Table_API_p.html" method="post" enctype="multipart/form-data" name="modify_repeat">
<select name="repeat_select" onchange='this.form.submit()'>
<option value="off" selected="selected">no repetition</option>
<option value="on">activate scheduler</option>
</select>
<input type="hidden" name="pk" value="#[pk]#" />
<input type="hidden" name="inline" value="#[inline]#" />
<input type="hidden" name="filter" value="#[filter]#" />
</form>
::
<form action="Table_API_p.html" method="post" enctype="multipart/form-data" name="modify_repeat">
<table><tr><td>
<select name="repeat_time" onchange='this.form.submit()'>
#{scale}#
<option value="#[time]#" #(selected)#::selected="selected"#(/selected)#>#[time]#</option>
#{/scale}#
</select>
</td><td>
<select name="repeat_unit" onchange='this.form.submit()'>
<option value="selminutes" #(selectedMinutes)#::selected="selected"#(/selectedMinutes)#>minutes</option>
<option value="selhours" #(selectedHours)#::selected="selected"#(/selectedHours)#>hours</option>
<option value="seldays" #(selectedDays)#::selected="selected"#(/selectedDays)#>days</option>
</select>
</td></tr></table>
<input type="hidden" name="pk" value="#[pk]#" />
<input type="hidden" name="inline" value="#[inline]#" />
<input type="hidden" name="filter" value="#[filter]#" />
<noscript><input type="submit" value="Submit"></noscript>
</form>
#(/scheduler)#
</td>
#(inline)#<td>#[url]#</td>::#(/inline)#
</tr>
#{/list}#
</table>
@ -89,6 +124,16 @@
</fieldset>
</form>
#(/showexec)#
#%env/templates/footer.template%#
#(showschedulerhint)#::
Scheduled actions are executed after the next execution date has arrived within a time frame of #[tfminutes]# minutes.
#(/showschedulerhint)#
#(inline)##%env/templates/footer.template%#::#(/inline)#
</div>
<script type="text/javascript">
<!--
parentPage = parent.document.getElementById('APITable');
if (parentPage != null) parentPage.height = document.getElementById('fullcontent').offsetHeight + 30;
-->
</script>
</body>
</html>

@ -18,21 +18,20 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.IOException;
import java.text.DateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.TreeSet;
import net.yacy.cora.protocol.Client;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.DateFormatter;
import de.anomic.data.WorkTables;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -42,6 +41,76 @@ public class Table_API_p {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
prop.put("showexec", 0);
prop.put("showtable", 0);
prop.put("inline", 0);
boolean inline = false;
if (post != null && post.get("inline","false").equals("true")) {
prop.put("inline", 1);
inline = true;
}
String typefilter = ".*";
if (post != null && post.containsKey("filter")) {
typefilter = post.get("filter", ".*");
}
String pk;
if (post != null && post.containsKey("repeat_select") && ((pk = post.get("pk")) != null)) try {
String action = post.get("repeat_select", "off");
if (action.equals("on")) {
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
if (row != null) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
if (post != null && post.containsKey("repeat_time") && ((pk = post.get("pk")) != null)) try {
String action = post.get("repeat_time", "off");
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
if (row != null) {
if (action.equals("off")) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
} else {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.parseInt(action));
}
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
if (post != null && post.containsKey("repeat_unit") && ((pk = post.get("pk")) != null)) try {
String action = post.get("repeat_unit", "seldays");
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
if (row != null) {
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, action.substring(3));
if (action.equals("selminutes") && time > 0 && time < 10) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 10);
if (action.equals("selminutes") && time > 50) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 50);
if (action.equals("selhours") && time > 23) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 23);
if (action.equals("seldays") && time > 30) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 30);
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
if (post != null && post.get("deleterows", "").length() > 0) {
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
@ -54,9 +123,6 @@ public class Table_API_p {
}
}
prop.put("showexec", 0);
prop.put("showtable", 0);
if (post != null && post.get("execrows", "").length() > 0) {
// create a time-ordered list of events to execute
TreeSet<String> pks = new TreeSet<String>();
@ -67,28 +133,7 @@ public class Table_API_p {
}
// now call the api URLs and store the result status
final Client client = new Client();
client.setRealm(sb.getConfig("adminAccountBase64MD5", ""));
client.setTimout(120000);
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (String pk: pks) {
try {
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
if (row != null) {
String url = "http://localhost:" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL));
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + pk;
url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1);
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, "");
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "");
client.GETbytes(url);
l.put(url, client.getStatusCode());
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
}
Map<String, Integer> l = sb.tables.execAPICall(pks, "localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""));
// construct result table
prop.put("showexec", 1);
@ -110,6 +155,7 @@ public class Table_API_p {
// generate table
prop.put("showtable", 1);
prop.put("showtable_inline", inline ? 1 : 0);
// insert rows
int count = 0;
@ -118,27 +164,83 @@ public class Table_API_p {
final Iterator<Tables.Row> mapIterator = sb.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator();
Tables.Row row;
boolean dark = true;
boolean scheduledactions = false;
while (mapIterator.hasNext()) {
row = mapIterator.next();
if (row == null) continue;
Date dfltdate = new Date();
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, dfltdate) : null;
String type = new String(row.get(WorkTables.TABLE_API_COL_TYPE));
if (!type.matches(typefilter)) continue;
Date now = new Date();
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, now) : null;
Date date_recording = row.get(WorkTables.TABLE_API_COL_DATE_RECORDING, date);
Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, dfltdate) : null;
Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, now) : null;
int callcount = row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1);
prop.put("showtable_list_" + count + "_dark", ((dark) ? 1 : 0) ); dark=!dark;
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
prop.put("showtable_list_" + count + "_inline", inline ? 1 : 0);
prop.put("showtable_list_" + count + "_dark", dark ? 1 : 0); dark=!dark;
prop.put("showtable_list_" + count + "_pk", new String(row.getPK()));
prop.put("showtable_list_" + count + "_count", count);
prop.put("showtable_list_" + count + "_callcount", callcount);
prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormatter.formatHTML(date_recording));
prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormatter.formatHTML(date_last_exec));
prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormatter.formatHTML(date_next_exec));
prop.put("showtable_list_" + count + "_type", row.get(WorkTables.TABLE_API_COL_TYPE));
prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormat.getDateTimeInstance().format(date_recording));
prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_last_exec));
prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_next_exec));
prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedDays", (unit.length() == 0 || unit.equals("days")) ? 1 : 0);
prop.put("showtable_list_" + count + "_repeatTime", time);
prop.put("showtable_list_" + count + "_type", type);
prop.put("showtable_list_" + count + "_comment", row.get(WorkTables.TABLE_API_COL_COMMENT));
prop.put("showtable_list_" + count + "_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL)));
prop.put("showtable_list_" + count + "_inline_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8080") + new String(row.get(WorkTables.TABLE_API_COL_URL)));
if (time == 0) {
prop.put("showtable_list_" + count + "_scheduler", 0);
prop.put("showtable_list_" + count + "_scheduler_pk", new String(row.getPK()));
} else {
scheduledactions = true;
prop.put("showtable_list_" + count + "_scheduler", 1);
prop.put("showtable_list_" + count + "_scheduler_pk", new String(row.getPK()));
prop.put("showtable_list_" + count + "_scheduler_scale_" + 0 + "_time", "off");
prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 0);
prop.put("showtable_list_" + count + "_scheduler_selectedHours", 0);
prop.put("showtable_list_" + count + "_scheduler_selectedDays", 0);
if (unit.equals("minutes")) {
for (int i = 1; i <= 5 ; i++) {
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i * 10);
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0);
}
prop.put("showtable_list_" + count + "_scheduler_scale_" + (time / 10) + "_selected", 1);
prop.put("showtable_list_" + count + "_scheduler_scale", 6);
prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 1);
} else if (unit.equals("hours")) {
for (int i = 1; i <= 23 ; i++) {
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i);
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0);
}
prop.put("showtable_list_" + count + "_scheduler_scale_" + time + "_selected", 1);
prop.put("showtable_list_" + count + "_scheduler_scale", 24);
prop.put("showtable_list_" + count + "_scheduler_selectedHours", 1);
} else {
for (int i = 1; i <= 30 ; i++) {
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_time", i);
prop.put("showtable_list_" + count + "_scheduler_scale_" + i + "_selected", 0);
}
prop.put("showtable_list_" + count + "_scheduler_scale_" + time + "_selected", 1);
prop.put("showtable_list_" + count + "_scheduler_scale", 31);
prop.put("showtable_list_" + count + "_scheduler_selectedDays", 1);
}
}
prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false");
prop.put("showtable_list_" + count + "_scheduler_filter", typefilter);
count++;
}
if (scheduledactions) {
prop.put("showschedulerhint", 1);
prop.put("showschedulerhint_tfminutes", sb.getConfigLong(SwitchboardConstants.CLEANUP_BUSYSLEEP, 300000) / 60000);
} else {
prop.put("showschedulerhint", 0);
}
} catch (IOException e) {
Log.logException(e);
}

@ -34,16 +34,16 @@
<fieldset><legend>Table Selection</legend>
<dl>
<dt class="TableCellDark">Select Table:
<select name="table" size="1">
<select name="table" onchange='this.form.submit()'>
#{tables}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/tables}#
</select>
</dt>
<dd><input type="submit" name="edittable" value="Show Table" /></dd>
<dd>&nbsp;</dd>
<dt>
show max.
<select name="count" size="1">
<select name="count" onchange='this.form.submit()'>
<option value="10">10</option>
<option value="100" selected="selected">100</option>
<option value="1000">1000</option>
@ -56,7 +56,7 @@
search rows for
<input type="text" name="search" value="#[pattern]#" />
</dt>
<dd>&nbsp;</dd>
<dd><input type="submit" name="edittable" value="Search" /></dd>
</dl>
</fieldset>
</form>

@ -23,7 +23,7 @@
<div class="SubMenugroup">
<h3>Crawler Steering</h3>
<ul class="SubMenu">
<li><a href="/CrawlProfileEditor_p.html" class="MenuItemLink lock">Crawl Profile Editor</a></li>
<li><a href="/CrawlProfileEditor_p.html" class="MenuItemLink lock">Scheduler and Profile Editor</a></li>
<li><a href="/Table_RobotsTxt_p.html" class="MenuItemLink lock">robots.txt Monitor</a></li>
</ul>
</div>

@ -28,8 +28,13 @@ package de.anomic.data;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import net.yacy.cora.protocol.Client;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
@ -62,6 +67,13 @@ public class WorkTables extends Tables {
super(workPath, 12);
}
/**
* recording of a api call. stores the call parameters into the API database table
* @param post the post arguments of the api call
* @param servletName the name of the servlet
* @param type name of the servlet category
* @param comment visual description of the process
*/
public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) {
// remove the apicall attributes from the post object
String pk = post.remove(TABLE_API_COL_APICALL_PK);
@ -69,7 +81,7 @@ public class WorkTables extends Tables {
if (count == null) count = "1";
String time = post.remove(TABLE_API_COL_APICALL_SCHEDULE_TIME);
String unit = post.remove(TABLE_API_COL_APICALL_SCHEDULE_UNIT);
if (time == null || unit == null || unit.length() == 0 || "minues,hours,days".indexOf(unit) < 0) {
if (time == null || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
time = ""; unit = "";
}
@ -125,4 +137,83 @@ public class WorkTables extends Tables {
Log.logInfo("APICALL", apiurl);
}
/**
* execute an API call using a api table row which contains all essentials
* to access the server also the host, port and the authentication realm must be given
* @param pks a collection of primary keys denoting the rows in the api table
* @param host the host where the api shall be called
* @param port the port on the host
* @param realm authentification realm
* @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred
*/
public Map<String, Integer> execAPICall(Collection<String> pks, String host, int port, String realm) {
// now call the api URLs and store the result status
final Client client = new Client();
client.setRealm(realm);
client.setTimout(120000);
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (String pk: pks) {
Tables.Row row = null;
try {
row = select(WorkTables.TABLE_API_NAME, pk.getBytes());
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
if (row == null) continue;
String url = "http://" + host + ":" + port + new String(row.get(WorkTables.TABLE_API_COL_URL));
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + new String(row.getPK());
url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1);
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, "");
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "");
try {
client.GETbytes(url);
l.put(url, client.getStatusCode());
} catch (IOException e) {
Log.logException(e);
l.put(url, -1);
}
}
return l;
}
/**
* simplified call to execute a single entry in the api database table
* @param pk the primary key of the entry
* @param host the host where the api shall be called
* @param port the port on the host
* @param realm authentification realm
* @return the http status code of the api call or -1 if any other IOException occurred
*/
public int execAPICall(String pk, String host, int port, String realm) {
ArrayList<String> pks = new ArrayList<String>();
pks.add(pk);
Map<String, Integer> m = execAPICall(pks, host, port, realm);
if (m.isEmpty()) return -1;
return m.values().iterator().next().intValue();
}
/**
* calculate the execution time in a api call table based on given scheduling time and last execution time
* @param row the database row in the api table
*/
public static void calculateAPIScheduler(Tables.Row row, boolean update) {
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, new Date()) : null;
date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
if (time <= 0) {
row.remove(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC);
return;
}
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
long d = date.getTime();
if (unit.equals("minutes")) d += 60000L * time;
if (unit.equals("hours")) d += 60000L * 60L * time;
if (unit.equals("days")) d += 60000L * 60L * 24L * time;
if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
}
}

@ -82,11 +82,13 @@ import net.yacy.document.TextParser;
import net.yacy.document.content.DCEntry;
import net.yacy.document.content.SurrogateReader;
import net.yacy.document.importer.OAIListFriendsLoader;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
@ -1436,8 +1438,6 @@ public final class Switchboard extends serverSwitch {
public boolean cleanupJob() {
try {
boolean hasDoneSomething = false;
// clear caches if necessary
if (!MemoryControl.request(8000000L, false)) {
for (Segment indexSegment: this.indexSegments) indexSegment.urlMetadata().clearCache();
@ -1486,6 +1486,40 @@ public final class Switchboard extends serverSwitch {
Log.logException(e);
}
// execute scheduled API actions
Tables.Row row;
ArrayList<String> pks = new ArrayList<String>();
Date now = new Date();
try {
Iterator<Tables.Row> plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME);
final Iterator<Tables.Row> mapIterator = this.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator();
while (mapIterator.hasNext()) {
row = mapIterator.next();
if (row == null) continue;
Date date_next_exec = row.containsKey(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC) ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, now) : null;
if (date_next_exec == null) continue;
if (date_next_exec.after(now)) continue;
pks.add(new String(row.getPK()));
}
} catch (IOException e) {
Log.logException(e);
}
for (String pk: pks) try {
row = this.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
WorkTables.calculateAPIScheduler(row, true); // calculate next update time
this.tables.update(WorkTables.TABLE_API_NAME, row);
} catch (IOException e) {
Log.logException(e);
continue;
} catch (RowSpaceExceededException e) {
Log.logException(e);
continue;
}
Map<String, Integer> callResult = this.tables.execAPICall(pks, "localhost", (int) this.getConfigLong("port", 8080), this.getConfig("adminAccountBase64MD5", ""));
for (Map.Entry<String, Integer> call: callResult.entrySet()) {
log.logInfo("Scheduler executed api call, response " + call.getValue() + ": " + call.getKey());
}
// close unused connections
de.anomic.http.client.Client.cleanup();
ConnectionInfo.cleanUp();
@ -1505,7 +1539,6 @@ public final class Switchboard extends serverSwitch {
if ((crawlQueues.delegatedURL.stackSize() > 1000)) {
if (this.log.isFine()) log.logFine("Cleaning Delegated-URLs report stack, " + crawlQueues.delegatedURL.stackSize() + " entries on stack");
crawlQueues.delegatedURL.clearStack();
hasDoneSomething = true;
}
// clean up error stack
@ -1513,7 +1546,6 @@ public final class Switchboard extends serverSwitch {
if ((crawlQueues.errorURL.stackSize() > 1000)) {
if (this.log.isFine()) log.logFine("Cleaning Error-URLs report stack, " + crawlQueues.errorURL.stackSize() + " entries on stack");
crawlQueues.errorURL.clearStack();
hasDoneSomething = true;
}
// clean up loadedURL stack
@ -1522,21 +1554,21 @@ public final class Switchboard extends serverSwitch {
if (crawlResults.getStackSize(origin) > 1000) {
if (this.log.isFine()) log.logFine("Cleaning Loaded-URLs report stack, " + crawlResults.getStackSize(origin) + " entries on stack " + origin.getCode());
crawlResults.clearStack(origin);
hasDoneSomething = true;
}
}
// clean up image stack
ResultImages.clearQueues();
// clean up profiles
checkInterruption();
if (cleanProfiles()) hasDoneSomething = true;
cleanProfiles();
// clean up news
checkInterruption();
try {
if (this.log.isFine()) log.logFine("Cleaning Incoming News, " + this.peers.newsPool.size(yacyNewsPool.INCOMING_DB) + " entries on stack");
if (this.peers.newsPool.automaticProcess(peers) > 0) hasDoneSomething = true;
this.peers.newsPool.automaticProcess(peers);
} catch (final Exception e) {
Log.logException(e);
}
@ -1548,33 +1580,31 @@ public final class Switchboard extends serverSwitch {
}
// clean up seed-dbs
if(getConfigBool("routing.deleteOldSeeds.permission",true)) {
if (getConfigBool("routing.deleteOldSeeds.permission",true)) {
final long deleteOldSeedsTime = getConfigLong("routing.deleteOldSeeds.time",7)*24*3600000;
Iterator<yacySeed> e = this.peers.seedsSortedDisconnected(true,yacySeed.LASTSEEN);
yacySeed seed = null;
final ArrayList<String> deleteQueue = new ArrayList<String>();
checkInterruption();
//clean passive seeds
while(e.hasNext()) {
// clean passive seeds
while (e.hasNext()) {
seed = e.next();
if(seed != null) {
if (seed != null) {
//list is sorted -> break when peers are too young to delete
if(seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime))
break;
if (seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime)) break;
deleteQueue.add(seed.hash);
}
}
for(int i=0;i<deleteQueue.size();++i) this.peers.removeDisconnected(deleteQueue.get(i));
for (int i = 0; i < deleteQueue.size(); ++i) this.peers.removeDisconnected(deleteQueue.get(i));
deleteQueue.clear();
e = this.peers.seedsSortedPotential(true,yacySeed.LASTSEEN);
checkInterruption();
//clean potential seeds
while(e.hasNext()) {
// clean potential seeds
while (e.hasNext()) {
seed = e.next();
if(seed != null) {
if (seed != null) {
//list is sorted -> break when peers are too young to delete
if(seed.getLastSeenUTC() > (System.currentTimeMillis()-deleteOldSeedsTime))
break;
if (seed.getLastSeenUTC() > (System.currentTimeMillis() - deleteOldSeedsTime)) break;
deleteQueue.add(seed.hash);
}
}
@ -1627,7 +1657,7 @@ public final class Switchboard extends serverSwitch {
// after all clean up is done, check the resource usage
observer.resourceObserverJob();
return hasDoneSomething;
return true;
} catch (final InterruptedException e) {
this.log.logInfo("cleanupJob: Shutdown detected");
return false;

@ -81,87 +81,6 @@ public class BEncoder {
return null;
}
/*
public static byte[] encodeMap(
String key0, byte[] value0,
String key1, byte[] value1
) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
BDecoder.BDictionaryObject.toStream(
baos,
key0, value0,
key1, value1
);
baos.close();
return baos.toByteArray();
} catch (IOException e) {}
return null;
}
public static byte[] encodeMap(
String key0, byte[] value0,
String key1, byte[] value1,
String key2, byte[] value2
) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
BDecoder.BDictionaryObject.toStream(
baos,
key0, value0,
key1, value1,
key2, value2
);
baos.close();
return baos.toByteArray();
} catch (IOException e) {}
return null;
}
public static byte[] encodeMap(
String key0, byte[] value0,
String key1, byte[] value1,
String key2, byte[] value2,
String key3, byte[] value3
) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
BDecoder.BDictionaryObject.toStream(
baos,
key0, value0,
key1, value1,
key2, value2,
key3, value3
);
baos.close();
return baos.toByteArray();
} catch (IOException e) {}
return null;
}
public static byte[] encodeMap(
String key0, byte[] value0,
String key1, byte[] value1,
String key2, byte[] value2,
String key3, byte[] value3,
String key4, byte[] value4
) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
BDecoder.BDictionaryObject.toStream(
baos,
key0, value0,
key1, value1,
key2, value2,
key3, value3,
key4, value4
);
baos.close();
return baos.toByteArray();
} catch (IOException e) {}
return null;
}
*/
public static void main(final String[] args) {
Map<String, byte[]> m = new HashMap<String, byte[]>();
m.put("k", "000".getBytes());

@ -43,9 +43,6 @@ public final class DateFormatter {
/** minimal date format including milliseconds (fixed width: 17) */
public static final String PATTERN_SHORT_MILSEC = "yyyyMMddHHmmssSSS";
/** special time format for two-line display in html tables **/
public static final String PATTERN_HTML = "dd'&nbsp;'MMM'&nbsp;'yyyy HH:mm:ss";
/** default HTTP 1.1 header date format pattern */
public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss Z"; // with numeric time zone indicator as defined in RFC5322
public static final String PATTERN_RFC1123_SHORT = "EEE, dd MMM yyyy";
@ -71,9 +68,6 @@ public final class DateFormatter {
/** Date formatter/parser for minimal yyyyMMddHHmmssSSS pattern */
private static final SimpleDateFormat FORMAT_SHORT_MILSEC = new SimpleDateFormat(PATTERN_SHORT_MILSEC, Locale.US);
/** special time format for two-line display in html tables **/
private static final SimpleDateFormat FORMAT_HTML = new SimpleDateFormat(PATTERN_HTML, Locale.US);
/** Date formatter/non-sloppy parser for W3C datetime (ISO8601) in GMT/UTC */
private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US);
@ -176,11 +170,6 @@ public final class DateFormatter {
return FORMAT_RFC1123_SHORT.format(date);
}
public static String formatHTML(final Date date) {
if (date == null) return "";
return FORMAT_HTML.format(date);
}
/**
* Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}.

Loading…
Cancel
Save