moved crawl profile table from watch crawler to profile editor

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3824 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 5cae5e79c2
commit 3b46f0460f

@ -13,7 +13,66 @@
The profiles for remote crawls, <a href="/ProxyIndexingMonitor_p.html">indexing via proxy</a> and snippet fetches
cannot be altered here as they are hard-coded.
</p>
<form action="/CrawlProfileEditor_p.html" method="post" enctype="multipart/form-data">
<!-- crawl profile list -->
<form action="CrawlProfileEditor_p.html" method="get" enctype="multipart/form-data">
<fieldset><legend>Crawl Profile List</legend>
<table border="0" cellpadding="2" cellspacing="1">
<colgroup>
<col width="120" />
<col />
<col width="16" />
<col width="60" />
<col width="10" span="2" />
<col />
<col width="10" span="5" />
</colgroup>
<tr class="TableHeader">
<td><strong>Crawl Thread</strong></td>
<td><strong>Start URL</strong></td>
<td><strong>Depth</strong></td>
<td><strong>Filter</strong></td>
<td><strong>MaxAge</strong></td>
<td><strong>Auto Filter Depth</strong></td>
<td><strong>Auto Filter Content</strong></td>
<td><strong>Max Page Per Domain</strong></td>
<td><strong>Accept '?' URLs</strong></td>
<td><strong>Fill Proxy Cache</strong></td>
<td><strong>Local Text Indexing</strong></td>
<td><strong>Local Media Indexing</strong></td>
<td><strong>Remote Indexing</strong></td>
<td></td>
</tr>
#{crawlProfiles}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[name]#</td>
<td><a href="#[startURL]#">#[startURL]#</a></td>
<td>#[depth]#</td>
<td>#[filter]#</td>
<td>#[crawlingIfOlder]#</td>
<td>#[crawlingDomFilterDepth]#</td>
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
<td>#[crawlingDomMaxPages]#</td>
<td>#(withQuery)#no::yes#(/withQuery)#</td>
<td>#(storeCache)#no::yes#(/storeCache)#</td>
<td>#(indexText)#no::yes#(/indexText)#</td>
<td>#(indexMedia)#no::yes#(/indexMedia)#</td>
<td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
<td>#(deleteButton)#::
<pre><input type="hidden" name="handle" value="#[handle]#" /></pre>
<pre><input type="submit" name="deleteprofile" value="Delete" /></pre>
#(/deleteButton)#
</td>
</tr>
#{/crawlProfiles}#
</table>
</fieldset>
</form>
<!-- crawl profile editor -->
<form action="CrawlProfileEditor_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend>Select the profile to edit</legend>
<select name="handle">#{profiles}#
<option value="#[handle]#"#(selected)#:: selected="selected"#(/selected)#>#[name]#</option>#{/profiles}#

@ -49,6 +49,7 @@ import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlProfile.entry;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -100,65 +101,129 @@ public class CrawlProfileEditor_p {
final servletProperties prop = new servletProperties();
final plasmaSwitchboard sb = (plasmaSwitchboard)env;
String handle = (post == null) ? "" : post.get("handle", "");
// read post for handle
String handle = "";
if (post != null) {
handle = post.get("handle", "");
if (post.containsKey("deleteprofile")) {
// deletion of a crawl
if (handle != null) sb.profiles.removeEntry(handle);
}
}
// generate handle list
int count = 0;
Iterator it = sb.profiles.profiles(true);
entry e;
entry selentry;
while (it.hasNext()) {
e = (entry)it.next();
if (e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
e.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
selentry = (entry)it.next();
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
continue;
prop.put("profiles_" + count + "_name", e.name());
prop.put("profiles_" + count + "_handle", e.handle());
if (handle.equals(e.handle()))
prop.put("profiles_" + count + "_name", selentry.name());
prop.put("profiles_" + count + "_handle", selentry.handle());
if (handle.equals(selentry.handle()))
prop.put("profiles_" + count + "_selected", 1);
count++;
}
prop.put("profiles", count);
selentry = sb.profiles.getEntry(handle);
e = sb.profiles.getEntry(handle);
if (e == null) return prop;
if (post.containsKey("submit")) try {
it = labels.iterator();
eentry tee;
while (it.hasNext()) {
tee = (eentry)it.next();
String cval = (String)e.map().get(tee.name);
String val = (tee.type == eentry.BOOLEAN)
? Boolean.toString(post.containsKey(tee.name))
: post.get(tee.name, cval);
if (!cval.equals(val))
e.changeEntry(tee.name, val);
}
} catch (IOException ex) {
prop.put("error", 1);
prop.put("error_message", ex.getMessage());
}
// read post for change submit
if ((post != null) && (selentry != null)) {
if (post.containsKey("submit")) {
try {
it = labels.iterator();
eentry tee;
while (it.hasNext()) {
tee = (eentry) it.next();
String cval = (String) selentry.map().get(tee.name);
String val = (tee.type == eentry.BOOLEAN) ? Boolean.toString(post.containsKey(tee.name)) : post.get(tee.name, cval);
if (!cval.equals(val)) selentry.changeEntry(tee.name, val);
}
} catch (IOException ex) {
prop.put("error", 1);
prop.put("error_message", ex.getMessage());
}
}
}
prop.put("edit", 1);
prop.put("edit_name", e.name());
prop.put("edit_handle", e.handle());
it = labels.iterator();
// generate crawl profile table
count = 0;
int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
it = sb.profiles.profiles(true);
plasmaCrawlProfile.entry profile;
boolean dark = true;
while (it.hasNext()) {
eentry ee = (eentry)it.next();
Object val = e.map().get(ee.name);
prop.put("edit_entries_" + count + "_readonly", ee.readonly ? 1 : 0);
prop.put("edit_entries_" + count + "_readonly_name", ee.name);
prop.put("edit_entries_" + count + "_readonly_label", ee.label);
prop.put("edit_entries_" + count + "_readonly_type", ee.type);
if (ee.type == eentry.BOOLEAN) {
prop.put("edit_entries_" + count + "_readonly_type_checked", Boolean.valueOf((String)val).booleanValue() ? 1 : 0);
} else {
prop.put("edit_entries_" + count + "_readonly_type_value", val);
profile = (plasmaCrawlProfile.entry) it.next();
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_name", profile.name());
prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
prop.put("crawlProfiles_"+count+"_handle", profile.handle());
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth()));
//start contrib [MN]
int i = 0;
String item;
while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){
if(i == domlistlength){
item = item + " ...";
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item);
i++;
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i);
//end contrib [MN]
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexText", ((profile.indexText()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexMedia", ((profile.indexMedia()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_remoteIndexing", ((profile.remoteIndexing()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_deleteButton", (((profile.name().equals("remote")) ||
(profile.name().equals("proxy")) ||
(profile.name().equals("snippetText")) ||
(profile.name().equals("snippetMedia")) ? 0 : 1)));
prop.put("crawlProfiles_"+count+"_deleteButton_handle", profile.handle());
dark = !dark;
count++;
}
prop.put("edit_entries", count);
prop.put("crawlProfiles", count);
// generate edit field
if (selentry == null) {
prop.put("edit", 0);
} else {
prop.put("edit", 1);
prop.put("edit_name", selentry.name());
prop.put("edit_handle", selentry.handle());
it = labels.iterator();
count = 0;
while (it.hasNext()) {
eentry ee = (eentry) it.next();
Object val = selentry.map().get(ee.name);
prop.put("edit_entries_" + count + "_readonly", ee.readonly ? 1 : 0);
prop.put("edit_entries_" + count + "_readonly_name", ee.name);
prop.put("edit_entries_" + count + "_readonly_label", ee.label);
prop.put("edit_entries_" + count + "_readonly_type", ee.type);
if (ee.type == eentry.BOOLEAN) {
prop.put("edit_entries_" + count + "_readonly_type_checked", Boolean.valueOf((String) val).booleanValue() ? 1 : 0);
} else {
prop.put("edit_entries_" + count + "_readonly_type_value", val);
}
count++;
}
prop.put("edit_entries", count);
}
return prop;
}

@ -159,62 +159,6 @@
</tbody>
</table>
<!-- crawl profile list -->
<p id="crawlingProfiles"><strong>Crawl Profiles:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<colgroup>
<col width="120" />
<col />
<col width="16" />
<col width="60" />
<col width="10" span="2" />
<col />
<col width="10" span="5" />
</colgroup>
<tr class="TableHeader">
<td><strong>Crawl Thread</strong></td>
<td><strong>Start URL</strong></td>
<td><strong>Depth</strong></td>
<td><strong>Filter</strong></td>
<td><strong>MaxAge</strong></td>
<td><strong>Auto Filter Depth</strong></td>
<td><strong>Auto Filter Content</strong></td>
<td><strong>Max Page Per Domain</strong></td>
<td><strong>Accept '?' URLs</strong></td>
<td><strong>Fill Proxy Cache</strong></td>
<td><strong>Local Text Indexing</strong></td>
<td><strong>Local Media Indexing</strong></td>
<td><strong>Remote Indexing</strong></td>
<td></td>
</tr>
#{crawlProfiles}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[name]#</td>
<td><a href="#[startURL]#">#[startURL]#</a></td>
<td>#[depth]#</td>
<td>#[filter]#</td>
<td>#[crawlingIfOlder]#</td>
<td>#[crawlingDomFilterDepth]#</td>
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
<td>#[crawlingDomMaxPages]#</td>
<td>#(withQuery)#no::yes#(/withQuery)#</td>
<td>#(storeCache)#no::yes#(/storeCache)#</td>
<td>#(indexText)#no::yes#(/indexText)#</td>
<td>#(indexMedia)#no::yes#(/indexMedia)#</td>
<td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
<td>#(deleteButton)#::
<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
<pre><input type="hidden" name="handle" value="#[handle]#" /></pre>
<pre><input type="submit" name="deleteprofile" value="Delete" /></pre>
</form>
#(/deleteButton)#
</td>
</tr>
#{/crawlProfiles}#
</table>
#%env/templates/footer.template%#
</body>
</html>

@ -71,12 +71,6 @@ public class WatchCrawler_p {
} else {
prop.put("info", 0);
if (post.containsKey("deleteprofile")) {
// deletion of a crawl
String handle = (String) post.get("handle");
if (handle != null) switchboard.profiles.removeEntry(handle);
}
if (post.containsKey("continue")) {
// continue queue
String queue = post.get("continue", "");
@ -361,54 +355,6 @@ public class WatchCrawler_p {
}
}
// crawl profiles
int count = 0;
int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
Iterator it = switchboard.profiles.profiles(true);
plasmaCrawlProfile.entry profile;
boolean dark = true;
while (it.hasNext()) {
profile = (plasmaCrawlProfile.entry) it.next();
prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_name", profile.name());
prop.put("crawlProfiles_"+count+"_startURL", profile.startURL());
prop.put("crawlProfiles_"+count+"_handle", profile.handle());
prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth());
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth()));
//start contrib [MN]
int i = 0;
String item;
while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){
if(i == domlistlength){
item = item + " ...";
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item);
i++;
}
prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i);
//end contrib [MN]
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexText", ((profile.indexText()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_indexMedia", ((profile.indexMedia()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_remoteIndexing", ((profile.remoteIndexing()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_deleteButton", (((profile.name().equals("remote")) ||
(profile.name().equals("proxy")) ||
(profile.name().equals("snippetText")) ||
(profile.name().equals("snippetMedia")) ? 0 : 1)));
prop.put("crawlProfiles_"+count+"_deleteButton_handle", profile.handle());
dark = !dark;
count++;
}
prop.put("crawlProfiles", count);
// performance settings
long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100"));
int LCppm = (int) (60000L / LCbusySleep);

Loading…
Cancel
Save