Merge branch 'master' of git@github.com:yacy/yacy_search_server.git

pull/122/head
Michael Peter Christen 8 years ago
commit 335868edba

@ -1,3 +1,3 @@
#!/usr/bin/env sh
cd "`dirname $0`"
./apicall.sh /IndexImportMediawiki_p.html?file=$1 > /dev/null
./protectedPostApiCall.sh "IndexImportMediawiki_p.html" "file=$1"

@ -0,0 +1,29 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': User Accounts</title>
#%env/templates/metas.template%#
<script type="text/javascript" src="js/sorttable.js"></script>
</head>
<body>
#%env/templates/header.template%#
#%env/templates/submenuUseCaseAccount.template%#
<h2>User List</h2>
<fieldset><legend>User Accounts</legend>
<table class="sortable">
<tr class="TableHeader">
<th>User</th><th>First name</th><th>Last name</th><th>Address</th><th>Last Access</th><th>Rights</th><th>Time</th><th>Traffic</th>
</tr>
#{userlist}#
<tr>
<td><a href="ConfigUser_p.html?user=#[username]#">#[username]#</a></td><td>#[firstname]#</td><td>#[lastname]#</td><td>#[address]#</td><td>#[lastaccess]#</td><td>#[rights]#</td><td>#[time]#</td><td>#[traffic]#</td>
</tr>
#{/userlist}#
</table>
</fieldset>
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,103 @@
// ConfigAccountList_p.java
// -------------------------
// (c) 2017 by reger24; https://github.com/reger24
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Date;
import java.util.Iterator;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.UserDB;
import net.yacy.data.UserDB.AccessRight;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class ConfigAccountList_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
UserDB.Entry entry;
if (sb.userDB == null) {
prop.put("userlist", 0);
return prop;
}
//Generate Userlist
final Iterator<UserDB.Entry> it = sb.userDB.iterator(true);
int numUsers = 0;
while (it.hasNext()) {
entry = it.next();
if (entry == null) {
continue;
}
prop.putHTML("userlist_" + numUsers + "_username", entry.getUserName());
prop.putHTML("userlist_" + numUsers + "_lastname", entry.getLastName());
prop.putHTML("userlist_" + numUsers + "_firstname", entry.getFirstName());
prop.putHTML("userlist_" + numUsers + "_address", entry.getAddress());
if (entry.getLastAccess() != null) {
prop.put("userlist_" + numUsers + "_lastaccess", GenericFormatter.FORMAT_SIMPLE.format(new Date(entry.getLastAccess())));
} else {
prop.put("userlist_" + numUsers + "_lastaccess", "never");
}
final AccessRight[] rights = AccessRight.values();
String rightStr = "";
for (final AccessRight right : rights) {
if (entry.hasRight(right)) {
if (rightStr.isEmpty()) {
rightStr = right.getFriendlyName();
} else {
rightStr += ", " + right.getFriendlyName();
}
}
}
prop.putHTML("userlist_" + numUsers + "_rights", rightStr);
long percent;
if (entry.getTrafficLimit() != null) {
long limit = entry.getTrafficLimit();
long used = entry.getTrafficSize();
percent = used * 100 / limit;
prop.put("userlist_" + numUsers + "_time", percent);
} else {
prop.put("userlist_" + numUsers + "_time", "");
}
percent = -1;
if (entry.getTimeLimit() > 0) {
long limit = entry.getTimeLimit();
long used = entry.getTimeUsed();
percent = used * 100 / limit;
prop.put("userlist_" + numUsers + "_traffic", percent);
} else {
prop.put("userlist_" + numUsers + "_traffic", "");
}
numUsers++;
}
prop.put("userlist", numUsers);
// return rewrite properties
return prop;
}
}

@ -75,7 +75,7 @@
</fieldset>
<fieldset><legend>User Accounts</legend>
<form action="ConfigAccounts_p.html" accept-charset="UTF-8">
<form action="ConfigAccounts_p.html" method="post" accept-charset="UTF-8">
<input type="hidden" name="transactionToken" value="#[transactionToken]#"/>
<fieldset><legend>Select user</legend>
<dl>
@ -84,7 +84,7 @@
<select name="user" id="user">
<option value="newuser">New user</option>#{users}#
<option>#[user]#</option>#{/users}#
</select>
</select> or goto user <a href="ConfigAccountList_p.html">account list</a>
</dd>
<dt>&nbsp;</dt>
<dd>
@ -128,7 +128,7 @@
</dl>
</fieldset>
</form>
</fieldset>
</fieldset>
#%env/templates/footer.template%#
</body>

@ -55,7 +55,7 @@ public class ConfigAccounts_p {
/* Acquire a transaction token for the next POST form submission */
prop.put(TransactionManager.TRANSACTION_TOKEN_PARAM, TransactionManager.getTransactionToken(header));
final Switchboard sb = Switchboard.getSwitchboard();
final Switchboard sb = (Switchboard) env;
UserDB.Entry entry = null;
// admin password
@ -156,8 +156,8 @@ public class ConfigAccounts_p {
//user != current_user
//user=from userlist
//current_user = edited user
} else if (post.containsKey("user") && !"newuser".equals(post.get("user"))){
TransactionManager.checkPostTransaction(header, post);
} else if (post.containsKey("user") && !"newuser".equals(post.get("user"))) {
TransactionManager.checkPostTransaction(header, post);
if (post.containsKey("change_user")) {
//defaults for newuser are set above
entry = sb.userDB.getEntry(post.get("user"));
@ -183,7 +183,7 @@ public class ConfigAccounts_p {
sb.userDB.removeEntry(post.get("user"));
}
} else if (post.containsKey("change")) { //New User / edit User
TransactionManager.checkPostTransaction(header, post);
TransactionManager.checkPostTransaction(header, post);
prop.put("text", "0");
prop.put("error", "0");
@ -270,7 +270,24 @@ public class ConfigAccounts_p {
prop.putHTML("text_username", username);
prop.put("text", "2");
}//edit user
prop.putHTML("username", username);
if (entry != null) {
//TODO: set username read-only in html
prop.putHTML("current_user", entry.getUserName());
prop.putHTML("username", entry.getUserName());
prop.putHTML("firstname", entry.getFirstName());
prop.putHTML("lastname", entry.getLastName());
prop.putHTML("address", entry.getAddress());
prop.put("timelimit", entry.getTimeLimit());
prop.put("timeused", entry.getTimeUsed());
int count = 0;
for (final AccessRight right : rights) {
prop.put("rights_" + count + "_set", entry.hasRight(right) ? "1" : "0");
count++;
}
prop.put("rights", count);
}
}
//Generate Userlist

@ -121,12 +121,6 @@
</li>#(/search.navigation.location)#
</ul></td>
</tr>
<tr>
<td><input type="checkbox" name="search.navigation.hosts" value="true" #(search.navigation.hosts)#::checked="checked" #(/search.navigation.hosts)# /></td>
<td><ul class="nav nav-sidebar menugroup" id="sidebarDomains" style="padding-left:15px; padding-right:10px;">
<li><h3>Provider</h3></li>
</ul></td>
</tr>
<tr>
<td><input type="checkbox" name="search.navigation.language" value="true" #(search.navigation.language)#::checked="checked" #(/search.navigation.language)# /></td>
<td><ul class="nav nav-sidebar menugroup" id="sidebarLanguages" style="padding-left:15px; padding-right:10px;">

@ -88,7 +88,7 @@ public class ConfigSearchPage_p {
if (post.getBoolean("search.navigation.location")) nav += "location,";
// if (post.getBoolean("search.navigation.filetype")) nav += "filetype,";
if (post.getBoolean("search.navigation.protocol")) nav += "protocol,";
if (post.getBoolean("search.navigation.hosts")) nav += "hosts,";
// if (post.getBoolean("search.navigation.hosts")) nav += "hosts,";
if (post.getBoolean("search.navigation.language")) nav += "language,";
// if (post.getBoolean("search.navigation.authors")) nav += "authors,";
// if (post.getBoolean("search.navigation.collections")) nav += "collections,";
@ -198,7 +198,7 @@ public class ConfigSearchPage_p {
prop.put("search.navigation.location", sb.getConfig("search.navigation", "").indexOf("location",0) >= 0 ? 1 : 0);
// prop.put("search.navigation.filetype", sb.getConfig("search.navigation", "").indexOf("filetype",0) >= 0 ? 1 : 0);
prop.put("search.navigation.protocol", sb.getConfig("search.navigation", "").indexOf("protocol",0) >= 0 ? 1 : 0);
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
// prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
prop.put("search.navigation.language", sb.getConfig("search.navigation", "").indexOf("language",0) >= 0 ? 1 : 0);
// prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
// prop.put("search.navigation.collections", sb.getConfig("search.navigation", "").indexOf("collections",0) >= 0 ? 1 : 0);

@ -0,0 +1,67 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': User Editor</title>
#%env/templates/metas.template%#
</head>
<body>
#%env/templates/header.template%#
#%env/templates/submenuUseCaseAccount.template%#
<h2>User Account Editor</h2>
<!-- Page 1: Results -->
#(text)#
::
<p>User created: #[username]#</p>
::
<p>User changed: #[username]#</p>
#(/text)#
#(error)#
::
<p class="error">Generic error.</p>
::
<p class="error">Passwords do not match.</p>
::
<p class="error">Username too short. Username must be &gt;= 4 Characters.</p>
::
<p class="error">Username already used (not allowed).</p>
#(/error)#
<form action="ConfigUser_p.html" method="post" accept-charset="UTF-8">
<fieldset><legend>Edit current user: #[username]#</legend>
<!-- Hidden(text for debugging): <input type="text" name="current_user" value="#[current_user]#" readonly> -->
<input type="hidden" name="current_user" value="#[current_user]#" />
<dl>
<dt><label for="username">Username</label>:</dt>
<dd><input type="text" id="username" name="username" value="#[username]#" /></dd>
<dt><label for="password">Password</label>:</dt>
<dd><input type="password" id="password" name="password" /></dd>
<dt><label for="password2">Repeat password</label>:</dt>
<dd><input type="password" id="password2" name="password2" /></dd>
<dt><label for="firstname">First name</label>:</dt>
<dd><input type="text" id="firstname" name="firstname" value="#[firstname]#" /></dd>
<dt><label for="lastname">Last name</label>:</dt>
<dd><input type="text" id="lastname" name="lastname" value="#[lastname]#" /></dd>
<dt><label for="address">Address</label>:</dt>
<dd><input type="text" id="address" name="address" value="#[address]#" /></dd>
<dt>Rights:</dt>
<dd>
#{rights}#
<input type="checkbox" id="#[name]#" name="#[name]#"#(set)#:: checked="checked"#(/set)# /><label for="#[name]#">#[friendlyName]# right</label><br />
#{/rights}#
</dd>
<dt><label for="tlimit">Timelimit</label>:</dt>
<dd><input type="text" id="tlimit" name="timelimit" value="#[timelimit]#" /></dd>
<dt><label for="tused">Time used</label>:</dt>
<dd><input type="text" id="tused" name="timeused" value="#[timeused]#" /></dd>
<dt>&nbsp;</dt>
<dd><input type="submit" name="change" value="Save User" class="btn btn-primary"/>
<input type="submit" name="delete" value="Delete User" class="btn btn-danger"/>
<button name="cancel" class="btn btn-default btn-link" value="ConfigAccountList_p.html">back to user list</button></dd>
</dl>
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,172 @@
// ConfigUser_p.java
// -----------------------
// (c) 2017 by reger24; https://github.com/reger24
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.EnumMap;
import java.util.Map;
import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.UserDB;
import net.yacy.data.UserDB.AccessRight;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class ConfigUser_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
if (post != null && post.containsKey("cancel")) {
prop.put(serverObjects.ACTION_LOCATION, "ConfigAccountList_p.html");
return prop;
}
//default values
prop.put("current_user", "newuser");
prop.put("username", "");
prop.put("firstname", "");
prop.put("lastname", "");
prop.put("address", "");
prop.put("timelimit", "");
prop.put("timeused", "");
final AccessRight[] rights = AccessRight.values();
int c = 0;
for (final AccessRight right : rights) {
prop.put("rights_" + c + "_name", right.toString());
prop.put("rights_" + c + "_friendlyName", right.getFriendlyName());
prop.put("rights_" + c + "_set", "0");
c++;
}
prop.put("rights", c);
prop.put("users", "0");
if (sb.userDB == null) {
return prop;
}
if (post == null) {
//do nothing
} else if (post.containsKey("user") && !"newuser".equals(post.get("user"))) {
UserDB.Entry entry = sb.userDB.getEntry(post.get("user"));
if (entry != null) {
//TODO: set username read-only in html
prop.putHTML("current_user", entry.getUserName());
prop.putHTML("username", entry.getUserName());
prop.putHTML("firstname", entry.getFirstName());
prop.putHTML("lastname", entry.getLastName());
prop.putHTML("address", entry.getAddress());
prop.put("timelimit", entry.getTimeLimit());
prop.put("timeused", entry.getTimeUsed());
int count = 0;
for (final AccessRight right : rights) {
prop.put("rights_" + count + "_set", entry.hasRight(right) ? "1" : "0");
count++;
}
prop.put("rights", count);
}
} else if (post.containsKey("change")) { // edit User
prop.put("text", "0");
prop.put("error", "0");
final String username = post.get("username");
final String pw1 = post.get("password");
final String pw2 = post.get("password2");
if (pw1 == null || !pw1.equals(pw2)) {
prop.put("error", "2"); //PW does not match
return prop;
}
// do not allow same username as staticadmin
if (username.equalsIgnoreCase(sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"))) {
prop.put("error", "4");
return prop;
}
final String firstName = post.get("firstname");
final String lastName = post.get("lastname");
final String address = post.get("address");
final String timeLimit = post.get("timelimit");
final String timeUsed = post.get("timeused");
final Map<AccessRight, String> rightsSet = new EnumMap<AccessRight, String>(AccessRight.class);
for (final AccessRight right : rights) {
rightsSet.put(right, post.containsKey(right.toString()) && "on".equals(post.get(right.toString())) ? "true" : "false");
}
UserDB.Entry entry = sb.userDB.getEntry(username);
if (entry != null) {
try {
if (!"".equals(pw1)) {
// with prefix of encoding method (supported MD5: )
entry.setProperty(UserDB.Entry.MD5ENCODED_USERPWD_STRING, "MD5:" + Digest.encodeMD5Hex(username + ":" + sb.getConfig(SwitchboardConstants.ADMIN_REALM, "YaCy") + ":" + pw1));
}
entry.setProperty(UserDB.Entry.USER_FIRSTNAME, firstName);
entry.setProperty(UserDB.Entry.USER_LASTNAME, lastName);
entry.setProperty(UserDB.Entry.USER_ADDRESS, address);
entry.setProperty(UserDB.Entry.TIME_LIMIT, timeLimit);
entry.setProperty(UserDB.Entry.TIME_USED, timeUsed);
for (final AccessRight right : rights) {
entry.setProperty(right.toString(), rightsSet.get(right));
}
//TODO: set username read-only in html
prop.putHTML("current_user", entry.getUserName());
prop.putHTML("username", entry.getUserName());
prop.putHTML("firstname", entry.getFirstName());
prop.putHTML("lastname", entry.getLastName());
prop.putHTML("address", entry.getAddress());
prop.put("timelimit", entry.getTimeLimit());
prop.put("timeused", entry.getTimeUsed());
int count = 0;
for (final AccessRight right : rights) {
prop.put("rights_" + count + "_set", entry.hasRight(right) ? "1" : "0");
count++;
}
prop.put("rights", count);
} catch (final Exception e) {
ConcurrentLog.logException(e);
}
} else {
prop.put("error", "1");
}
prop.putHTML("text_username", username);
prop.put("text", "2");
prop.putHTML("username", username);
} else if (post.containsKey("delete")) {
sb.userDB.removeEntry(post.get("username"));
prop.put(serverObjects.ACTION_LOCATION, "ConfigAccountList_p.html"); // jump back to user list
}
// return rewrite properties
return prop;
}
}

@ -1,5 +1,5 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<!DOCTYPE html>
<html lang="en">
<head>
<title>YaCy '#[clientname]#': MediaWiki Dump Import</title>
#%env/templates/metas.template%#
@ -13,24 +13,36 @@
<h2>MediaWiki Dump Import</h2>
#(import)#
<p>#(status)#No import thread is running, you can start a new thread here::Bad input data: #[message]# #(/status)#</p>
<form action="IndexImportMediawiki_p.html" method="get" accept-charset="UTF-8">
<!-- no post method here, we don't want to transmit the whole file, only the path-->
<p>#(prevStatus)#
::<div class="alert alert-danger" role="alert">Error on last import : #[message]#</div>
#(/prevStatus)#</p>
<p>#(status)#<div class="alert alert-info" role="alert">No import thread is running, you can start a new thread here</div>
::<div class="alert alert-danger" role="alert">Error : dump <abbr title="Uniform Resource Locator">URL</abbr> is malformed.</div>
::<div class="alert alert-danger" role="alert">Error : file not found "#[sourceFile]#"</div>
::<div class="alert alert-danger" role="alert">Error : can not read file "#[sourceFile]#"</div>
::<div class="alert alert-danger" role="alert">Error : you selected a directory ("#[sourceFile]#")</div>
#(/status)#</p>
<form action="IndexImportMediawiki_p.html" method="post" accept-charset="UTF-8" class="form-horizontal">
<input type="hidden" name="transactionToken" value="#[transactionToken]#"/>
<fieldset>
<legend>MediaWiki Dump File Selection: select an XML file (which may be bz2- or gz-encoded)</legend>
<legend>MediaWiki Dump File Selection</legend>
<p>
You can import MediaWiki dumps here. An example is the file
<a href="http://dumps.wikimedia.org/dewiki/latest/dewiki-latest-pages-articles.xml.bz2">
http://dumps.wikimedia.org/dewiki/latest/dewiki-latest-pages-articles.xml.bz2</a>.
<br />
Dumps must be stored in the local file system in XML format and may be compressed in gz or bz2.
<br />
<div class="input-group">
<span style="display: inline-block">
<input name="file" style="" type="file" value="" size="75" /></span>
<div class="btn-group">
<input name="submit" class="btn btn-primary" type="submit" value="Import MediaWiki Dump" />
</div>
<a href="https://dumps.wikimedia.org/dewiki/latest/dewiki-latest-pages-articles.xml.bz2">
https://dumps.wikimedia.org/dewiki/latest/dewiki-latest-pages-articles.xml.bz2</a>.
</p>
<p>
Dumps can be stored in the local file system or on a remote server in XML format and may be compressed in gz or bz2.
</p>
<div class="form-group">
<div class="col-sm-3 col-md-2 col-lg-2">
<label for="file" class="control-label" >Dump file path or <abbr title="Uniform Resource Locator">URL</abbr></label>
</div>
<div class="col-sm-9 col-md-8 col-lg-8">
<input id="file" class="form-control" name="file" type="text" title="Dump file path on this YaCy server file system, or any remote URL" required="required"/>
</div>
</div>
<input name="submit" class="btn btn-primary" type="submit" value="Import MediaWiki Dump" />
</fieldset>
</form>
<p>
@ -61,6 +73,8 @@
</ul>
<br />
::
<p>#(status)#::<div class="alert alert-danger" role="alert">Error encountered : #[message]#</div>
#(/status)#</p>
<form><fieldset><legend>Import Process</legend>
<dl>
<dt>Thread:</dt><dd>#[thread]#</dd>

@ -23,22 +23,45 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.net.MalformedURLException;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.TransactionManager;
import net.yacy.document.importer.MediawikiImporter;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
/**
* Import of MediaWiki dump files in the local index.
*/
public class IndexImportMediawiki_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
/**
* Run conditions :
* - no MediaWiki import thread is running : allow to start a new import by filling the "file" parameter
* - the MediaWiki import thread is running : returns monitoring information.
* @param header servlet request header
* @param post request parameters. Supported keys :
* <ul>
* <li>file : a dump file path on this YaCy server local file system</li>
* </ul>
* @param env server environment
* @return the servlet answer object
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
if (MediawikiImporter.job != null && MediawikiImporter.job.isAlive()) {
// one import is running, no option to insert anything
prop.put("import", 1);
final String jobErrorMessage = MediawikiImporter.job.status();
if( jobErrorMessage != null && !jobErrorMessage.isEmpty()) {
prop.put("import_status", 1);
prop.put("import_status_message", jobErrorMessage);
}
prop.put("import_thread", "running");
prop.put("import_dump", MediawikiImporter.job.source());
prop.put("import_count", MediawikiImporter.job.count());
@ -49,28 +72,63 @@ public class IndexImportMediawiki_p {
prop.put("import_remainingMinutes", (MediawikiImporter.job.remainingTime() / 60) % 60);
} else {
prop.put("import", 0);
if(MediawikiImporter.job != null) {
/* Report eventual fail report from the last terminated import (for example an HTTP 404 status)
* that else could be missed by the user because of page refresh */
final String jobErrorMessage = MediawikiImporter.job.status();
if( jobErrorMessage != null && !jobErrorMessage.isEmpty()) {
prop.put("import_prevStatus", 1);
prop.put("import_prevStatus_message", jobErrorMessage);
}
}
if (post == null) {
prop.put("import_status", 0);
/* Acquire a transaction token for the next POST form submission */
final String token = TransactionManager.getTransactionToken(header);
prop.put(TransactionManager.TRANSACTION_TOKEN_PARAM, token);
prop.put("import_" + TransactionManager.TRANSACTION_TOKEN_PARAM, token);
} else {
if (post.containsKey("file")) {
/* Check the transaction is valid */
TransactionManager.checkPostTransaction(header, post);
String file = post.get("file");
if (file.startsWith("file://")) file = file.substring(7);
if (file.startsWith("http")) {
prop.put("import_dump", "");
prop.put("import_thread", "Error: file argument must be a path to a document in the local file system");
} else {
final File sourcefile = new File(file);
if (sourcefile.exists()) {
MediawikiImporter.job = new MediawikiImporter(sourcefile, sb.surrogatesInPath);
MediawikiImporter.job.start();
prop.put("import_dump", MediawikiImporter.job.source());
prop.put("import_thread", "started");
} else {
prop.put("import_dump", "");
prop.put("import_thread", "Error: file not found ["+sourcefile+"]");
}
}
prop.put("import", 1);
MultiProtocolURL sourceURL = null;
int status = 0;
String sourceFilePath = "";
try {
sourceURL = new MultiProtocolURL(file);
if(sourceURL.isFile()) {
final File sourcefile = sourceURL.getFSFile();
sourceFilePath = sourcefile.getAbsolutePath();
if (!sourcefile.exists()) {
status = 2;
} else if (!sourcefile.canRead()) {
status = 3;
} else if (sourcefile.isDirectory()) {
status = 4;
}
}
} catch (MalformedURLException e) {
status = 1;
}
if (status == 0) {
MediawikiImporter.job = new MediawikiImporter(sourceURL, sb.surrogatesInPath);
MediawikiImporter.job.start();
prop.put("import_dump", MediawikiImporter.job.source());
prop.put("import_thread", "started");
prop.put("import", 1);
} else {
prop.put("import_status", status);
prop.put("import_status_sourceFile", sourceFilePath);
/* Acquire a transaction token for the next POST form submission */
final String token = TransactionManager.getTransactionToken(header);
prop.put(TransactionManager.TRANSACTION_TOKEN_PARAM, token);
prop.put("import_" + TransactionManager.TRANSACTION_TOKEN_PARAM, token);
}
prop.put("import_count", 0);
prop.put("import_speed", 0);
prop.put("import_runningHours", 0);

@ -22,13 +22,16 @@
You can download warc archives for example here
<a href="https://archive.org/search.php?query=subject%3A%22warcarchives%22&and[]=subject%3A%22warcarchives%22" target="_blank">Internet Archive</a>.
</p>
<div class="input-group">
<span style="display: inline-block">
<input name="file" type="file" value="" size="75" /></span>
<div class="btn-group">
<input name="submit" class="btn btn-primary" type="submit" value="Import Warc File" />
</div>
</div>
<dl>
<dt class="TableCellDark"><label for="file">File:</label></dt>
<dd><input name="file" id="file" type="file" value="" size="75" /></dd>
<dt></dt>
<dd>or</dd>
<dt class="TableCellDark"><label for="url">Url:</label></dt>
<dd><input name="url" id="url" value="" size="75"/></dd>
<dt></dt>
<dd><input name="submit" class="btn btn-primary" type="submit" value="Import Warc File" /></dd>
</dl>
</fieldset>
</form>

@ -18,6 +18,10 @@
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.importer.WarcImporter;
@ -45,23 +49,42 @@ public class IndexImportWarc_p {
} else {
prop.put("import", 0);
if (post != null) {
if (post.containsKey("file")) {
String file = post.get("file");
final File sourcefile = new File(file);
if (sourcefile.exists()) {
try {
WarcImporter wi = new WarcImporter(sourcefile);
wi.start();
prop.put("import_thread", "started");
} catch (FileNotFoundException ex) {
prop.put("import_thread", "Error: file not found [" + file + "]");
if (post.containsKey("file") || post.containsKey("url")) {
String filename = post.get("file");
if (filename != null && filename.length() > 0) {
final File sourcefile = new File(filename);
if (sourcefile.exists()) {
try {
WarcImporter wi = new WarcImporter(sourcefile);
wi.start();
prop.put("import_thread", "started");
} catch (FileNotFoundException ex) {
prop.put("import_thread", "Error: file not found [" + filename + "]");
}
prop.put("import", 1);
prop.put("import_warcfile", filename);
} else {
prop.put("import_warcfile", "");
prop.put("import_thread", "Error: file not found [" + filename + "]");
}
prop.put("import_warcfile", file);
} else {
prop.put("import_warcfile", "");
prop.put("import_thread", "Error: file not found [" + file + "]");
String urlstr = post.get("url");
if (urlstr != null && urlstr.length() > 0) {
try {
MultiProtocolURL url = new MultiProtocolURL(urlstr);
WarcImporter wi = new WarcImporter(url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent), urlstr);
wi.start();
prop.put("import_thread", "started");
} catch (MalformedURLException ex) {
prop.put("import_thread", ex.getMessage());
} catch (IOException ex) {
prop.put("import_thread", ex.getMessage());
}
prop.put("import", 1);
prop.put("import_warcfile", urlstr);
}
}
prop.put("import", 1);
prop.put("import_count", 0);
prop.put("import_speed", 0);
prop.put("import_runningHours", 0);

@ -14,7 +14,8 @@
You can share your local addition to translations and distribute it to other peers.
The remote peer can vote on your translation and add it to the own local translation.<br>
(#[transsize]# entries available)&nbsp;&nbsp;<input type="submit" class="btn btn-default" name="publishtranslation" value="Publish">
&nbsp;&nbsp;<small>You can check your outgoing messages <a href="News.html?page=3">here</a></small>
&nbsp;&nbsp;<small>You can check your outgoing messages <a href="News.html?page=3">here</a>.</small>
To edit or add local translations you can use <a href="Translator_p.html">Translator_p.html</a>.
</p>
</form>
#(errmsg)#::<p class="error">Please activate a different language <a href='ConfigBasic.html'>here</a></p>#(/errmsg)#

@ -59,11 +59,15 @@ public final class message {
}
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
if (post == null || env == null) { return null; }
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
prop.put("messagesize", "0");
prop.put("attachmentsize", "0");
prop.put("response", "-1"); // request rejected
if ((post == null) || (env == null)) return prop;
if (!Protocol.authentifyRequest(post, env)) return prop;
@ -74,22 +78,17 @@ public final class message {
final int messagesize = 10240;
final int attachmentsize = 0;
prop.put("messagesize", "0");
prop.put("attachmentsize", "0");
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
// check if we are the right target and requester has correct information about this peer
if ((sb.peers.mySeed() == null) || (!(sb.peers.mySeed().hash.equals(youare)))) {
// this request has a wrong target
prop.put("response", "-1"); // request rejected
return prop;
}
if ((sb.isRobinsonMode()) &&
(!((sb.isPublicRobinson()) ||
(sb.isInMyCluster(header.getRemoteAddr()))))) {
if ((sb.isRobinsonMode())
&& (!((sb.isPublicRobinson())
|| (sb.isInMyCluster(header.getRemoteAddr()))))) {
// if we are a robinson cluster, answer only if this client is known by our network definition
prop.put("response", "-1"); // request rejected
return prop;
}
@ -107,7 +106,7 @@ public final class message {
// post: post message to message board
final String otherSeedString = post.get("myseed", "");
if (otherSeedString.isEmpty()) {
prop.put("response", "-1"); // request rejected
// request rejected
return prop;
}
//Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
@ -115,20 +114,20 @@ public final class message {
try {
otherSeed = Seed.genRemoteSeed(otherSeedString, false, ias == null ? null : ias.getHostAddress());
} catch (final IOException e) {
prop.put("response", "-1"); // don't accept messages for bad seeds
// don't accept messages for bad seeds
return prop;
}
String subject = crypt.simpleDecode(post.get("subject", "")); // message's subject
String message = crypt.simpleDecode(post.get("message", "")); // message body
if (subject == null || message == null) {
prop.put("response", "-1"); // don't accept empty messages
// don't accept empty messages
return prop;
}
message = message.trim();
subject = subject.trim();
if (subject.isEmpty() || message.isEmpty()) {
prop.put("response", "-1"); // don't accept empty messages
// don't accept empty messages
return prop;
}
@ -156,7 +155,6 @@ public final class message {
}
}
// System.out.println("respond = " + prop.toString());
// return rewrite properties
return prop;

@ -156,14 +156,6 @@ function toggleVisibility(name, count) {
}
}
</script>
#(nav-domains)#::
<ul class="nav nav-sidebar menugroup">
<li style="cursor: pointer; cursor: hand;"><h3 onclick="toggleVisibility('domains', #[count]#);">Provider [#[count]#] <span style="float:right" id="chevron-domains" class="glyphicon glyphicon-chevron-down" title="click to expand facet"></span></h3></li>
#{element}#
<li style="display:none" id="#[id]#"><a href="#[url]#" class="MenuItemLink"><input type="checkbox" onchange="window.location.href='#[url]#'"#(on)# checked="checked"::#(/on)#/> #[name]# (#[count]#)</a></li>
#{/element}#</ul>
<script>if (#[count]# <= 8) toggleVisibility('domains', #[count]#);</script>
#(/nav-domains)#
#(nav-languages)#::
<ul class="nav nav-sidebar menugroup">

@ -103,47 +103,6 @@ public class yacysearchtrailer {
int count;
Iterator<String> navigatorIterator;
// domain navigators
final ScoreMap<String> hostNavigator = theSearch.hostNavigator;
if (hostNavigator == null || hostNavigator.isEmpty()) {
prop.put("nav-domains", 0);
} else {
prop.put("nav-domains", 1);
navigatorIterator = hostNavigator.keys(false);
int i = 0, pos = 0, neg = 0;
String nav, rawNav;
while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = hostNavigator.get(name);
if (count == 0) break;
nav = "site%3A" + name;
/* Avoid double percent encoding in QueryParams.navurl */
rawNav = "site:" + name;
if (theSearch.query.modifier.sitehost == null || !theSearch.query.modifier.sitehost.contains(name)) {
pos++;
prop.put("nav-domains_element_" + i + "_on", 1);
prop.put(fileType, "nav-domains_element_" + i + "_modifier", nav);
} else {
neg++;
prop.put("nav-domains_element_" + i + "_on", 0);
prop.put(fileType, "nav-domains_element_" + i + "_modifier", "-" + nav);
nav="";
rawNav = "";
}
prop.put(fileType, "nav-domains_element_" + i + "_name", name);
prop.put(fileType, "nav-domains_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString());
prop.put(fileType, "nav-domains_element_" + i + "_id", "domains_" + i);
prop.put("nav-domains_element_" + i + "_count", count);
prop.put("nav-domains_element_" + i + "_nl", 1);
i++;
}
prop.put("nav-domains_element", i);
prop.put("nav-domains_count", i);
i--;
prop.put("nav-domains_element_" + i + "_nl", 0);
if (pos == 1 && neg == 0) prop.put("nav-domains", 0); // this navigation is not useful
}
// language navigators
final ScoreMap<String> languageNavigator = theSearch.languageNavigator;
if (languageNavigator == null || languageNavigator.isEmpty()) {
@ -213,7 +172,6 @@ public class yacysearchtrailer {
for (Map.Entry<String, Integer> entry: cloud) {
name = entry.getKey();
count = entry.getValue();
prop.put("nav-topics_element_" + i + "_on", 1);
prop.put(fileType, "nav-topics_element_" + i + "_modifier", name);
prop.put(fileType, "nav-topics_element_" + i + "_name", name);
prop.put(fileType, "nav-topics_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, name, false).toString());

@ -34,19 +34,7 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-protocols)##(nav-domains)#::{
"facetname": "domains",
"displayname": "Domains",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-domains)##{navs}#{
},#(/nav-protocols)##{navs}#{
"facetname": "#[name]#",
"displayname": "#[displayname]#",
"type": "String",

@ -1,11 +1,4 @@
<yacy:navigation>
#(nav-domains)#::
<yacy:facet name="domains" displayname="Domains" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" url="#[url]#"/>
#{/element}#
</yacy:facet>
#(/nav-domains)#
#{navs}#
<yacy:facet name="#[name]#" displayname="#[displayname]#" type="String" min="0" max="0" mean="0">
#{element}#

@ -1936,6 +1936,9 @@
<trans-unit id="c06925e" xml:space="preserve" approved="no" translate="yes">
<source>Here you can set up a robots.txt for all webcrawlers that try to access the webinterface of your peer.</source>
</trans-unit>
<trans-unit id="Line0013" xml:space="preserve" approved="no" translate="yes">
<source>is a voluntary agreement most search-engines (including YaCy) follow.</source>
</trans-unit>
<trans-unit id="19b9bf10" xml:space="preserve" approved="no" translate="yes">
<source>It disallows crawlers to access webpages or even entire domains.</source>
</trans-unit>
@ -8973,6 +8976,9 @@
<trans-unit id="6fb5a4ce" xml:space="preserve" approved="no" translate="yes">
<source>&gt;here&lt;</source>
</trans-unit>
<trans-unit id="Line0018" xml:space="preserve" approved="no" translate="yes">
<source>To edit or add local translations you can use</source>
</trans-unit>
<trans-unit id="40bca1e" xml:space="preserve" approved="no" translate="yes">
<source>File:</source>
</trans-unit>

@ -48,6 +48,8 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpStatus;
import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import jcifs.smb.SmbFileInputStream;
@ -62,6 +64,7 @@ import net.yacy.cora.protocol.ftp.FTPClient;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.HTTPInputStream;
import net.yacy.crawler.retrieval.Response;
/**
@ -2147,7 +2150,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
*/
public java.io.File getFSFile() throws MalformedURLException {
if (!isFile()) throw new MalformedURLException();
return new java.io.File(this.toNormalform(true).substring(5));
return new java.io.File(unescape(this.toNormalform(true)).substring("file://".length()));
}
/**
@ -2290,7 +2293,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return null;
}
public InputStream getInputStream(final ClientIdentification.Agent agent, final String username, final String pass) throws IOException {
/**
* Open an input stream on the resource described by this URL.
* <strong>Please don't forget to release resources by closing the returned stream.</strong>
* @param agent user agent identifier to use when the protocul is HTTP
* @return an open input stream
* @throws IOException when the stream can not be opened
*/
public InputStream getInputStream(final ClientIdentification.Agent agent) throws IOException {
if (isFile()) return new BufferedInputStream(new FileInputStream(getFSFile()));
if (isSMB()) return new BufferedInputStream(new SmbFileInputStream(getSmbFile()));
if (isFTP()) {
@ -2303,7 +2313,12 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient(agent);
client.setHost(getHost());
return new ByteArrayInputStream(client.GETbytes(this, username, pass, false));
client.GET(this, false);
if (client.getStatusCode() != HttpStatus.SC_OK) {
throw new IOException("Unable to open http stream on " + this.toString() +
"\nServer returned status: " + client.getHttpResponse().getStatusLine());
}
return new HTTPInputStream(client);
}
return null;

@ -774,21 +774,57 @@ public class HTTPClient {
*
* @throws IOException
*/
public void finish() throws IOException {
if (this.httpResponse != null) {
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null && httpEntity.isStreaming()) {
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consumeQuietly(httpEntity);
}
this.httpResponse.close();
}
if (this.currentRequest != null) {
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest.abort();
this.currentRequest = null;
}
}
public void finish() throws IOException {
try {
if (this.httpResponse != null) {
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null && httpEntity.isStreaming()) {
/*
* Try to fully consume the eventual remaining of the
* content stream : if too long abort the request. Not using
* EntityUtils.consumeQuietly(httpEntity) because too long
* to perform on large resources when calling this before
* full stream processing end : for example on caller
* exception handling .
*/
InputStream contentStream = null;
try {
contentStream = httpEntity.getContent();
if (contentStream != null) {
byte[] buffer = new byte[2048];
int count = 0;
int readNb = contentStream.read(buffer);
while (readNb >= 0 && count < 10) {
readNb = contentStream.read(buffer);
count++;
}
if (readNb >= 0) {
if (this.currentRequest != null) {
this.currentRequest.abort();
}
}
}
} catch(IOException e){
/* Silently ignore here IOException (for example caused by stream already closed) as in EntityUtils.consumeQuietly() */
} finally {
if (contentStream != null) {
try {
contentStream.close();
} catch(IOException ignored) {}
}
this.httpResponse.close();
}
}
}
} finally {
if (this.currentRequest != null) {
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest = null;
}
}
}
private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final int maxBytes, final boolean concurrent) throws IOException {
byte[] content = null;

@ -138,7 +138,7 @@ public class FileLoader {
}
// load the resource
InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null);
InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
byte[] b = FileUtils.read(is);
// create response with loaded content

@ -156,7 +156,7 @@ public class SMBLoader {
}
// load the resource
InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null);
InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
byte[] b = FileUtils.read(is);
// create response with loaded content

@ -589,8 +589,9 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
int p;
int positionOfOpeningTag;
int positionOfClosingTag;
int fromIndex = 0;
// internal links and images
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_LINK)) >= 0) {
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_LINK, fromIndex)) >= 0) {
positionOfClosingTag = line.indexOf(WIKI_CLOSE_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_LINK);
if (positionOfClosingTag <= positionOfOpeningTag) {
break;
@ -640,16 +641,19 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
}
line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK);
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
}
// this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) {
kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE);
line = line.substring(0, positionOfOpeningTag) + "" + "<object width=\"425\" height=\"350\"><param name=\"movie\" value=\"http://www.youtube.com/v/" + kl + "\"></param><param name=\"wmode\" value=\"transparent\"></param><embed src=\"http://www.youtube.com/v/" + kl + "\" type=\"application/x-shockwave-flash\" wmode=\"transparent\" width=\"425\" height=\"350\"></embed></object>";
break;
}
// this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kl = kl.substring(LEN_WIKI_VIDEO_VIMEO);
line = line.substring(0, positionOfOpeningTag) + "" + "<iframe src=\"http://player.vimeo.com/video/" + kl + "\" width=\"425\" height=\"350\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>";
break;
}
// if it's no image, it might be an internal link
else {
@ -660,11 +664,13 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
kv = kl;
}
line = line.substring(0, positionOfOpeningTag) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK); // oob exception in append() !
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
}
}
fromIndex = 0;
// external links
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_EXTERNAL_LINK)) >= 0) {
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_EXTERNAL_LINK, fromIndex)) >= 0) {
positionOfClosingTag = line.indexOf(WIKI_CLOSE_EXTERNAL_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_EXTERNAL_LINK);
if (positionOfClosingTag <= positionOfOpeningTag) {
break;
@ -686,6 +692,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
kl = "http://" + hostport + "/" + kl;
}
line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK);
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK;
}
return line;
}

@ -54,6 +54,9 @@ import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.NumberTools;
@ -86,17 +89,18 @@ public class MediawikiImporter extends Thread implements Importer {
public static Importer job; // if started from a servlet, this object is used to store the thread
public File sourcefile;
public MultiProtocolURL sourcefile;
public File targetdir;
public int count;
private long start;
private final long docsize;
private final int approxdocs;
private String hostport, urlStub;
private String errorMessage;
public MediawikiImporter(final File sourcefile, final File targetdir) {
super("MediawikiImporter(" + sourcefile != null ? sourcefile.getAbsolutePath() : "null sourcefile" +")");
public MediawikiImporter(final MultiProtocolURL sourcefile, final File targetdir) {
super("MediawikiImporter(" + sourcefile != null ? sourcefile.toNormalform(true) : "null sourcefile" +")");
this.sourcefile = sourcefile;
this.docsize = sourcefile.length();
this.approxdocs = (int) (this.docsize * docspermbinxmlbz2 / 1024L / 1024L);
@ -105,6 +109,7 @@ public class MediawikiImporter extends Thread implements Importer {
this.start = 0;
this.hostport = null;
this.urlStub = null;
this.errorMessage = null;
}
@Override
@ -114,12 +119,15 @@ public class MediawikiImporter extends Thread implements Importer {
@Override
public String source() {
return this.sourcefile.getAbsolutePath();
return this.sourcefile.toNormalform(true);
}
/**
* @return an empty string or the error message when an exception occurred
*/
@Override
public String status() {
return "";
return this.errorMessage != null ? this.errorMessage : "";
}
/**
@ -152,17 +160,18 @@ public class MediawikiImporter extends Thread implements Importer {
// regardless of any exception (e.g. eof memory) a add(poison) is added to the most outer final block
final BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
final wikiparserrecord poison = newRecord();
BufferedReader reader = null;
try {
String targetstub = this.sourcefile.getName();
String targetstub = this.sourcefile.getFileName();
int p = targetstub.lastIndexOf("\\.");
if (p > 0) targetstub = targetstub.substring(0, p);
InputStream is = new BufferedInputStream(new FileInputStream(this.sourcefile), 1024 * 1024);
if (this.sourcefile.getName().endsWith(".bz2")) {
InputStream is = new BufferedInputStream(this.sourcefile.getInputStream(ClientIdentification.yacyInternetCrawlerAgent), 1024 * 1024);
if (this.sourcefile.getFileName().endsWith(".bz2")) {
is = new BZip2CompressorInputStream(is);
} else if (this.sourcefile.getName().endsWith(".gz")) {
} else if (this.sourcefile.getFileName().endsWith(".gz")) {
is = new GZIPInputStream(is);
}
final BufferedReader r = new BufferedReader(new java.io.InputStreamReader(is, StandardCharsets.UTF_8), 4 * 1024 * 1024);
reader = new BufferedReader(new java.io.InputStreamReader(is, StandardCharsets.UTF_8), 4 * 1024 * 1024);
String t;
StringBuilder sb = new StringBuilder();
boolean page = false, text = false;
@ -181,7 +190,7 @@ public class MediawikiImporter extends Thread implements Importer {
wikiparserrecord record;
int q;
while ((t = r.readLine()) != null) {
while ((t = reader.readLine()) != null) {
if ((p = t.indexOf("<base>",0)) >= 0 && (q = t.indexOf("</base>", p)) > 0) {
//urlStub = "http://" + lang + ".wikipedia.org/wiki/";
this.urlStub = t.substring(p + 6, q);
@ -256,7 +265,6 @@ public class MediawikiImporter extends Thread implements Importer {
sb.append('\n');
}
}
r.close();
try {
for (int i = 0; i < threads; i++) {
@ -265,23 +273,24 @@ public class MediawikiImporter extends Thread implements Importer {
for (int i = 0; i < threads; i++) {
consumerResults[i].get(10000, TimeUnit.MILLISECONDS);
}
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
} catch (final ExecutionException e) {
ConcurrentLog.logException(e);
} catch (final TimeoutException e) {
ConcurrentLog.logException(e);
} catch (final Exception e) {
this.errorMessage = e.getMessage();
ConcurrentLog.logException(e);
} finally {
out.put(poison); // output thread condition (for file.close)
writerResult.get(10000, TimeUnit.MILLISECONDS);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final Exception e) {
this.errorMessage = e.getMessage();
ConcurrentLog.logException(e);
} finally {
if(reader != null) {
try {
reader.close();
} catch (IOException e) {
ConcurrentLog.warn("WIKITRANSLATION", "Could not close dump reader : " + e.getMessage());
}
}
try {
out.put(poison); // out keeps output file open until poisened, to close file if exception happend in this block
} catch (InterruptedException ex) { }
@ -767,7 +776,7 @@ public class MediawikiImporter extends Thread implements Importer {
System.out.println(" -index <wikipedia-dump>");
System.out.println(" -read <start> <len> <idx-file>");
System.out.println(" -find <title> <wikipedia-dump>");
System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir> <url-stub>");
System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
ConcurrentLog.shutdown();
return;
}
@ -779,17 +788,22 @@ public class MediawikiImporter extends Thread implements Importer {
// DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2
// DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/
if (s[0].equals("-convert") && s.length > 2) {
final File sourcefile = new File(s[1]);
if (s[0].equals("-convert")) {
if(s.length < 3) {
System.out.println("usage:");
System.out.println(" -convert <wikipedia-dump-xml.bz2> <convert-target-dir>");
ConcurrentLog.shutdown();
return;
}
final File targetdir = new File(s[2]);
// String urlStub = s[3]; // i.e. http://de.wikipedia.org/wiki/
// String language = urlStub.substring(7,9);
try {
final MediawikiImporter mi = new MediawikiImporter(sourcefile, targetdir);
final MediawikiImporter mi = new MediawikiImporter(new MultiProtocolURL(s[1]), targetdir);
mi.start();
mi.join();
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
} catch (MalformedURLException e) {
ConcurrentLog.logException(e);
}
}
@ -821,6 +835,11 @@ public class MediawikiImporter extends Thread implements Importer {
}
} finally {
try {
HTTPClient.closeConnectionManager();
} catch (InterruptedException e) {
e.printStackTrace();
}
ConcurrentLog.shutdown();
}
}

@ -73,6 +73,18 @@ public class WarcImporter extends Thread implements Importer {
sourceSize = -1;
}
/**
* Init the WarcImporter with input stream with a informational filename or
* url als info for calls to the importer methode source() which returns
* the urlinfo. Otherwise this methode is equivalent to WarchImporter(inputstream)
* @param f the input stream to read the warc archive from
* @param urlinfo a info like the url or the filename
*/
public WarcImporter (InputStream f, String urlinfo) {
this(f);
name = urlinfo;
}
public WarcImporter(File f) throws FileNotFoundException{
name = f.getName();
sourceSize = f.length();

@ -380,8 +380,20 @@ public class htmlParser extends AbstractParser implements Parser {
locationSnapshot = new DigestURL(location.toNormalform(true) + "?_escaped_fragment_=");
}
Charset[] detectedcharsetcontainer = new Charset[]{null};
ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, locationSnapshot.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null), maxLinks);
documentSnapshot = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraperSnapshot);
InputStream snapshotStream = null;
try {
snapshotStream = locationSnapshot.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
ContentScraper scraperSnapshot = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, snapshotStream, maxLinks);
documentSnapshot = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraperSnapshot);
} finally {
if(snapshotStream != null) {
try {
snapshotStream.close();
} catch(IOException e) {
AbstractParser.log.warn("Could not close snapshot stream : " + e.getMessage());
}
}
}
AbstractParser.log.info("parse snapshot "+locationSnapshot.toString() + " additional to " + location.toString());
} catch (IOException | Failure ex) { }
return documentSnapshot;

@ -394,7 +394,7 @@ public final class LoaderDispatcher {
inStream = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent);
} else if (protocol.equals("ftp") || protocol.equals("smb") || protocol.equals("file")) {
// may also open directly stream with ftp loader
inStream = url.getInputStream(agent, null, null);
inStream = url.getInputStream(agent);
} else {
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
}

@ -28,6 +28,7 @@ package net.yacy.search.index;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
@ -158,10 +159,20 @@ public class DocumentIndex extends Segment {
} catch (final Exception e ) {
length = -1;
}
InputStream sourceStream = null;
try {
documents = TextParser.parseSource(url, null, null, new VocabularyScraper(), timezoneOffset, 0, length, url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null));
sourceStream = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
documents = TextParser.parseSource(url, null, null, new VocabularyScraper(), timezoneOffset, 0, length, sourceStream);
} catch (final Exception e ) {
throw new IOException("cannot parse " + url.toNormalform(false) + ": " + e.getMessage());
} finally {
if(sourceStream != null) {
try {
sourceStream.close();
} catch(IOException e) {
ConcurrentLog.warn("DocumentIndex", "Could not close source stream : " + e.getMessage());
}
}
}
//Document document = Document.mergeDocuments(url, null, documents);
final SolrInputDocument[] rows = new SolrInputDocument[documents.length];

@ -41,7 +41,7 @@ public class NavigatorPlugins {
static public Map<String, String> listAvailable() {
Map<String, String> defaultnavplugins = new TreeMap<String, String>();
defaultnavplugins.put("filetype", "Filetype");
// defaultnavplugins.put("hosts", "Provider");
defaultnavplugins.put("hosts", "Provider");
// defaultnavplugins.put("language", "Language");
defaultnavplugins.put("authors", "Authors");
defaultnavplugins.put("collections", "Collection");
@ -87,11 +87,11 @@ public class NavigatorPlugins {
if (navname.contains("filetype")) {
navigatorPlugins.put("filetype", new FileTypeNavigator("Filetype", CollectionSchema.url_file_ext_s));
}
/*
if (navname.contains("hosts")) {
navigatorPlugins.put("hosts", new HostNavigator("Provider", CollectionSchema.host_s));
}
/*
if (navname.contains("language")) {
navigatorPlugins.put("language", new LanguageNavigator("Language"));
}

@ -378,8 +378,8 @@ public class QueryModifier {
Date toDate = to == null || to.equals("*") ? null : DateDetection.parseLine(to, timezoneOffset);
StringBuilder filterQuery = new StringBuilder(20);
if (fromDate != null && toDate != null) {
String dstrFrom = fromDate == null ? "*" : DateFormatUtil.formatExternal(fromDate);
String dstrTo = toDate == null ? "*" : DateFormatUtil.formatExternal(toDate);
String dstrFrom = DateFormatUtil.formatExternal(fromDate);
String dstrTo = DateFormatUtil.formatExternal(toDate);
filterQuery.append(CollectionSchema.dates_in_content_dts.getSolrFieldName()).append(":[").append(dstrFrom).append(" TO ").append(dstrTo).append(']');
}
return filterQuery.toString();

@ -146,7 +146,6 @@ public final class SearchEvent {
private int localsolroffset;
private final AtomicInteger expectedRemoteReferences, maxExpectedRemoteReferences; // counter for referenced that had been sorted out for other reasons
public final ScoreMap<String> locationNavigator; // a counter for the appearance of location coordinates
public final ScoreMap<String> hostNavigator; // a counter for the appearance of host names
public final ScoreMap<String> protocolNavigator; // a counter for protocol types
public final ScoreMap<String> dateNavigator; // a counter for file types
public final ScoreMap<String> languageNavigator; // a counter for appearance of languages
@ -262,7 +261,6 @@ public final class SearchEvent {
// prepare configured search navigation
final String navcfg = Switchboard.getSwitchboard().getConfig("search.navigation", "");
this.locationNavigator = navcfg.contains("location") ? new ConcurrentScoreMap<String>() : null;
this.hostNavigator = navcfg.contains("hosts") ? new ConcurrentScoreMap<String>() : null;
this.protocolNavigator = navcfg.contains("protocol") ? new ConcurrentScoreMap<String>() : null;
this.dateNavigator = navcfg.contains("date") ? new ClusteredScoreMap<String>(true) : null;
this.topicNavigatorCount = navcfg.contains("topics") ? MAX_TOPWORDS : 0;
@ -842,18 +840,6 @@ public final class SearchEvent {
}
}
if (this.hostNavigator != null) {
fcts = facets.get(CollectionSchema.host_s.getSolrFieldName());
if (fcts != null) {
for (String host: fcts) {
int hc = fcts.get(host);
if (hc == 0) continue;
if (host.startsWith("www.")) host = host.substring(4);
this.hostNavigator.inc(host, hc);
}
}
}
if (this.dateNavigator != null) {
fcts = facets.get(CollectionSchema.dates_in_content_dts.getSolrFieldName());
if (fcts != null) this.dateNavigator.inc(fcts);
@ -1340,6 +1326,11 @@ public final class SearchEvent {
return null;
}
/**
* Adds the retrieved results (fulltext & rwi) to the result list and
* computes the text snippets
* @return true on adding entries to resultlist otherwise false
*/
public boolean drainStacksToResult() {
// we take one entry from both stacks at the same time
boolean success = false;
@ -1465,7 +1456,7 @@ public final class SearchEvent {
if (this.query.getSegment().connectedCitation()) {
int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
} /* else r += 0; */
}
// prefer hit with 'prefer' pattern
if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) r += 255 << this.query.ranking.coeff_prefer;
if (this.query.prefer.matcher(rentry.title()).matches()) r += 255 << this.query.ranking.coeff_prefer;
@ -1482,11 +1473,11 @@ public final class SearchEvent {
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for (final String urlcomp : urlcompmap) {
int tc = topwords.get(urlcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_urlcompintoplist;
if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist;
}
for (final String descrcomp : descrcompmap) {
int tc = topwords.get(descrcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist;
if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist;
}
final Iterator<String> shi = this.query.getQueryGoal().getIncludeWords();
@ -1553,6 +1544,14 @@ public final class SearchEvent {
return page.makeResultEntry(this.query.getSegment(), this.peers, null); // result without snippet
}
/**
* This is the access point for the search interface to retrive ranked results.
* for display.
*
* @param item requested result counting number (starting at 0)
* @param timeout
* @return
*/
public URIMetadataNode oneResult(final int item, final long timeout) {
// check if we already retrieved this item
// (happens if a search pages is accessed a second time)

@ -1,8 +1,12 @@
package net.yacy.cora.document.id;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedHashMap;
@ -373,6 +377,29 @@ public class MultiProtocolURLTest {
assertEquals(testString[1], unescaped);
}
}
/**
* Unit tests for {@link MultiProtocolURL#MultiProtocolURL(java.io.File)}
* @throws MalformedURLException when an error occurred
* @throws URISyntaxException
*/
@Test
public void testFileConstructor() throws MalformedURLException, URISyntaxException {
File[] files = new File[] {
/* Simple file name */
new File(File.separator + "textFile.txt"),
/* File name with space */
new File(File.separator + "text file.txt"),
/* File name with non ASCII latin chars */
new File(File.separator + "fileéàè.txt"),
};
for(int i = 0; i < files.length; i++) {
MultiProtocolURL url = new MultiProtocolURL(files[i]);
assertTrue(url.isFile());
/* Check consistency when retrieving a File object with getFSFile() */
assertEquals(files[i], url.getFSFile());
}
}
}

@ -54,4 +54,51 @@ public class WikiCodeTest {
assertFalse("no header tag expected:"+erg, erg.contains("<h1>"));
}
}
/**
* Test internal link markup processing
*/
@Test
public void testInternalLink() {
WikiCode wc = new WikiCode();
/* Link to another wiki article */
String result = wc.transform("http://wiki:8080", "[[article]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article\""));
/* Renamed link */
result = wc.transform("http://wiki:8080", "[[article|renamed article]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article\""));
assertTrue(result.contains(">renamed article<"));
/* Multiple links on the same line */
result = wc.transform("http://wiki:8080", "[[article1]] [[article2]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article1\""));
assertTrue(result.contains("href=\"Wiki.html?page=article2\""));
}
/**
* Test external link markup processing
*/
@Test
public void testExternalLink() {
WikiCode wc = new WikiCode();
/* Unamed link */
String result = wc.transform("http://wiki:8080", "[http://yacy.net]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"http://yacy.net\""));
/* Named link */
result = wc.transform("http://wiki:8080", "[http://yacy.net YaCy]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"http://yacy.net\""));
assertTrue(result.contains(">YaCy<"));
/* Lua Script array parameter : should not crash the transform process */
result = wc.transform("http://wiki:8080", "'[[[[2,1],[4,3],[6,5],[2,1]],[[12,11],[14,13],[16,15],[12,11]]]]'");
}
}

Loading…
Cancel
Save