From 6e42d4de88bf7c3f263a01796d4a588e941caab6 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 5 May 2011 00:25:14 +0000 Subject: [PATCH] - added full-String search function: find things that match exactly what is quoted in the query - re-structuring authentification methods to fix a problem with API steering git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7697 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ConfigAccounts_p.java | 14 +-- htroot/ConfigBasic.java | 3 +- htroot/SettingsAck_p.java | 3 +- htroot/Status.java | 3 +- htroot/User.java | 4 +- htroot/yacy/search.java | 6 +- htroot/yacysearch.java | 4 +- .../anomic/http/server/HTTPDFileHandler.java | 106 ++++++++++-------- source/de/anomic/http/server/HTTPDemon.java | 17 --- source/de/anomic/search/QueryParams.java | 29 +++++ source/de/anomic/search/ResultFetcher.java | 15 ++- source/de/anomic/search/Switchboard.java | 40 +++++-- .../anomic/search/SwitchboardConstants.java | 9 ++ source/net/yacy/migration.java | 5 +- source/net/yacy/yacy.java | 2 +- 15 files changed, 160 insertions(+), 100 deletions(-) diff --git a/htroot/ConfigAccounts_p.java b/htroot/ConfigAccounts_p.java index 6956b8596..8255421e4 100644 --- a/htroot/ConfigAccounts_p.java +++ b/htroot/ConfigAccounts_p.java @@ -39,8 +39,8 @@ import net.yacy.kelondro.order.Digest; import de.anomic.data.UserDB; import de.anomic.data.UserDB.AccessRight; -import de.anomic.http.server.HTTPDemon; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import java.util.EnumMap; @@ -65,7 +65,7 @@ public class ConfigAccounts_p { // may be overwritten if new password is given if (user.length() > 0 && pw1.length() > 3 && pw1.equals(pw2)) { // check passed. set account: - env.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(user + ":" + pw1))); + env.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(user + ":" + pw1))); env.setConfig("adminAccount", ""); } @@ -74,21 +74,21 @@ public class ConfigAccounts_p { sb.setConfig("adminAccountForLocalhost", true); // if an localhost access is configured, check if a local password is given // if not, set a random password - if (env.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").length() == 0) { + if (env.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) { // make a 'random' password - env.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "0000" + sb.genRandomPassword()); + env.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "0000" + sb.genRandomPassword()); env.setConfig("adminAccount", ""); } } else { sb.setConfig("adminAccountForLocalhost", false); - if (env.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").startsWith("0000")) { + if (env.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").startsWith("0000")) { // make shure that the user can still use the interface after a random password was set - env.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, ""); + env.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""); } } } - if (env.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").length() == 0 && !env.getConfigBool("adminAccountForLocalhost", false)) { + if (env.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0 && !env.getConfigBool("adminAccountForLocalhost", false)) { prop.put("passwordNotSetWarning", 1); } diff --git a/htroot/ConfigBasic.java b/htroot/ConfigBasic.java index f397cae34..6997abd13 100644 --- a/htroot/ConfigBasic.java +++ b/htroot/ConfigBasic.java @@ -40,7 +40,6 @@ import net.yacy.kelondro.workflow.InstantBusyThread; import de.anomic.data.WorkTables; import de.anomic.data.Translator; -import de.anomic.http.server.HTTPDemon; import de.anomic.http.server.HTTPDFileHandler; import de.anomic.net.UPnP; import de.anomic.search.Switchboard; @@ -218,7 +217,7 @@ public class ConfigBasic { prop.put("setUseCase_repositoryPath", sb.getConfig("repositoryPath", "/DATA/HTROOT/repository")); // check if values are proper - final boolean properPassword = (sb.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").length() > 0) || sb.getConfigBool("adminAccountForLocalhost", false); + final boolean properPassword = (sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() > 0) || sb.getConfigBool("adminAccountForLocalhost", false); final boolean properName = (sb.peers.mySeed().getName().length() >= 3) && (!(yacySeed.isDefaultPeerName(sb.peers.mySeed().getName()))); final boolean properPort = (sb.peers.mySeed().isSenior()) || (sb.peers.mySeed().isPrincipal()); diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java index 020e7f8f2..5dcdb3998 100644 --- a/htroot/SettingsAck_p.java +++ b/htroot/SettingsAck_p.java @@ -45,6 +45,7 @@ import net.yacy.kelondro.util.Formatter; import de.anomic.http.server.HTTPDemon; import de.anomic.http.server.HTTPDProxyHandler; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -92,7 +93,7 @@ public class SettingsAck_p { return prop; } // check passed. set account: - env.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(user + ":" + pw1))); + env.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(user + ":" + pw1))); env.setConfig("adminAccount", ""); prop.put("info", "5");//admin account changed prop.putHTML("info_user", user); diff --git a/htroot/Status.java b/htroot/Status.java index 6b0aaab1d..a81eab0e4 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -38,7 +38,6 @@ import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.OS; import net.yacy.kelondro.workflow.WorkflowProcessor; -import de.anomic.http.server.HTTPDemon; import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverCore; @@ -125,7 +124,7 @@ public class Status { } // password protection - if ((sb.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) { + if ((sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) { prop.put("protection", "0"); // not protected prop.put("urgentSetPassword", "1"); } else { diff --git a/htroot/User.java b/htroot/User.java index ffc2baf21..caae52a96 100644 --- a/htroot/User.java +++ b/htroot/User.java @@ -35,8 +35,8 @@ import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Digest; import de.anomic.data.UserDB; -import de.anomic.http.server.HTTPDemon; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.servletProperties; @@ -95,7 +95,7 @@ public class User{ final String password=post.get("password"); entry=sb.userDB.passwordAuth(username, password); - final boolean staticAdmin = sb.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").equals( + final boolean staticAdmin = sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").equals( Digest.encodeMD5Hex( Base64Order.standardCoder.encodeString(username + ":" + password) ) diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index e219a9f79..0d47d76fd 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -34,6 +34,7 @@ import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Pattern; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.UTF8; @@ -102,6 +103,7 @@ public final class search { final String prefer = post.get("prefer", ""); final String contentdom = post.get("contentdom", "text"); final String filter = post.get("filter", ".*"); + final Pattern snippetPattern = Pattern.compile(post.get("snippet", ".*")); String sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null; String authorhash = post.get("authorhash", ""); if (authorhash.length() == 0) authorhash = null; String language = post.get("language", ""); @@ -214,6 +216,7 @@ public final class search { abstractSet, new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0), null, + snippetPattern, null, maxdist, prefer, @@ -272,7 +275,8 @@ public final class search { null, queryhashes, excludehashes, - null, + null, + snippetPattern, null, maxdist, prefer, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 726661e07..ec03defb4 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -531,7 +531,8 @@ public class yacysearch { // do the search final HandleSet queryHashes = Word.words2hashesHandles(query[0]); - + final Pattern snippetPattern = QueryParams.stringSearchPattern(originalquerystring); + // check filters try { Pattern.compile(urlmask); @@ -556,6 +557,7 @@ public class yacysearch { queryHashes, Word.words2hashesHandles(query[1]), Word.words2hashesHandles(query[2]), + snippetPattern, tenant, maxDistance, prefermask, diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index 612a51694..b0493663b 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -96,6 +96,7 @@ import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ScraperInputStream; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.ByteBuffer; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; @@ -276,10 +277,6 @@ public final class HTTPDFileHandler { return; } - // check permission/granted access - String authorization = requestHeader.get(RequestHeader.AUTHORIZATION); - if (authorization != null && authorization.length() == 0) authorization = null; - final String adminAccountBase64MD5 = switchboard.getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, ""); // cache settings boolean nocache = path.contains("?") || body != null; @@ -297,47 +294,62 @@ public final class HTTPDFileHandler { path = "/api/bookmarks/" + path.substring(11); } - final boolean adminAccountForLocalhost = sb.getConfigBool("adminAccountForLocalhost", false); - final String refererHost = requestHeader.refererHost(); - boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); - final boolean grantedForLocalhost = adminAccountForLocalhost && accessFromLocalhost; + // these are the 5 cases where an access granted: + // (the alternative is that we deliver a 401 to request authorization) + + // -1- the page is not protected; or final boolean protectedPage = path.indexOf("_p.") > 0; - final boolean accountEmpty = adminAccountBase64MD5.length() == 0; - final boolean softauth = accessFromLocalhost && authorization != null && authorization.length() > 6 && (adminAccountBase64MD5.equals(authorization.substring(6))); - - if (protectedPage && !softauth && ((!grantedForLocalhost && !accountEmpty) || requestHeader.userAgent().startsWith("yacybot"))) { - // authentication required - if (authorization == null) { - // no authorization given in response. Ask for that - final ResponseHeader responseHeader = getDefaultHeaders(path); - responseHeader.put(RequestHeader.WWW_AUTHENTICATE,"Basic realm=\"admin log-in\""); - //httpd.sendRespondHeader(conProp,out,httpVersion,401,headers); - final servletProperties tp=new servletProperties(); - tp.put("returnto", path); - //TODO: separate error page Wrong Login / No Login - HTTPDemon.sendRespondError(conProp, out, 5, 401, "Wrong Authentication", "", new File("proxymsg/authfail.inc"), tp, null, responseHeader); - return; - } else if ( - (HTTPDemon.staticAdminAuthenticated(authorization.trim().substring(6), switchboard) == 4) || - (sb.userDB.hasAdminRight(authorization, requestHeader.getHeaderCookies()))) { - //Authentication successful. remove brute-force flag - serverCore.bfHost.remove(conProp.getProperty(HeaderFramework.CONNECTION_PROP_CLIENTIP)); - } else { - // a wrong authentication was given or the userDB user does not have admin access. Ask again - Log.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'"); - final Integer attempts = serverCore.bfHost.get(clientIP); - if (attempts == null) - serverCore.bfHost.put(clientIP, Integer.valueOf(1)); - else - serverCore.bfHost.put(clientIP, Integer.valueOf(attempts.intValue() + 1)); - - final ResponseHeader headers = getDefaultHeaders(path); - headers.put(RequestHeader.WWW_AUTHENTICATE,"Basic realm=\"admin log-in\""); - HTTPDemon.sendRespondHeader(conProp,out,httpVersion,401,headers); - return; - } + boolean accessGranted = !protectedPage; + + // -2- a password is not configured; or + final String adminAccountBase64MD5 = switchboard.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""); + if (!accessGranted) { + accessGranted = adminAccountBase64MD5.length() == 0; } - + + // -3- access from localhost is granted and access comes from localhost; or + final String refererHost = requestHeader.refererHost(); + if (!accessGranted) { + final boolean adminAccountForLocalhost = sb.getConfigBool("adminAccountForLocalhost", false); + boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); + accessGranted = adminAccountForLocalhost && accessFromLocalhost; + } + + // -4- a password is configured and access comes from localhost + // and the realm-value of a http-authentify String is equal to the stored base64MD5; or + String realmProp = requestHeader.get(RequestHeader.AUTHORIZATION); + if (realmProp != null && realmProp.length() == 0) realmProp = null; + String realmValue = realmProp == null ? null : realmProp.substring(6); + if (!accessGranted) { + boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); + accessGranted = accessFromLocalhost && realmValue != null && realmProp.length() > 6 && (adminAccountBase64MD5.equals(realmValue)); + } + + // -5- a password is configured and access comes with matching http-authentify + if (!accessGranted) { + accessGranted = realmProp != null && realmValue != null && (sb.userDB.hasAdminRight(realmProp, requestHeader.getHeaderCookies()) || adminAccountBase64MD5.equals(Digest.encodeMD5Hex(realmValue))); + } + + // in case that we are still not granted we ask for a password + if (!accessGranted) { + Log.logInfo("HTTPD", "Wrong log-in for path '" + path + "' from host '" + clientIP + "'"); + final Integer attempts = serverCore.bfHost.get(clientIP); + if (attempts == null) + serverCore.bfHost.put(clientIP, Integer.valueOf(1)); + else + serverCore.bfHost.put(clientIP, Integer.valueOf(attempts.intValue() + 1)); + + final ResponseHeader responseHeader = getDefaultHeaders(path); + responseHeader.put(RequestHeader.WWW_AUTHENTICATE,"Basic realm=\"admin log-in\""); + final servletProperties tp=new servletProperties(); + tp.put("returnto", path); + HTTPDemon.sendRespondError(conProp, out, 5, 401, "Wrong Authentication", "", new File("proxymsg/authfail.inc"), tp, null, responseHeader); + return; + } + + // Authentication successful. remove brute-force flag + serverCore.bfHost.remove(conProp.getProperty(HeaderFramework.CONNECTION_PROP_CLIENTIP)); + // parse arguments serverObjects args = new serverObjects(); int argc = 0; @@ -818,7 +830,7 @@ public final class HTTPDFileHandler { // check if the servlets requests authentication if (templatePatterns.containsKey(servletProperties.ACTION_AUTHENTICATE)) { // handle brute-force protection - if (authorization != null) { + if (realmProp != null) { Log.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'"); final Integer attempts = serverCore.bfHost.get(clientIP); if (attempts == null) @@ -948,7 +960,7 @@ public final class HTTPDFileHandler { null, "chunked", nocache); // send the content in chunked parts, see RFC 2616 section 3.6.1 final ChunkedOutputStream chos = new ChunkedOutputStream(out); - ServerSideIncludes.writeSSI(o, chos, authorization, clientIP); + ServerSideIncludes.writeSSI(o, chos, realmProp, clientIP); //chos.write(result); chos.finish(); } else { @@ -962,14 +974,14 @@ public final class HTTPDFileHandler { if (zipContent) { GZIPOutputStream zippedOut = new GZIPOutputStream(o); - ServerSideIncludes.writeSSI(o1, zippedOut, authorization, clientIP); + ServerSideIncludes.writeSSI(o1, zippedOut, realmProp, clientIP); //httpTemplate.writeTemplate(fis, zippedOut, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); zippedOut.finish(); zippedOut.flush(); zippedOut.close(); zippedOut = null; } else { - ServerSideIncludes.writeSSI(o1, o, authorization, clientIP); + ServerSideIncludes.writeSSI(o1, o, realmProp, clientIP); //httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); } if (method.equals(HeaderFramework.METHOD_HEAD)) { diff --git a/source/de/anomic/http/server/HTTPDemon.java b/source/de/anomic/http/server/HTTPDemon.java index d5efb4012..f5fce89ce 100644 --- a/source/de/anomic/http/server/HTTPDemon.java +++ b/source/de/anomic/http/server/HTTPDemon.java @@ -60,11 +60,9 @@ import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.ByteBuffer; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; -import net.yacy.kelondro.util.MapTools; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.FileItemFactory; @@ -94,12 +92,6 @@ import java.util.concurrent.ConcurrentMap; */ public final class HTTPDemon implements serverHandler, Cloneable { - /** - *

public static final String ADMIN_ACCOUNT_B64MD5 = "adminAccountBase64MD5"

- *

Name of the setting holding the authentication hash for the static admin-account. It is calculated - * by first encoding username:password as Base64 and hashing it using {@link MapTools#encodeMD5Hex(String)}.

- */ - public static final String ADMIN_ACCOUNT_B64MD5 = "adminAccountBase64MD5"; public static final int ERRORCASE_MESSAGE = 4; public static final int ERRORCASE_FILE = 5; @@ -259,15 +251,6 @@ public final class HTTPDemon implements serverHandler, Cloneable { return persistent; } - public static int staticAdminAuthenticated(final String authorization, final serverSwitch sw) { - // the authorization string must be given with the truncated 6 bytes at the beginning - final String adminAccountBase64MD5 = sw.getConfig(ADMIN_ACCOUNT_B64MD5, ""); - if (adminAccountBase64MD5.length() == 0) return 2; // no password stored - if (authorization == null || authorization.length() == 0) return 1; - if (adminAccountBase64MD5.equals(Digest.encodeMD5Hex(authorization))) return 4; // hard-authenticated, all ok - return 1; - } - private boolean handleYaCyHopAuthentication(final RequestHeader header, Properties prop, Session session) { // check if the user has allowed that his/her peer is used for hops if (!allowYaCyHop(session)) return false; diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 2ddfd69eb..3c5d409d5 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -33,6 +33,7 @@ import java.util.Iterator; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -77,6 +78,7 @@ public final class QueryParams { public final String queryString; public HandleSet fullqueryHashes, queryHashes, excludeHashes; + public Pattern snippetMatcher; public final int itemsPerPage; public int offset; public final Pattern urlMask, prefer; @@ -131,6 +133,7 @@ public final class QueryParams { this.excludeHashes = Word.words2hashesHandles(cq[1]); this.fullqueryHashes = Word.words2hashesHandles(cq[2]); } + this.snippetMatcher = Pattern.compile(".*"); this.ranking = ranking; this.tenant = null; this.maxDistance = Integer.MAX_VALUE; @@ -165,6 +168,7 @@ public final class QueryParams { final String queryString, final HandleSet queryHashes, final HandleSet excludeHashes, final HandleSet fullqueryHashes, + final Pattern snippetMatcher, final String tenant, final int maxDistance, final String prefer, final ContentDomain contentdom, final String language, @@ -187,6 +191,7 @@ public final class QueryParams { this.queryHashes = queryHashes; this.excludeHashes = excludeHashes; this.fullqueryHashes = fullqueryHashes; + this.snippetMatcher = snippetMatcher; this.tenant = (tenant != null && tenant.length() == 0) ? null : tenant; this.ranking = ranking; this.maxDistance = maxDistance; @@ -533,4 +538,28 @@ public final class QueryParams { return sb.toString(); } + + private static Pattern StringMatchPattern = Pattern.compile(".*?(\".*?\").*"); + + /** + * calculate a pattern to match with a string search + * @param query + * @return + */ + public static Pattern stringSearchPattern(String query) { + String p = ""; + while (query.length() > 0) { + Matcher m = StringMatchPattern.matcher(query); + if (!m.matches()) break; + p += ".*" + query.substring(m.start(1) + 1, m.end(1) - 1); + query = query.substring(m.end(1)); + } + p += ".*"; + return Pattern.compile(p); + } + + public static void main(String[] args) { + Pattern p = stringSearchPattern("die \"peer-to-peer Suchmaschine\" ohne Zensur als \"freie Software\" runterladen"); + System.out.println(p.toString()); + } } diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index c2bf59fb0..99ebaffe4 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -29,6 +29,7 @@ package de.anomic.search; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.regex.Pattern; import net.yacy.cora.document.MultiProtocolURI; @@ -254,7 +255,7 @@ public class ResultFetcher { this.workerThreads = new Worker[deployCount]; synchronized(this.workerThreads) { for (int i = 0; i < workerThreads.length; i++) { - Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, neededResults); + Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, query.snippetMatcher, neededResults); worker.start(); this.workerThreads[i] = worker; } @@ -266,7 +267,7 @@ public class ResultFetcher { for (int i = 0; i < this.workerThreads.length; i++) { if (deployCount <= 0) break; if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { - Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, neededResults); + Worker worker = new Worker(i, 10000, query.snippetCacheStrategy, query.snippetMatcher, neededResults); worker.start(); this.workerThreads[i] = worker; deployCount--; @@ -295,11 +296,13 @@ public class ResultFetcher { private final int id; private final CrawlProfile.CacheStrategy cacheStrategy; private final int neededResults; + private final Pattern snippetPattern; - public Worker(final int id, final long maxlifetime, CrawlProfile.CacheStrategy cacheStrategy, int neededResults) { + public Worker(final int id, final long maxlifetime, CrawlProfile.CacheStrategy cacheStrategy, Pattern snippetPattern, int neededResults) { this.id = id; this.cacheStrategy = cacheStrategy; this.lastLifeSign = System.currentTimeMillis(); + this.snippetPattern = snippetPattern; this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; } @@ -340,10 +343,12 @@ public class ResultFetcher { loops++; final ResultEntry resultEntry = fetchSnippet(page, cacheStrategy); // does not fetch snippets if snippetMode == 0 - if (resultEntry == null) continue; // the entry had some problems, cannot be used - //if (result.contains(resultEntry)) continue; + String rawLine = resultEntry.textSnippet().getLineRaw(); + //System.out.println("***SNIPPET*** raw='" + rawLine + "', pattern='" + this.snippetPattern.toString() + "'"); + if (!this.snippetPattern.matcher(rawLine).matches()) continue; + //if (result.contains(resultEntry)) continue; urlRetrievalAllTime += resultEntry.dbRetrievalTime; snippetComputationAllTime += resultEntry.snippetComputationTime; diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 4f8387f6b..b72c9346f 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -108,6 +108,7 @@ import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; +import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.FileUtils; @@ -149,7 +150,6 @@ import de.anomic.data.wiki.WikiBoard; import de.anomic.data.wiki.WikiCode; import de.anomic.data.wiki.WikiParser; import de.anomic.http.client.Cache; -import de.anomic.http.server.HTTPDemon; import de.anomic.http.server.RobotsTxtConfig; import de.anomic.net.UPnP; import de.anomic.server.serverSwitch; @@ -601,10 +601,10 @@ public final class Switchboard extends serverSwitch { // addresses are blocked to prevent attack szenarios where remote pages contain links to localhost // addresses that can steer a YaCy peer if ((getConfigBool("adminAccountForLocalhost", false))) { - if (getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").startsWith("0000")) { + if (getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").startsWith("0000")) { // the password was set automatically with a random value. // We must remove that here to prevent that a user cannot log in any more - setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, ""); + setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""); // after this a message must be generated to alert the user to set a new password log.logInfo("RANDOM PASSWORD REMOVED! User must set a new password"); } @@ -1525,9 +1525,9 @@ public final class Switchboard extends serverSwitch { } // set a random password if no password is configured - if (getConfigBool("adminAccountForLocalhost", false) && getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "").length() == 0) { + if (getConfigBool("adminAccountForLocalhost", false) && getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) { // make a 'random' password - setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, "0000" + this.genRandomPassword()); + setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "0000" + this.genRandomPassword()); setConfig("adminAccount", ""); } @@ -2216,27 +2216,45 @@ public final class Switchboard extends serverSwitch { return refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost); } + /** + * check authentication status for request + * access shall be granted if return value >= 2; + * these are the cases where an access is granted to protected pages: + * - a password is not configured: auth-level 2 + * - access from localhost is granted and access comes from localhost: auth-level 3 + * - a password is configured and access comes from localhost + * and the realm-value of a http-authentify String is equal to the stored base64MD5: auth-level 3 + * - a password is configured and access comes with matching http-authentify: auth-level 4 + * @param requestHeader + * @return the auth-level as described above or 1 which means 'not authorized'. a 0 is returned in case of fraud attempts + */ public int adminAuthenticated(final RequestHeader requestHeader) { + // authorization in case that there is no account stored + final String adminAccountBase64MD5 = getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""); + if (adminAccountBase64MD5.length() == 0) return 2; // no password stored; this should not happen for older peers + // authorization for localhost, only if flag is set to grant localhost access as admin final boolean accessFromLocalhost = accessFromLocalhost(requestHeader); if (getConfigBool("adminAccountForLocalhost", false) && accessFromLocalhost) return 3; // soft-authenticated for localhost // get the authorization string from the header - final String authorization = (requestHeader.get(RequestHeader.AUTHORIZATION, "xxxxxx")).trim().substring(6); + String realmProp = (requestHeader.get(RequestHeader.AUTHORIZATION, "xxxxxx")).trim(); + final String realmValue = realmProp.substring(6); // security check against too long authorization strings - if (authorization.length() > 256) return 0; + if (realmValue.length() > 256) return 0; // authorization by encoded password, only for localhost access - final String adminAccountBase64MD5 = getConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, ""); - if (accessFromLocalhost && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost + if (accessFromLocalhost && (adminAccountBase64MD5.equals(realmValue))) return 3; // soft-authenticated for localhost // authorization by hit in userDB - if (userDB.hasAdminRight(requestHeader.get(RequestHeader.AUTHORIZATION, "xxxxxx"), requestHeader.getHeaderCookies())) return 4; //return, because 4=max + if (userDB.hasAdminRight(realmProp, requestHeader.getHeaderCookies())) return 4; //return, because 4=max // authorization with admin keyword in configuration - return HTTPDemon.staticAdminAuthenticated(authorization, this); + if (realmValue == null || realmValue.length() == 0) return 1; + if (adminAccountBase64MD5.equals(Digest.encodeMD5Hex(realmValue))) return 4; // hard-authenticated, all ok + return 1; } public boolean verifyAuthentication(final RequestHeader header, final boolean strict) { diff --git a/source/de/anomic/search/SwitchboardConstants.java b/source/de/anomic/search/SwitchboardConstants.java index 3a1814345..1497dcabc 100644 --- a/source/de/anomic/search/SwitchboardConstants.java +++ b/source/de/anomic/search/SwitchboardConstants.java @@ -26,6 +26,7 @@ package de.anomic.search; +import net.yacy.kelondro.util.MapTools; import de.anomic.http.server.RobotsTxtConfig; /** @@ -34,6 +35,14 @@ import de.anomic.http.server.RobotsTxtConfig; */ public final class SwitchboardConstants { + + /** + *

public static final String ADMIN_ACCOUNT_B64MD5 = "adminAccountBase64MD5"

+ *

Name of the setting holding the authentication hash for the static admin-account. It is calculated + * by first encoding username:password as Base64 and hashing it using {@link MapTools#encodeMD5Hex(String)}.

+ */ + public static final String ADMIN_ACCOUNT_B64MD5 = "adminAccountBase64MD5"; + public static final int CRAWLJOB_SYNC = 0; public static final int CRAWLJOB_STATUS = 1; // 20_dhtdistribution diff --git a/source/net/yacy/migration.java b/source/net/yacy/migration.java index 183484f3f..301b20d32 100644 --- a/source/net/yacy/migration.java +++ b/source/net/yacy/migration.java @@ -30,7 +30,6 @@ import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.FileUtils; -import de.anomic.http.server.HTTPDemon; import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; @@ -207,7 +206,7 @@ public class migration { // set preset accounts/passwords String acc; if ((acc = sb.getConfig("adminAccount", "")).length() > 0) { - sb.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc))); + sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc))); sb.setConfig("adminAccount", ""); } @@ -217,7 +216,7 @@ public class migration { sb.setConfig("proxyAccountBase64", ""); } if ((acc = sb.getConfig("adminAccountBase64", "")).length() > 0) { - sb.setConfig(HTTPDemon.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(acc)); + sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(acc)); sb.setConfig("adminAccountBase64", ""); } if ((acc = sb.getConfig("uploadAccountBase64", "")).length() > 0) { diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 0f5980ce1..c630d61a7 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -528,7 +528,7 @@ public final class yacy { final int port = serverCore.getPortNr(config.getProperty("port", "8090")); // read password - String encodedPassword = (String) config.get(HTTPDemon.ADMIN_ACCOUNT_B64MD5); + String encodedPassword = (String) config.get(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5); if (encodedPassword == null) encodedPassword = ""; // not defined // send 'wget' to web interface