From 0c173821fd5ce17d201435530af0a82c60743962 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 23 May 2008 09:45:33 +0000 Subject: [PATCH] more access security regarding database access and snippet retrieval: restrict number of results for not-authorized searchers git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4838 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/search.java | 4 ++-- htroot/yacy/user/ysearch.java | 8 +++++--- htroot/yacysearch.java | 5 +++-- source/de/anomic/plasma/plasmaSearchQuery.java | 15 ++++++++++----- .../anomic/plasma/plasmaSearchRankingProcess.java | 6 +++--- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index a9bc53bf3..bd40b0e7b 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -174,7 +174,7 @@ public final class search { plasmaSearchEvent theSearch = null; if ((query.length() == 0) && (abstractSet != null)) { // this is _not_ a normal search, only a request for index abstracts - theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client); + theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client, false); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); @@ -200,7 +200,7 @@ public final class search { } else { // retrieve index containers from search request - theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client); + theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client, false); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + remoteSeed.getName(), plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes), "")); diff --git a/htroot/yacy/user/ysearch.java b/htroot/yacy/user/ysearch.java index 2d888ab8b..846496a18 100644 --- a/htroot/yacy/user/ysearch.java +++ b/htroot/yacy/user/ysearch.java @@ -107,8 +107,9 @@ public class ysearch { } if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {} - int itemsPerPage = post.getInt("count", 10); - int offset = post.getInt("offset", 0); + int itemsPerPage = Math.max((authenticated) ? 1000 : 10, post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative + int offset = post.getInt("startRecord", post.getInt("offset", 0)); + boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); final boolean indexof = post.get("indexof","").equals("on"); String urlmask = ""; @@ -205,7 +206,8 @@ public class ysearch { constraint, true, domainzone, - client); + client, + authenticated); // tell all threads to do nothing for a specific time diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index ae18b12d5..2d98f1f9f 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -138,7 +138,7 @@ public class yacysearch { } if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {} - int itemsPerPage = post.getInt("maximumRecords", post.getInt("count", 10)); // SRU syntax with old property as alternative + int itemsPerPage = Math.max((authenticated) ? 1000 : 10, post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative int offset = post.getInt("startRecord", post.getInt("offset", 0)); boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); @@ -280,7 +280,8 @@ public class yacysearch { constraint, true, yacyURL.TLD_any_zone_filter, - client); + client, + authenticated); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.INITIALIZATION, 0, 0)); // tell all threads to do nothing for a specific time diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index e3dcdd6b5..a8f627bf3 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -96,7 +96,8 @@ public final class plasmaSearchQuery { // values that are set after a search: public int resultcount; // number of found results public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets - + public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options + public plasmaSearchQuery(String queryString, int lines, plasmaSearchRankingProfile ranking, @@ -128,10 +129,12 @@ public final class plasmaSearchQuery { this.host = null; this.remotepeer = null; this.handle = new Long(System.currentTimeMillis()); + this.specialRights = false; } public plasmaSearchQuery( - String queryString, TreeSet queryHashes, TreeSet excludeHashes, + String queryString, TreeSet queryHashes, + TreeSet excludeHashes, plasmaSearchRankingProfile ranking, int maxDistance, String prefer, int contentdom, boolean onlineSnippetFetch, @@ -139,7 +142,8 @@ public final class plasmaSearchQuery { int domType, String domGroupName, int domMaxTargets, kelondroBitfield constraint, boolean allofconstraint, int domainzone, - String host) { + String host, + boolean specialRights) { this.queryString = queryString; this.queryHashes = queryHashes; this.excludeHashes = excludeHashes; @@ -147,8 +151,8 @@ public final class plasmaSearchQuery { this.maxDistance = maxDistance; this.prefer = prefer; this.contentdom = contentdom; - this.linesPerPage = Math.min(100, lines); - this.offset = Math.min(100, offset); + this.linesPerPage = Math.min((specialRights) ? 1000 : 10, lines); + this.offset = Math.min((specialRights) ? 10000 : 100, offset); this.urlMask = urlMask; this.domType = domType; this.zonecode = domainzone; @@ -159,6 +163,7 @@ public final class plasmaSearchQuery { this.host = host; this.remotepeer = null; this.handle = new Long(System.currentTimeMillis()); + this.specialRights = specialRights; } public int neededResults() { diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 980417dd6..5a6c77c52 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -56,7 +56,7 @@ public final class plasmaSearchRankingProcess { public static kelondroBinSearch[] ybrTables = null; // block-rank tables public static final int maxYBR = 3; // the lower this value, the faster the search private static boolean useYBR = true; - private static final int maxDoubleDom = 20; + private static final int maxDoubleDomAll = 20, maxDoubleDomSpecial = 10000; private kelondroSortStack stack; private HashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack @@ -260,7 +260,7 @@ public final class plasmaSearchRankingProcess { m = this.doubleDomCache.get(domhash); if (m == null) { // first appearance of dom - m = new kelondroSortStack(maxDoubleDom); + m = new kelondroSortStack((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); this.doubleDomCache.put(domhash, m); return rwi; } @@ -298,7 +298,7 @@ public final class plasmaSearchRankingProcess { while ((stack.size() > 0) || (size() > 0)) { if (((stack.size() == 0) && (size() == 0))) break; kelondroSortStack.stackElement obrwi = bestRWI(skipDoubleDom); - if (obrwi == null) continue; // *** ? this happenened and the thread was suspended silently. cause? + if (obrwi == null) continue; // *** ? this happened and the thread was suspended silently. cause? indexURLReference u = wordIndex.getURL(obrwi.element.urlHash(), obrwi.element, obrwi.weight.longValue()); if (u != null) { indexURLReference.Components comp = u.comp();