From b4ed937f1eab8017a379a59f65b294bd801a0deb Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 11 Mar 2008 11:09:38 +0000 Subject: [PATCH] - modified zone navigation (does still not work correctly) - added dht switch in network definition - 0.574 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4550 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/yacy/search.java | 5 +- htroot/yacy/ui/result.java | 1 + htroot/yacy/user/sidebar_navigation.html | 27 +++++- htroot/yacy/user/sidebar_navigation.java | 46 ++++++---- htroot/yacy/user/ysearch.java | 3 + htroot/yacysearch.java | 1 + .../de/anomic/plasma/plasmaSearchQuery.java | 9 +- .../plasma/plasmaSearchRankingProcess.java | 8 +- .../de/anomic/plasma/plasmaSwitchboard.java | 7 +- source/de/anomic/yacy/yacySeedDB.java | 2 +- source/de/anomic/yacy/yacyURL.java | 87 ++++--------------- yacy.network.unit | 1 + 13 files changed, 94 insertions(+), 105 deletions(-) diff --git a/build.properties b/build.properties index 609f6b4b2..41b7c9ab2 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.5 javacTarget=1.5 # Release Configuration -releaseVersion=0.573 +releaseVersion=0.574 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index bf4c64187..8eeabc775 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -54,6 +54,7 @@ import de.anomic.tools.crypt; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; public final class search { @@ -153,7 +154,7 @@ public final class search { plasmaSearchEvent theSearch = null; if ((query.length() == 0) && (abstractSet != null)) { // this is _not_ a normal search, only a request for index abstracts - theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, client); + theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, yacyURL.TLD_any_zone_filter, client); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); @@ -178,7 +179,7 @@ public final class search { } else { // retrieve index containers from search request - theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, client); + theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, yacyURL.TLD_any_zone_filter, client); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); diff --git a/htroot/yacy/ui/result.java b/htroot/yacy/ui/result.java index 3779b432b..79cc2ddc2 100644 --- a/htroot/yacy/ui/result.java +++ b/htroot/yacy/ui/result.java @@ -182,6 +182,7 @@ public class result { 20, constraint, true, + yacyURL.TLD_any_zone_filter, client); diff --git a/htroot/yacy/user/sidebar_navigation.html b/htroot/yacy/user/sidebar_navigation.html index e7d0d9cc8..e787d2305 100644 --- a/htroot/yacy/user/sidebar_navigation.html +++ b/htroot/yacy/user/sidebar_navigation.html @@ -26,9 +26,30 @@

Language Zone:

#(/languagezone)# diff --git a/htroot/yacy/user/sidebar_navigation.java b/htroot/yacy/user/sidebar_navigation.java index 7fc614d60..a5539452c 100644 --- a/htroot/yacy/user/sidebar_navigation.java +++ b/htroot/yacy/user/sidebar_navigation.java @@ -25,7 +25,6 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import java.util.Iterator; -import java.util.Map; import java.util.Set; import java.util.TreeSet; @@ -130,24 +129,24 @@ public class sidebar_navigation { } // compose language zone drill-down - int c = 0; - final Iterator> iter = theSearch.getRankingResult().getZoneStatistics().entrySet().iterator(); - Map.Entry entry; - while (iter.hasNext()) { - entry = iter.next(); - if ((theQuery == null) || (theQuery.queryString == null)) break; - prop.putHTML("navigation_languagezone_zones_" + c + "_zone", entry.getKey() + " (" + entry.getValue() + ")"); - prop.putHTML("navigation_languagezone_zones_" + c + "_search", theQuery.queryString.replace(' ', '+')); - prop.put("navigation_languagezone_zones_" + c + "_count", theQuery.displayResults()); - prop.put("navigation_languagezone_zones_" + c + "_offset", "0"); - prop.put("navigation_languagezone_zones_" + c + "_contentdom", theQuery.contentdom()); - prop.put("navigation_languagezone_zones_" + c + "_resource", theQuery.searchdom()); - prop.put("navigation_languagezone_zones_" + c + "_zonecode", yacyURL.zone2map.get(entry.getKey()).intValue()); - prop.put("navigation_languagezone_zones", c); - c++; - } - prop.put("navigation_languagezone", (c > 2) ? "1" : "0"); - + final int[] zones = theSearch.getRankingResult().zones(); + boolean z = false; + domzone(prop, "All", theSearch.getRankingResult().size(), theQuery); + if (zones[yacyURL.TLD_EuropeRussia_ID] > 0) + { z = true; domzone(prop, "EuropeRussia", zones[yacyURL.TLD_EuropeRussia_ID], theQuery);} + if (zones[yacyURL.TLD_MiddleSouthAmerica_ID] > 0) + { z = true; domzone(prop, "MiddleSouthAmerica", zones[yacyURL.TLD_MiddleSouthAmerica_ID], theQuery);} + if (zones[yacyURL.TLD_SouthEastAsia_ID] > 0) + { z = true; domzone(prop, "SouthEastAsia", zones[yacyURL.TLD_SouthEastAsia_ID], theQuery);} + if (zones[yacyURL.TLD_MiddleEastWestAsia_ID] > 0) + { z = true; domzone(prop, "MiddleEastWestAsia_", zones[yacyURL.TLD_MiddleEastWestAsia_ID], theQuery);} + if (zones[yacyURL.TLD_NorthAmericaOceania_ID] + zones[yacyURL.TLD_Generic_ID] > 0) + { z = true; domzone(prop, "NorthAmericaOceania", zones[yacyURL.TLD_NorthAmericaOceania_ID] + zones[yacyURL.TLD_Generic_ID], theQuery);} + if (zones[yacyURL.TLD_Africa_ID] > 0) + { z = true; domzone(prop, "Africa", zones[yacyURL.TLD_Africa_ID], theQuery);} + if (zones[7] > 0) + { z = true; domzone(prop, "Intranet", zones[7], theQuery);} + prop.put("navigation_languagezone", (z) ? "1" : "0"); // compose page navigation StringBuffer resnav = new StringBuffer(); @@ -191,4 +190,13 @@ public class sidebar_navigation { "&former=" + theQuery.queryString() + "\">"; } + private static void domzone(serverObjects prop, String zonename, int zonecount, plasmaSearchQuery theQuery) { + prop.put("navigation_languagezone_" + zonename + "_count", zonecount); + prop.putHTML("navigation_languagezone_" + zonename + "_search", theQuery.queryString.replace(' ', '+')); + prop.put("navigation_languagezone_" + zonename + "_offset", "0"); + prop.put("navigation_languagezone_" + zonename + "_contentdom", theQuery.contentdom()); + prop.put("navigation_languagezone_" + zonename + "_resource", theQuery.searchdom()); + prop.put("navigation_languagezone_" + zonename, 1); + } + } diff --git a/htroot/yacy/user/ysearch.java b/htroot/yacy/user/ysearch.java index cc9f56c60..10b22f8ba 100644 --- a/htroot/yacy/user/ysearch.java +++ b/htroot/yacy/user/ysearch.java @@ -123,6 +123,8 @@ public class ysearch { constraint.set(plasmaCondenser.flag_cat_indexof, true); } + int domainzone = post.getInt("zone", yacyURL.TLD_any_zone_filter); + // SEARCH //final boolean indexDistributeGranted = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true"); //final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true"); @@ -182,6 +184,7 @@ public class ysearch { 20, constraint, true, + domainzone, client); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 66859d82e..da40f1b36 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -257,6 +257,7 @@ public class yacysearch { 20, constraint, true, + yacyURL.TLD_any_zone_filter, client); diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index a692f2796..13ac98e5d 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -118,7 +118,7 @@ public final class plasmaSearchQuery { this.offset = 0; this.urlMask = ".*"; this.domType = SEARCHDOM_LOCAL; - this.zonecode = yacyURL.language_domain_any_zone; + this.zonecode = yacyURL.TLD_any_zone_filter; this.domMaxTargets = 0; this.constraint = constraint; this.allofconstraint = false; @@ -136,6 +136,7 @@ public final class plasmaSearchQuery { int lines, int offset, String urlMask, int domType, String domGroupName, int domMaxTargets, kelondroBitfield constraint, boolean allofconstraint, + int domainzone, String host) { this.queryString = queryString; this.queryHashes = queryHashes; @@ -149,7 +150,7 @@ public final class plasmaSearchQuery { //this.maximumTime = Math.min(6000, maximumTime); this.urlMask = urlMask; this.domType = domType; - this.zonecode = yacyURL.language_domain_any_zone; + this.zonecode = domainzone; this.domMaxTargets = domMaxTargets; this.constraint = constraint; this.allofconstraint = allofconstraint; @@ -288,9 +289,9 @@ public final class plasmaSearchQuery { public String id(boolean anonymized) { // generate a string that identifies a search so results can be re-used in a cache if (anonymized) { - return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + ":" + this.contentdom + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString()); + return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString(); } else { - return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString()); + return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString(); } } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 41b6db5d7..079845643 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -99,6 +99,10 @@ public final class plasmaSearchRankingProcess { return order.cardinal(word); } + public int[] zones() { + return this.domZones; + } + public void execQuery() { long timer = System.currentTimeMillis(); @@ -334,10 +338,6 @@ public final class plasmaSearchRankingProcess { return this.local_resourceSize; } - public Map getZoneStatistics() { - return yacyURL.zoneStatistics(this.domZones); - } - public indexRWIEntry remove(String urlHash) { kelondroSortStack.stackElement se = stack.remove(urlHash.hashCode()); if (se == null) return null; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index da2d64e84..5a600446c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2658,8 +2658,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (yacyCore.seedDB.noDHTActivity()) { return "no DHT distribution: network too small"; } + if (!this.getConfigBool("network.unit.dht", true)) { + return "no DHT distribution: disabled by network.unit.dht"; + } if (getConfig(INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false")) { - return "no DHT distribution: not enabled"; + return "no DHT distribution: not enabled (ser setting)"; } if (wordIndex.loadedURL.size() < 10) { return "no DHT distribution: loadedURL.size() = " + wordIndex.loadedURL.size(); @@ -2673,7 +2676,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if ((getConfig(INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (sbQueue.size() > 1)) { return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + sbQueue.size(); } - return null; + return null; // this means; yes, please do dht transfer } public boolean dhtTransferJob() { diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 2820f14df..a12fd89e0 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -90,7 +90,7 @@ public final class yacySeedDB { * these hashes all shall be generated by base64.enhancedCoder */ public static final int commonHashLength = 12; - public static final int dhtActivityMagic = 32; + public static final int dhtActivityMagic = 48; public static final String[] sortFields = new String[] {yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.UPTIME, yacySeed.VERSION, yacySeed.LASTSEEN}; public static final String[] longaccFields = new String[] {yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.ISPEED}; diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index 153f86067..47ba8aac3 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -31,7 +31,6 @@ import java.io.File; import java.net.MalformedURLException; import java.util.HashMap; import java.util.Iterator; -import java.util.Map; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -44,10 +43,18 @@ import de.anomic.tools.Punycode.PunycodeException; public class yacyURL { - // TLD separation in political and cultural parts // https://www.cia.gov/cia/publications/factbook/index.html // http://en.wikipedia.org/wiki/List_of_countries_by_continent + public static final int TLD_EuropeRussia_ID = 0; // European languages but no english + public static final int TLD_MiddleSouthAmerica_ID = 1; // mainly spanish-speaking countries + public static final int TLD_SouthEastAsia_ID = 2; // asia + public static final int TLD_MiddleEastWestAsia_ID = 3; // middle east + public static final int TLD_NorthAmericaOceania_ID = 4; // english-speaking countries + public static final int TLD_Africa_ID = 5; // africa + public static final int TLD_Generic_ID = 6; // anything else, mixed languages, mainly english + + public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter private static final String[] TLD_NorthAmericaOceania={ // primary english-speaking countries @@ -160,6 +167,7 @@ public class yacyURL { "DK=Denmark", "ES=Spain", "EE=Estonia", + "EU=Europe", "FI=Finland", "FO=Faroe Islands", // Viking Settlers "FR=France", @@ -334,27 +342,6 @@ public class yacyURL { "NT=Neutral Zone" }; - - /* - * TLDs: aero, biz, com, coop, edu, gov, info, int, mil, museum, name, net, - * org, pro, arpa AC, AD, AE, AERO, AF, AG, AI, AL, AM, AN, AO, AQ, AR, - * ARPA, AS, AT, AU, AW, AZ, BA, BB, BD, BE, BF, BG, BH, BI, BIZ, BJ, BM, - * BN, BO, BR, BS, BT, BV, BW, BY, BZ, CA, CC, CD, CF, CG, CH, CI, CK, CL, - * CM, CN, CO, COM, COOP, CR, CU, CV, CX, CY, CZ, DE, DJ, DK, DM, DO, DZ, - * EC, EDU, EE, EG, ER, ES, ET, EU, FI, FJ, FK, FM, FO, FR, GA, GB, GD, GE, - * GF, GG, GH, GI, GL, GM, GN, GOV, GP, GQ, GR, GS, GT, GU, GW, GY, HK, HM, - * HN, HR, HT, HU, ID, IE, IL, IM, IN, INFO, INT, IO, IQ, IR, IS, IT, JE, - * JM, JO, JOBS, JP, KE, KG, KH, KI, KM, KN, KR, KW, KY, KZ, LA, LB, LC, LI, - * LK, LR, LS, LT, LU, LV, LY, MA, MC, MD, MG, MH, MIL, MK, ML, MM, MN, MO, - * MOBI, MP, MQ, MR, MS, MT, MU, MUSEUM, MV, MW, MX, MY, MZ, NA, NAME, NC, - * NE, NET, NF, NG, NI, NL, NO, NP, NR, NU, NZ, OM, ORG, PA, PE, PF, PG, PH, - * PK, PL, PM, PN, PR, PRO, PS, PT, PW, PY, QA, RE, RO, RU, RW, SA, SB, SC, - * SD, SE, SG, SH, SI, SJ, SK, SL, SM, SN, SO, SR, ST, SU, SV, SY, SZ, TC, - * TD, TF, TG, TH, TJ, TK, TL, TM, TN, TO, TP, TR, TRAVEL, TT, TV, TW, TZ, - * UA, UG, UK, UM, US, UY, UZ, VA, VC, VE, VG, VI, VN, VU, WF, WS, YE, YT, - * YU, ZA, ZM, ZW - */ - public static String dummyHash; private static HashMap TLDID = new HashMap(); @@ -375,38 +362,21 @@ public class yacyURL { } } - public static final int language_domain_europe_zone = 128 + 1; //{0, 7}; - public static final int language_domain_english_zone = 128 + 16 + 64; //{4, 6, 7}; - public static final int language_domain_spanish_zone = 128 + 2; //{1, 7}; - public static final int language_domain_asia_zone = 128 + 4; //{2, 7}; - public static final int language_domain_middleeast_zone = 128 + 8; //{3, 7}; - public static final int language_domain_africa_zone = 128 + 32; //{5, 7}; - public static final int language_domain_any_zone = 255; - - public static final HashMap zone2map = new HashMap(); - static { // create a dummy hash dummyHash = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) dummyHash += "-"; // assign TLD-ids and names - insertTLDProps(TLD_EuropeRussia, 0); // European languages but no english - insertTLDProps(TLD_MiddleSouthAmerica, 1); // mainly spanish-speaking countries - insertTLDProps(TLD_SouthEastAsia, 2); // asia - insertTLDProps(TLD_MiddleEastWestAsia, 3); // middle east - insertTLDProps(TLD_NorthAmericaOceania, 4); // english-speaking countries - insertTLDProps(TLD_Africa, 5); // africa - insertTLDProps(TLD_Generic, 6); // anything else, mixed languages, mainly english + insertTLDProps(TLD_EuropeRussia, TLD_EuropeRussia_ID); + insertTLDProps(TLD_MiddleSouthAmerica, TLD_MiddleSouthAmerica_ID); + insertTLDProps(TLD_SouthEastAsia, TLD_SouthEastAsia_ID); + insertTLDProps(TLD_MiddleEastWestAsia, TLD_MiddleEastWestAsia_ID); + insertTLDProps(TLD_NorthAmericaOceania, TLD_NorthAmericaOceania_ID); + insertTLDProps(TLD_Africa, TLD_Africa_ID); + insertTLDProps(TLD_Generic, TLD_Generic_ID); // the id=7 is used to flag local addresses - - zone2map.put("europe", language_domain_europe_zone); - zone2map.put("english", language_domain_english_zone); - zone2map.put("spanish", language_domain_spanish_zone); - zone2map.put("asia", language_domain_asia_zone); - zone2map.put("middleeast", language_domain_middleeast_zone); - zone2map.put("africa", language_domain_africa_zone); - zone2map.put("any", language_domain_any_zone); + } // class variables @@ -1132,27 +1102,6 @@ public class yacyURL { return language; } - public static Map zoneStatistics(int[] domAccumulators) { - assert domAccumulators.length == 8; - HashMap zoneCounter = new HashMap(); - Iterator> j; - Map.Entry entry; - for (int i = 0; i < 8; i++) { - j = zone2map.entrySet().iterator(); - while (j.hasNext()) { - entry = j.next(); - if ((i & entry.getValue().intValue()) != 0) { - if (zoneCounter.containsKey(entry.getKey())) { - zoneCounter.put(entry.getKey(), zoneCounter.get(entry.getKey()) + domAccumulators[i]); - } else { - zoneCounter.put(entry.getKey(), domAccumulators[i]); - } - } - } - } - return zoneCounter; - } - public static void main(String[] args) { String[][] test = new String[][]{ new String[]{null, "http://www.anomic.de/home/test?x=1#home"}, diff --git a/yacy.network.unit b/yacy.network.unit index 0789fb12a..b785355c7 100644 --- a/yacy.network.unit +++ b/yacy.network.unit @@ -53,6 +53,7 @@ network.unit.name = freeworld network.unit.description = Public YaCy Community network.unit.domain = global network.unit.search.time = 4 +network.unit.dht = true network.unit.dhtredundancy.junior = 1 network.unit.dhtredundancy.senior = 3 network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt