diff --git a/defaults/yacy.init b/defaults/yacy.init index da9bbc7b3..7ff695c60 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -276,13 +276,6 @@ promoteSearchPageGreeting.smallImage = /env/grafics/YaCyLogo_60ppi.png # when the secondary path should be equal to the primary, it must be declared empty indexPrimaryPath=DATA/INDEX -# the commons are words that appear in the index more than 64k times in references. -# Since indexes with such size cannot be handled efficiently, they are sorted in such a way that references with high ranking -# are stored back into the index, and references with bad ranking are sorted out. Such sorted-out references can be stored -# for later use (but there is no at this time). If the sorted-out references should be stored, the following property should be -# set to true. If set to false, they are abandoned (deleted), and previously stored commons are removed. -index.storeCommons=false - # the path to the LISTS files. Most lists are used to filter web content listsPath=DATA/LISTS @@ -749,23 +742,6 @@ indexDistribution.maxChunkSize = 1000 indexDistribution.startChunkSize = 200 indexDistribution.maxChunkFails = 1 -# Distribution of Citation-Reference (CR-) files -# The distribution is done in two steps: -# first step to anonymize the records -# second step to forward to a collecting peer -# to anonymize the data even against the intermediate peer -# a specific precentage is also sent again to other peers. -# for key-numbers please see de.anomic.plasma.plasmaRankingDistribution -CRDistOn = true -CRDist0Path = GLOBAL/010_owncr -CRDist0Method = 1 -CRDist0Percent = 0 -CRDist0Target = -CRDist1Path = GLOBAL/014_othercr -CRDist1Method = 9 -CRDist1Percent = 30 -CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000 - # Search sequence settings # collection: # time = time to get a RWI out of RAM cache, assortments and WORDS files @@ -834,9 +810,6 @@ searchProcessRemoteCount_s = 10 timeout_text = 10000 timeout_media = 15000 -# path to ranking directory containing ranking reference files -rankingPath = DATA/RANKING - # a list of domain name patterns that should not be cached by the httpc dns cache httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu diff --git a/htroot/Network.html b/htroot/Network.html index 25a8660b0..e21ccca66 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -121,7 +121,6 @@ To see a list of all APIs, please visit the http://#[ip]#:#[port]# #[hash]# - #[CRWCnt]#/#[CRTCnt]# #[age]# #[seeds]# #[connects]# diff --git a/htroot/Network.java b/htroot/Network.java index 71c1e1bec..6d01fa207 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -375,8 +375,6 @@ public class Network { prop.put(STR_TABLE_LIST + conCount + "_complete_port", seed.get(yacySeed.PORT, "-") ); prop.put(STR_TABLE_LIST + conCount + "_complete_hash", seed.hash); prop.put(STR_TABLE_LIST + conCount + "_complete_age", seed.getAge()); - prop.putNum(STR_TABLE_LIST + conCount + "_complete_CRWCnt", Long.parseLong(seed.get(yacySeed.CRWCNT, "0"))); - prop.putNum(STR_TABLE_LIST + conCount + "_complete_CRTCnt", Long.parseLong(seed.get(yacySeed.CRTCNT, "0"))); prop.putNum(STR_TABLE_LIST + conCount + "_complete_seeds", Long.parseLong(seed.get(yacySeed.SCOUNT, "0"))); prop.putNum(STR_TABLE_LIST + conCount + "_complete_connects", Double.parseDouble(seed.get(yacySeed.CCOUNT, "0"))); prop.putHTML(STR_TABLE_LIST + conCount + "_complete_userAgent", userAgent); diff --git a/htroot/yacy/transfer.html b/htroot/yacy/transfer.html deleted file mode 100644 index e7b0e7a96..000000000 --- a/htroot/yacy/transfer.html +++ /dev/null @@ -1,12 +0,0 @@ -version=#[version]# -uptime=#[uptime]# -response=#[response]# -#(process)# -access=#[access]# -address=#[address]# -protocol=#[protocol]# -path=#[path]# -maxsize=#[maxsize]# -:: -tt=#[tt]# -#(/process)# \ No newline at end of file diff --git a/htroot/yacy/transfer.java b/htroot/yacy/transfer.java deleted file mode 100644 index 1c87a4e12..000000000 --- a/htroot/yacy/transfer.java +++ /dev/null @@ -1,151 +0,0 @@ -// transfer.java -// ----------------------- -// part of YaCy caching proxy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// created 07.11.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -import java.io.File; -import java.io.IOException; - -import net.yacy.cora.protocol.HeaderFramework; -import net.yacy.cora.protocol.RequestHeader; -import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Digest; -import net.yacy.kelondro.util.FileUtils; - -import de.anomic.search.Switchboard; -import de.anomic.search.blockrank.CRDistribution; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.yacy.yacyNetwork; -import de.anomic.yacy.yacySeed; - -public final class transfer { - - public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - final Switchboard sb = (Switchboard) env; - final serverObjects prop = new serverObjects(); - if ((post == null) || (env == null)) return prop; - if (!yacyNetwork.authentifyRequest(post, env)) return prop; - - final String process = post.get("process", ""); // permission or store - //String key = post.get("key", ""); // a transmission key from the client - final String otherpeer = post.get("iam", ""); // identification of the client (a peer-hash) - final String purpose = post.get("purpose", ""); // declares how the file shall be treated - final String filename = post.get("filename", ""); // a name of a file without path - //long filesize = Long.parseLong((String) post.get("filesize", "")); // the size of the file - - prop.put("process", "0"); - prop.put("response", "denied"); // reject is default and is overwritten if ok - prop.put("process_access", ""); - prop.put("process_address", ""); - prop.put("process_protocol", ""); - prop.put("process_path", ""); - prop.put("process_maxsize", "0"); - - if (sb.isRobinsonMode() || !sb.rankingOn) { - // in a robinson environment, do not answer. We do not do any transfer in a robinson cluster. - return prop; - } - - final yacySeed otherseed = sb.peers.get(otherpeer); - if ((otherseed == null) || (filename.indexOf("..") >= 0)) { - // reject unknown peers: this does not appear fair, but anonymous senders are dangerous - // reject paths that contain '..' because they are dangerous - if (sb.getLog().isFine()) { - if (otherseed == null) sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + otherpeer + "', current IP " + header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, "unknown")); - if (filename.indexOf("..") >= 0) sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + (otherseed == null ? "null" : otherseed.getName() + "/" + otherseed.getPublicAddress()) + ", current IP " + header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, "unknown")); - } - return prop; - } - - final String otherpeerName = otherseed.hash + ":" + otherseed.getName(); - - if (process.equals("permission")) { - prop.put("process", "0"); - if (((purpose.equals("crcon")) && (filename.startsWith("CRG")) && (filename.endsWith(".cr.gz"))) || ((filename.startsWith("domlist")) && (filename.endsWith(".txt.gz") || filename.endsWith(".zip")))) { - // consolidation of cr files - //System.out.println("yacy/transfer:post=" + post.toString()); - //String cansendprotocol = (String) post.get("can-send-protocol", "http"); - final String access = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(otherpeer + ":" + filename)) + ":" + Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))); - prop.put("response", "ok"); - prop.put("process_access", access); - prop.put("process_address", sb.peers.mySeed().getPublicAddress()); - prop.put("process_protocol", "http"); - prop.put("process_path", ""); // currently empty; the store process will find a path - prop.put("process_maxsize", "-1"); // if response is too big we return the size of the file - sb.rankingPermissions.put(Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(access)), filename); - if (sb.getLog().isFine()) sb.getLog().logFine("RankingTransmission: granted peer " + otherpeerName + " to send CR file " + filename); - } - return prop; - } - - if (process.equals("store")) { - prop.put("process", "1"); - if (purpose.equals("crcon")) { - final String fileString = post.get("filename$file"); - final String accesscode = post.get("access", ""); // one-time authentication - final String md5 = post.get("md5", ""); // one-time authentication - //java.util.HashMap perm = sb.rankingPermissions; - //System.out.println("PERMISSIONDEBUG: accesscode=" + accesscode + ", permissions=" + perm.toString()); - final String grantedFile = sb.rankingPermissions.get(accesscode); - prop.put("process_tt", ""); - if ((grantedFile == null) || (!(grantedFile.equals(filename)))) { - // fraud-access of this interface - prop.put("response", "denied"); - if (sb.getLog().isFine()) sb.getLog().logFine("RankingTransmission: denied " + otherpeerName + " to send CR file " + filename + ": wrong access code"); - } else { - sb.rankingPermissions.remove(accesscode); // not needed any more - final File path = new File(sb.rankingPath, CRDistribution.CR_OTHER); - path.mkdirs(); - final File file = new File(path, filename); - try { - if (file.getCanonicalPath().startsWith(path.getCanonicalPath())){ - FileUtils.copy(fileString.getBytes(), file); - final String md5t = Digest.encodeMD5Hex(file); - if (md5t.equals(md5)) { - prop.put("response", "ok"); - if (sb.getLog().isFine()) sb.getLog().logFine("RankingTransmission: received from peer " + otherpeerName + " CR file " + filename); - } else { - prop.put("response", "transfer failure"); - if (sb.getLog().isFine()) sb.getLog().logFine("RankingTransmission: transfer failure from peer " + otherpeerName + " for CR file " + filename); - } - }else{ - //exploit? - prop.put("response", "io error"); - return prop; - } - } catch (final IOException e) { - prop.put("response", "io error"); - } - } - } - return prop; - } - - // wrong access - if (sb.getLog().isFine()) sb.getLog().logFine("RankingTransmission: rejected unknown process " + process + ":" + purpose + " from peer " + otherpeerName); - return prop; - } - -} diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index b36d00f81..f3dd579d8 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -665,6 +665,13 @@ public final class RankingProcess extends Thread { return this.authorNavigator; } + /** + * load YaCy Block Rank tables + * These tables have a very simple structure: every file is a sequence of Domain hashes, ordered by b64. + * Each Domain hash has a length of 6 bytes and there is no separation character between the hashes + * @param rankingPath + * @param count + */ public static void loadYBR(final File rankingPath, final int count) { // load ranking tables if (rankingPath.exists()) { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 84244ac7a..b0c6c0b62 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -147,7 +147,6 @@ import de.anomic.http.client.Cache; import de.anomic.http.server.HTTPDemon; import de.anomic.http.server.RobotsTxtConfig; import de.anomic.net.UPnP; -import de.anomic.search.blockrank.CRDistribution; import de.anomic.server.serverSwitch; import de.anomic.server.serverCore; import de.anomic.tools.crypt; @@ -190,14 +189,12 @@ public final class Switchboard extends serverSwitch { public File dictionariesPath; public File listsPath; public File htDocsPath; - public File rankingPath; public File workPath; public File releasePath; public File networkRoot; public File queuesRoot; public File surrogatesInPath; public File surrogatesOutPath; - public Map rankingPermissions; public Segments indexSegments; public LoaderDispatcher loader; public CrawlSwitchboard crawler; @@ -209,9 +206,6 @@ public final class Switchboard extends serverSwitch { public BlogBoard blogDB; public BlogBoardComments blogCommentDB; public RobotsTxt robots; - public boolean rankingOn; - public CRDistribution rankingOwnDistribution; - public CRDistribution rankingOtherDistribution; public Map outgoingCookies, incomingCookies; public volatile long proxyLastAccess, localSearchLastAccess, remoteSearchLastAccess; public yacyCore yc; @@ -286,9 +280,6 @@ public final class Switchboard extends serverSwitch { this.log.logConfig("Lists Path: " + this.listsPath.toString()); this.htDocsPath = getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT); this.log.logConfig("HTDOCS Path: " + this.htDocsPath.toString()); - this.rankingPath = getDataPath(SwitchboardConstants.RANKING_PATH, SwitchboardConstants.RANKING_PATH_DEFAULT); - this.log.logConfig("Ranking Path: " + this.rankingPath.toString()); - this.rankingPermissions = new HashMap(); // mapping of permission - to filename. this.workPath = getDataPath(SwitchboardConstants.WORK_PATH, SwitchboardConstants.WORK_PATH_DEFAULT); this.log.logConfig("Work Path: " + this.workPath.toString()); this.dictionariesPath = getDataPath(SwitchboardConstants.DICTIONARY_SOURCE_PATH, SwitchboardConstants.DICTIONARY_SOURCE_PATH_DEFAULT); @@ -391,7 +382,7 @@ public final class Switchboard extends serverSwitch { this.proxyLastAccess = System.currentTimeMillis() - 10000; this.localSearchLastAccess = System.currentTimeMillis() - 10000; this.remoteSearchLastAccess = System.currentTimeMillis() - 10000; - this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map")); + this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map")); // configuring list path if (!(listsPath.exists())) listsPath.mkdirs(); @@ -538,22 +529,6 @@ public final class Switchboard extends serverSwitch { } catch (final IOException e) { } - // init ranking transmission - /* - CRDistOn = true/false - CRDist0Path = GLOBAL/010_owncr - CRDist0Method = 1 - CRDist0Percent = 0 - CRDist0Target = - CRDist1Path = GLOBAL/014_othercr/1 - CRDist1Method = 9 - CRDist1Percent = 30 - CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000,suma-lab.de:8080 - **/ - rankingOn = getConfig(SwitchboardConstants.RANKING_DIST_ON, "true").equals("true") && networkName.equals("freeworld"); - rankingOwnDistribution = new CRDistribution(log, peers, new File(rankingPath, getConfig(SwitchboardConstants.RANKING_DIST_0_PATH, CRDistribution.CR_OWN)), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_0_METHOD, CRDistribution.METHOD_ANYSENIOR), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_0_METHOD, 0), getConfig(SwitchboardConstants.RANKING_DIST_0_TARGET, "")); - rankingOtherDistribution = new CRDistribution(log, peers, new File(rankingPath, getConfig(SwitchboardConstants.RANKING_DIST_1_PATH, CRDistribution.CR_OTHER)), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_1_METHOD, CRDistribution.METHOD_MIXEDSENIOR), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_1_METHOD, 30), getConfig(SwitchboardConstants.RANKING_DIST_1_TARGET, "kaskelix.de:8080,yacy.dyndns.org:8000")); - // init nameCacheNoCachingList Domains.setNoCachingPatterns(getConfig(SwitchboardConstants.HTTPC_NAME_CACHE_CACHING_PATTERNS_NO,"")); @@ -926,7 +901,7 @@ public final class Switchboard extends serverSwitch { 10000); // create new web structure - this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map")); + this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map")); // load domainList @@ -1224,7 +1199,6 @@ public final class Switchboard extends serverSwitch { userDB.close(); bookmarksDB.close(); messageDB.close(); - webStructure.flushCitationReference("crg"); webStructure.close(); crawlQueues.close(); crawler.close(); @@ -1586,21 +1560,8 @@ public final class Switchboard extends serverSwitch { } // close unused connections -// de.anomic.http.client.Client.cleanup(); ConnectionInfo.cleanUp(); - // do transmission of CR-files - /* - checkInterruption(); - int count = rankingOwnDistribution.size() / 100; - if (count == 0) count = 1; - if (count > 5) count = 5; - if (rankingOn && !isRobinsonMode()) { - rankingOwnDistribution.transferRanking(count); - rankingOtherDistribution.transferRanking(1); - } - */ - // clean up delegated stack checkInterruption(); if ((crawlQueues.delegatedURL.stackSize() > 1000)) { diff --git a/source/de/anomic/search/SwitchboardConstants.java b/source/de/anomic/search/SwitchboardConstants.java index b383198c8..14cd2d4a0 100644 --- a/source/de/anomic/search/SwitchboardConstants.java +++ b/source/de/anomic/search/SwitchboardConstants.java @@ -234,15 +234,6 @@ public final class SwitchboardConstants { public static final String INDEX_DIST_ALLOW_WHILE_INDEXING = "allowDistributeIndexWhileIndexing"; public static final String INDEX_TRANSFER_TIMEOUT = "indexTransfer.timeout"; public static final String INDEX_TRANSFER_GZIP_BODY = "indexTransfer.gzipBody"; - public static final String RANKING_DIST_ON = "CRDistOn"; - public static final String RANKING_DIST_0_PATH = "CRDist0Path"; - public static final String RANKING_DIST_0_METHOD = "CRDist0Method"; - public static final String RANKING_DIST_0_PERCENT = "CRDist0Percent"; - public static final String RANKING_DIST_0_TARGET = "CRDist0Target"; - public static final String RANKING_DIST_1_PATH = "CRDist1Path"; - public static final String RANKING_DIST_1_METHOD = "CRDist1Method"; - public static final String RANKING_DIST_1_PERCENT = "CRDist1Percent"; - public static final String RANKING_DIST_1_TARGET = "CRDist1Target"; public static final String PARSER_MIME_DENY = "parser.mime.deny"; /** *

public static final String PROXY_ONLINE_CAUTION_DELAY = "onlineCautionDelay"

@@ -359,16 +350,6 @@ public final class SwitchboardConstants { */ public static final String LISTS_PATH = "listsPath"; public static final String LISTS_PATH_DEFAULT = "DATA/LISTS"; - /** - *

public static final String RANKING_PATH = "rankingPath"

- *

Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all - * ranking files are stored, self-generated as well as received ranking files

- * - * @see Switchboard#RANKING_DIST_0_PATH - * @see Switchboard#RANKING_DIST_1_PATH - */ - public static final String RANKING_PATH = "rankingPath"; - public static final String RANKING_PATH_DEFAULT = "DATA/RANKING"; /** *

public static final String WORK_PATH = "wordPath"

*

Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all diff --git a/source/de/anomic/search/blockrank/CRDistribution.java b/source/de/anomic/search/blockrank/CRDistribution.java deleted file mode 100644 index 6747093d0..000000000 --- a/source/de/anomic/search/blockrank/CRDistribution.java +++ /dev/null @@ -1,196 +0,0 @@ -// plasmaRankingDistribution.java -// ------------------------------------------- -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// created 9.11.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.search.blockrank; - -import java.io.File; -import java.io.IOException; -import java.util.Random; -import java.util.StringTokenizer; - -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.util.FileUtils; - -import de.anomic.yacy.yacyClient; -import de.anomic.yacy.yacySeed; -import de.anomic.yacy.yacySeedDB; -import de.anomic.yacy.yacyVersion; - -public final class CRDistribution { - - public static final String CR_OWN = "GLOBAL/010_owncr"; - public static final String CR_OTHER = "GLOBAL/014_othercr/"; - - public static final int METHOD_NONE = 0; - public static final int METHOD_ANYSENIOR = 1; - public static final int METHOD_ANYPRINCIPAL = 2; - public static final int METHOD_MIXEDSENIOR = 9; - public static final int METHOD_MIXEDPRINCIPAL = 10; - public static final int METHOD_FIXEDADDRESS = 99; - - private final Log log; - private final File sourcePath; // where to load CR-files - private int method; // of peer selection - private int percentage; // to select any other peer - private String address[]; // of fixed other peer - private final yacySeedDB seedDB; - private static Random random = new Random(System.currentTimeMillis()); - - public CRDistribution(final Log log, final yacySeedDB seedDB, final File sourcePath, final int method, final int percentage, final String addresses) { - this.log = log; - this.seedDB = seedDB; - this.sourcePath = sourcePath; - this.method = method; - this.percentage = percentage; - StringTokenizer st = new StringTokenizer(addresses, ","); - int c = 0; while (st.hasMoreTokens()) {st.nextToken(); c++;} - st = new StringTokenizer(addresses, ","); - this.address = new String[c]; - c = 0; - while (st.hasMoreTokens()) {this.address[c++] = st.nextToken();} - } - - public void setMethod(final int method, final int percentage, final String address[]) { - this.method = method; - this.percentage = percentage; - this.address = address; - } - - public int size() { - if ((sourcePath.exists()) && (sourcePath.isDirectory())) - return sourcePath.list().length; - return 0; - } - - public boolean transferRanking(int count) throws InterruptedException { - - if (method == METHOD_NONE) { - log.logFine("no ranking distribution: no transfer method given"); - return false; - } - if (seedDB == null) { - log.logFine("no ranking distribution: seedDB == null"); - return false; - } - if (seedDB.mySeed() == null) { - log.logFine("no ranking distribution: mySeed == null"); - return false; - } - if (seedDB.mySeed().isVirgin()) { - log.logFine("no ranking distribution: status is virgin"); - return false; - } - - final String[] outfiles = sourcePath.list(); - - if (outfiles == null) { - log.logFine("no ranking distribution: source path does not exist"); - return false; - } - if (outfiles.length == 0) { - log.logFine("no ranking distribution: source path does not contain any file"); - return false; - } - - if (outfiles.length < count) count = outfiles.length; - File crfile = null; - - for (int i = 0; i < count; i++) { - // check for interruption - if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress"); - - // getting the next file to transfer - crfile = new File(sourcePath, outfiles[i]); - - if ((method == METHOD_ANYSENIOR) || (method == METHOD_ANYPRINCIPAL)) { - transferRankingAnySeed(crfile, 5); - } - if (method == METHOD_FIXEDADDRESS) { - transferRankingAddress(crfile); - } - if ((method == METHOD_MIXEDSENIOR) || (method == METHOD_MIXEDPRINCIPAL)) { - if (random.nextInt(100) > percentage) { - if (!(transferRankingAddress(crfile))) transferRankingAnySeed(crfile, 5); - } else { - if (!(transferRankingAnySeed(crfile, 5))) transferRankingAddress(crfile); - } - } - - } - log.logFine("no ranking distribution: no target available"); - return false; - } - - private boolean transferRankingAnySeed(final File crfile, final int trycount) throws InterruptedException { - yacySeed target = null; - for (int j = 0; j < trycount; j++) { - // check for interruption - if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress"); - - // get next target - target = seedDB.anySeedVersion(yacyVersion.YACY_ACCEPTS_RANKING_TRANSMISSION); - - if (target == null) continue; - final String targetaddress = target.getPublicAddress(); - if (transferRankingAddress(crfile, targetaddress)) return true; - } - return false; - } - - private boolean transferRankingAddress(final File crfile) throws InterruptedException { - // try all addresses - for (int i = 0; i < this.address.length; i++) { - // check for interruption - if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress"); - - // try to transfer ranking address using the next address - if (transferRankingAddress(crfile, this.address[i])) return true; - } - return false; - } - - private boolean transferRankingAddress(final File crfile, final String address) { - // do the transfer - final long starttime = System.currentTimeMillis(); - String result = "unknown"; - try { - final byte[] b = FileUtils.read(crfile); - result = yacyClient.transfer(address, crfile.getName(), b); - if (result == null) { - log.logInfo("RankingDistribution - transmitted file " + crfile + " to " + address + " successfully in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds"); - FileUtils.deletedelete(crfile); // the file is not needed any more locally - } else { - log.logInfo("RankingDistribution - error transmitting file " + crfile + " to " + address + ": " + result); - } - } catch (final IOException e) { - log.logInfo("RankingDistribution - could not read file " + crfile + ": " + e.getMessage()); - result = "input file error: " + e.getMessage(); - } - - // show success - return result == null; - } - -} \ No newline at end of file diff --git a/source/de/anomic/search/blockrank/CRProcess.java b/source/de/anomic/search/blockrank/CRProcess.java deleted file mode 100644 index 4b3ebc97b..000000000 --- a/source/de/anomic/search/blockrank/CRProcess.java +++ /dev/null @@ -1,586 +0,0 @@ -// plasmaCRProcess.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// Created 15.11.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.search.blockrank; - -import java.io.File; -import java.io.IOException; -import java.util.Date; -import java.util.Iterator; - -import net.yacy.kelondro.data.word.WordReference; -import net.yacy.kelondro.index.Index; -import net.yacy.kelondro.index.Row; -import net.yacy.kelondro.index.RowSet; -import net.yacy.kelondro.index.RowSpaceExceededException; -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Bitfield; -import net.yacy.kelondro.order.CloneableIterator; -import net.yacy.kelondro.order.MicroDate; -import net.yacy.kelondro.rwi.IndexCell; -import net.yacy.kelondro.rwi.ReferenceContainer; -import net.yacy.kelondro.table.Table; -import net.yacy.kelondro.util.AttrSeq; -import net.yacy.kelondro.util.DateFormatter; -import net.yacy.kelondro.util.FileUtils; -import net.yacy.kelondro.util.MemoryControl; - -import de.anomic.search.Segment; - -public class CRProcess { - - /* - header.append("# Name=YaCy " + ((type.equals("crl")) ? "Local" : "Global") + " Citation Reference Ticket"); header.append((char) 13); header.append((char) 10); - header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10); - header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10); - header.append("# ---"); header.append((char) 13); header.append((char) 10); - */ - - /* - private static final int Col_Referee = 0; - private static final int Col_UDate = 1; - private static final int Col_VDate = 2; - private static final int Col_LCount = 3; - private static final int Col_GCount = 4; - private static final int Col_ICount = 5; - private static final int Col_DCount = 6; - private static final int Col_TLength = 7; - private static final int Col_WACount = 8; - private static final int Col_WUCount = 9; - */ - private static final int Col_Flags = 10; - private static final int Col_FUDate = 11; - private static final int Col_FDDate = 12; - private static final int Col_LUDate = 13; - private static final int Col_UCount = 14; - private static final int Col_PCount = 15; - private static final int Col_ACount = 16; - private static final int Col_VCount = 17; - private static final int Col_Vita = 18; - - public static final Row CRG_accrow = new Row( - "byte[] Referee-12," + - "Cardinal UDate-3 {b64e}, Cardinal VDate-3 {b64e}, " + - "Cardinal LCount-2 {b64e}, Cardinal GCount-2 {b64e}, Cardinal ICount-2 {b64e}, Cardinal DCount-2 {b64e}, Cardinal TLength-3 {b64e}, " + - "Cardinal WACount-3 {b64e}, Cardinal WUCount-3 {b64e}, Cardinal Flags-1 {b64e}, " + - "Cardinal FUDate-3 {b64e}, Cardinal FDDate-3 {b64e}, Cardinal LUDate-3 {b64e}, " + - "Cardinal UCount-2 {b64e}, Cardinal PCount-2 {b64e}, Cardinal ACount-2 {b64e}, Cardinal VCount-2 {b64e}, Cardinal Vita-2 {b64e}", - Base64Order.enhancedCoder); - public static final Row CRG_colrow = new Row("byte[] Anchor-12", Base64Order.enhancedCoder); - public static final String CRG_accname = "CRG-a-attr"; - public static final String CRG_seqname = "CRG-a-coli"; - public static final Row RCI_coli = new Row("byte[] RefereeDom-6", Base64Order.enhancedCoder); - public static final String RCI_colname = "RCI-a-coli"; - - private static boolean accumulate_upd(final File f, final AttrSeq acc) { - // open file - AttrSeq source_cr = null; - try { - source_cr = new AttrSeq(f, false); - } catch (final IOException e) { - return false; - } - - // put elements in accumulator file - final Iterator el = source_cr.keys(); - String key; - AttrSeq.Entry new_entry, acc_entry; - int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita; - Bitfield acc_flags, new_flags; - while (el.hasNext()) { - key = el.next(); - new_entry = source_cr.getEntry(key); - new_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(new_entry.getAttr("Flags", 0), 1).getBytes()); - // enrich information with additional values - if ((acc_entry = acc.getEntry(key)) != null) { - FUDate = (int) acc_entry.getAttr("FUDate", 0); - FDDate = (int) acc_entry.getAttr("FDDate", 0); - LUDate = (int) acc_entry.getAttr("LUDate", 0); - UCount = (int) acc_entry.getAttr("UCount", 0); - PCount = (int) acc_entry.getAttr("PCount", 0); - ACount = (int) acc_entry.getAttr("ACount", 0); - VCount = (int) acc_entry.getAttr("VCount", 0); - Vita = (int) acc_entry.getAttr("Vita", 0); - - // update counters and dates - acc_entry.setSeq(new_entry.getSeqSet()); // need to be checked - - UCount++; // increase update counter - PCount += (new_flags.get(1)) ? 1 : 0; - ACount += (new_flags.get(2)) ? 1 : 0; - VCount += (new_flags.get(3)) ? 1 : 0; - - // 'OR' the flags - acc_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(acc_entry.getAttr("Flags", 0), 1).getBytes()); - for (int i = 0; i < 6; i++) { - if (new_flags.get(i)) acc_flags.set(i, true); - } - acc_entry.setAttr("Flags", (int) Base64Order.enhancedCoder.decodeLong(acc_flags.exportB64())); - } else { - // initialize counters and dates - acc_entry = acc.newEntry(key, new_entry.getAttrs(), new_entry.getSeqSet()); - FUDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // first update date - FDDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack - LUDate = (int) new_entry.getAttr("VDate", 0); - UCount = 0; - PCount = (new_flags.get(1)) ? 1 : 0; - ACount = (new_flags.get(2)) ? 1 : 0; - VCount = (new_flags.get(3)) ? 1 : 0; - Vita = 0; - } - // make plausibility check? - - // insert into accumulator - acc_entry.setAttr("FUDate", FUDate); - acc_entry.setAttr("FDDate", FDDate); - acc_entry.setAttr("LUDate", LUDate); - acc_entry.setAttr("UCount", UCount); - acc_entry.setAttr("PCount", PCount); - acc_entry.setAttr("ACount", ACount); - acc_entry.setAttr("VCount", VCount); - acc_entry.setAttr("Vita", Vita); - acc.putEntrySmall(acc_entry); - } - - return true; - } - - public static boolean accumulate_upd(final File f, final Index acc) throws IOException, RowSpaceExceededException { - // open file - AttrSeq source_cr = null; - try { - source_cr = new AttrSeq(f, false); - } catch (final IOException e) { - return false; - } - - // put elements in accumulator file - final Iterator el = source_cr.keys(); - String key; - AttrSeq.Entry new_entry; - Row.Entry acc_entry; - int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita; - Bitfield acc_flags, new_flags; - while (el.hasNext()) { - key = el.next(); - new_entry = source_cr.getEntry(key); - new_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(new_entry.getAttr("Flags", 0), 1).getBytes()); - // enrich information with additional values - if ((acc_entry = acc.get(key.getBytes())) != null) { - FUDate = (int) acc_entry.getColLong(Col_FUDate); - FDDate = (int) acc_entry.getColLong(Col_FDDate); - LUDate = (int) acc_entry.getColLong(Col_LUDate); - UCount = (int) acc_entry.getColLong(Col_UCount); - PCount = (int) acc_entry.getColLong(Col_PCount); - ACount = (int) acc_entry.getColLong(Col_ACount); - VCount = (int) acc_entry.getColLong(Col_VCount); - Vita = (int) acc_entry.getColLong(Col_Vita); - - // update counters and dates - //seq.add(key.getBytes(), new_entry.getSeqCollection()); - - UCount++; // increase update counter - PCount += (new_flags.get(1)) ? 1 : 0; - ACount += (new_flags.get(2)) ? 1 : 0; - VCount += (new_flags.get(3)) ? 1 : 0; - - // 'OR' the flags - acc_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(acc_entry.getColLong(Col_Flags), 1).getBytes()); - for (int i = 0; i < 6; i++) { - if (new_flags.get(i)) acc_flags.set(i, true); - } - acc_entry.setCol(Col_Flags, (int) Base64Order.enhancedCoder.decodeLong(acc_flags.exportB64())); - } else { - // initialize counters and dates - acc_entry = acc.row().newEntry(); - acc_entry.setCol(0, key, null); - for (int i = 1; i < acc.row().columns(); i++) { - acc_entry.setCol(i, new_entry.getAttr(acc.row().column(i).nickname, 0)); - } - //seq.put(key.getBytes(), new_entry.getSeqCollection()); - FUDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // first update date - FDDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack - LUDate = (int) new_entry.getAttr("VDate", 0); - UCount = 0; - PCount = (new_flags.get(1)) ? 1 : 0; - ACount = (new_flags.get(2)) ? 1 : 0; - VCount = (new_flags.get(3)) ? 1 : 0; - Vita = 0; - } - // make plausibility check? - - // insert into accumulator - acc_entry.setCol(Col_FUDate, FUDate); - acc_entry.setCol(Col_FDDate, FDDate); - acc_entry.setCol(Col_LUDate, LUDate); - acc_entry.setCol(Col_UCount, UCount); - acc_entry.setCol(Col_PCount, PCount); - acc_entry.setCol(Col_ACount, ACount); - acc_entry.setCol(Col_VCount, VCount); - acc_entry.setCol(Col_Vita, Vita); - acc.put(acc_entry); - } - - return true; - } - - public static void accumulate( - final File from_dir, - final File tmp_dir, - final File err_dir, - final File bkp_dir, - final File to_file, - int max_files, - final boolean newdb) throws IOException, RowSpaceExceededException { - if (!(from_dir.isDirectory())) { - System.out.println("source path " + from_dir + " is not a directory."); - return; - } - if (!(tmp_dir.isDirectory())) { - System.out.println("temporary path " + tmp_dir + " is not a directory."); - return; - } - if (!(err_dir.isDirectory())) { - System.out.println("error path " + err_dir + " is not a directory."); - return; - } - if (!(bkp_dir.isDirectory())) { - System.out.println("back-up path " + bkp_dir + " is not a directory."); - return; - } - - // open target file - AttrSeq acc = null; - Index newacc = null; - IndexCell newseq = null; - if (newdb) { - final File path = to_file.getParentFile(); // path to storage place - newacc = new Table(new File(path, CRG_accname), CRG_accrow, 0, 0, true, false); - newseq = new IndexCell( - path, - "index", - Segment.wordReferenceFactory, - Base64Order.enhancedCoder, - CRG_colrow, - 10000, 1000000000L, 20, null, 1000000); - } else { - if (!(to_file.exists())) { - acc = new AttrSeq("Global Ranking Accumulator File", - ",'='," + - ",,,,,,,,,," + - ",,,,,,,," + - "'|',*", false); - acc.toFile(to_file); - } - acc = new AttrSeq(to_file, false); - } - // collect source files - File source_file = null; - final String[] files = from_dir.list(); - if (files.length < max_files) max_files = files.length; - for (int i = 0; i < max_files; i++) { - // open file - source_file = new File(from_dir, files[i]); - if (newdb) { - /* - if (accumulate_upd(source_file, newacc, newseq)) { - // move CR file to temporary folder - source_file.renameTo(new File(tmp_dir, files[i])); - } else { - // error case: the CR-file is not valid; move to error path - source_file.renameTo(new File(err_dir, files[i])); - } - */ - } else { - if (accumulate_upd(source_file, acc)) { - // move CR file to temporary folder - source_file.renameTo(new File(tmp_dir, files[i])); - } else { - // error case: the CR-file is not valid; move to error path - source_file.renameTo(new File(err_dir, files[i])); - } - } - } - - try { - if (newdb) { - newacc.close(); - newseq.close(); - } else { - // save accumulator to temporary file - File tmp_file; - if (to_file.toString().endsWith(".gz")) { - tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".prt.gz"); - } else { - tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".prt"); - } - // store the file - acc.toFile(tmp_file); - // since this was successful, we remove the old file and move the new file to it - FileUtils.deletedelete(to_file); - tmp_file.renameTo(to_file); - } - FileUtils.moveAll(tmp_dir, bkp_dir); - } catch (final IOException e) { - // move previously processed files back - Log.logException(e); - FileUtils.moveAll(tmp_dir, from_dir); - } - - } - - public static int genrci(File cr_in, final File rci_out) throws IOException { - if (!(cr_in.exists())) return 0; - AttrSeq cr = new AttrSeq(cr_in, false); - //if (rci_out.exists()) FileUtils.deletedelete(rci_out); // we want only fresh rci here (during testing) - if (!(rci_out.exists())) { - final AttrSeq rcix = new AttrSeq("Global Ranking Reverse Citation Index", - ",'='," + - "," + - "'|',*", false); - rcix.toFile(rci_out); - } - final AttrSeq rci = new AttrSeq(rci_out, false); - - // loop over all referees - int count = 0; - final int size = cr.size(); - final long start = System.currentTimeMillis(); - long l; - final Iterator i = cr.keys(); - String referee, anchor, anchorDom; - AttrSeq.Entry cr_entry, rci_entry; - long cr_UDate, rci_UDate; - while (i.hasNext()) { - referee = i.next(); - cr_entry = cr.getEntry(referee); - cr_UDate = cr_entry.getAttr("UDate", 0); - - // loop over all anchors - final Iterator j = cr_entry.getSeqSet().iterator(); - while (j.hasNext()) { - // get domain of anchors - anchor = j.next(); - if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); - - // update domain-specific entry - rci_entry = rci.getEntry(anchorDom); - if (rci_entry == null) rci_entry = rci.newEntry(anchorDom, false); - rci_entry.addSeq(referee); - - // update Update-Date - rci_UDate = rci_entry.getAttr("UDate", 0); - if (cr_UDate > rci_UDate) rci_entry.setAttr("UDate", cr_UDate); - - // insert entry - rci.putEntry(rci_entry); - } - count++; - if ((count % 1000) == 0) { - l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l)) + " seconds remaining; mem = " + MemoryControl.available()); - } - i.remove(); - } - - // finished. write to file - cr = null; - cr_in = null; - rci.toFile(rci_out); - return count; - } - - public static int genrcix(final File cr_path_in, final File rci_path_out) throws IOException, RowSpaceExceededException { - //kelondroFlexTable acc = new kelondroFlexTable(cr_path_in, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow, true); - final IndexCell seq = new IndexCell( - cr_path_in, "index", Segment.wordReferenceFactory, Base64Order.enhancedCoder, CRG_colrow, 10000, 1000000000L, 20, null, 1000000); - final IndexCell rci = new IndexCell( - rci_path_out, "index", Segment.wordReferenceFactory, Base64Order.enhancedCoder, RCI_coli, 10000, 1000000000L, 20, null, 1000000); - - // loop over all referees - int count = 0; - final int size = seq.size(); - final long start = System.currentTimeMillis(); - long l; - final CloneableIterator> i = seq.references(null, false); - ReferenceContainer keycollection; - String referee, refereeDom, anchor, anchorDom; - RowSet rci_entry; - CloneableIterator cr_entry; - while (i.hasNext()) { - keycollection = i.next(); - referee = new String(keycollection.getTermHash()); - if (referee.length() == 6) refereeDom = referee; else refereeDom = referee.substring(6); - cr_entry = keycollection.rows(); - - // loop over all anchors - Row.Entry entry; - while (cr_entry.hasNext()) { - entry = cr_entry.next(); - anchor = entry.getColString(0, null); - if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); - - // update domain-specific entry - rci_entry = rci.get(anchorDom.getBytes(), null); - if (rci_entry == null) rci_entry = new RowSet(RCI_coli, 0); - rci_entry.add(refereeDom.getBytes()); - - // insert entry - //rci.put(anchorDom.getBytes(), rci_entry); - } - count++; - if ((count % 1000) == 0) { - l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l) / 60) + " minutes remaining; mem = " + MemoryControl.free()); - } - } - - // finished. write to file - seq.close(); - rci.close(); - return count; - } - - public static void main(final String[] args) { - // java -classpath source de.anomic.plasma.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr - try { - if ((args.length == 5) && (args[0].equals("-accumulate"))) { - accumulate(new File(args[1]), new File(args[2]), new File(args[3]), new File(args[4]), new File(args[5]), Integer.parseInt(args[6]), true); - } - if ((args.length == 2) && (args[0].equals("-accumulate"))) { - final File root_path = new File(args[1]); - final File from_dir = new File(root_path, "DATA/RANKING/GLOBAL/014_othercr"); - final File ready_dir = new File(root_path, "DATA/RANKING/GLOBAL/015_ready"); - final File tmp_dir = new File(root_path, "DATA/RANKING/GLOBAL/016_tmp"); - final File err_dir = new File(root_path, "DATA/RANKING/GLOBAL/017_err"); - final File acc_dir = new File(root_path, "DATA/RANKING/GLOBAL/018_acc"); - final String filename = "CRG-a-" + DateFormatter.formatShortMilliSecond(new Date()) + ".cr.gz"; - final File to_file = new File(root_path, "DATA/RANKING/GLOBAL/020_con0/" + filename); - if (!(ready_dir.exists())) ready_dir.mkdirs(); - if (!(tmp_dir.exists())) tmp_dir.mkdirs(); - if (!(err_dir.exists())) err_dir.mkdirs(); - if (!(acc_dir.exists())) acc_dir.mkdirs(); - if (!(to_file.getParentFile().exists())) to_file.getParentFile().mkdirs(); - FileUtils.moveAll(from_dir, ready_dir); - final long start = System.currentTimeMillis(); - final int files = ready_dir.list().length; - accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file, 1000, true); - final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("Finished accumulate for " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); - } - if ((args.length == 3) && (args[0].equals("-recycle"))) { - final File root_path = new File(args[1]); - final int max_age_hours = Integer.parseInt(args[2]); - final File own_dir = new File(root_path, "DATA/RANKING/GLOBAL/010_owncr"); - final File acc_dir = new File(root_path, "DATA/RANKING/GLOBAL/018_acc"); - final File bkp_dir = new File(root_path, "DATA/RANKING/GLOBAL/019_bkp"); - if (!(own_dir.exists())) return; - if (!(acc_dir.exists())) return; - if (!(bkp_dir.exists())) bkp_dir.mkdirs(); - final String[] list = acc_dir.list(); - final long start = System.currentTimeMillis(); - final int files = list.length; - long d; - File f; - for (int i = 0; i < list.length; i++) { - f = new File(acc_dir, list[i]); - try { - d = (System.currentTimeMillis() - (new AttrSeq(f, false)).created()) / 3600000; - if (d > max_age_hours) { - // file is considered to be too old, it is not recycled - System.out.println("file " + f.getName() + " is old (" + d + " hours) and not recycled, only moved to backup"); - f.renameTo(new File(bkp_dir, list[i])); - } else { - // file is fresh, it is duplicated and moved to be transferred to other peers again - System.out.println("file " + f.getName() + " is fresh (" + d + " hours old), recycled and moved to backup"); - FileUtils.copy(f, new File(own_dir, list[i])); - f.renameTo(new File(bkp_dir, list[i])); - } - } catch (final IOException e) { - // there is something wrong with this file; delete it - System.out.println("file " + f.getName() + " is corrupted and deleted"); - FileUtils.deletedelete(f); - } - } - final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("Finished recycling of " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); - } - if ((args.length == 2) && (args[0].equals("-genrci"))) { - final File root_path = new File(args[1]); - final File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0"); - final File rci_filedir = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0"); - rci_filedir.mkdirs(); - final long start = System.currentTimeMillis(); - final int count = genrcix(cr_filedir, rci_filedir); - final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("Completed RCI generation: " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)"); - } - /* - if ((args.length == 2) && (args[0].equals("-genrci"))) { - File root_path = new File(args[1]); - File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0"); - File rci_file = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0/RCI-0.rci.gz"); - rci_file.getParentFile().mkdirs(); - String[] cr_filenames = cr_filedir.list(); - for (int i = 0; i < cr_filenames.length; i++) { - long start = System.currentTimeMillis(); - int count = genrci(new File(cr_filedir, cr_filenames[i]), rci_file); - long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("Completed RCI generation for input file " + cr_filenames[i] + ": " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)"); - } - } - */ - } catch (final Exception e) { - Log.logException(e); - } - } - - /* - Class-A File format: - - UDate : latest update timestamp of the URL (as virtual date, hours since epoch) - VDate : last visit timestamp of the URL (as virtual date, hours since epoch) - LCount : count of links to local resources - GCount : count of links to global resources - ICount : count of links to images (in document) - DCount : count of links to other documents - TLength: length of the plain text content (bytes) - WACount: total number of all words in content - WUCount: number of unique words in content (removed doubles) - Flags : Flags (0=update, 1=popularity, 2=attention, 3=vote) - - Class-a File format is an extension of Class-A plus the following attributes - FUDate : first update timestamp of the URL - FDDate : first update timestamp of the domain - LUDate : latest update timestamp of the URL - UCount : Update Counter (of 'latest update timestamp') - PCount : Popularity Counter (proxy clicks) - ACount : Attention Counter (search result clicks) - VCount : Votes - Vita : Vitality (normed number of updates per time) - */ -} diff --git a/source/de/anomic/search/blockrank/RCIEvaluation.java b/source/de/anomic/search/blockrank/RCIEvaluation.java deleted file mode 100644 index 1b2ce6722..000000000 --- a/source/de/anomic/search/blockrank/RCIEvaluation.java +++ /dev/null @@ -1,238 +0,0 @@ -// plasmaRCIEvaluation.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// Created 18.11.2005 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.search.blockrank; - -import java.io.File; -import java.io.IOException; -import java.net.MalformedURLException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.TreeSet; - -import net.yacy.kelondro.data.meta.DigestURI; -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Digest; -import net.yacy.kelondro.util.AttrSeq; -import net.yacy.kelondro.util.FileUtils; - -import de.anomic.search.RankingProcess; - -public class RCIEvaluation { - - public static int[] rcieval(final AttrSeq rci) { - // collect information about which entry has how many references - // the output is a reference-count:occurrences relation - final HashMap counts = new HashMap(); - final Iterator i = rci.keys(); - String key; - AttrSeq.Entry entry; - Integer count_key, count_count; - int c, maxcount = 0; - while (i.hasNext()) { - key = i.next(); - entry = rci.getEntry(key); - c = entry.getSeqSet().size(); - if (c > maxcount) maxcount = c; - count_key = Integer.valueOf(c); - count_count = counts.get(count_key); - if (count_count == null) { - count_count = 1; - } else { - count_count = Integer.valueOf(count_count.intValue() + 1); - } - counts.put(count_key, count_count); - } - final int[] ctable = new int[maxcount + 1]; - for (int j = 0; j <= maxcount; j++) { - count_count = counts.get(Integer.valueOf(j)); - if (count_count == null) { - ctable[j] = 0; - } else { - ctable[j] = count_count.intValue(); - } - } - return ctable; - } - - public static long sum(final int[] c) { - long s = 0; - for (int i = 0; i < c.length; i++) s += c[i]; - return s; - } - - public static int[] interval(final int[] counts, final int parts) { - long limit = sum(counts) / 2; - final int[] partition = new int[parts]; - int s = 0, p = parts - 1; - for (int i = 1; i < counts.length; i++) { - s += counts[i]; - if ((s > limit) && (p >= 0)) { - partition[p--] = i; - limit = (2 * limit - s) / 2; - s = 0; - } - } - partition[0] = counts.length - 1; - for (int i = 1; i < 10; i++) partition[i] = (partition[i - 1] + 4 * partition[i]) / 5; - return partition; - } - - public static void checkPartitionTable0(final int[] counts, final int[] partition) { - int sumsum = 0; - int sum; - int j = 0; - for (int i = partition.length - 1; i >= 0; i--) { - sum = 0; - while (j <= partition[i]) { - sum += counts[j++]; - } - System.out.println("sum of YBR-" + i + " entries: " + sum); - sumsum += sum; - } - System.out.println("complete sum = " + sumsum); - } - - public static void checkPartitionTable1(final int[] counts, final int[] partition) { - int sumsum = 0; - final int[] sum = new int[partition.length]; - for (int i = 0; i < partition.length; i++) sum[i] = 0; - for (int i = 0; i < counts.length; i++) sum[orderIntoYBI(partition, i)] += counts[i]; - for (int i = partition.length - 1; i >= 0; i--) { - System.out.println("sum of YBR-" + i + " entries: " + sum[i]); - sumsum += sum[i]; - } - System.out.println("complete sum = " + sumsum); - } - - public static int orderIntoYBI(final int[] partition, final int count) { - for (int i = 0; i < partition.length - 1; i++) { - if ((count >= (partition[i + 1] + 1)) && (count <= partition[i])) return i; - } - return partition.length - 1; - } - - @SuppressWarnings("unchecked") - public static TreeSet[] genRankingTable(final AttrSeq rci, final int[] partition) { - final TreeSet[] ranked = new TreeSet[partition.length]; - for (int i = 0; i < partition.length; i++) ranked[i] = new TreeSet(Base64Order.enhancedCoder); - final Iterator i = rci.keys(); - String key; - AttrSeq.Entry entry; - while (i.hasNext()) { - key = i.next(); - entry = rci.getEntry(key); - ranked[orderIntoYBI(partition, entry.getSeqSet().size())].add(key.getBytes()); - } - return ranked; - } - - public static HashMap genReverseDomHash(final File domlist) { - final HashSet domset = FileUtils.loadList(domlist); - final HashMap dommap = new HashMap(); - final Iterator i = domset.iterator(); - String dom; - while (i.hasNext()) { - dom = i.next(); - if (dom.startsWith("www.")) dom = dom.substring(4); - try { - dommap.put(new String((new DigestURI("http://" + dom)).hash(), 6, 6), dom); - dommap.put(new String((new DigestURI("http://www." + dom)).hash(), 6, 6), "www." + dom); - } catch (final MalformedURLException e) {} - } - return dommap; - } - - public static void storeRankingTable(final TreeSet[] ranking, final File tablePath) throws IOException { - String filename; - if (!(tablePath.exists())) tablePath.mkdirs(); - for (int i = 0; i < ranking.length - 1; i++) { - filename = "YBR-4-" + Digest.encodeHex(i, 2) + ".idx"; - FileUtils.saveSet(new File(tablePath, filename), "plain", ranking[i], ""); - } - } - - public static void main(final String[] args) { - try { - if ((args.length == 2) && (args[0].equals("-genybr"))) { - final File root_path = new File(args[1]); - final File rci_file = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0/RCI-0.rci.gz"); - final long start = System.currentTimeMillis(); - if (!(rci_file.exists())) return; - - // create partition table - final AttrSeq rci = new AttrSeq(rci_file, false); - final int counts[] = rcieval(rci); - final int[] partition = interval(counts, 16); - - // check the table - System.out.println("partition position table:"); - for (int i = 0; i < partition.length - 1; i++) { - System.out.println("YBR-" + i + ": " + (partition[i + 1] + 1) + " - " + partition[i] + " references"); - } - System.out.println("YBR-" + (partition.length - 1) + ": 0 - " + partition[partition.length - 1] + " references"); - checkPartitionTable0(counts, partition); - checkPartitionTable1(counts, partition); - int sum = 0; - for (int i = 0; i < counts.length; i++) sum += counts[i]; - System.out.println("sum of all references: " + sum); - - // create ranking - final TreeSet[] ranked = genRankingTable(rci, partition); - storeRankingTable(ranked, new File(root_path, "ranking/YBR")); - final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); - System.out.println("Finished YBR generation in " + seconds + " seconds."); - } - if ((args.length == 2) && (args[0].equals("-rcieval"))) { - final File root_path = new File(args[1]); - - // load a partition table - RankingProcess.loadYBR(new File(root_path, "ranking/YBR"), 16); - - // load domain list and generate hash index for domains - final HashMap dommap = genReverseDomHash(new File(root_path, "domlist.txt")); - - // print out the table - String hash, dom; - for (int i = 0; i < 9; i++) { - System.out.print("YBR-" + i + ": "); - for (int j = 0; j < RankingProcess.ybrTables[i].size(); j++) { - hash = new String(RankingProcess.ybrTables[i].get(j)); - dom = dommap.get(hash); - if (dom == null) System.out.print("[" + hash + "], "); else System.out.print(dom + ", "); - } - System.out.println(); - } - - } - } catch (final IOException e) { - Log.logException(e); - } - } - -} diff --git a/source/de/anomic/yacy/graphics/WebStructureGraph.java b/source/de/anomic/yacy/graphics/WebStructureGraph.java index 4bee73cb6..ab6ded34d 100644 --- a/source/de/anomic/yacy/graphics/WebStructureGraph.java +++ b/source/de/anomic/yacy/graphics/WebStructureGraph.java @@ -42,8 +42,6 @@ import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.MicroDate; import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.LookAheadIterator; @@ -56,19 +54,13 @@ public class WebStructureGraph { public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) public static int maxhosts = 20000; // maximum number of hosts in web structure map - private StringBuilder crg; // global citation references private final Log log; - private final File rankingPath, structureFile; - private final String crlFile, crgFile; + private final File structureFile; TreeMap structure_old; // ',' to {}* TreeMap structure_new; - public WebStructureGraph(final Log log, final File rankingPath, final String crlFile, final String crgFile, final File structureFile) { + public WebStructureGraph(final Log log, final File structureFile) { this.log = log; - this.rankingPath = rankingPath; - this.crlFile = crlFile; - this.crgFile = crgFile; - this.crg = new StringBuilder(maxCRGDump); this.structure_old = new TreeMap(); this.structure_new = new TreeMap(); this.structureFile = structureFile; @@ -126,63 +118,12 @@ public class WebStructureGraph { } } - // append this reference to buffer - // generate header info - final String head = new String(url.hash()) + "=" + - MicroDate.microDateHoursStr(docDate.getTime()) + // latest update timestamp of the URL - MicroDate.microDateHoursStr(System.currentTimeMillis()) + // last visit timestamp of the URL - Base64Order.enhancedCoder.encodeLongSmart(LCount, 2) + // count of links to local resources - Base64Order.enhancedCoder.encodeLongSmart(GCount, 2) + // count of links to global resources - Base64Order.enhancedCoder.encodeLongSmart(document.getImages().size(), 2) + // count of Images in document - Base64Order.enhancedCoder.encodeLongSmart(0, 2) + // count of links to other documents - Base64Order.enhancedCoder.encodeLongSmart(document.getTextLength(), 3) + // length of plain text in bytes - Base64Order.enhancedCoder.encodeLongSmart((condenser == null) ? 0 : condenser.RESULT_NUMB_WORDS, 3) + // count of all appearing words - Base64Order.enhancedCoder.encodeLongSmart((condenser == null) ? 0 : condenser.words().size(), 3) + // count of all unique words - Base64Order.enhancedCoder.encodeLongSmart(0, 1); // Flags (update, popularity, attention, vote) - - //crl.append(head); crl.append ('|'); crl.append(cpl); crl.append((char) 13); crl.append((char) 10); - crg.append(head); crg.append('|'); crg.append(cpg); crg.append((char) 13); crg.append((char) 10); - assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString(); learn(url, cpg); - // if buffer is full, flush it. - /* - if (crl.length() > maxCRLDump) { - flushCitationReference(crl, "crl"); - crl = new StringBuilder(maxCRLDump); - } - **/ - if (crg.length() > maxCRGDump) { - flushCitationReference("crg"); - crg = new StringBuilder(maxCRGDump); - } - return new Integer[] {Integer.valueOf(LCount), Integer.valueOf(GCount)}; } - public void flushCitationReference(final String type) { - if (crg.length() < 12) return; - final String filename = type.toUpperCase() + "-A-" + DateFormatter.formatShortMilliSecond(new Date()) + "." + crg.substring(0, 12) + ".cr.gz"; - final File path = new File(rankingPath, (type.equals("crl")) ? crlFile : crgFile); - path.mkdirs(); - final File file = new File(path, filename); - - // generate header - final StringBuilder header = new StringBuilder(200); - header.append("# Name=YaCy " + ((type.equals("crl")) ? "Local" : "Global") + " Citation Reference Ticket"); header.append((char) 13); header.append((char) 10); - header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10); - header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10); - header.append("# ---"); header.append((char) 13); header.append((char) 10); - crg.insert(0, header.toString()); - try { - FileUtils.writeAndGZip(crg.toString().getBytes(), file); - if (this.log.isFine()) log.logFine("wrote citation reference dump " + file.toString()); - } catch (final IOException e) { - Log.logException(e); - } - } - private static int refstr2count(final String refs) { if ((refs == null) || (refs.length() <= 8)) return 0; assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length(); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 7cea44824..0ecf44ee6 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -61,7 +61,6 @@ import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSReader; -import net.yacy.cora.protocol.ByteArrayBody; import net.yacy.cora.protocol.http.HTTPConnector; import net.yacy.cora.services.Search; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -71,7 +70,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Bitfield; -import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.rwi.Reference; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainerCache; @@ -747,77 +745,6 @@ public final class yacyClient { if (address == null) address = "localhost:8080"; return address; } - - public static Map transferPermission(final String targetAddress, final long filesize, final String filename) { - - // prepare request - final String salt = crypt.randomSalt(); - - // send request - try { - final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); - parts.put("process", new StringBody("permission")); - parts.put("purpose", new StringBody("crcon")); - parts.put("filename", new StringBody(filename)); - parts.put("filesize", new StringBody(Long.toString(filesize))); - parts.put("can-send-protocol", new StringBody("http")); - final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts); - final Map result = FileUtils.table(content); - return result; - } catch (final Exception e) { - // most probably a network time-out exception - yacyCore.log.logSevere("yacyClient.permissionTransfer error:" + e.getMessage()); - return null; - } - } - - public static Map transferStore(final String targetAddress, final String access, final String filename, final byte[] file) { - - // prepare request - final String salt = crypt.randomSalt(); - - // send request - try { - final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt); - parts.put("process", new StringBody("store")); - parts.put("purpose", new StringBody("crcon")); - parts.put("filesize", new StringBody(Long.toString(file.length))); - parts.put("md5", new StringBody(Digest.encodeMD5Hex(file))); - parts.put("access", new StringBody(access)); - parts.put("filename", new ByteArrayBody(file, filename)); - final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts); - final Map result = FileUtils.table(content); - return result; - } catch (final Exception e) { - yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage()); - return null; - } - } - - public static String transfer(final String targetAddress, final String filename, final byte[] file) { - final Map phase1 = transferPermission(targetAddress, file.length, filename); - if (phase1 == null) return "no connection to remote address " + targetAddress + "; phase 1"; - final String access = phase1.get("access"); - final String nextaddress = phase1.get("address"); - final String protocol = phase1.get("protocol"); - //String path = (String) phase1.get("path"); - //String maxsize = (String) phase1.get("maxsize"); - String response = phase1.get("response"); - if ((response == null) || (protocol == null) || (access == null)) return "wrong return values from other peer; phase 1"; - if (!(response.equals("ok"))) return "remote peer rejected transfer: " + response; - final String accesscode = Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(access)); - if (protocol.equals("http")) { - final Map phase2 = transferStore(nextaddress, accesscode, filename, file); - if (phase2 == null) return "no connection to remote address " + targetAddress + "; phase 2"; - response = phase2.get("response"); - if (response == null) return "wrong return values from other peer; phase 2"; - if (!(response.equals("ok"))) { - return "remote peer failed with transfer: " + response; - } - return null; - } - return "wrong protocol: " + protocol; - } public static Map crawlReceipt(final yacySeed mySeed, final yacySeed target, final String process, final String result, final String reason, final URIMetadataRow entry, final String wordhashes) { assert (target != null); diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index 180d50c6d..bf73d785d 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -357,10 +357,6 @@ public class yacyCore { log.logSevere("publishMySeed: problem with news encoding", e); } sb.peers.mySeed().setUnusedFlags(); - - // include current citation-rank file count - sb.peers.mySeed().put(yacySeed.CRWCNT, Integer.toString(sb.rankingOwnDistribution.size())); - sb.peers.mySeed().put(yacySeed.CRTCNT, Integer.toString(sb.rankingOtherDistribution.size())); int newSeeds = -1; //if (seeds.length > 1) { // holding a reference to all started threads diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index c9c062192..3bfb36cf6 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -147,10 +147,6 @@ public class yacySeed implements Cloneable { public static final String SCOUNT = "SCount"; /** the number of clients that the peer connects (connects/hour as double) */ public static final String CCOUNT = "CCount"; - /** Citation Rank (Own) - Count */ - public static final String CRWCNT = "CRWCnt"; - /** Citation Rank (Other) - Count */ - public static final String CRTCNT = "CRTCnt"; public static final String IP = "IP"; public static final String PORT = "Port"; public static final String SEEDLISTURL = "seedURL"; @@ -216,9 +212,6 @@ public class yacySeed implements Cloneable { this.dna.put(yacySeed.LASTSEEN, DateFormatter.formatShortSecond(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/))); // for last-seen date this.dna.put(yacySeed.USPEED, yacySeed.ZERO); // the computated uplink speed of the peer - this.dna.put(yacySeed.CRWCNT, yacySeed.ZERO); - this.dna.put(yacySeed.CRTCNT, yacySeed.ZERO); - // settings that are needed to organize the seed round-trip this.dna.put(yacySeed.FLAGS, yacySeed.FLAGSZERO); setFlagDirectConnect(false); diff --git a/source/net/yacy/kelondro/util/AttrSeq.java b/source/net/yacy/kelondro/util/AttrSeq.java deleted file mode 100644 index ab4ec718c..000000000 --- a/source/net/yacy/kelondro/util/AttrSeq.java +++ /dev/null @@ -1,460 +0,0 @@ -// kelondroAttrSeq.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2005 -// Created 15.11.2005 -// -// $LastChangedDate: 2005-10-22 15:28:04 +0200 (Sat, 22 Oct 2005) $ -// $LastChangedRevision: 968 $ -// $LastChangedBy: theli $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.util; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.logging.Logger; -import java.util.zip.GZIPInputStream; - -import net.yacy.kelondro.index.Column; -import net.yacy.kelondro.index.Row; -import net.yacy.kelondro.index.RowCollection; -import net.yacy.kelondro.index.RowSpaceExceededException; -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Base64Order; - - -public class AttrSeq { - - // class objects - private final File file; - private final Map entries; // value may be of type String or of type Entry - protected Structure structure; - private String name; - private long created; - - // optional logger - protected Logger theLogger = null; - - public AttrSeq(final File file, final boolean tree) throws IOException { - this.file = file; - this.structure = null; - this.created = -1; - this.name = ""; - this.entries = (tree) ? new TreeMap() : new HashMap(); - readAttrFile(file); - } - - public AttrSeq(final String name, final String struct, final boolean tree) { - this.file = null; - this.structure = new Structure(struct); - this.created = System.currentTimeMillis(); - this.name = name; - this.entries = (tree) ? new TreeMap() : new HashMap(); - } - - public void setLogger(final Logger newLogger) { - this.theLogger = newLogger; - } - - public void logInfo(final String message) { - if (this.theLogger == null) - System.err.println("ATTRSEQ INFO for file " + this.file + ": " + message); - else - this.theLogger.info("ATTRSEQ INFO for file " + this.file + ": " + message); - } - - public void logWarning(final String message) { - if (this.theLogger == null) - System.err.println("ATTRSEQ WARNING for file " + this.file + ": " + message); - else - this.theLogger.warning("ATTRSEQ WARNING for file " + this.file + ": " + message); - } - - private void readAttrFile(final File loadfile) throws IOException { - BufferedReader br = null; - int p; - if (loadfile.toString().endsWith(".gz")) { - br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(loadfile)))); - } else { - br = new BufferedReader(new InputStreamReader(new FileInputStream(loadfile))); - } - String line, key, oldvalue, newvalue; - while ((line = br.readLine()) != null) { - line = line.trim(); - if (line.length() == 0) continue; - if (line.charAt(0) == '#') { - if (line.startsWith("# Structure=")) { - structure = new Structure(line.substring(12)); - } - if (line.startsWith("# Name=")) { - name = line.substring(7); - } - if (line.startsWith("# Created=")) { - created = Long.parseLong(line.substring(10)); - } - continue; - } - if ((p = line.indexOf('=')) > 0) { - key = line.substring(0, p).trim(); - newvalue = line.substring(p + 1).trim(); - oldvalue = (String) entries.get(key); - if (oldvalue != null) { - if (newvalue.equals(oldvalue)) { - //logWarning("key " + key + ": double occurrence. values are equal. second appearance is ignored"); - } else { - if (newvalue.length() < oldvalue.length()) { - if (oldvalue.substring(0, newvalue.length()).equals(newvalue)) { - logWarning("key " + key + ": double occurrence. new value is subset of old value. second appearance is ignored"); - } else { - logWarning("key " + key + ": double occurrence. new value is shorter than old value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue); - } - } else if (newvalue.length() > oldvalue.length()) { - if (newvalue.substring(0, oldvalue.length()).equals(oldvalue)) { - logWarning("key " + key + ": double occurrence. old value is subset of new value. first appearance is ignored"); - } else { - logWarning("key " + key + ": double occurrence. old value is shorter than new value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue); - } - entries.put(key, newvalue); - } else { - logWarning("key " + key + ": double occurrence. old and new value have equal length but are not equal. old = " + oldvalue + ", new = " + newvalue); - //entries.put(key, newvalue); - } - } - } else { - entries.put(key, newvalue); - } - } - } - br.close(); - if (structure == null) throw new IOException("file contains no structure tag"); - if (name == null) throw new IOException("file contains no name tag"); - if (created == -1) throw new IOException("file contains no created tag"); - } - - public int size() { - return entries.size(); - } - - public long created() { - return this.created; - } - - public void toFile(final File out) throws IOException { - // generate header - final StringBuilder sb = new StringBuilder(2000); - sb.append("# Name="); sb.append(this.name); sb.append((char) 13); sb.append((char) 10); - sb.append("# Created="); sb.append(this.created); sb.append((char) 13); sb.append((char) 10); - sb.append("# Structure="); sb.append(this.structure.toString()); sb.append((char) 13); sb.append((char) 10); - sb.append("# ---"); sb.append((char) 13); sb.append((char) 10); - String k; - Object v; - for (final Map.Entry entry : entries.entrySet()) { - k = entry.getKey(); - v = entry.getValue(); - sb.append(k); sb.append('='); - if (v instanceof String) sb.append((String) v); - if (v instanceof Entry) sb.append(((Entry) v).toString()); - sb.append((char) 13); sb.append((char) 10); - } - if (out.toString().endsWith(".gz")) { - FileUtils.writeAndGZip((new String(sb)).getBytes(), out); - } else { - FileUtils.copy((new String(sb)).getBytes(), out); - } - } - - public Iterator keys() { - return entries.keySet().iterator(); - } - - public Entry newEntry(final String pivot, final boolean tree) { - return new Entry(pivot, new HashMap(), (tree) ? (Set) new TreeSet() : (Set) new HashSet()); - } - - public Entry newEntry(final String pivot, final Map props, final Set seq) { - return new Entry(pivot, props, seq); - } - - /* - public void putEntry(String pivot, String attrseq) { - entries.put(pivot, attrseq); - } - */ - - public void putEntry(final Entry entry) { - if (shortmem()) - entries.put(entry.pivot, entry.toString()); - else - entries.put(entry.pivot, entry); - } - - public void putEntrySmall(final Entry entry) { - entries.put(entry.pivot, entry.toString()); - } - - public Entry getEntry(final String pivot) { - final Object e = entries.get(pivot); - if (e == null) return null; - if (e instanceof String) return new Entry(pivot, (String) e, false); - if (e instanceof Entry) return (Entry) e; - return null; - } - - public Entry removeEntry(final String pivot) { - final Object e = entries.remove(pivot); - if (e == null) return null; - if (e instanceof String) return new Entry(pivot, (String) e, false); - if (e instanceof Entry) return (Entry) e; - return null; - } - - public static class Structure { - - protected String pivot_name = null; - protected int pivot_len = -1; - protected String[] prop_names = null; - protected int[] prop_len = null, prop_pos = null; - protected String[] seq_names = null; - protected int[] seq_len = null, seq_pos = null; - protected Row seqrow; - // example: - //# Structure=,'=',,,,,,,,,,,'|',* - - public Structure(String structure) { - // parse a structure string - - // parse pivot definition: - int p = structure.indexOf(",'='"); - if (p < 0) return; - final String pivot = structure.substring(0, p); - structure = structure.substring(p + 5); - Column a = new Column(pivot); - pivot_name = a.nickname; - pivot_len = a.cellwidth; - - // parse property part definition: - p = structure.indexOf(",'|'"); - if (p < 0) return; - ArrayList l = new ArrayList(); - final String attr = structure.substring(0, p); - String seqs = structure.substring(p + 5); - StringTokenizer st = new StringTokenizer(attr, ","); - while (st.hasMoreTokens()) { - a = new Column(st.nextToken()); - l.add(a); - } - prop_names = new String[l.size()]; - prop_len = new int[l.size()]; - prop_pos = new int[l.size()]; - p = 0; - for (int i = 0; i < l.size(); i++) { - a = l.get(i); - prop_names[i] = a.nickname; - prop_len[i] = a.cellwidth; - prop_pos[i] = p; - p += prop_len[i]; - } - - // parse sequence definition: - if (seqs.length() > 0 && seqs.charAt(0) == '*') seqs = seqs.substring(1); - l = new ArrayList(); - st = new StringTokenizer(seqs, ","); - while (st.hasMoreTokens()) { - a = new Column(st.nextToken()); - l.add(a); - } - seq_names = new String[l.size()]; - seq_len = new int[l.size()]; - seq_pos = new int[l.size()]; - p = 0; - for (int i = 0; i < l.size(); i++) { - a = l.get(i); - seq_names[i] = a.nickname; - seq_len[i] = a.cellwidth; - seq_pos[i] = p; - p += seq_len[i]; - } - - // generate rowdef for seq row definition - final StringBuilder rowdef = new StringBuilder(); - rowdef.append("byte[] "); - rowdef.append(seq_names[0]); - rowdef.append('-'); - rowdef.append(seq_len[0]); - - for (int i = 1; i < seq_names.length; i++) { - rowdef.append(", byte[] "); - rowdef.append(seq_names[i]); - rowdef.append('-'); - rowdef.append(seq_len[i]); - } - seqrow = new Row(new String(rowdef), null); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(100); - sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',"); - if (prop_names.length > 0) { - for (int i = 0; i < prop_names.length; i++) { - sb.append('<'); sb.append(prop_names[i]); sb.append('-'); sb.append(Integer.toString(prop_len[i])); sb.append(">,"); - } - } - sb.append("'|'"); - if (seq_names.length > 0) { - for (int i = 0; i < seq_names.length; i++) { - sb.append(",<"); sb.append(seq_names[i]); sb.append('-'); sb.append(Integer.toString(seq_len[i])); sb.append('>'); - } - } - return new String(sb); - } - } - - public class Entry { - String pivot; - Map attrs; - Set seq; - - public Entry(final String pivot, final Map attrs, final Set seq) { - this.pivot = pivot; - this.attrs = attrs; - this.seq = seq; - } - - public Entry(final String pivot, final String attrseq, final boolean tree) { - this.pivot = pivot; - attrs = new HashMap(); - seq = (tree) ? (Set) new TreeSet() : (Set) new HashSet(); - for (int i = 0; i < structure.prop_names.length; i++) { - attrs.put(structure.prop_names[i], Long.valueOf(Base64Order.enhancedCoder.decodeLong(attrseq.substring(structure.prop_pos[i], structure.prop_pos[i] + structure.prop_len[i])))); - } - - int p = attrseq.indexOf('|') + 1; - //long[] seqattrs = new long[structure.seq_names.length - 1]; - String seqname; - while (p + structure.seq_len[0] <= attrseq.length()) { - seqname = attrseq.substring(p, p + structure.seq_len[0]); - p += structure.seq_len[0]; - for (int i = 1; i < structure.seq_names.length; i++) { - //seqattrs[i - 1] = kelondroBase64Order.enhancedCoder.decodeLong(attrseq.substring(p, p + structure.seq_len[i])); - p += structure.seq_len[i]; - } - seq.add(seqname/*, seqattrs*/); - } - } - - public Map getAttrs() { - return attrs; - } - - public long getAttr(final String key, final long dflt) { - final Long i = attrs.get(key); - if (i == null) return dflt; - return i.longValue(); - } - - public void setAttr(final String key, final long attr) { - attrs.put(key, Long.valueOf(attr)); - } - - public Set getSeqSet() { - return seq; - } - - public RowCollection getSeqCollection() throws RowSpaceExceededException { - final RowCollection collection = new RowCollection(structure.seqrow, seq.size()); - final Iterator i = seq.iterator(); - while (i.hasNext()) { - collection.addUnique(structure.seqrow.newEntry(i.next().getBytes())); - } - return collection; - } - - public void setSeq(final Set seq) { - this.seq = seq; - } - - public void addSeq(final String s/*, long[] seqattrs*/) { - this.seq.add(s/*, seqattrs*/); - } - - @Override - public String toString() { - // creates only the attribute field and the sequence, not the pivot - final StringBuilder sb = new StringBuilder(100 + structure.seq_len[0] * seq.size()); - Long val; - for (int i = 0; i < structure.prop_names.length; i++) { - val = attrs.get(structure.prop_names[i]); - sb.append(Base64Order.enhancedCoder.encodeLongSmart((val == null) ? 0 : val.longValue(), structure.prop_len[i])); - } - sb.append('|'); - final Iterator q = seq.iterator(); - //long[] seqattrs; - while (q.hasNext()) { - sb.append(q.next()); - //seqattrs = (long[]) entry.getValue(); - /* - for (int i = 1; i < structure.seq_names.length; i++) { - sb.append(kelondroBase64Order.enhancedCoder.encodeLong(seqattrs[i - 1], structure.seq_len[i])); - } - */ - } - return new String(sb); - } - } - - private static boolean shortmem() { - return (MemoryControl.available() < 20000000L); - } - - public static void transcode(final File from_file, final File to_file) throws IOException { - final AttrSeq crp = new AttrSeq(from_file, true); - //crp.toFile(new File(args[1])); - final AttrSeq cro = new AttrSeq(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString(), true); - final Iterator i = crp.entries.keySet().iterator(); - while (i.hasNext()) { - cro.putEntry(crp.getEntry(i.next())); - } - cro.toFile(to_file); - } - - public static void main(final String[] args) { - // java -classpath source de.anomic.kelondro.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr - try { - if ((args.length == 3) && ("-transcode".equals(args[0]))) { - transcode(new File(args[1]), new File(args[2])); - } - } catch (final IOException e) { - Log.logException(e); - } - } - -} diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 4fd8e2f32..c9d672fff 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -81,7 +81,6 @@ import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverCore; import de.anomic.tools.enumerateFiles; -import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.Tray; import de.anomic.yacy.yacyBuildProperties; @@ -797,20 +796,6 @@ public final class yacy { // finished Log.logConfig("CLEAN-WORDLIST", "FINISHED"); } - - private static void transferCR(final String targetaddress, final String crfile) { - final File f = new File(crfile); - try { - final byte[] b = FileUtils.read(f); - final String result = yacyClient.transfer(targetaddress, f.getName(), b); - if (result == null) - Log.logInfo("TRANSFER-CR", "transmitted file " + crfile + " to " + targetaddress + " successfully"); - else - Log.logInfo("TRANSFER-CR", "error transmitting file " + crfile + " to " + targetaddress + ": " + result); - } catch (final IOException e) { - Log.logInfo("TRANSFER-CR", "could not read file " + crfile); - } - } private static String[] shift(final String[] args, final int pos, final int count) { final String[] newargs = new String[args.length - count]; @@ -1017,11 +1002,6 @@ public final class yacy { final int minlength = Integer.parseInt(args[2]); final int maxlength = Integer.parseInt(args[3]); cleanwordlist(args[1], minlength, maxlength); - } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-transfercr"))) { - // transfer a single cr file to a remote peer - final String targetaddress = args[1]; - final String crfile = args[2]; - transferCR(targetaddress, crfile); } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) { // generate a url list and save it in a file if (args.length == 2) applicationRoot= new File(args[1]);