(); // mapping of permission - to filename.
this.workPath = getDataPath(SwitchboardConstants.WORK_PATH, SwitchboardConstants.WORK_PATH_DEFAULT);
this.log.logConfig("Work Path: " + this.workPath.toString());
this.dictionariesPath = getDataPath(SwitchboardConstants.DICTIONARY_SOURCE_PATH, SwitchboardConstants.DICTIONARY_SOURCE_PATH_DEFAULT);
@@ -391,7 +382,7 @@ public final class Switchboard extends serverSwitch {
this.proxyLastAccess = System.currentTimeMillis() - 10000;
this.localSearchLastAccess = System.currentTimeMillis() - 10000;
this.remoteSearchLastAccess = System.currentTimeMillis() - 10000;
- this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));
+ this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map"));
// configuring list path
if (!(listsPath.exists())) listsPath.mkdirs();
@@ -538,22 +529,6 @@ public final class Switchboard extends serverSwitch {
} catch (final IOException e) {
}
- // init ranking transmission
- /*
- CRDistOn = true/false
- CRDist0Path = GLOBAL/010_owncr
- CRDist0Method = 1
- CRDist0Percent = 0
- CRDist0Target =
- CRDist1Path = GLOBAL/014_othercr/1
- CRDist1Method = 9
- CRDist1Percent = 30
- CRDist1Target = kaskelix.de:8080,yacy.dyndns.org:8000,suma-lab.de:8080
- **/
- rankingOn = getConfig(SwitchboardConstants.RANKING_DIST_ON, "true").equals("true") && networkName.equals("freeworld");
- rankingOwnDistribution = new CRDistribution(log, peers, new File(rankingPath, getConfig(SwitchboardConstants.RANKING_DIST_0_PATH, CRDistribution.CR_OWN)), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_0_METHOD, CRDistribution.METHOD_ANYSENIOR), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_0_METHOD, 0), getConfig(SwitchboardConstants.RANKING_DIST_0_TARGET, ""));
- rankingOtherDistribution = new CRDistribution(log, peers, new File(rankingPath, getConfig(SwitchboardConstants.RANKING_DIST_1_PATH, CRDistribution.CR_OTHER)), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_1_METHOD, CRDistribution.METHOD_MIXEDSENIOR), (int) getConfigLong(SwitchboardConstants.RANKING_DIST_1_METHOD, 30), getConfig(SwitchboardConstants.RANKING_DIST_1_TARGET, "kaskelix.de:8080,yacy.dyndns.org:8000"));
-
// init nameCacheNoCachingList
Domains.setNoCachingPatterns(getConfig(SwitchboardConstants.HTTPC_NAME_CACHE_CACHING_PATTERNS_NO,""));
@@ -926,7 +901,7 @@ public final class Switchboard extends serverSwitch {
10000);
// create new web structure
- this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));
+ this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map"));
// load domainList
@@ -1224,7 +1199,6 @@ public final class Switchboard extends serverSwitch {
userDB.close();
bookmarksDB.close();
messageDB.close();
- webStructure.flushCitationReference("crg");
webStructure.close();
crawlQueues.close();
crawler.close();
@@ -1586,21 +1560,8 @@ public final class Switchboard extends serverSwitch {
}
// close unused connections
-// de.anomic.http.client.Client.cleanup();
ConnectionInfo.cleanUp();
- // do transmission of CR-files
- /*
- checkInterruption();
- int count = rankingOwnDistribution.size() / 100;
- if (count == 0) count = 1;
- if (count > 5) count = 5;
- if (rankingOn && !isRobinsonMode()) {
- rankingOwnDistribution.transferRanking(count);
- rankingOtherDistribution.transferRanking(1);
- }
- */
-
// clean up delegated stack
checkInterruption();
if ((crawlQueues.delegatedURL.stackSize() > 1000)) {
diff --git a/source/de/anomic/search/SwitchboardConstants.java b/source/de/anomic/search/SwitchboardConstants.java
index b383198c8..14cd2d4a0 100644
--- a/source/de/anomic/search/SwitchboardConstants.java
+++ b/source/de/anomic/search/SwitchboardConstants.java
@@ -234,15 +234,6 @@ public final class SwitchboardConstants {
public static final String INDEX_DIST_ALLOW_WHILE_INDEXING = "allowDistributeIndexWhileIndexing";
public static final String INDEX_TRANSFER_TIMEOUT = "indexTransfer.timeout";
public static final String INDEX_TRANSFER_GZIP_BODY = "indexTransfer.gzipBody";
- public static final String RANKING_DIST_ON = "CRDistOn";
- public static final String RANKING_DIST_0_PATH = "CRDist0Path";
- public static final String RANKING_DIST_0_METHOD = "CRDist0Method";
- public static final String RANKING_DIST_0_PERCENT = "CRDist0Percent";
- public static final String RANKING_DIST_0_TARGET = "CRDist0Target";
- public static final String RANKING_DIST_1_PATH = "CRDist1Path";
- public static final String RANKING_DIST_1_METHOD = "CRDist1Method";
- public static final String RANKING_DIST_1_PERCENT = "CRDist1Percent";
- public static final String RANKING_DIST_1_TARGET = "CRDist1Target";
public static final String PARSER_MIME_DENY = "parser.mime.deny";
/**
* public static final String PROXY_ONLINE_CAUTION_DELAY = "onlineCautionDelay"
@@ -359,16 +350,6 @@ public final class SwitchboardConstants {
*/
public static final String LISTS_PATH = "listsPath";
public static final String LISTS_PATH_DEFAULT = "DATA/LISTS";
- /**
- * public static final String RANKING_PATH = "rankingPath"
- * Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all
- * ranking files are stored, self-generated as well as received ranking files
- *
- * @see Switchboard#RANKING_DIST_0_PATH
- * @see Switchboard#RANKING_DIST_1_PATH
- */
- public static final String RANKING_PATH = "rankingPath";
- public static final String RANKING_PATH_DEFAULT = "DATA/RANKING";
/**
* public static final String WORK_PATH = "wordPath"
* Name of the setting specifying the folder beginning from the YaCy-installation's top-folder, where all
diff --git a/source/de/anomic/search/blockrank/CRDistribution.java b/source/de/anomic/search/blockrank/CRDistribution.java
deleted file mode 100644
index 6747093d0..000000000
--- a/source/de/anomic/search/blockrank/CRDistribution.java
+++ /dev/null
@@ -1,196 +0,0 @@
-// plasmaRankingDistribution.java
-// -------------------------------------------
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2005
-// created 9.11.2005
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-package de.anomic.search.blockrank;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Random;
-import java.util.StringTokenizer;
-
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.util.FileUtils;
-
-import de.anomic.yacy.yacyClient;
-import de.anomic.yacy.yacySeed;
-import de.anomic.yacy.yacySeedDB;
-import de.anomic.yacy.yacyVersion;
-
-public final class CRDistribution {
-
- public static final String CR_OWN = "GLOBAL/010_owncr";
- public static final String CR_OTHER = "GLOBAL/014_othercr/";
-
- public static final int METHOD_NONE = 0;
- public static final int METHOD_ANYSENIOR = 1;
- public static final int METHOD_ANYPRINCIPAL = 2;
- public static final int METHOD_MIXEDSENIOR = 9;
- public static final int METHOD_MIXEDPRINCIPAL = 10;
- public static final int METHOD_FIXEDADDRESS = 99;
-
- private final Log log;
- private final File sourcePath; // where to load CR-files
- private int method; // of peer selection
- private int percentage; // to select any other peer
- private String address[]; // of fixed other peer
- private final yacySeedDB seedDB;
- private static Random random = new Random(System.currentTimeMillis());
-
- public CRDistribution(final Log log, final yacySeedDB seedDB, final File sourcePath, final int method, final int percentage, final String addresses) {
- this.log = log;
- this.seedDB = seedDB;
- this.sourcePath = sourcePath;
- this.method = method;
- this.percentage = percentage;
- StringTokenizer st = new StringTokenizer(addresses, ",");
- int c = 0; while (st.hasMoreTokens()) {st.nextToken(); c++;}
- st = new StringTokenizer(addresses, ",");
- this.address = new String[c];
- c = 0;
- while (st.hasMoreTokens()) {this.address[c++] = st.nextToken();}
- }
-
- public void setMethod(final int method, final int percentage, final String address[]) {
- this.method = method;
- this.percentage = percentage;
- this.address = address;
- }
-
- public int size() {
- if ((sourcePath.exists()) && (sourcePath.isDirectory()))
- return sourcePath.list().length;
- return 0;
- }
-
- public boolean transferRanking(int count) throws InterruptedException {
-
- if (method == METHOD_NONE) {
- log.logFine("no ranking distribution: no transfer method given");
- return false;
- }
- if (seedDB == null) {
- log.logFine("no ranking distribution: seedDB == null");
- return false;
- }
- if (seedDB.mySeed() == null) {
- log.logFine("no ranking distribution: mySeed == null");
- return false;
- }
- if (seedDB.mySeed().isVirgin()) {
- log.logFine("no ranking distribution: status is virgin");
- return false;
- }
-
- final String[] outfiles = sourcePath.list();
-
- if (outfiles == null) {
- log.logFine("no ranking distribution: source path does not exist");
- return false;
- }
- if (outfiles.length == 0) {
- log.logFine("no ranking distribution: source path does not contain any file");
- return false;
- }
-
- if (outfiles.length < count) count = outfiles.length;
- File crfile = null;
-
- for (int i = 0; i < count; i++) {
- // check for interruption
- if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress");
-
- // getting the next file to transfer
- crfile = new File(sourcePath, outfiles[i]);
-
- if ((method == METHOD_ANYSENIOR) || (method == METHOD_ANYPRINCIPAL)) {
- transferRankingAnySeed(crfile, 5);
- }
- if (method == METHOD_FIXEDADDRESS) {
- transferRankingAddress(crfile);
- }
- if ((method == METHOD_MIXEDSENIOR) || (method == METHOD_MIXEDPRINCIPAL)) {
- if (random.nextInt(100) > percentage) {
- if (!(transferRankingAddress(crfile))) transferRankingAnySeed(crfile, 5);
- } else {
- if (!(transferRankingAnySeed(crfile, 5))) transferRankingAddress(crfile);
- }
- }
-
- }
- log.logFine("no ranking distribution: no target available");
- return false;
- }
-
- private boolean transferRankingAnySeed(final File crfile, final int trycount) throws InterruptedException {
- yacySeed target = null;
- for (int j = 0; j < trycount; j++) {
- // check for interruption
- if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress");
-
- // get next target
- target = seedDB.anySeedVersion(yacyVersion.YACY_ACCEPTS_RANKING_TRANSMISSION);
-
- if (target == null) continue;
- final String targetaddress = target.getPublicAddress();
- if (transferRankingAddress(crfile, targetaddress)) return true;
- }
- return false;
- }
-
- private boolean transferRankingAddress(final File crfile) throws InterruptedException {
- // try all addresses
- for (int i = 0; i < this.address.length; i++) {
- // check for interruption
- if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress");
-
- // try to transfer ranking address using the next address
- if (transferRankingAddress(crfile, this.address[i])) return true;
- }
- return false;
- }
-
- private boolean transferRankingAddress(final File crfile, final String address) {
- // do the transfer
- final long starttime = System.currentTimeMillis();
- String result = "unknown";
- try {
- final byte[] b = FileUtils.read(crfile);
- result = yacyClient.transfer(address, crfile.getName(), b);
- if (result == null) {
- log.logInfo("RankingDistribution - transmitted file " + crfile + " to " + address + " successfully in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds");
- FileUtils.deletedelete(crfile); // the file is not needed any more locally
- } else {
- log.logInfo("RankingDistribution - error transmitting file " + crfile + " to " + address + ": " + result);
- }
- } catch (final IOException e) {
- log.logInfo("RankingDistribution - could not read file " + crfile + ": " + e.getMessage());
- result = "input file error: " + e.getMessage();
- }
-
- // show success
- return result == null;
- }
-
-}
\ No newline at end of file
diff --git a/source/de/anomic/search/blockrank/CRProcess.java b/source/de/anomic/search/blockrank/CRProcess.java
deleted file mode 100644
index 4b3ebc97b..000000000
--- a/source/de/anomic/search/blockrank/CRProcess.java
+++ /dev/null
@@ -1,586 +0,0 @@
-// plasmaCRProcess.java
-// -----------------------
-// part of YaCy
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2005
-// Created 15.11.2005
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-package de.anomic.search.blockrank;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Date;
-import java.util.Iterator;
-
-import net.yacy.kelondro.data.word.WordReference;
-import net.yacy.kelondro.index.Index;
-import net.yacy.kelondro.index.Row;
-import net.yacy.kelondro.index.RowSet;
-import net.yacy.kelondro.index.RowSpaceExceededException;
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.order.Base64Order;
-import net.yacy.kelondro.order.Bitfield;
-import net.yacy.kelondro.order.CloneableIterator;
-import net.yacy.kelondro.order.MicroDate;
-import net.yacy.kelondro.rwi.IndexCell;
-import net.yacy.kelondro.rwi.ReferenceContainer;
-import net.yacy.kelondro.table.Table;
-import net.yacy.kelondro.util.AttrSeq;
-import net.yacy.kelondro.util.DateFormatter;
-import net.yacy.kelondro.util.FileUtils;
-import net.yacy.kelondro.util.MemoryControl;
-
-import de.anomic.search.Segment;
-
-public class CRProcess {
-
- /*
- header.append("# Name=YaCy " + ((type.equals("crl")) ? "Local" : "Global") + " Citation Reference Ticket"); header.append((char) 13); header.append((char) 10);
- header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10);
- header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10);
- header.append("# ---"); header.append((char) 13); header.append((char) 10);
- */
-
- /*
- private static final int Col_Referee = 0;
- private static final int Col_UDate = 1;
- private static final int Col_VDate = 2;
- private static final int Col_LCount = 3;
- private static final int Col_GCount = 4;
- private static final int Col_ICount = 5;
- private static final int Col_DCount = 6;
- private static final int Col_TLength = 7;
- private static final int Col_WACount = 8;
- private static final int Col_WUCount = 9;
- */
- private static final int Col_Flags = 10;
- private static final int Col_FUDate = 11;
- private static final int Col_FDDate = 12;
- private static final int Col_LUDate = 13;
- private static final int Col_UCount = 14;
- private static final int Col_PCount = 15;
- private static final int Col_ACount = 16;
- private static final int Col_VCount = 17;
- private static final int Col_Vita = 18;
-
- public static final Row CRG_accrow = new Row(
- "byte[] Referee-12," +
- "Cardinal UDate-3 {b64e}, Cardinal VDate-3 {b64e}, " +
- "Cardinal LCount-2 {b64e}, Cardinal GCount-2 {b64e}, Cardinal ICount-2 {b64e}, Cardinal DCount-2 {b64e}, Cardinal TLength-3 {b64e}, " +
- "Cardinal WACount-3 {b64e}, Cardinal WUCount-3 {b64e}, Cardinal Flags-1 {b64e}, " +
- "Cardinal FUDate-3 {b64e}, Cardinal FDDate-3 {b64e}, Cardinal LUDate-3 {b64e}, " +
- "Cardinal UCount-2 {b64e}, Cardinal PCount-2 {b64e}, Cardinal ACount-2 {b64e}, Cardinal VCount-2 {b64e}, Cardinal Vita-2 {b64e}",
- Base64Order.enhancedCoder);
- public static final Row CRG_colrow = new Row("byte[] Anchor-12", Base64Order.enhancedCoder);
- public static final String CRG_accname = "CRG-a-attr";
- public static final String CRG_seqname = "CRG-a-coli";
- public static final Row RCI_coli = new Row("byte[] RefereeDom-6", Base64Order.enhancedCoder);
- public static final String RCI_colname = "RCI-a-coli";
-
- private static boolean accumulate_upd(final File f, final AttrSeq acc) {
- // open file
- AttrSeq source_cr = null;
- try {
- source_cr = new AttrSeq(f, false);
- } catch (final IOException e) {
- return false;
- }
-
- // put elements in accumulator file
- final Iterator el = source_cr.keys();
- String key;
- AttrSeq.Entry new_entry, acc_entry;
- int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita;
- Bitfield acc_flags, new_flags;
- while (el.hasNext()) {
- key = el.next();
- new_entry = source_cr.getEntry(key);
- new_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(new_entry.getAttr("Flags", 0), 1).getBytes());
- // enrich information with additional values
- if ((acc_entry = acc.getEntry(key)) != null) {
- FUDate = (int) acc_entry.getAttr("FUDate", 0);
- FDDate = (int) acc_entry.getAttr("FDDate", 0);
- LUDate = (int) acc_entry.getAttr("LUDate", 0);
- UCount = (int) acc_entry.getAttr("UCount", 0);
- PCount = (int) acc_entry.getAttr("PCount", 0);
- ACount = (int) acc_entry.getAttr("ACount", 0);
- VCount = (int) acc_entry.getAttr("VCount", 0);
- Vita = (int) acc_entry.getAttr("Vita", 0);
-
- // update counters and dates
- acc_entry.setSeq(new_entry.getSeqSet()); // need to be checked
-
- UCount++; // increase update counter
- PCount += (new_flags.get(1)) ? 1 : 0;
- ACount += (new_flags.get(2)) ? 1 : 0;
- VCount += (new_flags.get(3)) ? 1 : 0;
-
- // 'OR' the flags
- acc_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(acc_entry.getAttr("Flags", 0), 1).getBytes());
- for (int i = 0; i < 6; i++) {
- if (new_flags.get(i)) acc_flags.set(i, true);
- }
- acc_entry.setAttr("Flags", (int) Base64Order.enhancedCoder.decodeLong(acc_flags.exportB64()));
- } else {
- // initialize counters and dates
- acc_entry = acc.newEntry(key, new_entry.getAttrs(), new_entry.getSeqSet());
- FUDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // first update date
- FDDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack
- LUDate = (int) new_entry.getAttr("VDate", 0);
- UCount = 0;
- PCount = (new_flags.get(1)) ? 1 : 0;
- ACount = (new_flags.get(2)) ? 1 : 0;
- VCount = (new_flags.get(3)) ? 1 : 0;
- Vita = 0;
- }
- // make plausibility check?
-
- // insert into accumulator
- acc_entry.setAttr("FUDate", FUDate);
- acc_entry.setAttr("FDDate", FDDate);
- acc_entry.setAttr("LUDate", LUDate);
- acc_entry.setAttr("UCount", UCount);
- acc_entry.setAttr("PCount", PCount);
- acc_entry.setAttr("ACount", ACount);
- acc_entry.setAttr("VCount", VCount);
- acc_entry.setAttr("Vita", Vita);
- acc.putEntrySmall(acc_entry);
- }
-
- return true;
- }
-
- public static boolean accumulate_upd(final File f, final Index acc) throws IOException, RowSpaceExceededException {
- // open file
- AttrSeq source_cr = null;
- try {
- source_cr = new AttrSeq(f, false);
- } catch (final IOException e) {
- return false;
- }
-
- // put elements in accumulator file
- final Iterator el = source_cr.keys();
- String key;
- AttrSeq.Entry new_entry;
- Row.Entry acc_entry;
- int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita;
- Bitfield acc_flags, new_flags;
- while (el.hasNext()) {
- key = el.next();
- new_entry = source_cr.getEntry(key);
- new_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(new_entry.getAttr("Flags", 0), 1).getBytes());
- // enrich information with additional values
- if ((acc_entry = acc.get(key.getBytes())) != null) {
- FUDate = (int) acc_entry.getColLong(Col_FUDate);
- FDDate = (int) acc_entry.getColLong(Col_FDDate);
- LUDate = (int) acc_entry.getColLong(Col_LUDate);
- UCount = (int) acc_entry.getColLong(Col_UCount);
- PCount = (int) acc_entry.getColLong(Col_PCount);
- ACount = (int) acc_entry.getColLong(Col_ACount);
- VCount = (int) acc_entry.getColLong(Col_VCount);
- Vita = (int) acc_entry.getColLong(Col_Vita);
-
- // update counters and dates
- //seq.add(key.getBytes(), new_entry.getSeqCollection());
-
- UCount++; // increase update counter
- PCount += (new_flags.get(1)) ? 1 : 0;
- ACount += (new_flags.get(2)) ? 1 : 0;
- VCount += (new_flags.get(3)) ? 1 : 0;
-
- // 'OR' the flags
- acc_flags = new Bitfield(Base64Order.enhancedCoder.encodeLong(acc_entry.getColLong(Col_Flags), 1).getBytes());
- for (int i = 0; i < 6; i++) {
- if (new_flags.get(i)) acc_flags.set(i, true);
- }
- acc_entry.setCol(Col_Flags, (int) Base64Order.enhancedCoder.decodeLong(acc_flags.exportB64()));
- } else {
- // initialize counters and dates
- acc_entry = acc.row().newEntry();
- acc_entry.setCol(0, key, null);
- for (int i = 1; i < acc.row().columns(); i++) {
- acc_entry.setCol(i, new_entry.getAttr(acc.row().column(i).nickname, 0));
- }
- //seq.put(key.getBytes(), new_entry.getSeqCollection());
- FUDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // first update date
- FDDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack
- LUDate = (int) new_entry.getAttr("VDate", 0);
- UCount = 0;
- PCount = (new_flags.get(1)) ? 1 : 0;
- ACount = (new_flags.get(2)) ? 1 : 0;
- VCount = (new_flags.get(3)) ? 1 : 0;
- Vita = 0;
- }
- // make plausibility check?
-
- // insert into accumulator
- acc_entry.setCol(Col_FUDate, FUDate);
- acc_entry.setCol(Col_FDDate, FDDate);
- acc_entry.setCol(Col_LUDate, LUDate);
- acc_entry.setCol(Col_UCount, UCount);
- acc_entry.setCol(Col_PCount, PCount);
- acc_entry.setCol(Col_ACount, ACount);
- acc_entry.setCol(Col_VCount, VCount);
- acc_entry.setCol(Col_Vita, Vita);
- acc.put(acc_entry);
- }
-
- return true;
- }
-
- public static void accumulate(
- final File from_dir,
- final File tmp_dir,
- final File err_dir,
- final File bkp_dir,
- final File to_file,
- int max_files,
- final boolean newdb) throws IOException, RowSpaceExceededException {
- if (!(from_dir.isDirectory())) {
- System.out.println("source path " + from_dir + " is not a directory.");
- return;
- }
- if (!(tmp_dir.isDirectory())) {
- System.out.println("temporary path " + tmp_dir + " is not a directory.");
- return;
- }
- if (!(err_dir.isDirectory())) {
- System.out.println("error path " + err_dir + " is not a directory.");
- return;
- }
- if (!(bkp_dir.isDirectory())) {
- System.out.println("back-up path " + bkp_dir + " is not a directory.");
- return;
- }
-
- // open target file
- AttrSeq acc = null;
- Index newacc = null;
- IndexCell newseq = null;
- if (newdb) {
- final File path = to_file.getParentFile(); // path to storage place
- newacc = new Table(new File(path, CRG_accname), CRG_accrow, 0, 0, true, false);
- newseq = new IndexCell(
- path,
- "index",
- Segment.wordReferenceFactory,
- Base64Order.enhancedCoder,
- CRG_colrow,
- 10000, 1000000000L, 20, null, 1000000);
- } else {
- if (!(to_file.exists())) {
- acc = new AttrSeq("Global Ranking Accumulator File",
- ",'='," +
- ",,,,,,,,,," +
- ",,,,,,,," +
- "'|',*", false);
- acc.toFile(to_file);
- }
- acc = new AttrSeq(to_file, false);
- }
- // collect source files
- File source_file = null;
- final String[] files = from_dir.list();
- if (files.length < max_files) max_files = files.length;
- for (int i = 0; i < max_files; i++) {
- // open file
- source_file = new File(from_dir, files[i]);
- if (newdb) {
- /*
- if (accumulate_upd(source_file, newacc, newseq)) {
- // move CR file to temporary folder
- source_file.renameTo(new File(tmp_dir, files[i]));
- } else {
- // error case: the CR-file is not valid; move to error path
- source_file.renameTo(new File(err_dir, files[i]));
- }
- */
- } else {
- if (accumulate_upd(source_file, acc)) {
- // move CR file to temporary folder
- source_file.renameTo(new File(tmp_dir, files[i]));
- } else {
- // error case: the CR-file is not valid; move to error path
- source_file.renameTo(new File(err_dir, files[i]));
- }
- }
- }
-
- try {
- if (newdb) {
- newacc.close();
- newseq.close();
- } else {
- // save accumulator to temporary file
- File tmp_file;
- if (to_file.toString().endsWith(".gz")) {
- tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".prt.gz");
- } else {
- tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".prt");
- }
- // store the file
- acc.toFile(tmp_file);
- // since this was successful, we remove the old file and move the new file to it
- FileUtils.deletedelete(to_file);
- tmp_file.renameTo(to_file);
- }
- FileUtils.moveAll(tmp_dir, bkp_dir);
- } catch (final IOException e) {
- // move previously processed files back
- Log.logException(e);
- FileUtils.moveAll(tmp_dir, from_dir);
- }
-
- }
-
- public static int genrci(File cr_in, final File rci_out) throws IOException {
- if (!(cr_in.exists())) return 0;
- AttrSeq cr = new AttrSeq(cr_in, false);
- //if (rci_out.exists()) FileUtils.deletedelete(rci_out); // we want only fresh rci here (during testing)
- if (!(rci_out.exists())) {
- final AttrSeq rcix = new AttrSeq("Global Ranking Reverse Citation Index",
- ",'='," +
- "," +
- "'|',*", false);
- rcix.toFile(rci_out);
- }
- final AttrSeq rci = new AttrSeq(rci_out, false);
-
- // loop over all referees
- int count = 0;
- final int size = cr.size();
- final long start = System.currentTimeMillis();
- long l;
- final Iterator i = cr.keys();
- String referee, anchor, anchorDom;
- AttrSeq.Entry cr_entry, rci_entry;
- long cr_UDate, rci_UDate;
- while (i.hasNext()) {
- referee = i.next();
- cr_entry = cr.getEntry(referee);
- cr_UDate = cr_entry.getAttr("UDate", 0);
-
- // loop over all anchors
- final Iterator j = cr_entry.getSeqSet().iterator();
- while (j.hasNext()) {
- // get domain of anchors
- anchor = j.next();
- if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6);
-
- // update domain-specific entry
- rci_entry = rci.getEntry(anchorDom);
- if (rci_entry == null) rci_entry = rci.newEntry(anchorDom, false);
- rci_entry.addSeq(referee);
-
- // update Update-Date
- rci_UDate = rci_entry.getAttr("UDate", 0);
- if (cr_UDate > rci_UDate) rci_entry.setAttr("UDate", cr_UDate);
-
- // insert entry
- rci.putEntry(rci_entry);
- }
- count++;
- if ((count % 1000) == 0) {
- l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l)) + " seconds remaining; mem = " + MemoryControl.available());
- }
- i.remove();
- }
-
- // finished. write to file
- cr = null;
- cr_in = null;
- rci.toFile(rci_out);
- return count;
- }
-
- public static int genrcix(final File cr_path_in, final File rci_path_out) throws IOException, RowSpaceExceededException {
- //kelondroFlexTable acc = new kelondroFlexTable(cr_path_in, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow, true);
- final IndexCell seq = new IndexCell(
- cr_path_in, "index", Segment.wordReferenceFactory, Base64Order.enhancedCoder, CRG_colrow, 10000, 1000000000L, 20, null, 1000000);
- final IndexCell rci = new IndexCell(
- rci_path_out, "index", Segment.wordReferenceFactory, Base64Order.enhancedCoder, RCI_coli, 10000, 1000000000L, 20, null, 1000000);
-
- // loop over all referees
- int count = 0;
- final int size = seq.size();
- final long start = System.currentTimeMillis();
- long l;
- final CloneableIterator> i = seq.references(null, false);
- ReferenceContainer keycollection;
- String referee, refereeDom, anchor, anchorDom;
- RowSet rci_entry;
- CloneableIterator cr_entry;
- while (i.hasNext()) {
- keycollection = i.next();
- referee = new String(keycollection.getTermHash());
- if (referee.length() == 6) refereeDom = referee; else refereeDom = referee.substring(6);
- cr_entry = keycollection.rows();
-
- // loop over all anchors
- Row.Entry entry;
- while (cr_entry.hasNext()) {
- entry = cr_entry.next();
- anchor = entry.getColString(0, null);
- if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6);
-
- // update domain-specific entry
- rci_entry = rci.get(anchorDom.getBytes(), null);
- if (rci_entry == null) rci_entry = new RowSet(RCI_coli, 0);
- rci_entry.add(refereeDom.getBytes());
-
- // insert entry
- //rci.put(anchorDom.getBytes(), rci_entry);
- }
- count++;
- if ((count % 1000) == 0) {
- l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l) / 60) + " minutes remaining; mem = " + MemoryControl.free());
- }
- }
-
- // finished. write to file
- seq.close();
- rci.close();
- return count;
- }
-
- public static void main(final String[] args) {
- // java -classpath source de.anomic.plasma.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr
- try {
- if ((args.length == 5) && (args[0].equals("-accumulate"))) {
- accumulate(new File(args[1]), new File(args[2]), new File(args[3]), new File(args[4]), new File(args[5]), Integer.parseInt(args[6]), true);
- }
- if ((args.length == 2) && (args[0].equals("-accumulate"))) {
- final File root_path = new File(args[1]);
- final File from_dir = new File(root_path, "DATA/RANKING/GLOBAL/014_othercr");
- final File ready_dir = new File(root_path, "DATA/RANKING/GLOBAL/015_ready");
- final File tmp_dir = new File(root_path, "DATA/RANKING/GLOBAL/016_tmp");
- final File err_dir = new File(root_path, "DATA/RANKING/GLOBAL/017_err");
- final File acc_dir = new File(root_path, "DATA/RANKING/GLOBAL/018_acc");
- final String filename = "CRG-a-" + DateFormatter.formatShortMilliSecond(new Date()) + ".cr.gz";
- final File to_file = new File(root_path, "DATA/RANKING/GLOBAL/020_con0/" + filename);
- if (!(ready_dir.exists())) ready_dir.mkdirs();
- if (!(tmp_dir.exists())) tmp_dir.mkdirs();
- if (!(err_dir.exists())) err_dir.mkdirs();
- if (!(acc_dir.exists())) acc_dir.mkdirs();
- if (!(to_file.getParentFile().exists())) to_file.getParentFile().mkdirs();
- FileUtils.moveAll(from_dir, ready_dir);
- final long start = System.currentTimeMillis();
- final int files = ready_dir.list().length;
- accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file, 1000, true);
- final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("Finished accumulate for " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)");
- }
- if ((args.length == 3) && (args[0].equals("-recycle"))) {
- final File root_path = new File(args[1]);
- final int max_age_hours = Integer.parseInt(args[2]);
- final File own_dir = new File(root_path, "DATA/RANKING/GLOBAL/010_owncr");
- final File acc_dir = new File(root_path, "DATA/RANKING/GLOBAL/018_acc");
- final File bkp_dir = new File(root_path, "DATA/RANKING/GLOBAL/019_bkp");
- if (!(own_dir.exists())) return;
- if (!(acc_dir.exists())) return;
- if (!(bkp_dir.exists())) bkp_dir.mkdirs();
- final String[] list = acc_dir.list();
- final long start = System.currentTimeMillis();
- final int files = list.length;
- long d;
- File f;
- for (int i = 0; i < list.length; i++) {
- f = new File(acc_dir, list[i]);
- try {
- d = (System.currentTimeMillis() - (new AttrSeq(f, false)).created()) / 3600000;
- if (d > max_age_hours) {
- // file is considered to be too old, it is not recycled
- System.out.println("file " + f.getName() + " is old (" + d + " hours) and not recycled, only moved to backup");
- f.renameTo(new File(bkp_dir, list[i]));
- } else {
- // file is fresh, it is duplicated and moved to be transferred to other peers again
- System.out.println("file " + f.getName() + " is fresh (" + d + " hours old), recycled and moved to backup");
- FileUtils.copy(f, new File(own_dir, list[i]));
- f.renameTo(new File(bkp_dir, list[i]));
- }
- } catch (final IOException e) {
- // there is something wrong with this file; delete it
- System.out.println("file " + f.getName() + " is corrupted and deleted");
- FileUtils.deletedelete(f);
- }
- }
- final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("Finished recycling of " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)");
- }
- if ((args.length == 2) && (args[0].equals("-genrci"))) {
- final File root_path = new File(args[1]);
- final File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0");
- final File rci_filedir = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0");
- rci_filedir.mkdirs();
- final long start = System.currentTimeMillis();
- final int count = genrcix(cr_filedir, rci_filedir);
- final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("Completed RCI generation: " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)");
- }
- /*
- if ((args.length == 2) && (args[0].equals("-genrci"))) {
- File root_path = new File(args[1]);
- File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0");
- File rci_file = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0/RCI-0.rci.gz");
- rci_file.getParentFile().mkdirs();
- String[] cr_filenames = cr_filedir.list();
- for (int i = 0; i < cr_filenames.length; i++) {
- long start = System.currentTimeMillis();
- int count = genrci(new File(cr_filedir, cr_filenames[i]), rci_file);
- long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("Completed RCI generation for input file " + cr_filenames[i] + ": " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)");
- }
- }
- */
- } catch (final Exception e) {
- Log.logException(e);
- }
- }
-
- /*
- Class-A File format:
-
- UDate : latest update timestamp of the URL (as virtual date, hours since epoch)
- VDate : last visit timestamp of the URL (as virtual date, hours since epoch)
- LCount : count of links to local resources
- GCount : count of links to global resources
- ICount : count of links to images (in document)
- DCount : count of links to other documents
- TLength: length of the plain text content (bytes)
- WACount: total number of all words in content
- WUCount: number of unique words in content (removed doubles)
- Flags : Flags (0=update, 1=popularity, 2=attention, 3=vote)
-
- Class-a File format is an extension of Class-A plus the following attributes
- FUDate : first update timestamp of the URL
- FDDate : first update timestamp of the domain
- LUDate : latest update timestamp of the URL
- UCount : Update Counter (of 'latest update timestamp')
- PCount : Popularity Counter (proxy clicks)
- ACount : Attention Counter (search result clicks)
- VCount : Votes
- Vita : Vitality (normed number of updates per time)
- */
-}
diff --git a/source/de/anomic/search/blockrank/RCIEvaluation.java b/source/de/anomic/search/blockrank/RCIEvaluation.java
deleted file mode 100644
index 1b2ce6722..000000000
--- a/source/de/anomic/search/blockrank/RCIEvaluation.java
+++ /dev/null
@@ -1,238 +0,0 @@
-// plasmaRCIEvaluation.java
-// -----------------------
-// part of YaCy
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2005
-// Created 18.11.2005
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-package de.anomic.search.blockrank;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.TreeSet;
-
-import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.order.Base64Order;
-import net.yacy.kelondro.order.Digest;
-import net.yacy.kelondro.util.AttrSeq;
-import net.yacy.kelondro.util.FileUtils;
-
-import de.anomic.search.RankingProcess;
-
-public class RCIEvaluation {
-
- public static int[] rcieval(final AttrSeq rci) {
- // collect information about which entry has how many references
- // the output is a reference-count:occurrences relation
- final HashMap counts = new HashMap();
- final Iterator i = rci.keys();
- String key;
- AttrSeq.Entry entry;
- Integer count_key, count_count;
- int c, maxcount = 0;
- while (i.hasNext()) {
- key = i.next();
- entry = rci.getEntry(key);
- c = entry.getSeqSet().size();
- if (c > maxcount) maxcount = c;
- count_key = Integer.valueOf(c);
- count_count = counts.get(count_key);
- if (count_count == null) {
- count_count = 1;
- } else {
- count_count = Integer.valueOf(count_count.intValue() + 1);
- }
- counts.put(count_key, count_count);
- }
- final int[] ctable = new int[maxcount + 1];
- for (int j = 0; j <= maxcount; j++) {
- count_count = counts.get(Integer.valueOf(j));
- if (count_count == null) {
- ctable[j] = 0;
- } else {
- ctable[j] = count_count.intValue();
- }
- }
- return ctable;
- }
-
- public static long sum(final int[] c) {
- long s = 0;
- for (int i = 0; i < c.length; i++) s += c[i];
- return s;
- }
-
- public static int[] interval(final int[] counts, final int parts) {
- long limit = sum(counts) / 2;
- final int[] partition = new int[parts];
- int s = 0, p = parts - 1;
- for (int i = 1; i < counts.length; i++) {
- s += counts[i];
- if ((s > limit) && (p >= 0)) {
- partition[p--] = i;
- limit = (2 * limit - s) / 2;
- s = 0;
- }
- }
- partition[0] = counts.length - 1;
- for (int i = 1; i < 10; i++) partition[i] = (partition[i - 1] + 4 * partition[i]) / 5;
- return partition;
- }
-
- public static void checkPartitionTable0(final int[] counts, final int[] partition) {
- int sumsum = 0;
- int sum;
- int j = 0;
- for (int i = partition.length - 1; i >= 0; i--) {
- sum = 0;
- while (j <= partition[i]) {
- sum += counts[j++];
- }
- System.out.println("sum of YBR-" + i + " entries: " + sum);
- sumsum += sum;
- }
- System.out.println("complete sum = " + sumsum);
- }
-
- public static void checkPartitionTable1(final int[] counts, final int[] partition) {
- int sumsum = 0;
- final int[] sum = new int[partition.length];
- for (int i = 0; i < partition.length; i++) sum[i] = 0;
- for (int i = 0; i < counts.length; i++) sum[orderIntoYBI(partition, i)] += counts[i];
- for (int i = partition.length - 1; i >= 0; i--) {
- System.out.println("sum of YBR-" + i + " entries: " + sum[i]);
- sumsum += sum[i];
- }
- System.out.println("complete sum = " + sumsum);
- }
-
- public static int orderIntoYBI(final int[] partition, final int count) {
- for (int i = 0; i < partition.length - 1; i++) {
- if ((count >= (partition[i + 1] + 1)) && (count <= partition[i])) return i;
- }
- return partition.length - 1;
- }
-
- @SuppressWarnings("unchecked")
- public static TreeSet[] genRankingTable(final AttrSeq rci, final int[] partition) {
- final TreeSet[] ranked = new TreeSet[partition.length];
- for (int i = 0; i < partition.length; i++) ranked[i] = new TreeSet(Base64Order.enhancedCoder);
- final Iterator i = rci.keys();
- String key;
- AttrSeq.Entry entry;
- while (i.hasNext()) {
- key = i.next();
- entry = rci.getEntry(key);
- ranked[orderIntoYBI(partition, entry.getSeqSet().size())].add(key.getBytes());
- }
- return ranked;
- }
-
- public static HashMap genReverseDomHash(final File domlist) {
- final HashSet domset = FileUtils.loadList(domlist);
- final HashMap dommap = new HashMap();
- final Iterator i = domset.iterator();
- String dom;
- while (i.hasNext()) {
- dom = i.next();
- if (dom.startsWith("www.")) dom = dom.substring(4);
- try {
- dommap.put(new String((new DigestURI("http://" + dom)).hash(), 6, 6), dom);
- dommap.put(new String((new DigestURI("http://www." + dom)).hash(), 6, 6), "www." + dom);
- } catch (final MalformedURLException e) {}
- }
- return dommap;
- }
-
- public static void storeRankingTable(final TreeSet[] ranking, final File tablePath) throws IOException {
- String filename;
- if (!(tablePath.exists())) tablePath.mkdirs();
- for (int i = 0; i < ranking.length - 1; i++) {
- filename = "YBR-4-" + Digest.encodeHex(i, 2) + ".idx";
- FileUtils.saveSet(new File(tablePath, filename), "plain", ranking[i], "");
- }
- }
-
- public static void main(final String[] args) {
- try {
- if ((args.length == 2) && (args[0].equals("-genybr"))) {
- final File root_path = new File(args[1]);
- final File rci_file = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0/RCI-0.rci.gz");
- final long start = System.currentTimeMillis();
- if (!(rci_file.exists())) return;
-
- // create partition table
- final AttrSeq rci = new AttrSeq(rci_file, false);
- final int counts[] = rcieval(rci);
- final int[] partition = interval(counts, 16);
-
- // check the table
- System.out.println("partition position table:");
- for (int i = 0; i < partition.length - 1; i++) {
- System.out.println("YBR-" + i + ": " + (partition[i + 1] + 1) + " - " + partition[i] + " references");
- }
- System.out.println("YBR-" + (partition.length - 1) + ": 0 - " + partition[partition.length - 1] + " references");
- checkPartitionTable0(counts, partition);
- checkPartitionTable1(counts, partition);
- int sum = 0;
- for (int i = 0; i < counts.length; i++) sum += counts[i];
- System.out.println("sum of all references: " + sum);
-
- // create ranking
- final TreeSet[] ranked = genRankingTable(rci, partition);
- storeRankingTable(ranked, new File(root_path, "ranking/YBR"));
- final long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000);
- System.out.println("Finished YBR generation in " + seconds + " seconds.");
- }
- if ((args.length == 2) && (args[0].equals("-rcieval"))) {
- final File root_path = new File(args[1]);
-
- // load a partition table
- RankingProcess.loadYBR(new File(root_path, "ranking/YBR"), 16);
-
- // load domain list and generate hash index for domains
- final HashMap dommap = genReverseDomHash(new File(root_path, "domlist.txt"));
-
- // print out the table
- String hash, dom;
- for (int i = 0; i < 9; i++) {
- System.out.print("YBR-" + i + ": ");
- for (int j = 0; j < RankingProcess.ybrTables[i].size(); j++) {
- hash = new String(RankingProcess.ybrTables[i].get(j));
- dom = dommap.get(hash);
- if (dom == null) System.out.print("[" + hash + "], "); else System.out.print(dom + ", ");
- }
- System.out.println();
- }
-
- }
- } catch (final IOException e) {
- Log.logException(e);
- }
- }
-
-}
diff --git a/source/de/anomic/yacy/graphics/WebStructureGraph.java b/source/de/anomic/yacy/graphics/WebStructureGraph.java
index 4bee73cb6..ab6ded34d 100644
--- a/source/de/anomic/yacy/graphics/WebStructureGraph.java
+++ b/source/de/anomic/yacy/graphics/WebStructureGraph.java
@@ -42,8 +42,6 @@ import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.order.Base64Order;
-import net.yacy.kelondro.order.MicroDate;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.LookAheadIterator;
@@ -56,19 +54,13 @@ public class WebStructureGraph {
public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 20000; // maximum number of hosts in web structure map
- private StringBuilder crg; // global citation references
private final Log log;
- private final File rankingPath, structureFile;
- private final String crlFile, crgFile;
+ private final File structureFile;
TreeMap structure_old; // ',' to {}*
TreeMap structure_new;
- public WebStructureGraph(final Log log, final File rankingPath, final String crlFile, final String crgFile, final File structureFile) {
+ public WebStructureGraph(final Log log, final File structureFile) {
this.log = log;
- this.rankingPath = rankingPath;
- this.crlFile = crlFile;
- this.crgFile = crgFile;
- this.crg = new StringBuilder(maxCRGDump);
this.structure_old = new TreeMap();
this.structure_new = new TreeMap();
this.structureFile = structureFile;
@@ -126,63 +118,12 @@ public class WebStructureGraph {
}
}
- // append this reference to buffer
- // generate header info
- final String head = new String(url.hash()) + "=" +
- MicroDate.microDateHoursStr(docDate.getTime()) + // latest update timestamp of the URL
- MicroDate.microDateHoursStr(System.currentTimeMillis()) + // last visit timestamp of the URL
- Base64Order.enhancedCoder.encodeLongSmart(LCount, 2) + // count of links to local resources
- Base64Order.enhancedCoder.encodeLongSmart(GCount, 2) + // count of links to global resources
- Base64Order.enhancedCoder.encodeLongSmart(document.getImages().size(), 2) + // count of Images in document
- Base64Order.enhancedCoder.encodeLongSmart(0, 2) + // count of links to other documents
- Base64Order.enhancedCoder.encodeLongSmart(document.getTextLength(), 3) + // length of plain text in bytes
- Base64Order.enhancedCoder.encodeLongSmart((condenser == null) ? 0 : condenser.RESULT_NUMB_WORDS, 3) + // count of all appearing words
- Base64Order.enhancedCoder.encodeLongSmart((condenser == null) ? 0 : condenser.words().size(), 3) + // count of all unique words
- Base64Order.enhancedCoder.encodeLongSmart(0, 1); // Flags (update, popularity, attention, vote)
-
- //crl.append(head); crl.append ('|'); crl.append(cpl); crl.append((char) 13); crl.append((char) 10);
- crg.append(head); crg.append('|'); crg.append(cpg); crg.append((char) 13); crg.append((char) 10);
-
assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString();
learn(url, cpg);
- // if buffer is full, flush it.
- /*
- if (crl.length() > maxCRLDump) {
- flushCitationReference(crl, "crl");
- crl = new StringBuilder(maxCRLDump);
- }
- **/
- if (crg.length() > maxCRGDump) {
- flushCitationReference("crg");
- crg = new StringBuilder(maxCRGDump);
- }
-
return new Integer[] {Integer.valueOf(LCount), Integer.valueOf(GCount)};
}
- public void flushCitationReference(final String type) {
- if (crg.length() < 12) return;
- final String filename = type.toUpperCase() + "-A-" + DateFormatter.formatShortMilliSecond(new Date()) + "." + crg.substring(0, 12) + ".cr.gz";
- final File path = new File(rankingPath, (type.equals("crl")) ? crlFile : crgFile);
- path.mkdirs();
- final File file = new File(path, filename);
-
- // generate header
- final StringBuilder header = new StringBuilder(200);
- header.append("# Name=YaCy " + ((type.equals("crl")) ? "Local" : "Global") + " Citation Reference Ticket"); header.append((char) 13); header.append((char) 10);
- header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10);
- header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10);
- header.append("# ---"); header.append((char) 13); header.append((char) 10);
- crg.insert(0, header.toString());
- try {
- FileUtils.writeAndGZip(crg.toString().getBytes(), file);
- if (this.log.isFine()) log.logFine("wrote citation reference dump " + file.toString());
- } catch (final IOException e) {
- Log.logException(e);
- }
- }
-
private static int refstr2count(final String refs) {
if ((refs == null) || (refs.length() <= 8)) return 0;
assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length();
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 7cea44824..0ecf44ee6 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -61,7 +61,6 @@ import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
-import net.yacy.cora.protocol.ByteArrayBody;
import net.yacy.cora.protocol.http.HTTPConnector;
import net.yacy.cora.services.Search;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@@ -71,7 +70,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
-import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceContainerCache;
@@ -747,77 +745,6 @@ public final class yacyClient {
if (address == null) address = "localhost:8080";
return address;
}
-
- public static Map transferPermission(final String targetAddress, final long filesize, final String filename) {
-
- // prepare request
- final String salt = crypt.randomSalt();
-
- // send request
- try {
- final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
- parts.put("process", new StringBody("permission"));
- parts.put("purpose", new StringBody("crcon"));
- parts.put("filename", new StringBody(filename));
- parts.put("filesize", new StringBody(Long.toString(filesize)));
- parts.put("can-send-protocol", new StringBody("http"));
- final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
- final Map result = FileUtils.table(content);
- return result;
- } catch (final Exception e) {
- // most probably a network time-out exception
- yacyCore.log.logSevere("yacyClient.permissionTransfer error:" + e.getMessage());
- return null;
- }
- }
-
- public static Map transferStore(final String targetAddress, final String access, final String filename, final byte[] file) {
-
- // prepare request
- final String salt = crypt.randomSalt();
-
- // send request
- try {
- final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
- parts.put("process", new StringBody("store"));
- parts.put("purpose", new StringBody("crcon"));
- parts.put("filesize", new StringBody(Long.toString(file.length)));
- parts.put("md5", new StringBody(Digest.encodeMD5Hex(file)));
- parts.put("access", new StringBody(access));
- parts.put("filename", new ByteArrayBody(file, filename));
- final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
- final Map result = FileUtils.table(content);
- return result;
- } catch (final Exception e) {
- yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage());
- return null;
- }
- }
-
- public static String transfer(final String targetAddress, final String filename, final byte[] file) {
- final Map phase1 = transferPermission(targetAddress, file.length, filename);
- if (phase1 == null) return "no connection to remote address " + targetAddress + "; phase 1";
- final String access = phase1.get("access");
- final String nextaddress = phase1.get("address");
- final String protocol = phase1.get("protocol");
- //String path = (String) phase1.get("path");
- //String maxsize = (String) phase1.get("maxsize");
- String response = phase1.get("response");
- if ((response == null) || (protocol == null) || (access == null)) return "wrong return values from other peer; phase 1";
- if (!(response.equals("ok"))) return "remote peer rejected transfer: " + response;
- final String accesscode = Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(access));
- if (protocol.equals("http")) {
- final Map phase2 = transferStore(nextaddress, accesscode, filename, file);
- if (phase2 == null) return "no connection to remote address " + targetAddress + "; phase 2";
- response = phase2.get("response");
- if (response == null) return "wrong return values from other peer; phase 2";
- if (!(response.equals("ok"))) {
- return "remote peer failed with transfer: " + response;
- }
- return null;
- }
- return "wrong protocol: " + protocol;
- }
public static Map crawlReceipt(final yacySeed mySeed, final yacySeed target, final String process, final String result, final String reason, final URIMetadataRow entry, final String wordhashes) {
assert (target != null);
diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java
index 180d50c6d..bf73d785d 100644
--- a/source/de/anomic/yacy/yacyCore.java
+++ b/source/de/anomic/yacy/yacyCore.java
@@ -357,10 +357,6 @@ public class yacyCore {
log.logSevere("publishMySeed: problem with news encoding", e);
}
sb.peers.mySeed().setUnusedFlags();
-
- // include current citation-rank file count
- sb.peers.mySeed().put(yacySeed.CRWCNT, Integer.toString(sb.rankingOwnDistribution.size()));
- sb.peers.mySeed().put(yacySeed.CRTCNT, Integer.toString(sb.rankingOtherDistribution.size()));
int newSeeds = -1;
//if (seeds.length > 1) {
// holding a reference to all started threads
diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java
index c9c062192..3bfb36cf6 100644
--- a/source/de/anomic/yacy/yacySeed.java
+++ b/source/de/anomic/yacy/yacySeed.java
@@ -147,10 +147,6 @@ public class yacySeed implements Cloneable {
public static final String SCOUNT = "SCount";
/** the number of clients that the peer connects (connects/hour as double) */
public static final String CCOUNT = "CCount";
- /** Citation Rank (Own) - Count */
- public static final String CRWCNT = "CRWCnt";
- /** Citation Rank (Other) - Count */
- public static final String CRTCNT = "CRTCnt";
public static final String IP = "IP";
public static final String PORT = "Port";
public static final String SEEDLISTURL = "seedURL";
@@ -216,9 +212,6 @@ public class yacySeed implements Cloneable {
this.dna.put(yacySeed.LASTSEEN, DateFormatter.formatShortSecond(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/))); // for last-seen date
this.dna.put(yacySeed.USPEED, yacySeed.ZERO); // the computated uplink speed of the peer
- this.dna.put(yacySeed.CRWCNT, yacySeed.ZERO);
- this.dna.put(yacySeed.CRTCNT, yacySeed.ZERO);
-
// settings that are needed to organize the seed round-trip
this.dna.put(yacySeed.FLAGS, yacySeed.FLAGSZERO);
setFlagDirectConnect(false);
diff --git a/source/net/yacy/kelondro/util/AttrSeq.java b/source/net/yacy/kelondro/util/AttrSeq.java
deleted file mode 100644
index ab4ec718c..000000000
--- a/source/net/yacy/kelondro/util/AttrSeq.java
+++ /dev/null
@@ -1,460 +0,0 @@
-// kelondroAttrSeq.java
-// -----------------------
-// part of YaCy
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2005
-// Created 15.11.2005
-//
-// $LastChangedDate: 2005-10-22 15:28:04 +0200 (Sat, 22 Oct 2005) $
-// $LastChangedRevision: 968 $
-// $LastChangedBy: theli $
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-package net.yacy.kelondro.util;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Logger;
-import java.util.zip.GZIPInputStream;
-
-import net.yacy.kelondro.index.Column;
-import net.yacy.kelondro.index.Row;
-import net.yacy.kelondro.index.RowCollection;
-import net.yacy.kelondro.index.RowSpaceExceededException;
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.order.Base64Order;
-
-
-public class AttrSeq {
-
- // class objects
- private final File file;
- private final Map entries; // value may be of type String or of type Entry
- protected Structure structure;
- private String name;
- private long created;
-
- // optional logger
- protected Logger theLogger = null;
-
- public AttrSeq(final File file, final boolean tree) throws IOException {
- this.file = file;
- this.structure = null;
- this.created = -1;
- this.name = "";
- this.entries = (tree) ? new TreeMap() : new HashMap();
- readAttrFile(file);
- }
-
- public AttrSeq(final String name, final String struct, final boolean tree) {
- this.file = null;
- this.structure = new Structure(struct);
- this.created = System.currentTimeMillis();
- this.name = name;
- this.entries = (tree) ? new TreeMap() : new HashMap();
- }
-
- public void setLogger(final Logger newLogger) {
- this.theLogger = newLogger;
- }
-
- public void logInfo(final String message) {
- if (this.theLogger == null)
- System.err.println("ATTRSEQ INFO for file " + this.file + ": " + message);
- else
- this.theLogger.info("ATTRSEQ INFO for file " + this.file + ": " + message);
- }
-
- public void logWarning(final String message) {
- if (this.theLogger == null)
- System.err.println("ATTRSEQ WARNING for file " + this.file + ": " + message);
- else
- this.theLogger.warning("ATTRSEQ WARNING for file " + this.file + ": " + message);
- }
-
- private void readAttrFile(final File loadfile) throws IOException {
- BufferedReader br = null;
- int p;
- if (loadfile.toString().endsWith(".gz")) {
- br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(loadfile))));
- } else {
- br = new BufferedReader(new InputStreamReader(new FileInputStream(loadfile)));
- }
- String line, key, oldvalue, newvalue;
- while ((line = br.readLine()) != null) {
- line = line.trim();
- if (line.length() == 0) continue;
- if (line.charAt(0) == '#') {
- if (line.startsWith("# Structure=")) {
- structure = new Structure(line.substring(12));
- }
- if (line.startsWith("# Name=")) {
- name = line.substring(7);
- }
- if (line.startsWith("# Created=")) {
- created = Long.parseLong(line.substring(10));
- }
- continue;
- }
- if ((p = line.indexOf('=')) > 0) {
- key = line.substring(0, p).trim();
- newvalue = line.substring(p + 1).trim();
- oldvalue = (String) entries.get(key);
- if (oldvalue != null) {
- if (newvalue.equals(oldvalue)) {
- //logWarning("key " + key + ": double occurrence. values are equal. second appearance is ignored");
- } else {
- if (newvalue.length() < oldvalue.length()) {
- if (oldvalue.substring(0, newvalue.length()).equals(newvalue)) {
- logWarning("key " + key + ": double occurrence. new value is subset of old value. second appearance is ignored");
- } else {
- logWarning("key " + key + ": double occurrence. new value is shorter than old value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue);
- }
- } else if (newvalue.length() > oldvalue.length()) {
- if (newvalue.substring(0, oldvalue.length()).equals(oldvalue)) {
- logWarning("key " + key + ": double occurrence. old value is subset of new value. first appearance is ignored");
- } else {
- logWarning("key " + key + ": double occurrence. old value is shorter than new value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue);
- }
- entries.put(key, newvalue);
- } else {
- logWarning("key " + key + ": double occurrence. old and new value have equal length but are not equal. old = " + oldvalue + ", new = " + newvalue);
- //entries.put(key, newvalue);
- }
- }
- } else {
- entries.put(key, newvalue);
- }
- }
- }
- br.close();
- if (structure == null) throw new IOException("file contains no structure tag");
- if (name == null) throw new IOException("file contains no name tag");
- if (created == -1) throw new IOException("file contains no created tag");
- }
-
- public int size() {
- return entries.size();
- }
-
- public long created() {
- return this.created;
- }
-
- public void toFile(final File out) throws IOException {
- // generate header
- final StringBuilder sb = new StringBuilder(2000);
- sb.append("# Name="); sb.append(this.name); sb.append((char) 13); sb.append((char) 10);
- sb.append("# Created="); sb.append(this.created); sb.append((char) 13); sb.append((char) 10);
- sb.append("# Structure="); sb.append(this.structure.toString()); sb.append((char) 13); sb.append((char) 10);
- sb.append("# ---"); sb.append((char) 13); sb.append((char) 10);
- String k;
- Object v;
- for (final Map.Entry entry : entries.entrySet()) {
- k = entry.getKey();
- v = entry.getValue();
- sb.append(k); sb.append('=');
- if (v instanceof String) sb.append((String) v);
- if (v instanceof Entry) sb.append(((Entry) v).toString());
- sb.append((char) 13); sb.append((char) 10);
- }
- if (out.toString().endsWith(".gz")) {
- FileUtils.writeAndGZip((new String(sb)).getBytes(), out);
- } else {
- FileUtils.copy((new String(sb)).getBytes(), out);
- }
- }
-
- public Iterator keys() {
- return entries.keySet().iterator();
- }
-
- public Entry newEntry(final String pivot, final boolean tree) {
- return new Entry(pivot, new HashMap(), (tree) ? (Set) new TreeSet() : (Set) new HashSet());
- }
-
- public Entry newEntry(final String pivot, final Map props, final Set seq) {
- return new Entry(pivot, props, seq);
- }
-
- /*
- public void putEntry(String pivot, String attrseq) {
- entries.put(pivot, attrseq);
- }
- */
-
- public void putEntry(final Entry entry) {
- if (shortmem())
- entries.put(entry.pivot, entry.toString());
- else
- entries.put(entry.pivot, entry);
- }
-
- public void putEntrySmall(final Entry entry) {
- entries.put(entry.pivot, entry.toString());
- }
-
- public Entry getEntry(final String pivot) {
- final Object e = entries.get(pivot);
- if (e == null) return null;
- if (e instanceof String) return new Entry(pivot, (String) e, false);
- if (e instanceof Entry) return (Entry) e;
- return null;
- }
-
- public Entry removeEntry(final String pivot) {
- final Object e = entries.remove(pivot);
- if (e == null) return null;
- if (e instanceof String) return new Entry(pivot, (String) e, false);
- if (e instanceof Entry) return (Entry) e;
- return null;
- }
-
- public static class Structure {
-
- protected String pivot_name = null;
- protected int pivot_len = -1;
- protected String[] prop_names = null;
- protected int[] prop_len = null, prop_pos = null;
- protected String[] seq_names = null;
- protected int[] seq_len = null, seq_pos = null;
- protected Row seqrow;
- // example:
- //# Structure=,'=',,,,,,,,,,,'|',*
-
- public Structure(String structure) {
- // parse a structure string
-
- // parse pivot definition:
- int p = structure.indexOf(",'='");
- if (p < 0) return;
- final String pivot = structure.substring(0, p);
- structure = structure.substring(p + 5);
- Column a = new Column(pivot);
- pivot_name = a.nickname;
- pivot_len = a.cellwidth;
-
- // parse property part definition:
- p = structure.indexOf(",'|'");
- if (p < 0) return;
- ArrayList l = new ArrayList();
- final String attr = structure.substring(0, p);
- String seqs = structure.substring(p + 5);
- StringTokenizer st = new StringTokenizer(attr, ",");
- while (st.hasMoreTokens()) {
- a = new Column(st.nextToken());
- l.add(a);
- }
- prop_names = new String[l.size()];
- prop_len = new int[l.size()];
- prop_pos = new int[l.size()];
- p = 0;
- for (int i = 0; i < l.size(); i++) {
- a = l.get(i);
- prop_names[i] = a.nickname;
- prop_len[i] = a.cellwidth;
- prop_pos[i] = p;
- p += prop_len[i];
- }
-
- // parse sequence definition:
- if (seqs.length() > 0 && seqs.charAt(0) == '*') seqs = seqs.substring(1);
- l = new ArrayList();
- st = new StringTokenizer(seqs, ",");
- while (st.hasMoreTokens()) {
- a = new Column(st.nextToken());
- l.add(a);
- }
- seq_names = new String[l.size()];
- seq_len = new int[l.size()];
- seq_pos = new int[l.size()];
- p = 0;
- for (int i = 0; i < l.size(); i++) {
- a = l.get(i);
- seq_names[i] = a.nickname;
- seq_len[i] = a.cellwidth;
- seq_pos[i] = p;
- p += seq_len[i];
- }
-
- // generate rowdef for seq row definition
- final StringBuilder rowdef = new StringBuilder();
- rowdef.append("byte[] ");
- rowdef.append(seq_names[0]);
- rowdef.append('-');
- rowdef.append(seq_len[0]);
-
- for (int i = 1; i < seq_names.length; i++) {
- rowdef.append(", byte[] ");
- rowdef.append(seq_names[i]);
- rowdef.append('-');
- rowdef.append(seq_len[i]);
- }
- seqrow = new Row(new String(rowdef), null);
- }
-
- @Override
- public String toString() {
- final StringBuilder sb = new StringBuilder(100);
- sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',");
- if (prop_names.length > 0) {
- for (int i = 0; i < prop_names.length; i++) {
- sb.append('<'); sb.append(prop_names[i]); sb.append('-'); sb.append(Integer.toString(prop_len[i])); sb.append(">,");
- }
- }
- sb.append("'|'");
- if (seq_names.length > 0) {
- for (int i = 0; i < seq_names.length; i++) {
- sb.append(",<"); sb.append(seq_names[i]); sb.append('-'); sb.append(Integer.toString(seq_len[i])); sb.append('>');
- }
- }
- return new String(sb);
- }
- }
-
- public class Entry {
- String pivot;
- Map attrs;
- Set seq;
-
- public Entry(final String pivot, final Map attrs, final Set seq) {
- this.pivot = pivot;
- this.attrs = attrs;
- this.seq = seq;
- }
-
- public Entry(final String pivot, final String attrseq, final boolean tree) {
- this.pivot = pivot;
- attrs = new HashMap();
- seq = (tree) ? (Set) new TreeSet() : (Set) new HashSet();
- for (int i = 0; i < structure.prop_names.length; i++) {
- attrs.put(structure.prop_names[i], Long.valueOf(Base64Order.enhancedCoder.decodeLong(attrseq.substring(structure.prop_pos[i], structure.prop_pos[i] + structure.prop_len[i]))));
- }
-
- int p = attrseq.indexOf('|') + 1;
- //long[] seqattrs = new long[structure.seq_names.length - 1];
- String seqname;
- while (p + structure.seq_len[0] <= attrseq.length()) {
- seqname = attrseq.substring(p, p + structure.seq_len[0]);
- p += structure.seq_len[0];
- for (int i = 1; i < structure.seq_names.length; i++) {
- //seqattrs[i - 1] = kelondroBase64Order.enhancedCoder.decodeLong(attrseq.substring(p, p + structure.seq_len[i]));
- p += structure.seq_len[i];
- }
- seq.add(seqname/*, seqattrs*/);
- }
- }
-
- public Map getAttrs() {
- return attrs;
- }
-
- public long getAttr(final String key, final long dflt) {
- final Long i = attrs.get(key);
- if (i == null) return dflt;
- return i.longValue();
- }
-
- public void setAttr(final String key, final long attr) {
- attrs.put(key, Long.valueOf(attr));
- }
-
- public Set getSeqSet() {
- return seq;
- }
-
- public RowCollection getSeqCollection() throws RowSpaceExceededException {
- final RowCollection collection = new RowCollection(structure.seqrow, seq.size());
- final Iterator i = seq.iterator();
- while (i.hasNext()) {
- collection.addUnique(structure.seqrow.newEntry(i.next().getBytes()));
- }
- return collection;
- }
-
- public void setSeq(final Set seq) {
- this.seq = seq;
- }
-
- public void addSeq(final String s/*, long[] seqattrs*/) {
- this.seq.add(s/*, seqattrs*/);
- }
-
- @Override
- public String toString() {
- // creates only the attribute field and the sequence, not the pivot
- final StringBuilder sb = new StringBuilder(100 + structure.seq_len[0] * seq.size());
- Long val;
- for (int i = 0; i < structure.prop_names.length; i++) {
- val = attrs.get(structure.prop_names[i]);
- sb.append(Base64Order.enhancedCoder.encodeLongSmart((val == null) ? 0 : val.longValue(), structure.prop_len[i]));
- }
- sb.append('|');
- final Iterator q = seq.iterator();
- //long[] seqattrs;
- while (q.hasNext()) {
- sb.append(q.next());
- //seqattrs = (long[]) entry.getValue();
- /*
- for (int i = 1; i < structure.seq_names.length; i++) {
- sb.append(kelondroBase64Order.enhancedCoder.encodeLong(seqattrs[i - 1], structure.seq_len[i]));
- }
- */
- }
- return new String(sb);
- }
- }
-
- private static boolean shortmem() {
- return (MemoryControl.available() < 20000000L);
- }
-
- public static void transcode(final File from_file, final File to_file) throws IOException {
- final AttrSeq crp = new AttrSeq(from_file, true);
- //crp.toFile(new File(args[1]));
- final AttrSeq cro = new AttrSeq(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString(), true);
- final Iterator i = crp.entries.keySet().iterator();
- while (i.hasNext()) {
- cro.putEntry(crp.getEntry(i.next()));
- }
- cro.toFile(to_file);
- }
-
- public static void main(final String[] args) {
- // java -classpath source de.anomic.kelondro.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr
- try {
- if ((args.length == 3) && ("-transcode".equals(args[0]))) {
- transcode(new File(args[1]), new File(args[2]));
- }
- } catch (final IOException e) {
- Log.logException(e);
- }
- }
-
-}
diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java
index 4fd8e2f32..c9d672fff 100644
--- a/source/net/yacy/yacy.java
+++ b/source/net/yacy/yacy.java
@@ -81,7 +81,6 @@ import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
import de.anomic.server.serverCore;
import de.anomic.tools.enumerateFiles;
-import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.Tray;
import de.anomic.yacy.yacyBuildProperties;
@@ -797,20 +796,6 @@ public final class yacy {
// finished
Log.logConfig("CLEAN-WORDLIST", "FINISHED");
}
-
- private static void transferCR(final String targetaddress, final String crfile) {
- final File f = new File(crfile);
- try {
- final byte[] b = FileUtils.read(f);
- final String result = yacyClient.transfer(targetaddress, f.getName(), b);
- if (result == null)
- Log.logInfo("TRANSFER-CR", "transmitted file " + crfile + " to " + targetaddress + " successfully");
- else
- Log.logInfo("TRANSFER-CR", "error transmitting file " + crfile + " to " + targetaddress + ": " + result);
- } catch (final IOException e) {
- Log.logInfo("TRANSFER-CR", "could not read file " + crfile);
- }
- }
private static String[] shift(final String[] args, final int pos, final int count) {
final String[] newargs = new String[args.length - count];
@@ -1017,11 +1002,6 @@ public final class yacy {
final int minlength = Integer.parseInt(args[2]);
final int maxlength = Integer.parseInt(args[3]);
cleanwordlist(args[1], minlength, maxlength);
- } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-transfercr"))) {
- // transfer a single cr file to a remote peer
- final String targetaddress = args[1];
- final String crfile = args[2];
- transferCR(targetaddress, crfile);
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) {
// generate a url list and save it in a file
if (args.length == 2) applicationRoot= new File(args[1]);