diff --git a/source/de/anomic/kelondro/kelondroAttrSeq.java b/source/de/anomic/kelondro/kelondroAttrSeq.java new file mode 100644 index 000000000..13d56c361 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroAttrSeq.java @@ -0,0 +1,404 @@ +// kelondroAttrSeq.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// Created 15.11.2005 +// +// $LastChangedDate: 2005-10-22 15:28:04 +0200 (Sat, 22 Oct 2005) $ +// $LastChangedRevision: 968 $ +// $LastChangedBy: theli $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.io.File; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.Iterator; +import java.util.Map; +import java.util.ArrayList; +import java.util.StringTokenizer; +import java.util.zip.GZIPInputStream; +import java.util.logging.Logger; + +import de.anomic.server.serverCodings; +import de.anomic.server.serverFileUtils; + +public class kelondroAttrSeq { + + // class objects + private File file; + private TreeMap entries; + private Structure structure; + private String name; + private long created; + + // optional logger + protected Logger theLogger = null; + + public kelondroAttrSeq(File file) throws IOException { + this.file = file; + this.structure = null; + this.created = 0; + this.name = ""; + this.entries = readPropFile(file); + } + + public kelondroAttrSeq(String name, String struct) { + this.file = null; + this.structure = new Structure(struct); + this.created = System.currentTimeMillis(); + this.name = name; + this.entries = new TreeMap(); + } + + public void setLogger(Logger newLogger) { + this.theLogger = newLogger; + } + + public void logWarning(String message) { + if (this.theLogger == null) + System.err.println("KELONDRO WARNING for file " + this.file + ": " + message); + else + this.theLogger.warning("KELONDRO WARNING for file " + this.file + ": " + message); + } + + private TreeMap readPropFile(File file) throws IOException { + TreeMap entries = new TreeMap(); + BufferedReader br = null; + int p; + if (file.toString().endsWith(".gz")) { + br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)))); + } else { + br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + } + String line; + String key; + while ((line = br.readLine()) != null) { + line = line.trim(); + if (line.length() == 0) continue; + if (line.startsWith("#")) { + if (line.startsWith("# Structure=")) { + structure = new Structure(line.substring(12)); + } + if (line.startsWith("# Name=")) { + name = line.substring(7); + } + if (line.startsWith("# Created=")) { + created = Long.parseLong(line.substring(10)); + } + continue; + } + if ((p = line.indexOf('=')) > 0) { + key = line.substring(0, p).trim(); + if (entries.containsKey(key)) { + logWarning("read PropFile " + file.toString() + ", key " + key + ": double occurrence"); + } else { + entries.put(key, line.substring(p + 1).trim()); + } + } + } + br.close(); + + return entries; + } + + public long created() { + return this.created; + } + + public void toFile(File out) throws IOException { + // generate header + StringBuffer sb = new StringBuffer(2000); + sb.append("# Name=" + this.name); sb.append((char) 13); sb.append((char) 10); + sb.append("# Created=" + this.created); sb.append((char) 13); sb.append((char) 10); + sb.append("# Structure=" + this.structure.toString()); sb.append((char) 13); sb.append((char) 10); + sb.append("# ---"); sb.append((char) 13); sb.append((char) 10); + Iterator i = entries.entrySet().iterator(); + Map.Entry entry; + String k,v; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + k = (String) entry.getKey(); + v = (String) entry.getValue(); + sb.append(k); sb.append('='); sb.append(v); sb.append((char) 13); sb.append((char) 10); + } + if (out.toString().endsWith(".gz")) { + serverFileUtils.writeAndZip(sb.toString().getBytes(), out); + } else { + serverFileUtils.write(sb.toString().getBytes(), out); + } + } + + public Iterator keys() { + return entries.keySet().iterator(); + } + + public Entry newEntry(String pivot, HashMap props, TreeSet seq) { + return new Entry(pivot, props, seq); + } + + public void addEntry(String pivot, String attrseq) { + entries.put(pivot, attrseq); + } + + public void addEntry(Entry entry) { + entries.put(entry.pivot, entry.toString()); + } + + public Entry getEntry(String pivot) { + String struct = (String) entries.get(pivot); + if (struct == null) return null; + return new Entry(pivot, struct); + } + + public Entry removeEntry(String pivot) { + String struct = (String) entries.remove(pivot); + if (struct == null) return null; + return new Entry(pivot, struct); + } + + public class Structure { + + protected String pivot_name = null; + protected int pivot_len = -1; + protected String[] prop_names = null; + protected int[] prop_len = null, prop_pos = null; + protected String seq_name = null; + protected int seq_len = -1; + + // example: + //# Structure=,'=',,,,,,,,,,,'|',* + + public Structure(String structure) { + // parse a structure string + + // parse pivot definition: + int p = structure.indexOf(",'='"); + if (p < 0) return; + String pivot = structure.substring(0, p); + structure = structure.substring(p + 5); + Object[] a = atom(pivot); + if (a == null) return; + pivot_name = (String) a[0]; + pivot_len = ((Integer) a[1]).intValue(); + + // parse property part definition: + p = structure.indexOf(",'|'"); + if (p < 0) return; + ArrayList l = new ArrayList(); + String attr = structure.substring(0, p); + String seqs = structure.substring(p + 5); + StringTokenizer st = new StringTokenizer(attr, ","); + while (st.hasMoreTokens()) { + a = atom(st.nextToken()); + if (a == null) break; + l.add(a); + } + prop_names = new String[l.size()]; + prop_len = new int[l.size()]; + prop_pos = new int[l.size()]; + p = 0; + for (int i = 0; i < l.size(); i++) { + a = (Object[]) l.get(i); + prop_names[i] = (String) a[0]; + prop_len[i] = ((Integer) a[1]).intValue(); + prop_pos[i] = p; + p += prop_len[i]; + } + + // parse sequence definition: + a = atom(seqs); + if (a == null) return; + seq_name = (String) a[0]; + seq_len = ((Integer) a[1]).intValue(); + } + + private Object[] atom(String a) { + if (a.startsWith("<")) { + a = a.substring(1); + } else if (a.startsWith("*<")) { + a = a.substring(2); + } else return null; + if (a.endsWith(">")) { + a = a.substring(0, a.length() - 1); + } else return null; + int p = a.indexOf('-'); + if (p < 0) return null; + String name = a.substring(0, p); + try { + int x = Integer.parseInt(a.substring(p + 1)); + return new Object[]{name, new Integer(x)}; + } catch (NumberFormatException e) { + return null; + } + } + + public String toString() { + StringBuffer sb = new StringBuffer(70); + sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',"); + if (prop_names.length > 0) { + for (int i = 0; i < prop_names.length; i++) { + sb.append('<'); sb.append(prop_names[i]); sb.append('-'); sb.append(Integer.toString(prop_len[i])); sb.append(">,"); + } + } + sb.append("'|',"); + sb.append("*<"); sb.append(seq_name); sb.append('-'); sb.append(Integer.toString(seq_len)); sb.append('>'); + return sb.toString(); + } + } + + public class Entry { + String pivot; + HashMap attrs; + TreeSet seq; + + public Entry(String pivot, HashMap attrs, TreeSet seq) { + this.pivot = pivot; + this.attrs = attrs; + this.seq = seq; + } + + public Entry(String pivot, String attrseq) { + this.pivot = pivot; + attrs = new HashMap(); + seq = new TreeSet(); + for (int i = 0; i < structure.prop_names.length; i++) { + attrs.put(structure.prop_names[i], new Long(serverCodings.enhancedCoder.decodeBase64Long(attrseq.substring(structure.prop_pos[i], structure.prop_pos[i] + structure.prop_len[i])))); + } + + int p = attrseq.indexOf('|'); + attrseq = attrseq.substring(p + 1); + for (int i = 0; i < attrseq.length(); i = i + structure.seq_len) { + seq.add(attrseq.substring(i, i + structure.seq_len)); + } + } + + public HashMap getAttrs() { + return attrs; + } + + public long getAttr(String key, long dflt) { + Long i = (Long) attrs.get(key); + if (i == null) return dflt; else return i.longValue(); + } + + public void setAttr(String key, long attr) { + attrs.put(key, new Long(attr)); + } + + public TreeSet getSeq() { + return seq; + } + + public void setSeq(TreeSet seq) { + this.seq = seq; + } + + public String toString() { + // creates only the attribute field and the sequence, not the pivot + StringBuffer sb = new StringBuffer(70); + Long val; + for (int i = 0; i < structure.prop_names.length; i++) { + val = (Long) attrs.get(structure.prop_names[i]); + sb.append(serverCodings.enhancedCoder.encodeBase64LongSmart((val == null) ? 0 : val.longValue(), structure.prop_len[i])); + } + sb.append('|'); + Iterator q = seq.iterator(); + while (q.hasNext()) { + sb.append((String) q.next()); + } + return sb.toString(); + } + } + + public static void transcode(File from_file, File to_file) throws IOException { + kelondroAttrSeq crp = new kelondroAttrSeq(from_file); + //crp.toFile(new File(args[1])); + kelondroAttrSeq cro = new kelondroAttrSeq(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString()); + Iterator i = crp.entries.keySet().iterator(); + String key; + kelondroAttrSeq.Entry entry; + while (i.hasNext()) { + key = (String) i.next(); + entry = crp.getEntry(key); + cro.addEntry(entry); + } + cro.toFile(to_file); + } + + public static void main(String[] args) { + // java -classpath source de.anomic.kelondro.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr + try { + if ((args.length == 3) && (args[0].equals("-transcode"))) { + transcode(new File(args[1]), new File(args[2])); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + /* + Class-A File format: + + UDate : latest update timestamp of the URL (as virtual date, hours since epoch) + VDate : last visit timestamp of the URL (as virtual date, hours since epoch) + LCount : count of links to local resources + GCount : count of links to global resources + ICount : count of links to images (in document) + DCount : count of links to other documents + TLength: length of the plain text content (bytes) + WACount: total number of all words in content + WUCount: number of unique words in content (removed doubles) + Flags : Flags (0=update, 1=popularity, 2=attention, 3=vote) + + Class-a File format is an extension of Class-A plus the following attributes + FUDate : first update timestamp of the URL + FDDate : first update timestamp of the domain + LUDate : latest update timestamp of the URL + UCount : Update Counter (of 'latest update timestamp') + PCount : Popularity Counter (proxy clicks) + ACount : Attention Counter (search result clicks) + VCount : Votes + Vita : Vitality (normed number of updates per time) + */ +} diff --git a/source/de/anomic/plasma/plasmaRankingCRFile.java b/source/de/anomic/plasma/plasmaRankingCRFile.java index 3cc6c625c..7078cb7fc 100644 --- a/source/de/anomic/plasma/plasmaRankingCRFile.java +++ b/source/de/anomic/plasma/plasmaRankingCRFile.java @@ -46,322 +46,55 @@ package de.anomic.plasma; import java.io.File; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.FileInputStream; import java.io.IOException; -import java.util.HashMap; -import java.util.TreeMap; -import java.util.TreeSet; import java.util.Iterator; -import java.util.Map; -import java.util.ArrayList; -import java.util.StringTokenizer; -import java.util.zip.GZIPInputStream; +import de.anomic.kelondro.kelondroAttrSeq; import de.anomic.server.serverCodings; import de.anomic.server.serverFileUtils; import de.anomic.tools.bitfield; -import de.anomic.server.logging.serverLog; public class plasmaRankingCRFile { - private File file; - private TreeMap entries; - private Structure structure; - private String name; - private long created; - private serverLog log; - - public plasmaRankingCRFile(File file) throws IOException { - this.log = new serverLog("RANKING"); - this.file = file; - this.structure = null; - this.created = 0; - this.name = ""; - this.entries = readCR(file); - } - - public plasmaRankingCRFile(String name, String struct) { - this.log = new serverLog("RANKING"); - this.file = null; - this.structure = new Structure(struct); - this.created = System.currentTimeMillis(); - this.name = name; - this.entries = new TreeMap(); - } - /* header.append("# Name=YaCy " + ((type.equals("crl")) ? "Local" : "Global") + " Citation Reference Ticket"); header.append((char) 13); header.append((char) 10); header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10); header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10); header.append("# ---"); header.append((char) 13); header.append((char) 10); */ - - private TreeMap readCR(File file) throws IOException { - TreeMap entries = new TreeMap(); - BufferedReader br = null; - int p; - if (file.toString().endsWith(".gz")) { - br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)))); - } else { - br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); - } - String line; - String key; - while ((line = br.readLine()) != null) { - line = line.trim(); - if (line.length() == 0) continue; - if (line.startsWith("#")) { - if (line.startsWith("# Structure=")) { - structure = new Structure(line.substring(12)); - } - if (line.startsWith("# Name=")) { - name = line.substring(7); - } - if (line.startsWith("# Created=")) { - created = Long.parseLong(line.substring(10)); - } - continue; - } - if ((p = line.indexOf('=')) > 0) { - key = line.substring(0, p).trim(); - if (entries.containsKey(key)) { - log.logInfo("read CRFile " + file.toString() + ", key " + key + ": double occurrence"); - } else { - entries.put(key, line.substring(p + 1).trim()); - } - } - } - br.close(); - - return entries; - } - - public void toFile(File out) throws IOException { - // generate header - StringBuffer sb = new StringBuffer(2000); - sb.append("# Name=" + this.name); sb.append((char) 13); sb.append((char) 10); - sb.append("# Created=" + this.created); sb.append((char) 13); sb.append((char) 10); - sb.append("# Structure=" + this.structure.toString()); sb.append((char) 13); sb.append((char) 10); - sb.append("# ---"); sb.append((char) 13); sb.append((char) 10); - Iterator i = entries.entrySet().iterator(); - Map.Entry entry; - String k,v; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - k = (String) entry.getKey(); - v = (String) entry.getValue(); - sb.append(k); sb.append('='); sb.append(v); sb.append((char) 13); sb.append((char) 10); - } - if (out.toString().endsWith(".gz")) { - serverFileUtils.writeAndZip(sb.toString().getBytes(), out); - } else { - serverFileUtils.write(sb.toString().getBytes(), out); - } - } - - public void addEntry(String referee, String attrseq) { - entries.put(referee, attrseq); - } - - public void addEntry(Entry entry) { - entries.put(entry.referee, entry.toString()); - } - - public Entry getEntry(String referee) { - String struct = (String) entries.get(referee); - if (struct == null) return null; - return new Entry(referee, struct); - } - - public Entry newEntry(String referee, HashMap props, TreeSet seq) { - return new Entry(referee, props, seq); - } - - public class Structure { - - protected String referee_name = null; - protected int referee_len = -1; - protected String[] prop_names = null; - protected int[] prop_len = null, prop_pos = null; - protected String seq_name = null; - protected int seq_len = -1; - - // example: - //# Structure=,'=',,,,,,,,,,,'|',* - public Structure(String structure) { - // parse a structure string - - // parse referee definition: - int p = structure.indexOf(",'='"); - if (p < 0) return; - String referee = structure.substring(0, p); - structure = structure.substring(p + 5); - Object[] a = atom(referee); - if (a == null) return; - referee_name = (String) a[0]; - referee_len = ((Integer) a[1]).intValue(); - - // parse property part definition: - p = structure.indexOf(",'|'"); - if (p < 0) return; - ArrayList l = new ArrayList(); - String attr = structure.substring(0, p); - String seqs = structure.substring(p + 5); - StringTokenizer st = new StringTokenizer(attr, ","); - while (st.hasMoreTokens()) { - a = atom(st.nextToken()); - if (a == null) break; - l.add(a); - } - prop_names = new String[l.size()]; - prop_len = new int[l.size()]; - prop_pos = new int[l.size()]; - p = 0; - for (int i = 0; i < l.size(); i++) { - a = (Object[]) l.get(i); - prop_names[i] = (String) a[0]; - prop_len[i] = ((Integer) a[1]).intValue(); - prop_pos[i] = p; - p += prop_len[i]; - } - - // parse sequence definition: - a = atom(seqs); - if (a == null) return; - seq_name = (String) a[0]; - seq_len = ((Integer) a[1]).intValue(); - } - - private Object[] atom(String a) { - if (a.startsWith("<")) { - a = a.substring(1); - } else if (a.startsWith("*<")) { - a = a.substring(2); - } else return null; - if (a.endsWith(">")) { - a = a.substring(0, a.length() - 1); - } else return null; - int p = a.indexOf('-'); - if (p < 0) return null; - String name = a.substring(0, p); - try { - int x = Integer.parseInt(a.substring(p + 1)); - return new Object[]{name, new Integer(x)}; - } catch (NumberFormatException e) { - return null; - } - } - - public String toString() { - StringBuffer sb = new StringBuffer(70); - sb.append('<'); sb.append(referee_name); sb.append('-'); sb.append(Integer.toString(referee_len)); sb.append(">,'=',"); - if (prop_names.length > 0) { - for (int i = 0; i < prop_names.length; i++) { - sb.append('<'); sb.append(prop_names[i]); sb.append('-'); sb.append(Integer.toString(prop_len[i])); sb.append(">,"); - } - } - sb.append("'|',"); - sb.append("*<"); sb.append(seq_name); sb.append('-'); sb.append(Integer.toString(seq_len)); sb.append('>'); - return sb.toString(); - } - } - - public class Entry { - String referee; - HashMap props; - TreeSet seq; - - public Entry(String referee, HashMap props, TreeSet seq) { - this.referee = referee; - this.props = props; - this.seq = seq; - } - - public Entry(String referee, String attrseq) { - this.referee = referee; - props = new HashMap(); - seq = new TreeSet(); - for (int i = 0; i < structure.prop_names.length; i++) { - props.put(structure.prop_names[i], new Integer((int) serverCodings.enhancedCoder.decodeBase64Long(attrseq.substring(structure.prop_pos[i], structure.prop_pos[i] + structure.prop_len[i])))); - } - - int p = attrseq.indexOf('|'); - attrseq = attrseq.substring(p + 1); - for (int i = 0; i < attrseq.length(); i = i + structure.seq_len) { - seq.add(attrseq.substring(i, i + structure.seq_len)); - } - } - - public String toString() { - // creates only the attribute field and the sequence, not the referee - StringBuffer sb = new StringBuffer(70); - Integer val; - for (int i = 0; i < structure.prop_names.length; i++) { - val = (Integer) props.get(structure.prop_names[i]); - sb.append(serverCodings.enhancedCoder.encodeBase64LongSmart((val == null) ? 0 : val.intValue(), structure.prop_len[i])); - } - sb.append('|'); - Iterator q = seq.iterator(); - while (q.hasNext()) { - sb.append((String) q.next()); - } - return sb.toString(); - } - } - - public static void transcode(File from_file, File to_file) throws IOException { - plasmaRankingCRFile crp = new plasmaRankingCRFile(from_file); - //crp.toFile(new File(args[1])); - plasmaRankingCRFile cro = new plasmaRankingCRFile(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString()); - Iterator i = crp.entries.keySet().iterator(); - String key; - plasmaRankingCRFile.Entry entry; - while (i.hasNext()) { - key = (String) i.next(); - entry = crp.getEntry(key); - cro.addEntry(entry); - } - cro.toFile(to_file); - } - - - private static boolean accumulate_upd(File f, plasmaRankingCRFile acc) { + private static boolean accumulate_upd(File f, kelondroAttrSeq acc) { // open file - plasmaRankingCRFile source_cr = null; + kelondroAttrSeq source_cr = null; try { - source_cr = new plasmaRankingCRFile(f); + source_cr = new kelondroAttrSeq(f); } catch (IOException e) { return false; } // put elements in accumulator file - Iterator el = source_cr.entries.keySet().iterator(); + Iterator el = source_cr.keys(); String key; - plasmaRankingCRFile.Entry new_entry, acc_entry; + kelondroAttrSeq.Entry new_entry, acc_entry; int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita; bitfield acc_flags, new_flags; while (el.hasNext()) { key = (String) el.next(); new_entry = source_cr.getEntry(key); - new_flags = new bitfield(serverCodings.enhancedCoder.encodeBase64Long((long) ((Integer) new_entry.props.get("Flags")).intValue(), 1).getBytes()); + new_flags = new bitfield(serverCodings.enhancedCoder.encodeBase64Long((long) new_entry.getAttr("Flags", 0), 1).getBytes()); // enrich information with additional values - if (acc.entries.containsKey(key)) { - acc_entry = acc.getEntry(key); - acc.entries.remove(key); // will be replaced later - FUDate = ((Integer) acc_entry.props.get("FUDate")).intValue(); - FDDate = ((Integer) acc_entry.props.get("FDDate")).intValue(); - LUDate = ((Integer) acc_entry.props.get("LUDate")).intValue(); - UCount = ((Integer) acc_entry.props.get("UCount")).intValue(); - PCount = ((Integer) acc_entry.props.get("PCount")).intValue(); - ACount = ((Integer) acc_entry.props.get("ACount")).intValue(); - VCount = ((Integer) acc_entry.props.get("VCount")).intValue(); - Vita = ((Integer) acc_entry.props.get("Vita")).intValue(); + if ((acc_entry = acc.removeEntry(key)) != null) { + FUDate = (int) acc_entry.getAttr("FUDate", 0); + FDDate = (int) acc_entry.getAttr("FDDate", 0); + LUDate = (int) acc_entry.getAttr("LUDate", 0); + UCount = (int) acc_entry.getAttr("UCount", 0); + PCount = (int) acc_entry.getAttr("PCount", 0); + ACount = (int) acc_entry.getAttr("ACount", 0); + VCount = (int) acc_entry.getAttr("VCount", 0); + Vita = (int) acc_entry.getAttr("Vita", 0); // update counters and dates - acc_entry.seq = new_entry.seq; // need to be checked + acc_entry.setSeq(new_entry.getSeq()); // need to be checked UCount++; // increase update counter PCount += (new_flags.get(1)) ? 1 : 0; @@ -369,17 +102,17 @@ public class plasmaRankingCRFile { VCount += (new_flags.get(3)) ? 1 : 0; // 'OR' the flags - acc_flags = new bitfield(serverCodings.enhancedCoder.encodeBase64Long((long) ((Integer) acc_entry.props.get("Flags")).intValue(), 1).getBytes()); + acc_flags = new bitfield(serverCodings.enhancedCoder.encodeBase64Long((long) acc_entry.getAttr("Flags", 0), 1).getBytes()); for (int i = 0; i < 6; i++) { if (new_flags.get(i)) acc_flags.set(i, true); } - acc_entry.props.put("Flags", new Integer((int) serverCodings.enhancedCoder.decodeBase64Long(new String(acc_flags.getBytes())))); + acc_entry.setAttr("Flags", (int) serverCodings.enhancedCoder.decodeBase64Long(new String(acc_flags.getBytes()))); } else { // initialize counters and dates - acc_entry = acc.newEntry(key, new_entry.props, new_entry.seq); + acc_entry = acc.newEntry(key, new_entry.getAttrs(), new_entry.getSeq()); FUDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // first update date FDDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack - LUDate = ((Integer) new_entry.props.get("VDate")).intValue(); + LUDate = (int) new_entry.getAttr("VDate", 0); UCount = 0; PCount = (new_flags.get(1)) ? 1 : 0; ACount = (new_flags.get(2)) ? 1 : 0; @@ -389,14 +122,14 @@ public class plasmaRankingCRFile { // make plausibility check? // insert into accumulator - acc_entry.props.put("FUDate", new Integer(FUDate)); - acc_entry.props.put("FDDate", new Integer(FDDate)); - acc_entry.props.put("LUDate", new Integer(LUDate)); - acc_entry.props.put("UCount", new Integer(UCount)); - acc_entry.props.put("PCount", new Integer(PCount)); - acc_entry.props.put("ACount", new Integer(ACount)); - acc_entry.props.put("VCount", new Integer(VCount)); - acc_entry.props.put("Vita", new Integer(Vita)); + acc_entry.setAttr("FUDate", (long) FUDate); + acc_entry.setAttr("FDDate", (long) FDDate); + acc_entry.setAttr("LUDate", (long) LUDate); + acc_entry.setAttr("UCount", (long) UCount); + acc_entry.setAttr("PCount", (long) PCount); + acc_entry.setAttr("ACount", (long) ACount); + acc_entry.setAttr("VCount", (long) VCount); + acc_entry.setAttr("Vita", (long) Vita); acc.addEntry(acc_entry); } @@ -422,19 +155,19 @@ public class plasmaRankingCRFile { } // open target file - plasmaRankingCRFile acc = null; + kelondroAttrSeq acc = null; if (!(to_file.exists())) { - acc = new plasmaRankingCRFile("Global Ranking Accumulator File", + acc = new kelondroAttrSeq("Global Ranking Accumulator File", ",'='," + ",,,,,,,,,," + ",,,,,,,," + "'|',*"); acc.toFile(to_file); } - acc = new plasmaRankingCRFile(to_file); + acc = new kelondroAttrSeq(to_file); // collect source files - plasmaRankingCRFile source_cr = null; + kelondroAttrSeq source_cr = null; File source_file = null; String[] files = from_dir.list(); for (int i = 0; i < files.length; i++) { @@ -470,30 +203,30 @@ public class plasmaRankingCRFile { } public static long crFileCreated(File f) throws IOException { - return (new plasmaRankingCRFile(f)).created; + return (new kelondroAttrSeq(f)).created(); } public static void main(String[] args) { - // java -classpath source de.anomic.plasma.plasmaRankingCRFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr + // java -classpath source de.anomic.plasma.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr try { - if ((args.length == 3) && (args[0].equals("-transcode"))) { - transcode(new File(args[1]), new File(args[2])); - } if ((args.length == 5) && (args[0].equals("-accumulate"))) { accumulate(new File(args[1]), new File(args[2]), new File(args[3]), new File(args[4]), new File(args[5])); } if ((args.length == 2) && (args[0].equals("-accumulate"))) { File root_path = new File(args[1]); File from_dir = new File(root_path, "DATA/RANKING/GLOBAL/014_othercr"); + File ready_dir = new File(root_path, "DATA/RANKING/GLOBAL/015_ready"); File tmp_dir = new File(root_path, "DATA/RANKING/GLOBAL/016_tmp"); File err_dir = new File(root_path, "DATA/RANKING/GLOBAL/017_err"); File acc_dir = new File(root_path, "DATA/RANKING/GLOBAL/018_acc"); - File to_file = new File(root_path, "DATA/RANKING/GLOBAL/020_accumulator/CRG-a-acc.cr.gz"); + File to_file = new File(root_path, "DATA/RANKING/GLOBAL/020_con0/CRG-a-acc.cr.gz"); + if (!(ready_dir.exists())) ready_dir.mkdirs(); if (!(tmp_dir.exists())) tmp_dir.mkdirs(); if (!(err_dir.exists())) err_dir.mkdirs(); if (!(acc_dir.exists())) acc_dir.mkdirs(); if (!(to_file.getParentFile().exists())) to_file.getParentFile().mkdirs(); - accumulate(from_dir, tmp_dir, err_dir, acc_dir, to_file); + serverFileUtils.moveAll(from_dir, ready_dir); + accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file); } if ((args.length == 3) && (args[0].equals("-recycle"))) { File root_path = new File(args[1]);