diff --git a/bin/cr_accumulate b/bin/cr_accumulate index 968dea72b..0f12779f6 100755 --- a/bin/cr_accumulate +++ b/bin/cr_accumulate @@ -1,3 +1,3 @@ cd `dirname $0`/.. -java -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -accumulate . -java -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -recycle . 168 +java -Xms300m -Xmx900m -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -accumulate . +java -Xms300m -Xmx900m -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -recycle . 168 diff --git a/bin/cr_genrci b/bin/cr_genrci index 33614f4b0..6f40f8447 100755 --- a/bin/cr_genrci +++ b/bin/cr_genrci @@ -1,2 +1,2 @@ cd `dirname $0`/.. -java -Xms300m -Xmx900m -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -genrci . +java -server -Xms1400m -Xmx1400m -classpath source:classes de.anomic.plasma.plasmaRankingCRProcess -genrci . diff --git a/source/de/anomic/kelondro/kelondroAttrSeq.java b/source/de/anomic/kelondro/kelondroAttrSeq.java index 2b321429c..5821251c0 100644 --- a/source/de/anomic/kelondro/kelondroAttrSeq.java +++ b/source/de/anomic/kelondro/kelondroAttrSeq.java @@ -67,7 +67,7 @@ public class kelondroAttrSeq { // class objects private File file; - private TreeMap entries; + private Map entries; private Structure structure; private String name; private long created; @@ -75,20 +75,21 @@ public class kelondroAttrSeq { // optional logger protected Logger theLogger = null; - public kelondroAttrSeq(File file) throws IOException { + public kelondroAttrSeq(File file, boolean tree) throws IOException { this.file = file; this.structure = null; this.created = 0; this.name = ""; - this.entries = readAttrFile(file); + this.entries = (tree) ? (Map) new TreeMap() : (Map) new HashMap(); + readAttrFile(file); } - public kelondroAttrSeq(String name, String struct) { + public kelondroAttrSeq(String name, String struct, boolean tree) { this.file = null; this.structure = new Structure(struct); this.created = System.currentTimeMillis(); this.name = name; - this.entries = new TreeMap(); + this.entries = (tree) ? (Map) new TreeMap() : (Map) new HashMap(); } public void setLogger(Logger newLogger) { @@ -109,8 +110,7 @@ public class kelondroAttrSeq { this.theLogger.warning("ATTRSEQ WARNING for file " + this.file + ": " + message); } - private TreeMap readAttrFile(File file) throws IOException { - TreeMap entries = new TreeMap(); + private void readAttrFile(File file) throws IOException { BufferedReader br = null; int p; if (file.toString().endsWith(".gz")) { @@ -145,8 +145,10 @@ public class kelondroAttrSeq { } } br.close(); - - return entries; + } + + public int size() { + return entries.size(); } public long created() { @@ -162,12 +164,16 @@ public class kelondroAttrSeq { sb.append("# ---"); sb.append((char) 13); sb.append((char) 10); Iterator i = entries.entrySet().iterator(); Map.Entry entry; - String k,v; + String k; + Object v; while (i.hasNext()) { entry = (Map.Entry) i.next(); k = (String) entry.getKey(); - v = (String) entry.getValue(); - sb.append(k); sb.append('='); sb.append(v); sb.append((char) 13); sb.append((char) 10); + v = entry.getValue(); + sb.append(k); sb.append('='); + if (v instanceof String) sb.append((String) v); + if (v instanceof Entry) sb.append(((Entry) v).toString()); + sb.append((char) 13); sb.append((char) 10); } if (out.toString().endsWith(".gz")) { serverFileUtils.writeAndZip(sb.toString().getBytes(), out); @@ -188,24 +194,33 @@ public class kelondroAttrSeq { return new Entry(pivot, props, seq); } - public void addEntry(String pivot, String attrseq) { + /* + public void putEntry(String pivot, String attrseq) { entries.put(pivot, attrseq); } + */ - public void addEntry(Entry entry) { - entries.put(entry.pivot, entry.toString()); + public void putEntry(Entry entry) { + if (shortmem()) + entries.put(entry.pivot, entry.toString()); + else + entries.put(entry.pivot, entry); } public Entry getEntry(String pivot) { - String struct = (String) entries.get(pivot); - if (struct == null) return null; - return new Entry(pivot, struct); + Object e = entries.get(pivot); + if (e == null) return null; + if (e instanceof String) return new Entry(pivot, (String) e); + if (e instanceof Entry) return (Entry) e; + return null; } public Entry removeEntry(String pivot) { - String struct = (String) entries.remove(pivot); - if (struct == null) return null; - return new Entry(pivot, struct); + Object e = entries.remove(pivot); + if (e == null) return null; + if (e instanceof String) return new Entry(pivot, (String) e); + if (e instanceof Entry) return (Entry) e; + return null; } public class Structure { @@ -285,7 +300,7 @@ public class kelondroAttrSeq { } public String toString() { - StringBuffer sb = new StringBuffer(70); + StringBuffer sb = new StringBuffer(100); sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',"); if (prop_names.length > 0) { for (int i = 0; i < prop_names.length; i++) { @@ -318,8 +333,7 @@ public class kelondroAttrSeq { } int p = attrseq.indexOf('|'); - attrseq = attrseq.substring(p + 1); - for (int i = 0; i < attrseq.length(); i = i + structure.seq_len) { + for (int i = p + 1; i < attrseq.length(); i = i + structure.seq_len) { seq.add(attrseq.substring(i, i + structure.seq_len)); } } @@ -351,7 +365,7 @@ public class kelondroAttrSeq { public String toString() { // creates only the attribute field and the sequence, not the pivot - StringBuffer sb = new StringBuffer(70); + StringBuffer sb = new StringBuffer(100 + structure.seq_len * seq.size()); Long val; for (int i = 0; i < structure.prop_names.length; i++) { val = (Long) attrs.get(structure.prop_names[i]); @@ -366,17 +380,27 @@ public class kelondroAttrSeq { } } + private static final Runtime runtime = Runtime.getRuntime(); + private static final long cc = 0; + private static boolean shortmemstate = false; + private static boolean shortmem() { + if ((cc % 300) == 0) { + shortmemstate = (runtime.freeMemory() < 20000000L); + } + return shortmemstate; + } + public static void transcode(File from_file, File to_file) throws IOException { - kelondroAttrSeq crp = new kelondroAttrSeq(from_file); + kelondroAttrSeq crp = new kelondroAttrSeq(from_file, true); //crp.toFile(new File(args[1])); - kelondroAttrSeq cro = new kelondroAttrSeq(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString()); + kelondroAttrSeq cro = new kelondroAttrSeq(crp.name + "/Transcoded from " + crp.file.getName(), crp.structure.toString(), true); Iterator i = crp.entries.keySet().iterator(); String key; kelondroAttrSeq.Entry entry; while (i.hasNext()) { key = (String) i.next(); entry = crp.getEntry(key); - cro.addEntry(entry); + cro.putEntry(entry); } cro.toFile(to_file); } diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index 3284101ef..75cc301f9 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -67,7 +67,7 @@ public class plasmaRankingCRProcess { // open file kelondroAttrSeq source_cr = null; try { - source_cr = new kelondroAttrSeq(f); + source_cr = new kelondroAttrSeq(f, false); } catch (IOException e) { return false; } @@ -83,7 +83,7 @@ public class plasmaRankingCRProcess { new_entry = source_cr.getEntry(key); new_flags = new bitfield(serverCodings.enhancedCoder.encodeBase64Long((long) new_entry.getAttr("Flags", 0), 1).getBytes()); // enrich information with additional values - if ((acc_entry = acc.removeEntry(key)) != null) { + if ((acc_entry = acc.getEntry(key)) != null) { FUDate = (int) acc_entry.getAttr("FUDate", 0); FDDate = (int) acc_entry.getAttr("FDDate", 0); LUDate = (int) acc_entry.getAttr("LUDate", 0); @@ -130,7 +130,7 @@ public class plasmaRankingCRProcess { acc_entry.setAttr("ACount", (long) ACount); acc_entry.setAttr("VCount", (long) VCount); acc_entry.setAttr("Vita", (long) Vita); - acc.addEntry(acc_entry); + acc.putEntry(acc_entry); } return true; @@ -161,10 +161,10 @@ public class plasmaRankingCRProcess { ",'='," + ",,,,,,,,,," + ",,,,,,,," + - "'|',*"); + "'|',*", false); acc.toFile(to_file); } - acc = new kelondroAttrSeq(to_file); + acc = new kelondroAttrSeq(to_file, false); // collect source files kelondroAttrSeq source_cr = null; @@ -202,21 +202,25 @@ public class plasmaRankingCRProcess { } - public static void genrci(File cr_in, File rci_out) throws IOException { - if (!(cr_in.exists())) return; - kelondroAttrSeq cr = new kelondroAttrSeq(cr_in); - kelondroAttrSeq rci; + public static int genrci(File cr_in, File rci_out) throws IOException { + if (!(cr_in.exists())) return 0; + final kelondroAttrSeq cr = new kelondroAttrSeq(cr_in, false); + if (rci_out.exists()) rci_out.delete(); // we want only fresh rci here (during testing) if (!(rci_out.exists())) { - rci = new kelondroAttrSeq("Global Ranking Reverse Citation Index", + kelondroAttrSeq rcix = new kelondroAttrSeq("Global Ranking Reverse Citation Index", ",'='," + "," + - "'|',*"); - rci.toFile(rci_out); + "'|',*", false); + rcix.toFile(rci_out); } - rci = new kelondroAttrSeq(rci_out); + final kelondroAttrSeq rci = new kelondroAttrSeq(rci_out, false); // loop over all referees - Iterator i = cr.keys(); + int count = 0; + int size = cr.size(); + long start = System.currentTimeMillis(); + long l; + final Iterator i = cr.keys(); String referee, anchor, anchorDom; kelondroAttrSeq.Entry cr_entry, rci_entry; long cr_UDate, rci_UDate; @@ -233,7 +237,7 @@ public class plasmaRankingCRProcess { if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); // update domain-specific entry - rci_entry = rci.removeEntry(anchorDom); + rci_entry = rci.getEntry(anchorDom); if (rci_entry == null) rci_entry = rci.newEntry(anchorDom); rci_entry.addSeq(referee); @@ -242,12 +246,18 @@ public class plasmaRankingCRProcess { if (cr_UDate > rci_UDate) rci_entry.setAttr("UDate", cr_UDate); // insert entry - rci.addEntry(rci_entry); + rci.putEntry(rci_entry); + } + count++; + if ((count % 1000) == 0) { + l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l)) + " seconds remaining; mem = " + Runtime.getRuntime().freeMemory()); } } // finished. write to file rci.toFile(rci_out); + return count; } public static void main(String[] args) { @@ -270,7 +280,11 @@ public class plasmaRankingCRProcess { if (!(acc_dir.exists())) acc_dir.mkdirs(); if (!(to_file.getParentFile().exists())) to_file.getParentFile().mkdirs(); serverFileUtils.moveAll(from_dir, ready_dir); + long start = System.currentTimeMillis(); + int files = ready_dir.list().length; accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file); + long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("Finished accumulate for " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); } if ((args.length == 3) && (args[0].equals("-recycle"))) { File root_path = new File(args[1]); @@ -282,12 +296,14 @@ public class plasmaRankingCRProcess { if (!(acc_dir.exists())) return; if (!(bkp_dir.exists())) bkp_dir.mkdirs(); String[] list = acc_dir.list(); + long start = System.currentTimeMillis(); + int files = list.length; long d; File f; for (int i = 0; i < list.length; i++) { f = new File(acc_dir, list[i]); try { - d = (System.currentTimeMillis() - (new kelondroAttrSeq(f)).created()) / 3600000; + d = (System.currentTimeMillis() - (new kelondroAttrSeq(f, false)).created()) / 3600000; if (d > max_age_hours) { // file is considered to be too old, it is not recycled System.out.println("file " + f.getName() + " is old (" + d + " hours) and not recycled, only moved to backup"); @@ -304,13 +320,18 @@ public class plasmaRankingCRProcess { f.delete(); } } + long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("Finished recycling of " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); } if ((args.length == 2) && (args[0].equals("-genrci"))) { File root_path = new File(args[1]); File cr_file = new File(root_path, "DATA/RANKING/GLOBAL/020_con0/CRG-a-acc.cr.gz"); File rci_file = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0/RCI-0.rci.gz"); rci_file.getParentFile().mkdirs(); - genrci(cr_file, rci_file); + long start = System.currentTimeMillis(); + int count = genrci(cr_file, rci_file); + long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("Finished RCI generation: " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)"); } } catch (IOException e) { e.printStackTrace(); diff --git a/source/de/anomic/server/serverCodings.java b/source/de/anomic/server/serverCodings.java index d82234209..3a81503a4 100644 --- a/source/de/anomic/server/serverCodings.java +++ b/source/de/anomic/server/serverCodings.java @@ -81,26 +81,27 @@ public final class serverCodings { public char encodeBase64Byte(byte b) { - return alpha[b]; + return (char) alpha[b]; } public byte decodeBase64Byte(char b) { return ahpla[b]; } - public String encodeBase64LongSmart(long c, int length) { if (c >= maxBase64(length)) { StringBuffer s = new StringBuffer(length); + s.setLength(length); while (length > 0) { - s.insert(0,alpha[0x3F]); - length--; + s.setCharAt(--length, alpha[0]); } return s.toString(); } else { return encodeBase64Long(c, length); } } + + /* public String encodeBase64Long(long c, int length) { if (length < 0) length = 0; StringBuffer s = new StringBuffer(length); //String s = ""; @@ -118,6 +119,17 @@ public final class serverCodings { while (s.length() < length) s.insert(0,alpha[0]); //s = alpha[0] + s; return s.toString(); } + */ + + public String encodeBase64Long(long c, int length) { + StringBuffer s = new StringBuffer(length); + s.setLength(length); + while (length > 0) { + s.setCharAt(--length, alpha[(byte) (c & 0x3F)]); + c >>= 6; + } + return s.toString(); + } public long decodeBase64Long(String s) { while (s.endsWith("=")) s = s.substring(0, s.length() - 1); diff --git a/source/de/anomic/server/serverMemory.java b/source/de/anomic/server/serverMemory.java index a7af9cc93..623348d43 100644 --- a/source/de/anomic/server/serverMemory.java +++ b/source/de/anomic/server/serverMemory.java @@ -51,7 +51,7 @@ public class serverMemory { private static final Runtime runtime = Runtime.getRuntime(); public static long free() { - // memory that is free without increasing of total memory takenn from os + // memory that is free without increasing of total memory taken from os return runtime.freeMemory(); }