From 94a334f128eab3abb41aadcbc3e395af9451197c Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 13 Aug 2012 11:13:53 +0200 Subject: [PATCH] another fix to the Solr metadata reading process and to the shutdown process --- .../net/yacy/kelondro/blob/HeapModifier.java | 86 +++++++++---------- .../kelondro/data/meta/URIMetadataNode.java | 28 ++++-- .../kelondro/workflow/WorkflowProcessor.java | 6 +- 3 files changed, 69 insertions(+), 51 deletions(-) diff --git a/source/net/yacy/kelondro/blob/HeapModifier.java b/source/net/yacy/kelondro/blob/HeapModifier.java index 56dec265e..37d7b8017 100644 --- a/source/net/yacy/kelondro/blob/HeapModifier.java +++ b/source/net/yacy/kelondro/blob/HeapModifier.java @@ -7,7 +7,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -38,7 +38,7 @@ import net.yacy.kelondro.util.MemoryControl; public class HeapModifier extends HeapReader implements BLOB { - + /* * This class adds a remove operation to a BLOBHeapReader. That means that a BLOBModifier can * - read elements from a BLOB @@ -57,7 +57,7 @@ public class HeapModifier extends HeapReader implements BLOB { public HeapModifier(final File heapFile, final int keylength, final ByteOrder ordering) throws IOException { super(heapFile, keylength, ordering); } - + /** * clears the content of the database * @throws IOException @@ -81,17 +81,17 @@ public class HeapModifier extends HeapReader implements BLOB { shrinkWithGapsAtEnd(); super.close(writeIDX); } - + @Override public synchronized void close() { close(true); } - + @Override public void finalize() { this.close(); } - + /** * remove a BLOB * @param key the primary key @@ -104,15 +104,15 @@ public class HeapModifier extends HeapReader implements BLOB { // pre-check before synchronization long seek = this.index.get(key); if (seek < 0) return; - + synchronized (this) { // check again if the index contains the key seek = this.index.get(key); if (seek < 0) return; - + // check consistency of the index //assert (checkKey(key, seek)) : "key compare failed; key = " + UTF8.String(key) + ", seek = " + seek; - + // access the file and read the container this.file.seek(seek); int size = this.file.readInt(); @@ -123,37 +123,37 @@ public class HeapModifier extends HeapReader implements BLOB { throw new IOException(this.heapFile.getName() + ": too long size " + size + " in record at " + seek); } super.deleteFingerprint(); - + // add entry to free array this.free.put(seek, size); - + // fill zeros to the content int l = size; byte[] fill = new byte[size]; while (l-- > 0) fill[l] = 0; this.file.write(fill, 0, size); - + // remove entry from index this.index.remove(key); - + // recursively merge gaps tryMergeNextGaps(seek, size); tryMergePreviousGap(seek); } } - + private void tryMergePreviousGap(final long thisSeek) throws IOException { // this is called after a record has been removed. That may cause that a new // empty record was surrounded by gaps. We merge with a previous gap, if this // is also empty, but don't do that recursively // If this is successful, it removes the given marker for thisSeed and // because of this, this method MUST be called AFTER tryMergeNextGaps was called. - + // first find the gap entry for the closest gap in front of the give gap SortedMap head = this.free.headMap(thisSeek); if (head.isEmpty()) return; long previousSeek = head.lastKey().longValue(); int previousSize = head.get(previousSeek).intValue(); - + // check if this is directly in front if (previousSeek + previousSize + 4 == thisSeek) { // right in front! merge the gaps @@ -166,22 +166,22 @@ public class HeapModifier extends HeapReader implements BLOB { private void tryMergeNextGaps(final long thisSeek, final int thisSize) throws IOException { // try to merge two gaps if one gap has been processed already and the position of the next record is known // if the next record is also a gap, merge these gaps and go on recursively - + // first check if next gap position is outside of file size long nextSeek = thisSeek + thisSize + 4; if (nextSeek >= this.file.length()) return; // end of recursion - + // move to next position and read record size Integer nextSize = this.free.get(nextSeek); if (nextSize == null) return; // finished, this is not a gap - + // check if the record is a gap-record assert nextSize.intValue() > 0; if (nextSize.intValue() == 0) { // a strange gap record: we can extend the thisGap with four bytes // the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap mergeGaps(thisSeek, thisSize, nextSeek, 0); - + // recursively go on tryMergeNextGaps(thisSeek, thisSize + 4); } else { @@ -194,35 +194,35 @@ public class HeapModifier extends HeapReader implements BLOB { if (t == 0) { // the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap mergeGaps(thisSeek, thisSize, nextSeek, nextSize.intValue()); - + // recursively go on tryMergeNextGaps(thisSeek, thisSize + 4 + nextSize.intValue()); } } } - + private void mergeGaps(final long seek0, final int size0, final long seek1, final int size1) throws IOException { //System.out.println("*** DEBUG-BLOBHeap " + heapFile.getName() + ": merging gap from pos " + seek0 + ", len " + size0 + " with next record of size " + size1 + " (+ 4)"); - + Integer g = this.free.remove(seek1); // g is only used for debugging assert g != null; assert g.intValue() == size1; - + // overwrite the size bytes of next records with zeros this.file.seek(seek1); this.file.writeInt(0); - + // the new size of the current gap: old size + len + 4 int newSize = size0 + 4 + size1; this.file.seek(seek0); this.file.writeInt(newSize); - + // register new gap in the free array; overwrite old gap entry g = this.free.put(seek0, newSize); assert g != null; assert g.intValue() == size0; } - + protected void shrinkWithGapsAtEnd() { // find gaps at the end of the file and shrink the file by these gaps if (this.free == null) return; @@ -249,26 +249,26 @@ public class HeapModifier extends HeapReader implements BLOB { public int replace(byte[] key, final Rewriter rewriter) throws IOException { throw new UnsupportedOperationException(); } - + @Override public int reduce(byte[] key, final Reducer reducer) throws IOException, SpaceExceededException { key = normalizeKey(key); assert key.length == this.keylength; - + // pre-check before synchronization long pos = this.index.get(key); if (pos < 0) return 0; - + synchronized (this) { long m = this.mem(); - + // check again if the index contains the key pos = this.index.get(key); if (pos < 0) return 0; - + // check consistency of the index //assert checkKey(key, pos) : "key compare failed; key = " + UTF8.String(key) + ", seek = " + pos; - + // access the file and read the container this.file.seek(pos); final int len = this.file.readInt() - this.keylength; @@ -276,16 +276,16 @@ public class HeapModifier extends HeapReader implements BLOB { if (!MemoryControl.request(len, true)) return 0; // not enough memory available for this blob } super.deleteFingerprint(); - + // read the key final byte[] keyf = new byte[this.keylength]; this.file.readFully(keyf, 0, keyf.length); - assert this.ordering == null || this.ordering.equal(key, keyf); - + assert this.ordering == null || this.ordering.equal(key, keyf) : "key = " + UTF8.String(key) + ", keyf = " + UTF8.String(keyf); + // read the blob byte[] blob = new byte[len]; this.file.readFully(blob, 0, blob.length); - + // rewrite the entry blob = reducer.rewrite(blob); int reduction = len - blob.length; @@ -295,30 +295,30 @@ public class HeapModifier extends HeapReader implements BLOB { this.file.write(blob); return 0; } - + // the new entry must be smaller than the old entry and must at least be 4 bytes smaller // because that is the space needed to write a new empty entry record at the end of the gap if (blob.length > len - 4) throw new IOException("replace of BLOB for key " + UTF8.String(key) + " failed (too large): new size = " + blob.length + ", old size = " + (len - 4)); - + // replace old content this.file.seek(pos); this.file.writeInt(blob.length + key.length); this.file.write(key); this.file.write(blob); - + // define the new empty entry final int newfreereclen = reduction - 4; assert newfreereclen >= 0; this.file.writeInt(newfreereclen); - + // fill zeros to the content int l = newfreereclen; byte[] fill = new byte[newfreereclen]; while (l-- > 0) fill[l] = 0; this.file.write(fill, 0, newfreereclen); - + // add a new free entry this.free.put(pos + 4 + blob.length + key.length, newfreereclen); - + assert mem() <= m : "m = " + m + ", mem() = " + mem(); return reduction; } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index cd503e541..aa65a3abf 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -223,6 +223,14 @@ public class URIMetadataNode implements URIMetadata { return UTF8.getBytes((String) languages.get(0)); } + + @Override + public byte[] referrerHash() { + ArrayList referrer = getArrayList(YaCySchema.referrer_id_txt); + if (referrer == null || referrer.size() == 0) return null; + return ASCII.getBytes((String) referrer.get(0)); + } + @Override public int size() { return getInt(YaCySchema.size_i); @@ -377,19 +385,27 @@ public class URIMetadataNode implements URIMetadata { return null; core.ensureCapacity(core.length() + snippet.length() * 2); - core.insert(0, "{"); + core.insert(0, '{'); core.append(",snippet=").append(crypt.simpleEncode(snippet)); - core.append("}"); + core.append('}'); return core.toString(); //return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}"; } + + /** + * @return the object as String.
+ * This e.g. looks like this: + *
{hash=jmqfMk7Y3NKw,referrer=------------,mod=20050610,load=20051003,size=51666,wc=1392,cc=0,local=true,q=AEn,dt=h,lang=uk,url=b|aHR0cDovL3d3dy50cmFuc3BhcmVuY3kub3JnL3N1cnZleXMv,descr=b|S25vd2xlZGdlIENlbnRyZTogQ29ycnVwdGlvbiBTdXJ2ZXlzIGFuZCBJbmRpY2Vz}
+ */ @Override - public byte[] referrerHash() { - String[] referrer = (String[]) this.doc.getFieldValue(YaCySchema.referrer_id_txt.name()); - if (referrer == null || referrer.length == 0) return null; - return ASCII.getBytes(referrer[0]); + public String toString() { + final StringBuilder core = corePropList(); + if (core == null) return null; + core.insert(0, '{'); + core.append('}'); + return core.toString(); } @Override diff --git a/source/net/yacy/kelondro/workflow/WorkflowProcessor.java b/source/net/yacy/kelondro/workflow/WorkflowProcessor.java index 1c9a9a620..926840f97 100644 --- a/source/net/yacy/kelondro/workflow/WorkflowProcessor.java +++ b/source/net/yacy/kelondro/workflow/WorkflowProcessor.java @@ -195,8 +195,10 @@ public class WorkflowProcessor { } // wait until input queue is empty - while (this.input.size() > 0) { - try {Thread.sleep(100);} catch (InterruptedException e) {} + for (int i = 0; i < 10; i++) { + if (this.input.size() <= 0) break; + Log.logInfo("WorkflowProcess", "waiting for queue " + this.processName + " to shut down; input.size = " + this.input.size()); + try {Thread.sleep(1000);} catch (InterruptedException e) {} } // shut down executors