From e2c86a8ebaa94e7d4e262b78c765cb3a8a6918ba Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 29 Oct 2023 12:49:08 +0100 Subject: [PATCH] added a ZIM cluster pointer cache --- source/org/openzim/ZIMFile.java | 19 +++++++++++++++---- source/org/openzim/ZIMReader.java | 5 +---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/source/org/openzim/ZIMFile.java b/source/org/openzim/ZIMFile.java index dd209b5e9..2dcfb2208 100644 --- a/source/org/openzim/ZIMFile.java +++ b/source/org/openzim/ZIMFile.java @@ -33,8 +33,9 @@ import java.util.List; * Proof-Reading, unclustering, refactoring, * naming adoption to https://wiki.openzim.org/wiki/ZIM_file_format, * change of Exception handling, - * extension to more attributes as defined in spec (bugfix for mime type loading) - * int/long bugfix (did reading of long values with int variables, causing negative offsets) + * extension to more attributes as defined in spec (bugfix for mime type loading), + * int/long bugfix (did reading of long values with int variables, causing negative offsets), + * added url pointer, title pointer and cluster pointer caches */ public class ZIMFile extends File { @@ -45,10 +46,10 @@ public class ZIMFile extends File { public final int header_majorVersion; public final int header_minorVersion; public final int header_entryCount; - public final int header_clusterCount; + private final int header_clusterCount; public final long header_urlPtrPos; public final long header_titlePtrPos; - public final long header_clusterPtrPos; + private final long header_clusterPtrPos; public final long header_mimeListPos; public final int header_mainPage; public final int header_layoutPage; @@ -61,6 +62,7 @@ public class ZIMFile extends File { private final String[] mimeTypeList; private final byte[] urlPtrListBlob; private final byte[] titlePtrListBlob; + private final byte[] clusterPtrListBlob; public ZIMFile(final String path) throws IOException { super(path); @@ -120,6 +122,11 @@ public class ZIMFile extends File { this.titlePtrListBlob = new byte[this.header_entryCount * 4]; mReader.seek(this.header_titlePtrPos); RandomAccessFileZIMInputStream.readFully(mReader, this.titlePtrListBlob); + + // Initialize the Cluster Pointer List + this.clusterPtrListBlob = new byte[this.header_clusterCount * 8]; + mReader.seek(this.header_clusterPtrPos); + RandomAccessFileZIMInputStream.readFully(mReader, this.clusterPtrListBlob); } public final String getMimeType(int idx) { @@ -133,4 +140,8 @@ public class ZIMFile extends File { public final int getTitlePtr(final int idx) { return RandomAccessFileZIMInputStream.toFourLittleEndianInteger(this.titlePtrListBlob, idx * 4); } + + public final long geClusterPtr(final int idx) { + return RandomAccessFileZIMInputStream.toEightLittleEndianLong(this.clusterPtrListBlob, idx * 8); + } } diff --git a/source/org/openzim/ZIMReader.java b/source/org/openzim/ZIMReader.java index 8d773d473..427b53072 100644 --- a/source/org/openzim/ZIMReader.java +++ b/source/org/openzim/ZIMReader.java @@ -215,11 +215,8 @@ public class ZIMReader { // This is now an article, so thus we can cast to ArticleEntry final ArticleEntry article = (ArticleEntry) directoryInfo; - // Move to the cluster entry in the clusterPtrPos - this.mFile.mReader.seek(this.mFile.header_clusterPtrPos + article.cluster_number * 8L); - // Read the location of the cluster - final long clusterPos = this.mFile.mReader.readEightLittleEndianBytesLong(); + final long clusterPos = this.mFile.geClusterPtr(article.cluster_number); // Move to the cluster this.mFile.mReader.seek(clusterPos);