From 1fefae9baff5ec54f2b4a7aadb47c1300c51cc30 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Fri, 27 Oct 2023 10:59:06 +0200
Subject: [PATCH] integrated the source code of a openzim file format reader.
 These are the raw format reader files with no integration in YaCy yet, which
 will maybe follow as a next step. The zim file format is documented in
 https://openzim.org and the reader code was taken from the archived,
 non-maintained repository at https://github.com/openzim/zimreader-java

---
 source/org/openzim/ArticleEntry.java          |  46 ++
 source/org/openzim/DirectoryEntry.java        |  69 +++
 .../RandomAcessFileZIMInputStream.java        | 135 ++++++
 source/org/openzim/RedirectEntry.java         |  37 ++
 source/org/openzim/Utilities.java             |  84 ++++
 source/org/openzim/ZIMFile.java               | 198 +++++++++
 source/org/openzim/ZIMReader.java             | 408 ++++++++++++++++++
 source/org/openzim/ZIMTest.java               |  44 ++
 8 files changed, 1021 insertions(+)
 create mode 100644 source/org/openzim/ArticleEntry.java
 create mode 100644 source/org/openzim/DirectoryEntry.java
 create mode 100644 source/org/openzim/RandomAcessFileZIMInputStream.java
 create mode 100644 source/org/openzim/RedirectEntry.java
 create mode 100644 source/org/openzim/Utilities.java
 create mode 100644 source/org/openzim/ZIMFile.java
 create mode 100644 source/org/openzim/ZIMReader.java
 create mode 100644 source/org/openzim/ZIMTest.java

diff --git a/source/org/openzim/ArticleEntry.java b/source/org/openzim/ArticleEntry.java
new file mode 100644
index 000000000..7eeae2e06
--- /dev/null
+++ b/source/org/openzim/ArticleEntry.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package org.openzim;
+
+public class ArticleEntry extends DirectoryEntry {
+
+    int clusterNumber;
+
+    int blobnumber;
+
+    public ArticleEntry(final int mimeType, final char namespace, final int revision,
+            final int clusterNumber, final int blobNumber, final String url, final String title,
+            final int urlListindex) {
+
+        super(mimeType, namespace, revision, url, title, urlListindex);
+
+        this.clusterNumber = clusterNumber;
+        this.blobnumber = blobNumber;
+    }
+
+    public int getClusterNumber() {
+        return this.clusterNumber;
+    }
+
+    public int getBlobnumber() {
+        return this.blobnumber;
+    }
+
+}
diff --git a/source/org/openzim/DirectoryEntry.java b/source/org/openzim/DirectoryEntry.java
new file mode 100644
index 000000000..92c52de41
--- /dev/null
+++ b/source/org/openzim/DirectoryEntry.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+public abstract class DirectoryEntry {
+
+    int mimeType;
+
+    char namespace;
+
+    int revision;
+
+    String url;
+
+    String title;
+
+    int urlListindex;
+
+    public DirectoryEntry(final int mimeType, final char namespace, final int revision,
+            final String url, final String title, final int index) {
+        this.mimeType = mimeType;
+        this.namespace = namespace;
+        this.revision = revision;
+        this.url = url;
+        this.title = title;
+        this.urlListindex = index;
+    }
+
+    public int getMimeType() {
+        return this.mimeType;
+    }
+
+    public char getNamespace() {
+        return this.namespace;
+    }
+
+    public int getRevision() {
+        return this.revision;
+    }
+
+    public String getUrl() {
+        return this.url;
+    }
+
+    public String getTitle() {
+        return this.title;
+    }
+
+    public int getUrlListindex() {
+        return this.urlListindex;
+    }
+
+}
diff --git a/source/org/openzim/RandomAcessFileZIMInputStream.java b/source/org/openzim/RandomAcessFileZIMInputStream.java
new file mode 100644
index 000000000..006dd4498
--- /dev/null
+++ b/source/org/openzim/RandomAcessFileZIMInputStream.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+
+/**
+ * This is an implementation of RandomAccessFile to ensure that it is an
+ * InputStream as well, specifically designed for reading a ZIM file. Ad-Hoc
+ * implementation, can be improved.
+ *
+ * @author Arunesh Mathur <aruneshmathur1990 at gmail.com>
+ */
+
+public class RandomAcessFileZIMInputStream extends InputStream {
+
+    private final RandomAccessFile mRAFReader;
+
+    private long mMarked = -1;
+
+    public RandomAcessFileZIMInputStream(final RandomAccessFile reader) {
+        this.mRAFReader = reader;
+    }
+
+    // TODO: Remove the parameter buffer
+    public int readTwoLittleEndianBytesValue(final byte[] buffer) throws IOException {
+        if (buffer.length < 2) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            this.mRAFReader.read(buffer, 0, 2);
+            return Utilities.toTwoLittleEndianInteger(buffer);
+        }
+    }
+
+    // TODO: Remove the parameter buffer
+    public int readFourLittleEndianBytesValue(final byte[] buffer) throws IOException {
+        if (buffer.length < 4) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            this.mRAFReader.read(buffer, 0, 4);
+            return Utilities.toFourLittleEndianInteger(buffer);
+        }
+    }
+
+    // TODO: Remove the parameter buffer
+    public int readEightLittleEndianBytesValue(final byte[] buffer)
+            throws IOException {
+        if (buffer.length < 8) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            this.mRAFReader.read(buffer, 0, 8);
+            return Utilities.toEightLittleEndianInteger(buffer);
+        }
+    }
+
+    // TODO: Remove the parameter buffer
+    public int readSixteenLittleEndianBytesValue(final byte[] buffer)
+            throws IOException {
+        if (buffer.length < 16) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            this.mRAFReader.read(buffer, 0, 16);
+            return Utilities.toSixteenLittleEndianInteger(buffer);
+        }
+    }
+
+    // Reads characters from the current position into a String and stops when a
+    // '\0' is encountered
+    public String readString() throws IOException {
+        final StringBuffer sb = new StringBuffer();
+        /*
+         * int i; byte[] buffer = new byte[100]; while (true) {
+         * mRAFReader.read(buffer); for (i = 0; i < buffer.length; i++) { if
+         * (buffer[i] == '\0') { break; } sb.append((char) buffer[i]); } if (i
+         * != buffer.length) break; } return sb.toString();
+         */
+        int b;
+        b = this.mRAFReader.read();
+        while (b != '\0') {
+            sb.append((char) b);
+            b = this.mRAFReader.read();
+        }
+        return sb.toString();
+
+    }
+
+    @Override
+    public int read() throws IOException {
+        return this.mRAFReader.read();
+    }
+
+    public RandomAccessFile getRandomAccessFile() {
+        return this.mRAFReader;
+    }
+
+    public void seek(final long pos) throws IOException {
+        this.mRAFReader.seek(pos);
+    }
+
+    public long getFilePointer() throws IOException {
+        return this.mRAFReader.getFilePointer();
+    }
+
+    public void mark() throws IOException {
+        this.mMarked = this.mRAFReader.getFilePointer();
+    }
+
+    @Override
+    public void reset() throws IOException {
+        if (this.mMarked == -1) {
+            return;
+        } else {
+            this.mRAFReader.seek(this.mMarked);
+            this.mMarked = -1;
+        }
+    }
+}
diff --git a/source/org/openzim/RedirectEntry.java b/source/org/openzim/RedirectEntry.java
new file mode 100644
index 000000000..fdbe3fba1
--- /dev/null
+++ b/source/org/openzim/RedirectEntry.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+public class RedirectEntry extends DirectoryEntry {
+
+    int redirectIndex;
+
+    public RedirectEntry(final int mimeType, final char namespace, final int revision,
+            final int redirectIndex, final String url, final String title, final int urlListindex) {
+
+        super(mimeType, namespace, revision, url, title, urlListindex);
+
+        this.redirectIndex = redirectIndex;
+    }
+
+    public int getRedirectIndex() {
+        return this.redirectIndex;
+    }
+
+}
diff --git a/source/org/openzim/Utilities.java b/source/org/openzim/Utilities.java
new file mode 100644
index 000000000..0de337c9c
--- /dev/null
+++ b/source/org/openzim/Utilities.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package org.openzim;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class Utilities {
+
+    // TODO: Write a binary search algorithm
+    public static int binarySearch() {
+        return -1;
+    }
+
+    public static int toTwoLittleEndianInteger(final byte[] buffer) throws IOException {
+        if (buffer.length < 2) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8));
+            return result;
+        }
+    }
+
+    public static int toFourLittleEndianInteger(final byte[] buffer) throws IOException {
+        if (buffer.length < 4) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+                    | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24));
+            return result;
+        }
+    }
+
+    public static int toEightLittleEndianInteger(final byte[] buffer) throws IOException {
+        if (buffer.length < 8) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+                    | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
+                    | ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
+                    | ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56));
+            return result;
+        }
+    }
+
+    public static int toSixteenLittleEndianInteger(final byte[] buffer) throws IOException {
+        if (buffer.length < 16) {
+            throw new OutOfMemoryError("buffer too small");
+        } else {
+            final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+                    | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
+                    | ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
+                    | ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56)
+                    | ((buffer[8] & 0xFF) << 64) | ((buffer[9] & 0xFF) << 72)
+                    | ((buffer[10] & 0xFF) << 80) | ((buffer[11] & 0xFF) << 88)
+                    | ((buffer[12] & 0xFF) << 96)
+                    | ((buffer[13] & 0xFF) << 104)
+                    | ((buffer[14] & 0xFF) << 112) | ((buffer[15] & 0xFF) << 120));
+            return result;
+        }
+    }
+
+    public static void skipFully(final InputStream stream, final long bytes) throws IOException {
+            for (long i = stream.skip(bytes); i < bytes; i += stream.skip(bytes - i));
+         }
+
+}
diff --git a/source/org/openzim/ZIMFile.java b/source/org/openzim/ZIMFile.java
new file mode 100644
index 000000000..c86119be1
--- /dev/null
+++ b/source/org/openzim/ZIMFile.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author Arunesh Mathur
+ *
+ *         A ZIM file implementation that stores the Header and the MIMETypeList
+ *
+ */
+public class ZIMFile extends File {
+
+    /**
+     *
+     */
+    private static final long serialVersionUID = 1L;
+
+    private Header mHeader;
+
+    private List<String> mMIMETypeList; // Can be removed if not needed
+
+    public ZIMFile(final String path) {
+        super(path);
+
+        try {
+            readHeader();
+        } catch (final FileNotFoundException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void readHeader() throws FileNotFoundException {
+
+        // Helpers
+        int len = 0;
+        StringBuffer mimeBuffer = null;
+
+        // The byte[] that will help us in reading bytes out of the file
+        final byte[] buffer = new byte[16];
+
+        // Check whether the file exists
+        if (!(this.exists())) {
+            throw new FileNotFoundException(
+                    "The file that you specified was not found.");
+        }
+
+        // The reader that will be used to read contents from the file
+
+        final RandomAcessFileZIMInputStream reader = new RandomAcessFileZIMInputStream(
+                new RandomAccessFile(this, "r"));
+
+        // The ZIM file header
+        this.mHeader = new Header();
+
+        // Read the contents of the header
+        try {
+            this.mHeader.magicNumber = reader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.magicNumber);
+
+            this.mHeader.version = reader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.version);
+
+            this.mHeader.uuid = reader.readSixteenLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.uuid); reader.read(buffer, 0, 4);
+
+            this.mHeader.articleCount = reader
+                    .readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.articleCount);
+
+            this.mHeader.clusterCount = reader
+                    .readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.clusterCount);
+
+            this.mHeader.urlPtrPos = reader.readEightLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.urlPtrPos);
+
+            this.mHeader.titlePtrPos = reader
+                    .readEightLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.titlePtrPos);
+
+            this.mHeader.clusterPtrPos = reader
+                    .readEightLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.clusterPtrPos);
+
+            this.mHeader.mimeListPos = reader
+                    .readEightLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.mimeListPos);
+
+            this.mHeader.mainPage = reader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.mainPage);
+
+            this.mHeader.layoutPage = reader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println(mHeader.layoutPage);
+
+            // Initialise the MIMETypeList
+            this.mMIMETypeList = new ArrayList<>();
+            while (true) {
+                reader.read(buffer, 0, 1);
+                len = 0;
+                mimeBuffer = new StringBuffer();
+                while (buffer[0] != '\0') {
+                    mimeBuffer.append((char) buffer[0]);
+                    reader.read(buffer, 0, 1);
+                    len++;
+                }
+                if (len == 0) {
+                    break;
+                }
+                this.mMIMETypeList.add(mimeBuffer.toString());
+                // System.out.println(mimeBuffer);
+            }
+
+        } catch (final Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public int getVersion() {
+        return this.mHeader.version;
+    }
+
+    public int getUuid() {
+        return this.mHeader.uuid;
+    }
+
+    public int getArticleCount() {
+        return this.mHeader.articleCount;
+    }
+
+    public int getClusterCount() {
+        return this.mHeader.clusterCount;
+    }
+
+    public int getUrlPtrPos() {
+        return this.mHeader.urlPtrPos;
+    }
+
+    public int getTitlePtrPos() {
+        return this.mHeader.titlePtrPos;
+    }
+
+    public int getClusterPtrPos() {
+        return this.mHeader.clusterPtrPos;
+    }
+
+    public String getMIMEType(final int mimeNumber) {
+        return this.mMIMETypeList.get(mimeNumber);
+    }
+
+    public int getHeaderSize() {
+        return this.mHeader.mimeListPos;
+    }
+
+    public int getMainPage() {
+        return this.mHeader.mainPage;
+    }
+
+    public int getLayoutPage() {
+        return this.mHeader.layoutPage;
+    }
+
+    public class Header {
+        int magicNumber;
+        int version;
+        int uuid;
+        int articleCount;
+        int clusterCount;
+        int urlPtrPos;
+        int titlePtrPos;
+        int clusterPtrPos;
+        int mimeListPos;
+        int mainPage;
+        int layoutPage;
+    }
+
+}
diff --git a/source/org/openzim/ZIMReader.java b/source/org/openzim/ZIMReader.java
new file mode 100644
index 000000000..affd6ea6d
--- /dev/null
+++ b/source/org/openzim/ZIMReader.java
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.tukaani.xz.SingleXZInputStream;
+
+/**
+ * @author Arunesh Mathur
+ *
+ *         A ZIMReader that reads data from the ZIMFile
+ *
+ */
+public class ZIMReader {
+
+    private final ZIMFile mFile;
+    private RandomAcessFileZIMInputStream mReader;
+
+    public ZIMReader(final ZIMFile file) {
+        this.mFile = file;
+        try {
+            this.mReader = new RandomAcessFileZIMInputStream(new RandomAccessFile(
+                    this.mFile, "r"));
+        } catch (final FileNotFoundException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public List<String> getURLListByURL() throws IOException {
+
+        int i = 0, pos, mimeType;
+
+        final byte[] buffer = new byte[8];
+
+        // The list that will eventually return the list of URL's
+        final ArrayList<String> returnList = new ArrayList<>();
+
+        // Move to the spot where URL's are listed
+        this.mReader.seek(this.mFile.getUrlPtrPos());
+
+        for (i = 0; i < this.mFile.getArticleCount(); i++) {
+
+            // The position of URL i
+            pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+
+            // Mark the current position that we need to return to
+            this.mReader.mark();
+
+            // Move to the position of URL i
+            this.mReader.seek(pos);
+
+            // Article or Redirect entry?
+            mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+            if (mimeType == 65535) {
+                this.mReader.seek(pos + 12);
+                returnList.add(this.mReader.readString());
+            } else {
+                this.mReader.seek(pos + 16);
+                returnList.add(this.mReader.readString());
+            }
+
+            this.mReader.reset();
+        }
+
+        return returnList;
+    }
+
+    public List<String> getURLListByTitle() throws IOException {
+
+        int i = 0, pos, mimeType, articleNumber, urlPtrPos;
+
+        final byte[] buffer = new byte[8];
+
+        // The list that will eventually return the list of URL's
+        final ArrayList<String> returnList = new ArrayList<>();
+
+        // Get the UrlPtrPos or one time storage
+        urlPtrPos = this.mFile.getUrlPtrPos();
+
+        // Move to the spot where URL's are listed
+        this.mReader.seek(this.mFile.getTitlePtrPos());
+
+        for (i = 0; i < this.mFile.getArticleCount(); i++) {
+
+            // The articleNumber of the position of URL i
+            articleNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+
+            // Mark the current position that we need to return to
+            this.mReader.mark();
+
+            this.mReader.seek(urlPtrPos + (8 * (articleNumber)));
+
+            // The position of URL i
+            pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+            this.mReader.seek(pos);
+
+            // Article or Redirect entry?
+            mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+            if (mimeType == 65535) {
+                this.mReader.seek(pos + 12);
+                final String url = this.mReader.readString();
+                returnList.add(url);
+            } else {
+                this.mReader.seek(pos + 16);
+                final String url = this.mReader.readString();
+                returnList.add(url);
+            }
+
+            // Return to the marked position
+            this.mReader.reset();
+        }
+
+        return returnList;
+    }
+
+    // Gives the minimum required information needed for the given articleName
+    public DirectoryEntry getDirectoryInfo(String articleName, final char namespace)
+            throws IOException {
+
+        DirectoryEntry entry;
+        String cmpStr;
+        final int numberOfArticles = this.mFile.getArticleCount();
+        int beg = this.mFile.getTitlePtrPos(), end = beg + (numberOfArticles * 4), mid;
+
+        articleName = namespace + "/" + articleName;
+
+        while (beg <= end) {
+            mid = beg + 4 * (((end - beg) / 4) / 2);
+            entry = getDirectoryInfoAtTitlePosition(mid);
+            if (entry == null) {
+                return null;
+            }
+            cmpStr = entry.getNamespace() + "/" + entry.getUrl();
+            if (articleName.compareTo(cmpStr) < 0) {
+                end = mid - 4;
+
+            } else if (articleName.compareTo(cmpStr) > 0) {
+                beg = mid + 4;
+
+            } else {
+                return entry;
+            }
+        }
+
+        return null;
+
+    }
+
+    public ByteArrayOutputStream getArticleData(final String articleName, final char namespace) throws IOException {
+
+        // search in the cache first, if not found, then call getDirectoryInfo(articleName)
+
+        byte[] buffer = new byte[8];
+
+        final DirectoryEntry mainEntry = getDirectoryInfo(articleName, namespace);
+
+        if (mainEntry != null) {
+
+            // Check what kind of an entry was mainEnrty
+            if (mainEntry.getClass() == ArticleEntry.class) {
+
+                // Cast to ArticleEntry
+                final ArticleEntry article = (ArticleEntry) mainEntry;
+
+                // Get the cluster and blob numbers from the article
+                final int clusterNumber = article.getClusterNumber();
+                final int blobNumber = article.getBlobnumber();
+
+                // Move to the cluster entry in the clusterPtrPos
+                this.mReader.seek(this.mFile.getClusterPtrPos() + clusterNumber * 8);
+
+                // Read the location of the cluster
+                final int clusterPos = this.mReader
+                        .readEightLittleEndianBytesValue(buffer);
+
+                // Move to the cluster
+                this.mReader.seek(clusterPos);
+
+                // Read the first byte, for compression information
+                final int compressionType = this.mReader.read();
+
+                // Reference declaration
+                SingleXZInputStream xzReader = null;
+                int firstOffset, numberOfBlobs, offset1,
+                offset2,
+                location,
+                differenceOffset;
+
+                ByteArrayOutputStream baos;
+
+                // Check the compression type that was read
+                switch (compressionType) {
+
+                // TODO: Read uncompressed data directly
+                case 0:
+                case 1:
+
+                    // Read the first 4 bytes to find out the number of artciles
+                    buffer = new byte[4];
+
+                    // Create a dictionary with size 40MiB, the zimlib uses this
+                    // size while creating
+
+                    // Read the first offset
+                    this.mReader.read(buffer);
+
+                    // The first four bytes are the offset of the zeroth blob
+                    firstOffset = Utilities
+                            .toFourLittleEndianInteger(buffer);
+
+                    // The number of blobs
+                    numberOfBlobs = firstOffset / 4;
+
+                    // The blobNumber has to be lesser than the numberOfBlobs
+                    assert blobNumber < numberOfBlobs;
+
+
+                    if (blobNumber == 0) {
+                        // The first offset is what we read earlier
+                        offset1 = firstOffset;
+                    } else {
+
+                        location = (blobNumber - 1) * 4;
+                        Utilities.skipFully(this.mReader, location);
+                        this.mReader.read(buffer);
+                        offset1 = Utilities.toFourLittleEndianInteger(buffer);
+                    }
+
+                    this.mReader.read(buffer);
+                    offset2 = Utilities.toFourLittleEndianInteger(buffer);
+
+                    differenceOffset = offset2 - offset1;
+                    buffer = new byte[differenceOffset];
+
+                    Utilities.skipFully(this.mReader,
+                            (offset1 - 4 * (blobNumber + 2)));
+
+                    this.mReader.read(buffer, 0, differenceOffset);
+
+                    baos = new ByteArrayOutputStream();
+                    baos.write(buffer, 0, differenceOffset);
+
+                    return baos;
+
+                // LZMA2 compressed data
+                case 4:
+
+                    // Read the first 4 bytes to find out the number of artciles
+                    buffer = new byte[4];
+
+                    // Create a dictionary with size 40MiB, the zimlib uses this
+                    // size while creating
+                    xzReader = new SingleXZInputStream(this.mReader, 4194304);
+
+                    // Read the first offset
+                    xzReader.read(buffer);
+
+                    // The first four bytes are the offset of the zeroth blob
+                    firstOffset = Utilities
+                            .toFourLittleEndianInteger(buffer);
+
+                    // The number of blobs
+                    numberOfBlobs = firstOffset / 4;
+
+                    // The blobNumber has to be lesser than the numberOfBlobs
+                    assert blobNumber < numberOfBlobs;
+
+                    if(blobNumber == 0) {
+                        // The first offset is what we read earlier
+                        offset1 = firstOffset;
+                    } else {
+
+                        location = (blobNumber - 1) * 4;
+                        Utilities.skipFully(xzReader, location);
+                        xzReader.read(buffer);
+                        offset1 = Utilities.toFourLittleEndianInteger(buffer);
+                    }
+
+                    xzReader.read(buffer);
+                    offset2 = Utilities.toFourLittleEndianInteger(buffer);
+
+                    differenceOffset = offset2 - offset1;
+                    buffer = new byte[differenceOffset];
+
+                    Utilities.skipFully(xzReader,
+                            (offset1 - 4 * (blobNumber + 2)));
+
+                    xzReader.read(buffer, 0, differenceOffset);
+
+                    baos = new ByteArrayOutputStream();
+                    baos.write(buffer, 0, differenceOffset);
+
+                    return baos;
+
+                }
+            }
+        }
+
+        return null;
+
+    }
+
+    public DirectoryEntry getDirectoryInfoAtTitlePosition(final int position)
+            throws IOException {
+
+        // Helpers
+        int pos;
+        final byte[] buffer = new byte[8];
+
+        // At the appropriate position in the titlePtrPos
+        this.mReader.seek(position);
+
+        // Get value of article at index
+        pos = this.mReader.readFourLittleEndianBytesValue(buffer);
+
+        // Move to the position in urlPtrPos
+        this.mReader.seek(this.mFile.getUrlPtrPos() + 8 * pos);
+
+        // Get value of article in urlPtrPos
+        pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+
+        // Go to the location of the directory entry
+        this.mReader.seek(pos);
+
+        final int type = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+        // Ignore the parameter length
+        this.mReader.read();
+
+        final char namespace = (char) this.mReader.read();
+        // System.out.println("Namepsace: " + namespace);
+
+        final int revision = this.mReader.readFourLittleEndianBytesValue(buffer);
+        // System.out.println("Revision: " + revision);
+
+        // TODO: Remove redundant if condition code
+        // Article or Redirect entry
+        if (type == 65535) {
+
+            // System.out.println("MIMEType: " + type);
+
+            final int redirectIndex = this.mReader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println("RedirectIndex: " + redirectIndex);
+
+            final String url = this.mReader.readString();
+            // System.out.println("URL: " + url);
+
+            String title = this.mReader.readString();
+            title = title.equals("") ? url : title;
+            // System.out.println("Title: " + title);
+
+            return new RedirectEntry(type, namespace, revision, redirectIndex,
+                    url, title, (position - this.mFile.getUrlPtrPos()) / 8);
+
+        } else {
+
+            // System.out.println("MIMEType: " + mFile.getMIMEType(type));
+
+            final int clusterNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println("Cluster Number: " + clusterNumber);
+
+            final int blobNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+            // System.out.println("Blob Number: " + blobNumber);
+
+            final String url = this.mReader.readString();
+            // System.out.println("URL: " + url);
+
+            String title = this.mReader.readString();
+            title = title.equals("") ? url : title;
+            // System.out.println("Title: " + title);
+
+            // Parameter data ignored
+
+            return new ArticleEntry(type, namespace, revision, clusterNumber,
+                    blobNumber, url, title,
+                    (position - this.mFile.getUrlPtrPos()) / 8);
+        }
+
+    }
+
+    public ZIMFile getZIMFile() {
+        return this.mFile;
+    }
+}
diff --git a/source/org/openzim/ZIMTest.java b/source/org/openzim/ZIMTest.java
new file mode 100644
index 000000000..6d8ed64fb
--- /dev/null
+++ b/source/org/openzim/ZIMTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package org.openzim;
+
+import java.io.IOException;
+
+public class ZIMTest {
+    public static void main(final String[] args) {
+        if(args.length!=2) {
+            System.out.println("Usage: java ZIMTest <ZIM_FILE> <ARTICLE_NAME>");
+            System.exit(0);
+        }
+
+        // args[0] is the Zim File's location
+        final ZIMFile file = new ZIMFile(args[0]);
+
+        // Associate the Zim File with a Reader
+        final ZIMReader zReader = new ZIMReader(file);
+
+        try {
+            // args[1] is the name of the articles that is
+             // to be fetched
+            System.out.println(zReader.getArticleData(args[1],'A').toString("utf-8"));
+        } catch (final IOException e) {
+            e.printStackTrace();
+        }
+    }
+}