integrated the source code of a openzim file format reader. These are

the raw format reader files with no integration in YaCy yet, which will
maybe follow as a next step. The zim file format is documented in
https://openzim.org and the reader code was taken from the archived,
non-maintained repository at https://github.com/openzim/zimreader-java
pull/607/head
Michael Peter Christen 1 year ago
parent 4308aa5415
commit 1fefae9baf

@ -0,0 +1,46 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
public class ArticleEntry extends DirectoryEntry {
int clusterNumber;
int blobnumber;
public ArticleEntry(final int mimeType, final char namespace, final int revision,
final int clusterNumber, final int blobNumber, final String url, final String title,
final int urlListindex) {
super(mimeType, namespace, revision, url, title, urlListindex);
this.clusterNumber = clusterNumber;
this.blobnumber = blobNumber;
}
public int getClusterNumber() {
return this.clusterNumber;
}
public int getBlobnumber() {
return this.blobnumber;
}
}

@ -0,0 +1,69 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
public abstract class DirectoryEntry {
int mimeType;
char namespace;
int revision;
String url;
String title;
int urlListindex;
public DirectoryEntry(final int mimeType, final char namespace, final int revision,
final String url, final String title, final int index) {
this.mimeType = mimeType;
this.namespace = namespace;
this.revision = revision;
this.url = url;
this.title = title;
this.urlListindex = index;
}
public int getMimeType() {
return this.mimeType;
}
public char getNamespace() {
return this.namespace;
}
public int getRevision() {
return this.revision;
}
public String getUrl() {
return this.url;
}
public String getTitle() {
return this.title;
}
public int getUrlListindex() {
return this.urlListindex;
}
}

@ -0,0 +1,135 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* This is an implementation of RandomAccessFile to ensure that it is an
* InputStream as well, specifically designed for reading a ZIM file. Ad-Hoc
* implementation, can be improved.
*
* @author Arunesh Mathur <aruneshmathur1990 at gmail.com>
*/
public class RandomAcessFileZIMInputStream extends InputStream {
private final RandomAccessFile mRAFReader;
private long mMarked = -1;
public RandomAcessFileZIMInputStream(final RandomAccessFile reader) {
this.mRAFReader = reader;
}
// TODO: Remove the parameter buffer
public int readTwoLittleEndianBytesValue(final byte[] buffer) throws IOException {
if (buffer.length < 2) {
throw new OutOfMemoryError("buffer too small");
} else {
this.mRAFReader.read(buffer, 0, 2);
return Utilities.toTwoLittleEndianInteger(buffer);
}
}
// TODO: Remove the parameter buffer
public int readFourLittleEndianBytesValue(final byte[] buffer) throws IOException {
if (buffer.length < 4) {
throw new OutOfMemoryError("buffer too small");
} else {
this.mRAFReader.read(buffer, 0, 4);
return Utilities.toFourLittleEndianInteger(buffer);
}
}
// TODO: Remove the parameter buffer
public int readEightLittleEndianBytesValue(final byte[] buffer)
throws IOException {
if (buffer.length < 8) {
throw new OutOfMemoryError("buffer too small");
} else {
this.mRAFReader.read(buffer, 0, 8);
return Utilities.toEightLittleEndianInteger(buffer);
}
}
// TODO: Remove the parameter buffer
public int readSixteenLittleEndianBytesValue(final byte[] buffer)
throws IOException {
if (buffer.length < 16) {
throw new OutOfMemoryError("buffer too small");
} else {
this.mRAFReader.read(buffer, 0, 16);
return Utilities.toSixteenLittleEndianInteger(buffer);
}
}
// Reads characters from the current position into a String and stops when a
// '\0' is encountered
public String readString() throws IOException {
final StringBuffer sb = new StringBuffer();
/*
* int i; byte[] buffer = new byte[100]; while (true) {
* mRAFReader.read(buffer); for (i = 0; i < buffer.length; i++) { if
* (buffer[i] == '\0') { break; } sb.append((char) buffer[i]); } if (i
* != buffer.length) break; } return sb.toString();
*/
int b;
b = this.mRAFReader.read();
while (b != '\0') {
sb.append((char) b);
b = this.mRAFReader.read();
}
return sb.toString();
}
@Override
public int read() throws IOException {
return this.mRAFReader.read();
}
public RandomAccessFile getRandomAccessFile() {
return this.mRAFReader;
}
public void seek(final long pos) throws IOException {
this.mRAFReader.seek(pos);
}
public long getFilePointer() throws IOException {
return this.mRAFReader.getFilePointer();
}
public void mark() throws IOException {
this.mMarked = this.mRAFReader.getFilePointer();
}
@Override
public void reset() throws IOException {
if (this.mMarked == -1) {
return;
} else {
this.mRAFReader.seek(this.mMarked);
this.mMarked = -1;
}
}
}

@ -0,0 +1,37 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
public class RedirectEntry extends DirectoryEntry {
int redirectIndex;
public RedirectEntry(final int mimeType, final char namespace, final int revision,
final int redirectIndex, final String url, final String title, final int urlListindex) {
super(mimeType, namespace, revision, url, title, urlListindex);
this.redirectIndex = redirectIndex;
}
public int getRedirectIndex() {
return this.redirectIndex;
}
}

@ -0,0 +1,84 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
import java.io.IOException;
import java.io.InputStream;
public class Utilities {
// TODO: Write a binary search algorithm
public static int binarySearch() {
return -1;
}
public static int toTwoLittleEndianInteger(final byte[] buffer) throws IOException {
if (buffer.length < 2) {
throw new OutOfMemoryError("buffer too small");
} else {
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8));
return result;
}
}
public static int toFourLittleEndianInteger(final byte[] buffer) throws IOException {
if (buffer.length < 4) {
throw new OutOfMemoryError("buffer too small");
} else {
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24));
return result;
}
}
public static int toEightLittleEndianInteger(final byte[] buffer) throws IOException {
if (buffer.length < 8) {
throw new OutOfMemoryError("buffer too small");
} else {
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
| ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
| ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56));
return result;
}
}
public static int toSixteenLittleEndianInteger(final byte[] buffer) throws IOException {
if (buffer.length < 16) {
throw new OutOfMemoryError("buffer too small");
} else {
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
| ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
| ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56)
| ((buffer[8] & 0xFF) << 64) | ((buffer[9] & 0xFF) << 72)
| ((buffer[10] & 0xFF) << 80) | ((buffer[11] & 0xFF) << 88)
| ((buffer[12] & 0xFF) << 96)
| ((buffer[13] & 0xFF) << 104)
| ((buffer[14] & 0xFF) << 112) | ((buffer[15] & 0xFF) << 120));
return result;
}
}
public static void skipFully(final InputStream stream, final long bytes) throws IOException {
for (long i = stream.skip(bytes); i < bytes; i += stream.skip(bytes - i));
}
}

@ -0,0 +1,198 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
/**
* @author Arunesh Mathur
*
* A ZIM file implementation that stores the Header and the MIMETypeList
*
*/
public class ZIMFile extends File {
/**
*
*/
private static final long serialVersionUID = 1L;
private Header mHeader;
private List<String> mMIMETypeList; // Can be removed if not needed
public ZIMFile(final String path) {
super(path);
try {
readHeader();
} catch (final FileNotFoundException e) {
e.printStackTrace();
}
}
private void readHeader() throws FileNotFoundException {
// Helpers
int len = 0;
StringBuffer mimeBuffer = null;
// The byte[] that will help us in reading bytes out of the file
final byte[] buffer = new byte[16];
// Check whether the file exists
if (!(this.exists())) {
throw new FileNotFoundException(
"The file that you specified was not found.");
}
// The reader that will be used to read contents from the file
final RandomAcessFileZIMInputStream reader = new RandomAcessFileZIMInputStream(
new RandomAccessFile(this, "r"));
// The ZIM file header
this.mHeader = new Header();
// Read the contents of the header
try {
this.mHeader.magicNumber = reader.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.magicNumber);
this.mHeader.version = reader.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.version);
this.mHeader.uuid = reader.readSixteenLittleEndianBytesValue(buffer);
// System.out.println(mHeader.uuid); reader.read(buffer, 0, 4);
this.mHeader.articleCount = reader
.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.articleCount);
this.mHeader.clusterCount = reader
.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.clusterCount);
this.mHeader.urlPtrPos = reader.readEightLittleEndianBytesValue(buffer);
// System.out.println(mHeader.urlPtrPos);
this.mHeader.titlePtrPos = reader
.readEightLittleEndianBytesValue(buffer);
// System.out.println(mHeader.titlePtrPos);
this.mHeader.clusterPtrPos = reader
.readEightLittleEndianBytesValue(buffer);
// System.out.println(mHeader.clusterPtrPos);
this.mHeader.mimeListPos = reader
.readEightLittleEndianBytesValue(buffer);
// System.out.println(mHeader.mimeListPos);
this.mHeader.mainPage = reader.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.mainPage);
this.mHeader.layoutPage = reader.readFourLittleEndianBytesValue(buffer);
// System.out.println(mHeader.layoutPage);
// Initialise the MIMETypeList
this.mMIMETypeList = new ArrayList<>();
while (true) {
reader.read(buffer, 0, 1);
len = 0;
mimeBuffer = new StringBuffer();
while (buffer[0] != '\0') {
mimeBuffer.append((char) buffer[0]);
reader.read(buffer, 0, 1);
len++;
}
if (len == 0) {
break;
}
this.mMIMETypeList.add(mimeBuffer.toString());
// System.out.println(mimeBuffer);
}
} catch (final Exception e) {
e.printStackTrace();
}
}
public int getVersion() {
return this.mHeader.version;
}
public int getUuid() {
return this.mHeader.uuid;
}
public int getArticleCount() {
return this.mHeader.articleCount;
}
public int getClusterCount() {
return this.mHeader.clusterCount;
}
public int getUrlPtrPos() {
return this.mHeader.urlPtrPos;
}
public int getTitlePtrPos() {
return this.mHeader.titlePtrPos;
}
public int getClusterPtrPos() {
return this.mHeader.clusterPtrPos;
}
public String getMIMEType(final int mimeNumber) {
return this.mMIMETypeList.get(mimeNumber);
}
public int getHeaderSize() {
return this.mHeader.mimeListPos;
}
public int getMainPage() {
return this.mHeader.mainPage;
}
public int getLayoutPage() {
return this.mHeader.layoutPage;
}
public class Header {
int magicNumber;
int version;
int uuid;
int articleCount;
int clusterCount;
int urlPtrPos;
int titlePtrPos;
int clusterPtrPos;
int mimeListPos;
int mainPage;
int layoutPage;
}
}

@ -0,0 +1,408 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
import org.tukaani.xz.SingleXZInputStream;
/**
* @author Arunesh Mathur
*
* A ZIMReader that reads data from the ZIMFile
*
*/
public class ZIMReader {
private final ZIMFile mFile;
private RandomAcessFileZIMInputStream mReader;
public ZIMReader(final ZIMFile file) {
this.mFile = file;
try {
this.mReader = new RandomAcessFileZIMInputStream(new RandomAccessFile(
this.mFile, "r"));
} catch (final FileNotFoundException e) {
e.printStackTrace();
}
}
public List<String> getURLListByURL() throws IOException {
int i = 0, pos, mimeType;
final byte[] buffer = new byte[8];
// The list that will eventually return the list of URL's
final ArrayList<String> returnList = new ArrayList<>();
// Move to the spot where URL's are listed
this.mReader.seek(this.mFile.getUrlPtrPos());
for (i = 0; i < this.mFile.getArticleCount(); i++) {
// The position of URL i
pos = this.mReader.readEightLittleEndianBytesValue(buffer);
// Mark the current position that we need to return to
this.mReader.mark();
// Move to the position of URL i
this.mReader.seek(pos);
// Article or Redirect entry?
mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
if (mimeType == 65535) {
this.mReader.seek(pos + 12);
returnList.add(this.mReader.readString());
} else {
this.mReader.seek(pos + 16);
returnList.add(this.mReader.readString());
}
this.mReader.reset();
}
return returnList;
}
public List<String> getURLListByTitle() throws IOException {
int i = 0, pos, mimeType, articleNumber, urlPtrPos;
final byte[] buffer = new byte[8];
// The list that will eventually return the list of URL's
final ArrayList<String> returnList = new ArrayList<>();
// Get the UrlPtrPos or one time storage
urlPtrPos = this.mFile.getUrlPtrPos();
// Move to the spot where URL's are listed
this.mReader.seek(this.mFile.getTitlePtrPos());
for (i = 0; i < this.mFile.getArticleCount(); i++) {
// The articleNumber of the position of URL i
articleNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
// Mark the current position that we need to return to
this.mReader.mark();
this.mReader.seek(urlPtrPos + (8 * (articleNumber)));
// The position of URL i
pos = this.mReader.readEightLittleEndianBytesValue(buffer);
this.mReader.seek(pos);
// Article or Redirect entry?
mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
if (mimeType == 65535) {
this.mReader.seek(pos + 12);
final String url = this.mReader.readString();
returnList.add(url);
} else {
this.mReader.seek(pos + 16);
final String url = this.mReader.readString();
returnList.add(url);
}
// Return to the marked position
this.mReader.reset();
}
return returnList;
}
// Gives the minimum required information needed for the given articleName
public DirectoryEntry getDirectoryInfo(String articleName, final char namespace)
throws IOException {
DirectoryEntry entry;
String cmpStr;
final int numberOfArticles = this.mFile.getArticleCount();
int beg = this.mFile.getTitlePtrPos(), end = beg + (numberOfArticles * 4), mid;
articleName = namespace + "/" + articleName;
while (beg <= end) {
mid = beg + 4 * (((end - beg) / 4) / 2);
entry = getDirectoryInfoAtTitlePosition(mid);
if (entry == null) {
return null;
}
cmpStr = entry.getNamespace() + "/" + entry.getUrl();
if (articleName.compareTo(cmpStr) < 0) {
end = mid - 4;
} else if (articleName.compareTo(cmpStr) > 0) {
beg = mid + 4;
} else {
return entry;
}
}
return null;
}
public ByteArrayOutputStream getArticleData(final String articleName, final char namespace) throws IOException {
// search in the cache first, if not found, then call getDirectoryInfo(articleName)
byte[] buffer = new byte[8];
final DirectoryEntry mainEntry = getDirectoryInfo(articleName, namespace);
if (mainEntry != null) {
// Check what kind of an entry was mainEnrty
if (mainEntry.getClass() == ArticleEntry.class) {
// Cast to ArticleEntry
final ArticleEntry article = (ArticleEntry) mainEntry;
// Get the cluster and blob numbers from the article
final int clusterNumber = article.getClusterNumber();
final int blobNumber = article.getBlobnumber();
// Move to the cluster entry in the clusterPtrPos
this.mReader.seek(this.mFile.getClusterPtrPos() + clusterNumber * 8);
// Read the location of the cluster
final int clusterPos = this.mReader
.readEightLittleEndianBytesValue(buffer);
// Move to the cluster
this.mReader.seek(clusterPos);
// Read the first byte, for compression information
final int compressionType = this.mReader.read();
// Reference declaration
SingleXZInputStream xzReader = null;
int firstOffset, numberOfBlobs, offset1,
offset2,
location,
differenceOffset;
ByteArrayOutputStream baos;
// Check the compression type that was read
switch (compressionType) {
// TODO: Read uncompressed data directly
case 0:
case 1:
// Read the first 4 bytes to find out the number of artciles
buffer = new byte[4];
// Create a dictionary with size 40MiB, the zimlib uses this
// size while creating
// Read the first offset
this.mReader.read(buffer);
// The first four bytes are the offset of the zeroth blob
firstOffset = Utilities
.toFourLittleEndianInteger(buffer);
// The number of blobs
numberOfBlobs = firstOffset / 4;
// The blobNumber has to be lesser than the numberOfBlobs
assert blobNumber < numberOfBlobs;
if (blobNumber == 0) {
// The first offset is what we read earlier
offset1 = firstOffset;
} else {
location = (blobNumber - 1) * 4;
Utilities.skipFully(this.mReader, location);
this.mReader.read(buffer);
offset1 = Utilities.toFourLittleEndianInteger(buffer);
}
this.mReader.read(buffer);
offset2 = Utilities.toFourLittleEndianInteger(buffer);
differenceOffset = offset2 - offset1;
buffer = new byte[differenceOffset];
Utilities.skipFully(this.mReader,
(offset1 - 4 * (blobNumber + 2)));
this.mReader.read(buffer, 0, differenceOffset);
baos = new ByteArrayOutputStream();
baos.write(buffer, 0, differenceOffset);
return baos;
// LZMA2 compressed data
case 4:
// Read the first 4 bytes to find out the number of artciles
buffer = new byte[4];
// Create a dictionary with size 40MiB, the zimlib uses this
// size while creating
xzReader = new SingleXZInputStream(this.mReader, 4194304);
// Read the first offset
xzReader.read(buffer);
// The first four bytes are the offset of the zeroth blob
firstOffset = Utilities
.toFourLittleEndianInteger(buffer);
// The number of blobs
numberOfBlobs = firstOffset / 4;
// The blobNumber has to be lesser than the numberOfBlobs
assert blobNumber < numberOfBlobs;
if(blobNumber == 0) {
// The first offset is what we read earlier
offset1 = firstOffset;
} else {
location = (blobNumber - 1) * 4;
Utilities.skipFully(xzReader, location);
xzReader.read(buffer);
offset1 = Utilities.toFourLittleEndianInteger(buffer);
}
xzReader.read(buffer);
offset2 = Utilities.toFourLittleEndianInteger(buffer);
differenceOffset = offset2 - offset1;
buffer = new byte[differenceOffset];
Utilities.skipFully(xzReader,
(offset1 - 4 * (blobNumber + 2)));
xzReader.read(buffer, 0, differenceOffset);
baos = new ByteArrayOutputStream();
baos.write(buffer, 0, differenceOffset);
return baos;
}
}
}
return null;
}
public DirectoryEntry getDirectoryInfoAtTitlePosition(final int position)
throws IOException {
// Helpers
int pos;
final byte[] buffer = new byte[8];
// At the appropriate position in the titlePtrPos
this.mReader.seek(position);
// Get value of article at index
pos = this.mReader.readFourLittleEndianBytesValue(buffer);
// Move to the position in urlPtrPos
this.mReader.seek(this.mFile.getUrlPtrPos() + 8 * pos);
// Get value of article in urlPtrPos
pos = this.mReader.readEightLittleEndianBytesValue(buffer);
// Go to the location of the directory entry
this.mReader.seek(pos);
final int type = this.mReader.readTwoLittleEndianBytesValue(buffer);
// Ignore the parameter length
this.mReader.read();
final char namespace = (char) this.mReader.read();
// System.out.println("Namepsace: " + namespace);
final int revision = this.mReader.readFourLittleEndianBytesValue(buffer);
// System.out.println("Revision: " + revision);
// TODO: Remove redundant if condition code
// Article or Redirect entry
if (type == 65535) {
// System.out.println("MIMEType: " + type);
final int redirectIndex = this.mReader.readFourLittleEndianBytesValue(buffer);
// System.out.println("RedirectIndex: " + redirectIndex);
final String url = this.mReader.readString();
// System.out.println("URL: " + url);
String title = this.mReader.readString();
title = title.equals("") ? url : title;
// System.out.println("Title: " + title);
return new RedirectEntry(type, namespace, revision, redirectIndex,
url, title, (position - this.mFile.getUrlPtrPos()) / 8);
} else {
// System.out.println("MIMEType: " + mFile.getMIMEType(type));
final int clusterNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
// System.out.println("Cluster Number: " + clusterNumber);
final int blobNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
// System.out.println("Blob Number: " + blobNumber);
final String url = this.mReader.readString();
// System.out.println("URL: " + url);
String title = this.mReader.readString();
title = title.equals("") ? url : title;
// System.out.println("Title: " + title);
// Parameter data ignored
return new ArticleEntry(type, namespace, revision, clusterNumber,
blobNumber, url, title,
(position - this.mFile.getUrlPtrPos()) / 8);
}
}
public ZIMFile getZIMFile() {
return this.mFile;
}
}

@ -0,0 +1,44 @@
/*
* Copyright (C) 2011 Arunesh Mathur
*
* This file is a part of zimreader-java.
*
* zimreader-java is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3.0 as
* published by the Free Software Foundation.
*
* zimreader-java is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>.
*/
package org.openzim;
import java.io.IOException;
public class ZIMTest {
public static void main(final String[] args) {
if(args.length!=2) {
System.out.println("Usage: java ZIMTest <ZIM_FILE> <ARTICLE_NAME>");
System.exit(0);
}
// args[0] is the Zim File's location
final ZIMFile file = new ZIMFile(args[0]);
// Associate the Zim File with a Reader
final ZIMReader zReader = new ZIMReader(file);
try {
// args[1] is the name of the articles that is
// to be fetched
System.out.println(zReader.getArticleData(args[1],'A').toString("utf-8"));
} catch (final IOException e) {
e.printStackTrace();
}
}
}
Loading…
Cancel
Save