You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/org/json/JSONTokener.java

633 lines
21 KiB

/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This class was taken from
* https://android.googlesource.com/platform/libcore/+/refs/heads/master/json/src/main/java/org/json
* and slightly modified (by mc@yacy.net):
* - removed dependency from other libraries (i.e. android.compat.annotation)
* - fixed "statement unnecessary nested" warnings
* - added class initialization with reader object
*/
package org.json;
import java.io.IOException;
import java.io.Reader;
// Note: this class was written without inspecting the non-free org.json sourcecode.
/**
* Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
* encoded string into the corresponding object. Most clients of
* this class will use only need the {@link #JSONTokener(String) constructor}
* and {@link #nextValue} method. Example usage: <pre>
* String json = "{"
* + " \"query\": \"Pizza\", "
* + " \"locations\": [ 94043, 90210 ] "
* + "}";
*
* JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
* String query = object.getString("query");
* JSONArray locations = object.getJSONArray("locations");</pre>
*
* <p>For best interoperability and performance use JSON that complies with
* RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
* this parser is lenient, so a successful parse does not indicate that the
* input string was valid JSON. All of the following syntax errors will be
* ignored:
* <ul>
* <li>End of line comments starting with {@code //} or {@code #} and ending
* with a newline character.
* <li>C-style comments starting with {@code /*} and ending with
* {@code *}{@code /}. Such comments may not be nested.
* <li>Strings that are unquoted or {@code 'single quoted'}.
* <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
* <li>Octal integers prefixed with {@code 0}.
* <li>Array elements separated by {@code ;}.
* <li>Unnecessary array separators. These are interpreted as if null was the
* omitted value.
* <li>Key-value pairs separated by {@code =} or {@code =>}.
* <li>Key-value pairs separated by {@code ;}.
* </ul>
*
* <p>Each tokener may be used to parse a single JSON string. Instances of this
* class are not thread safe. Although this class is nonfinal, it was not
* designed for inheritance and should not be subclassed. In particular,
* self-use by overrideable methods is not specified. See <i>Effective Java</i>
* Item 17, "Design and Document or inheritance or else prohibit it" for further
* information.
*/
public class JSONTokener {
/** The input JSON. */
private final String in;
/**
* The index of the next character to be returned by {@link #next}. When
* the input is exhausted, this equals the input's length.
*/
private int pos;
public JSONTokener(Reader reader) throws IOException {
final char[] buffer = new char[2048];
final StringBuilder out = new StringBuilder();
int charsRead;
while((charsRead = reader.read(buffer, 0, buffer.length)) > 0) {
out.append(buffer, 0, charsRead);
}
String in = out.toString();
if (in != null && in.startsWith("\ufeff")) {
in = in.substring(1);
}
this.in = in;
}
/**
* @param in JSON encoded string. Null is not permitted and will yield a
* tokener that throws {@code NullPointerExceptions} when methods are
* called.
*/
public JSONTokener(String in) {
// consume an optional byte order mark (BOM) if it exists
if (in != null && in.startsWith("\ufeff")) {
in = in.substring(1);
}
this.in = in;
}
/**
* Returns the next value from the input.
*
* @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
* Integer, Long, Double or {@link JSONObject#NULL}.
* @throws JSONException if the input is malformed.
*/
public Object nextValue() throws JSONException {
int c = nextCleanInternal();
switch (c) {
case -1:
throw syntaxError("End of input");
case '{':
return readObject();
case '[':
return readArray();
case '\'':
case '"':
return nextString((char) c);
default:
pos--;
return readLiteral();
}
}
private int nextCleanInternal() throws JSONException {
while (pos < in.length()) {
int c = in.charAt(pos++);
switch (c) {
case '\t':
case ' ':
case '\n':
case '\r':
continue;
case '/':
if (pos == in.length()) {
return c;
}
char peek = in.charAt(pos);
switch (peek) {
case '*':
// skip a /* c-style comment */
pos++;
int commentEnd = in.indexOf("*/", pos);
if (commentEnd == -1) {
throw syntaxError("Unterminated comment");
}
pos = commentEnd + 2;
continue;
case '/':
// skip a // end-of-line comment
pos++;
skipToEndOfLine();
continue;
default:
return c;
}
case '#':
/*
* Skip a # hash end-of-line comment. The JSON RFC doesn't
* specify this behavior, but it's required to parse
* existing documents. See http://b/2571423.
*/
skipToEndOfLine();
continue;
default:
return c;
}
}
return -1;
}
/**
* Advances the position until after the next newline character. If the line
* is terminated by "\r\n", the '\n' must be consumed as whitespace by the
* caller.
*/
private void skipToEndOfLine() {
for (; pos < in.length(); pos++) {
char c = in.charAt(pos);
if (c == '\r' || c == '\n') {
pos++;
break;
}
}
}
/**
* Returns the string up to but not including {@code quote}, unescaping any
* character escape sequences encountered along the way. The opening quote
* should have already been read. This consumes the closing quote, but does
* not include it in the returned string.
*
* @param quote either ' or ".
*/
public String nextString(char quote) throws JSONException {
/*
* For strings that are free of escape sequences, we can just extract
* the result as a substring of the input. But if we encounter an escape
* sequence, we need to use a StringBuilder to compose the result.
*/
StringBuilder builder = null;
/* the index of the first character not yet appended to the builder. */
int start = pos;
while (pos < in.length()) {
int c = in.charAt(pos++);
if (c == quote) {
if (builder == null) {
// a new string avoids leaking memory
return new String(in.substring(start, pos - 1));
}
builder.append(in, start, pos - 1);
return builder.toString();
}
if (c == '\\') {
if (pos == in.length()) {
throw syntaxError("Unterminated escape sequence");
}
if (builder == null) {
builder = new StringBuilder();
}
builder.append(in, start, pos - 1);
builder.append(readEscapeCharacter());
start = pos;
}
}
throw syntaxError("Unterminated string");
}
/**
* Unescapes the character identified by the character or characters that
* immediately follow a backslash. The backslash '\' should have already
* been read. This supports both unicode escapes "u000A" and two-character
* escapes "\n".
*/
private char readEscapeCharacter() throws JSONException {
char escaped = in.charAt(pos++);
switch (escaped) {
case 'u':
if (pos + 4 > in.length()) {
throw syntaxError("Unterminated escape sequence");
}
String hex = in.substring(pos, pos + 4);
pos += 4;
try {
return (char) Integer.parseInt(hex, 16);
} catch (NumberFormatException nfe) {
throw syntaxError("Invalid escape sequence: " + hex);
}
case 't':
return '\t';
case 'b':
return '\b';
case 'n':
return '\n';
case 'r':
return '\r';
case 'f':
return '\f';
case '\'':
case '"':
case '\\':
default:
return escaped;
}
}
/**
* Reads a null, boolean, numeric or unquoted string literal value. Numeric
* values will be returned as an Integer, Long, or Double, in that order of
* preference.
*/
private Object readLiteral() throws JSONException {
String literal = nextToInternal("{}[]/\\:,=;# \t\f");
if (literal.length() == 0) {
throw syntaxError("Expected literal value");
} else if ("null".equalsIgnoreCase(literal)) {
return JSONObject.NULL;
} else if ("true".equalsIgnoreCase(literal)) {
return Boolean.TRUE;
} else if ("false".equalsIgnoreCase(literal)) {
return Boolean.FALSE;
}
/* try to parse as an integral type... */
if (literal.indexOf('.') == -1) {
int base = 10;
String number = literal;
if (number.startsWith("0x") || number.startsWith("0X")) {
number = number.substring(2);
base = 16;
} else if (number.startsWith("0") && number.length() > 1) {
number = number.substring(1);
base = 8;
}
try {
long longValue = Long.parseLong(number, base);
if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
return (int) longValue;
}
return longValue;
} catch (NumberFormatException e) {
/*
* This only happens for integral numbers greater than
* Long.MAX_VALUE, numbers in exponential form (5e-10) and
* unquoted strings. Fall through to try floating point.
*/
}
}
/* ...next try to parse as a floating point... */
try {
return Double.valueOf(literal);
} catch (NumberFormatException ignored) {
}
/* ... finally give up. We have an unquoted string */
return new String(literal); // a new string avoids leaking memory
}
/**
* Returns the string up to but not including any of the given characters or
* a newline character. This does not consume the excluded character.
*/
private String nextToInternal(String excluded) {
int start = pos;
for (; pos < in.length(); pos++) {
char c = in.charAt(pos);
if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
return in.substring(start, pos);
}
}
return in.substring(start);
}
/**
* Reads a sequence of key/value pairs and the trailing closing brace '}' of
* an object. The opening brace '{' should have already been read.
*/
private JSONObject readObject() throws JSONException {
JSONObject result = new JSONObject();
/* Peek to see if this is the empty object. */
int first = nextCleanInternal();
if (first == '}') {
return result;
} else if (first != -1) {
pos--;
}
while (true) {
Object name = nextValue();
if (!(name instanceof String)) {
if (name == null) {
throw syntaxError("Names cannot be null");
}
throw syntaxError("Names must be strings, but " + name
+ " is of type " + name.getClass().getName());
}
/*
* Expect the name/value separator to be either a colon ':', an
* equals sign '=', or an arrow "=>". The last two are bogus but we
* include them because that's what the original implementation did.
*/
int separator = nextCleanInternal();
if (separator != ':' && separator != '=') {
throw syntaxError("Expected ':' after " + name);
}
if (pos < in.length() && in.charAt(pos) == '>') {
pos++;
}
result.put((String) name, nextValue());
switch (nextCleanInternal()) {
case '}':
return result;
case ';':
case ',':
continue;
default:
throw syntaxError("Unterminated object");
}
}
}
/**
* Reads a sequence of values and the trailing closing brace ']' of an
* array. The opening brace '[' should have already been read. Note that
* "[]" yields an empty array, but "[,]" returns a two-element array
* equivalent to "[null,null]".
*/
private JSONArray readArray() throws JSONException {
JSONArray result = new JSONArray();
/* to cover input that ends with ",]". */
boolean hasTrailingSeparator = false;
while (true) {
switch (nextCleanInternal()) {
case -1:
throw syntaxError("Unterminated array");
case ']':
if (hasTrailingSeparator) {
result.put(null);
}
return result;
case ',':
case ';':
/* A separator without a value first means "null". */
result.put(null);
hasTrailingSeparator = true;
continue;
default:
pos--;
}
result.put(nextValue());
switch (nextCleanInternal()) {
case ']':
return result;
case ',':
case ';':
hasTrailingSeparator = true;
continue;
default:
throw syntaxError("Unterminated array");
}
}
}
/**
* Returns an exception containing the given message plus the current
* position and the entire input string.
*/
public JSONException syntaxError(String message) {
return new JSONException(message + this);
}
/**
* Returns the current position and the entire input string.
*/
@Override public String toString() {
// consistent with the original implementation
return " at character " + pos + " of " + in;
}
/*
* Legacy APIs.
*
* None of the methods below are on the critical path of parsing JSON
* documents. They exist only because they were exposed by the original
* implementation and may be used by some clients.
*/
/**
* Returns true until the input has been exhausted.
*/
public boolean more() {
return pos < in.length();
}
/**
* Returns the next available character, or the null character '\0' if all
* input has been exhausted. The return value of this method is ambiguous
* for JSON strings that contain the character '\0'.
*/
public char next() {
return pos < in.length() ? in.charAt(pos++) : '\0';
}
/**
* Returns the next available character if it equals {@code c}. Otherwise an
* exception is thrown.
*/
public char next(char c) throws JSONException {
char result = next();
if (result != c) {
throw syntaxError("Expected " + c + " but was " + result);
}
return result;
}
/**
* Returns the next character that is not whitespace and does not belong to
* a comment. If the input is exhausted before such a character can be
* found, the null character '\0' is returned. The return value of this
* method is ambiguous for JSON strings that contain the character '\0'.
*/
public char nextClean() throws JSONException {
int nextCleanInt = nextCleanInternal();
return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
}
/**
* Returns the next {@code length} characters of the input.
*
* <p>The returned string shares its backing character array with this
* tokener's input string. If a reference to the returned string may be held
* indefinitely, you should use {@code new String(result)} to copy it first
* to avoid memory leaks.
*
* @throws JSONException if the remaining input is not long enough to
* satisfy this request.
*/
public String next(int length) throws JSONException {
if (pos + length > in.length()) {
throw syntaxError(length + " is out of bounds");
}
String result = in.substring(pos, pos + length);
pos += length;
return result;
}
/**
* Returns the {@link String#trim trimmed} string holding the characters up
* to but not including the first of:
* <ul>
* <li>any character in {@code excluded}
* <li>a newline character '\n'
* <li>a carriage return '\r'
* </ul>
*
* <p>The returned string shares its backing character array with this
* tokener's input string. If a reference to the returned string may be held
* indefinitely, you should use {@code new String(result)} to copy it first
* to avoid memory leaks.
*
* @return a possibly-empty string
*/
public String nextTo(String excluded) {
if (excluded == null) {
throw new NullPointerException("excluded == null");
}
return nextToInternal(excluded).trim();
}
/**
* Equivalent to {@code nextTo(String.valueOf(excluded))}.
*/
public String nextTo(char excluded) {
return nextToInternal(String.valueOf(excluded)).trim();
}
/**
* Advances past all input up to and including the next occurrence of
* {@code thru}. If the remaining input doesn't contain {@code thru}, the
* input is exhausted.
*/
public void skipPast(String thru) {
int thruStart = in.indexOf(thru, pos);
pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
}
/**
* Advances past all input up to but not including the next occurrence of
* {@code to}. If the remaining input doesn't contain {@code to}, the input
* is unchanged.
*/
public char skipTo(char to) {
int index = in.indexOf(to, pos);
if (index != -1) {
pos = index;
return to;
}
return '\0';
}
/**
* Unreads the most recent character of input. If no input characters have
* been read, the input is unchanged.
*/
public void back() {
if (--pos == -1) {
pos = 0;
}
}
/**
* Returns the integer [0..15] value for the given hex character, or -1
* for non-hex input.
*
* @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
* character will yield a -1 result.
*/
public static int dehexchar(char hex) {
if (hex >= '0' && hex <= '9') {
return hex - '0';
} else if (hex >= 'A' && hex <= 'F') {
return hex - 'A' + 10;
} else if (hex >= 'a' && hex <= 'f') {
return hex - 'a' + 10;
} else {
return -1;
}
}
}