- cora package has now no dependencies to other yacy packages and becomes a 'base' package (refactoring)
- cleaned up (removed special code and documentation for 27c3) - added remote search functions to be used within cora git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7420 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
0e54233408
commit
10ae8d961b
@ -0,0 +1,45 @@
|
|||||||
|
/**
|
||||||
|
* AbstractFormatter
|
||||||
|
* Copyright 2011 by Michael Peter Christen
|
||||||
|
* First released 2.1.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.date;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.TimeZone;
|
||||||
|
|
||||||
|
public abstract class AbstractFormatter implements DateFormatter {
|
||||||
|
|
||||||
|
protected static final TimeZone TZ_GMT = TimeZone.getTimeZone("GMT");
|
||||||
|
|
||||||
|
// statics
|
||||||
|
public final static long secondMillis = 1000;
|
||||||
|
public final static long minuteMillis = 60 * secondMillis;
|
||||||
|
public final static long hourMillis = 60 * minuteMillis;
|
||||||
|
public final static long dayMillis = 24 * hourMillis;
|
||||||
|
public final static long normalyearMillis = 365 * dayMillis;
|
||||||
|
public final static long leapyearMillis = 366 * dayMillis;
|
||||||
|
|
||||||
|
protected long last_time;
|
||||||
|
protected String last_format;
|
||||||
|
|
||||||
|
public abstract Date parse(String s) throws ParseException;
|
||||||
|
public abstract String format(final Date date);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* DateFormatter
|
||||||
|
* Copyright 2011 by Michael Peter Christen
|
||||||
|
* First released 2.1.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.date;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
public interface DateFormatter {
|
||||||
|
|
||||||
|
public Date parse(String s) throws ParseException;
|
||||||
|
public String format(final Date date);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,168 @@
|
|||||||
|
/**
|
||||||
|
* GenericFormatter
|
||||||
|
* Copyright 2011 by Michael Peter Christen
|
||||||
|
* First released 2.1.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.date;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
public class GenericFormatter extends AbstractFormatter implements DateFormatter {
|
||||||
|
|
||||||
|
public static final String PATTERN_SHORT_DAY = "yyyyMMdd";
|
||||||
|
public static final String PATTERN_SHORT_SECOND = "yyyyMMddHHmmss";
|
||||||
|
public static final String PATTERN_SHORT_MILSEC = "yyyyMMddHHmmssSSS";
|
||||||
|
private static final String PATTERN_RFC1123_SHORT = "EEE, dd MMM yyyy";
|
||||||
|
private static final String PATTERN_ANSIC = "EEE MMM d HH:mm:ss yyyy";
|
||||||
|
|
||||||
|
private static final SimpleDateFormat FORMAT_SHORT_DAY = new SimpleDateFormat(PATTERN_SHORT_DAY, Locale.US);
|
||||||
|
private static final SimpleDateFormat FORMAT_SHORT_SECOND = new SimpleDateFormat(PATTERN_SHORT_SECOND, Locale.US);
|
||||||
|
private static final SimpleDateFormat FORMAT_SHORT_MILSEC = new SimpleDateFormat(PATTERN_SHORT_MILSEC, Locale.US);
|
||||||
|
private static final SimpleDateFormat FORMAT_ANSIC = new SimpleDateFormat(PATTERN_ANSIC, Locale.US);
|
||||||
|
private static final SimpleDateFormat FORMAT_RFC1123_SHORT = new SimpleDateFormat(PATTERN_RFC1123_SHORT, Locale.US);
|
||||||
|
|
||||||
|
// find out time zone and DST offset
|
||||||
|
private static Calendar thisCalendar = Calendar.getInstance();
|
||||||
|
|
||||||
|
static {
|
||||||
|
// we want GMT times on the formats as well as they don't support any timezone
|
||||||
|
FORMAT_SHORT_DAY.setTimeZone(TZ_GMT);
|
||||||
|
FORMAT_SHORT_SECOND.setTimeZone(TZ_GMT);
|
||||||
|
FORMAT_SHORT_MILSEC.setTimeZone(TZ_GMT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final GenericFormatter SHORT_DAY_FORMATTER = new GenericFormatter(FORMAT_SHORT_DAY);
|
||||||
|
public static final GenericFormatter SHORT_SECOND_FORMATTER = new GenericFormatter(FORMAT_SHORT_SECOND);
|
||||||
|
public static final GenericFormatter SHORT_MILSEC_FORMATTER = new GenericFormatter(FORMAT_SHORT_MILSEC);
|
||||||
|
public static final GenericFormatter ANSIC_FORMATTER = new GenericFormatter(FORMAT_ANSIC);
|
||||||
|
public static final GenericFormatter RFC1123_SHORT_FORMATTER = new GenericFormatter(FORMAT_RFC1123_SHORT);
|
||||||
|
|
||||||
|
private SimpleDateFormat dateFormat;
|
||||||
|
|
||||||
|
public GenericFormatter(SimpleDateFormat dateFormat) {
|
||||||
|
this.dateFormat = dateFormat;
|
||||||
|
this.last_time = 0;
|
||||||
|
this.last_format = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: The short day format doesn't include any timezone information. This method
|
||||||
|
* transforms the date into the GMT/UTC timezone. Example: If the local system time is,
|
||||||
|
* 2007-12-18 01:15:00 +0200, then the resulting String will be "2007-12-17".
|
||||||
|
* In case you need a format with a timezon offset, use {@link #formatShortDay(TimeZone)}
|
||||||
|
* @return a String representation of the current system date in GMT using the
|
||||||
|
* short day format, e.g. "20071218".
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String format(final Date date) {
|
||||||
|
if (date == null) return "";
|
||||||
|
if (Math.abs(date.getTime() - last_time) < 1000) return last_format;
|
||||||
|
synchronized (this.dateFormat) {
|
||||||
|
last_format = this.dateFormat.format(date);
|
||||||
|
last_time = date.getTime();
|
||||||
|
}
|
||||||
|
return last_format;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a String representation of a Date in short day format assuming the date
|
||||||
|
* is aligned to the GMT/UTC timezone. An example for such a date string is "20071218".
|
||||||
|
* @see #formatShortDay()
|
||||||
|
* @throws ParseException The exception is thrown if an error occured during while parsing
|
||||||
|
* the String.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Date parse(final String timeString) throws ParseException {
|
||||||
|
synchronized (this.dateFormat) {
|
||||||
|
return this.dateFormat.parse(timeString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like {@link #parseShortSecond(String)} using additional timezone information provided in an
|
||||||
|
* offset String, like "+0100" for CET.
|
||||||
|
*/
|
||||||
|
public Date parse(final String timeString, final String UTCOffset) {
|
||||||
|
// FIXME: This method returns an incorrect date, check callers!
|
||||||
|
// ex: de.anomic.server.serverDate.parseShortSecond("20070101120000", "+0200").toGMTString()
|
||||||
|
// => 1 Jan 2007 13:00:00 GMT
|
||||||
|
if (timeString == null || timeString.length() == 0) { return new Date(); }
|
||||||
|
if (UTCOffset == null || UTCOffset.length() == 0) { return new Date(); }
|
||||||
|
try {
|
||||||
|
return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset));
|
||||||
|
} catch (final java.text.ParseException e) {
|
||||||
|
//serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
|
||||||
|
return new Date();
|
||||||
|
} catch (final java.lang.NumberFormatException e) {
|
||||||
|
//serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
|
||||||
|
return new Date();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long UTCDiff(final String diffString) {
|
||||||
|
if (diffString.length() != 5) throw new IllegalArgumentException("UTC String malformed (wrong size):" + diffString);
|
||||||
|
boolean ahead = true;
|
||||||
|
if (diffString.length() > 0 && diffString.charAt(0) == '+') ahead = true;
|
||||||
|
else if (diffString.length() > 0 && diffString.charAt(0) == '-') ahead = false;
|
||||||
|
else throw new IllegalArgumentException("UTC String malformed (wrong sign):" + diffString);
|
||||||
|
final long oh = Long.parseLong(diffString.substring(1, 3));
|
||||||
|
final long om = Long.parseLong(diffString.substring(3));
|
||||||
|
return ((ahead) ? (long) 1 : (long) -1) * (oh * AbstractFormatter.hourMillis + om * AbstractFormatter.minuteMillis);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long UTCDiff() {
|
||||||
|
// DST_OFFSET is dependent on the time of the Calendar, so it has to be updated
|
||||||
|
// to get the correct current offset
|
||||||
|
synchronized (thisCalendar) {
|
||||||
|
thisCalendar.setTimeInMillis(System.currentTimeMillis());
|
||||||
|
final long zoneOffsetHours = thisCalendar.get(Calendar.ZONE_OFFSET);
|
||||||
|
final long DSTOffsetHours = thisCalendar.get(Calendar.DST_OFFSET);
|
||||||
|
return zoneOffsetHours + DSTOffsetHours;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String UTCDiffString() {
|
||||||
|
// we express the UTC Difference in 5 digits:
|
||||||
|
// SHHMM
|
||||||
|
// S ::= '+'|'-'
|
||||||
|
// HH ::= '00'|'01'|'02'|'03'|'04'|'05'|'06'|'07'|'08'|'09'|'10'|'11'|'12'
|
||||||
|
// MM ::= '00'|'15'|'30'|'45'
|
||||||
|
// since there are some places on earth where there is a time shift of half an hour
|
||||||
|
// we need too show also the minutes of the time shift
|
||||||
|
// Examples: http://www.timeanddate.com/library/abbreviations/timezones/
|
||||||
|
final long offsetHours = UTCDiff();
|
||||||
|
final int om = Math.abs((int) (offsetHours / AbstractFormatter.minuteMillis)) % 60;
|
||||||
|
final int oh = Math.abs((int) (offsetHours / AbstractFormatter.hourMillis));
|
||||||
|
String diff = Integer.toString(om);
|
||||||
|
if (diff.length() < 2) diff = "0" + diff;
|
||||||
|
diff = Integer.toString(oh) + diff;
|
||||||
|
if (diff.length() < 4) diff = "0" + diff;
|
||||||
|
if (offsetHours < 0) {
|
||||||
|
return "-" + diff;
|
||||||
|
}
|
||||||
|
return "+" + diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long correctedUTCTime() {
|
||||||
|
return System.currentTimeMillis() - UTCDiff();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,191 @@
|
|||||||
|
/**
|
||||||
|
* ISO8601
|
||||||
|
* Copyright 2011 by Michael Peter Christen
|
||||||
|
* First released 2.1.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.date;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
public class ISO8601Formatter extends AbstractFormatter implements DateFormatter {
|
||||||
|
|
||||||
|
/** pattern for a W3C datetime variant of a non-localized ISO8601 date */
|
||||||
|
private static final String PATTERN_ISO8601 = "yyyy-MM-dd'T'HH:mm:ss'Z'";
|
||||||
|
|
||||||
|
/** Date formatter/non-sloppy parser for W3C datetime (ISO8601) in GMT/UTC */
|
||||||
|
private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US);
|
||||||
|
|
||||||
|
static {
|
||||||
|
FORMAT_ISO8601.setTimeZone(TZ_GMT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final ISO8601Formatter FORMATTER = new ISO8601Formatter();
|
||||||
|
|
||||||
|
public ISO8601Formatter() {
|
||||||
|
last_time = 0;
|
||||||
|
last_format = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}.
|
||||||
|
* This format (also specified in ISO8601) allows different "precisions".
|
||||||
|
* The following lower precision versions for the complete date
|
||||||
|
* "2007-12-19T10:20:30.567+0300" are allowed:<br>
|
||||||
|
* "2007"<br>
|
||||||
|
* "2007-12"<br>
|
||||||
|
* "2007-12-19"<br>
|
||||||
|
* "2007-12-19T10:20+0300<br>
|
||||||
|
* "2007-12-19T10:20:30+0300<br>
|
||||||
|
* "2007-12-19T10:20:30.567+0300<br>
|
||||||
|
* Additionally a timezone offset of "+0000" can be substituted as "Z".<br>
|
||||||
|
* Parsing is done in a fuzzy way. If there is an illegal character somewhere in
|
||||||
|
* the String, the date parsed so far will be returned, e.g. the input
|
||||||
|
* "2007-12-19FOO" would return a date that represents "2007-12-19".
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* @return
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Date parse(String s) throws ParseException {
|
||||||
|
// do some lazy checks here
|
||||||
|
s = s.trim();
|
||||||
|
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
|
||||||
|
if (s.startsWith("{")) s = s.substring(1);
|
||||||
|
if (s.endsWith("}")) s = s.substring(0, s.length() - 1);
|
||||||
|
if (s.startsWith("[")) s = s.substring(1);
|
||||||
|
if (s.endsWith("]")) s = s.substring(0, s.length() - 1);
|
||||||
|
while (s.length() > 0 && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1);
|
||||||
|
if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00";
|
||||||
|
int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first
|
||||||
|
p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first
|
||||||
|
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
|
||||||
|
|
||||||
|
// no go for exact parsing
|
||||||
|
final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);
|
||||||
|
cal.clear();
|
||||||
|
|
||||||
|
// split 2007-12-19T10:20:30.789+0500 into its parts
|
||||||
|
// correct: yyyy['-'MM['-'dd['T'HH':'MM[':'ss['.'SSS]]('Z'|ZZZZZ)]]]
|
||||||
|
final StringTokenizer t = new StringTokenizer(s, "-T:.Z+", true);
|
||||||
|
if (s == null || t.countTokens() == 0)
|
||||||
|
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// year
|
||||||
|
cal.set(Calendar.YEAR, Integer.parseInt(t.nextToken()));
|
||||||
|
// month
|
||||||
|
if (t.nextToken().equals("-")) {
|
||||||
|
cal.set(Calendar.MONTH, Integer.parseInt(t.nextToken()) - 1);
|
||||||
|
} else {
|
||||||
|
return cal.getTime();
|
||||||
|
}
|
||||||
|
// day
|
||||||
|
if (t.nextToken().equals("-")) {
|
||||||
|
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(t.nextToken()));
|
||||||
|
} else {
|
||||||
|
return cal.getTime();
|
||||||
|
}
|
||||||
|
// The standard says:
|
||||||
|
// if there is an hour there has to be a minute and a timezone token, too.
|
||||||
|
if (t.nextToken().equals("T")) {
|
||||||
|
final int hour = Integer.parseInt(t.nextToken());
|
||||||
|
// no error, got hours
|
||||||
|
int min = 0;
|
||||||
|
int sec = 0;
|
||||||
|
int msec = 0;
|
||||||
|
if (t.nextToken().equals(":")) {
|
||||||
|
min = Integer.parseInt(t.nextToken());
|
||||||
|
// no error, got minutes
|
||||||
|
// need TZ or seconds
|
||||||
|
String token = t.nextToken();
|
||||||
|
if (token.equals(":")) {
|
||||||
|
sec = Integer.parseInt(t.nextToken());
|
||||||
|
// need millisecs or TZ
|
||||||
|
token = t.nextToken();
|
||||||
|
if (token.equals(".")) {
|
||||||
|
msec = Integer.parseInt(t.nextToken());
|
||||||
|
// need TZ
|
||||||
|
token = t.nextToken();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for TZ data
|
||||||
|
int offset;
|
||||||
|
if (token.equals("Z")) {
|
||||||
|
offset = 0;
|
||||||
|
} else {
|
||||||
|
int sign = 0;
|
||||||
|
if (token.equals("+")) {
|
||||||
|
sign = 1;
|
||||||
|
} else if (token.equals("-")) {
|
||||||
|
sign = -1;
|
||||||
|
} else {
|
||||||
|
// no legal TZ offset found
|
||||||
|
return cal.getTime();
|
||||||
|
}
|
||||||
|
offset = sign * Integer.parseInt(t.nextToken()) * 10 * 3600;
|
||||||
|
}
|
||||||
|
cal.set(Calendar.ZONE_OFFSET, offset);
|
||||||
|
}
|
||||||
|
cal.set(Calendar.HOUR_OF_DAY, hour);
|
||||||
|
cal.set(Calendar.MINUTE, min);
|
||||||
|
cal.set(Calendar.SECOND, sec);
|
||||||
|
cal.set(Calendar.MILLISECOND, msec);
|
||||||
|
}
|
||||||
|
} catch (final NoSuchElementException e) {
|
||||||
|
// ignore this as it is perfectly fine to have non-complete date in this format
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// catch all Exceptions and return what we parsed so far
|
||||||
|
//serverLog.logInfo("SERVER", "parseISO8601: DATE ERROR with: '" + s + "' got so far: '" + cal.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
// in case we couldn't even parse a year
|
||||||
|
if (!cal.isSet(Calendar.YEAR))
|
||||||
|
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
|
||||||
|
Date d = cal.getTime();
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a String representation of a Date using the format defined
|
||||||
|
* in ISO8601/W3C datetime
|
||||||
|
* The result will be in UTC/GMT, e.g. "2007-12-19T10:20:30Z".
|
||||||
|
*
|
||||||
|
* @param date The Date instance to transform.
|
||||||
|
* @return A fixed width (20 chars) ISO8601 date String.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final String format(final Date date) {
|
||||||
|
if (date == null) return "";
|
||||||
|
if (Math.abs(date.getTime() - last_time) < 1000) return last_format;
|
||||||
|
synchronized (FORMAT_ISO8601) {
|
||||||
|
last_format = FORMAT_ISO8601.format(date);
|
||||||
|
last_time = date.getTime();
|
||||||
|
}
|
||||||
|
return last_format;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,74 @@
|
|||||||
|
/**
|
||||||
|
* LinkExtractor
|
||||||
|
* Copyright 2011 by Michael Peter Christen
|
||||||
|
* First released 2.01.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.protocol.http;
|
||||||
|
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.util.WeakHashMap;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.MultiProtocolURI;
|
||||||
|
|
||||||
|
public class LinkExtractor {
|
||||||
|
|
||||||
|
private static final char lb = '<', rb = '>', dquotes = '"', space = ' ';
|
||||||
|
private static final Object PRESENT = new Object();
|
||||||
|
|
||||||
|
private WeakHashMap<MultiProtocolURI, Object> links;
|
||||||
|
private Pattern blackpattern;
|
||||||
|
|
||||||
|
public LinkExtractor(Pattern blackpattern) {
|
||||||
|
this.links = new WeakHashMap<MultiProtocolURI, Object>();
|
||||||
|
this.blackpattern = blackpattern;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scrape(String text) {
|
||||||
|
text = text.replace(lb, space).replace(rb, space).replace(dquotes, space);
|
||||||
|
int p, q, s = 0;
|
||||||
|
String u;
|
||||||
|
while (s < text.length()) {
|
||||||
|
p = Math.min(find(text, "smb://", s), Math.min(find(text, "ftp://", s), Math.min(find(text, "http://", s), find(text, "https://", s))));
|
||||||
|
if (p == Integer.MAX_VALUE) break;
|
||||||
|
q = text.indexOf(" ", p + 1);
|
||||||
|
u = text.substring(p, q < 0 ? text.length() : q);
|
||||||
|
if (u.endsWith(".")) u = u.substring(0, u.length() - 1); // remove the '.' that was appended above
|
||||||
|
s = p + 1;
|
||||||
|
if (this.blackpattern.matcher(u).matches()) continue;
|
||||||
|
try {links.put(new MultiProtocolURI(u), PRESENT);} catch (MalformedURLException e) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return the links in the text in the order as they appear
|
||||||
|
* @return a list of urls
|
||||||
|
*/
|
||||||
|
public MultiProtocolURI[] getLinks() {
|
||||||
|
MultiProtocolURI[] urls = new MultiProtocolURI[this.links.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (MultiProtocolURI uri: this.links.keySet()) urls[i++] = uri;
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int find(final String s, final String m, final int start) {
|
||||||
|
final int p = s.indexOf(m, start);
|
||||||
|
return (p < 0) ? Integer.MAX_VALUE : p;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,39 @@
|
|||||||
|
/**
|
||||||
|
* OutOfLimitsException
|
||||||
|
* Copyright 2006 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||||
|
* First released 17.01.2006 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.storage;
|
||||||
|
|
||||||
|
public class OutOfLimitsException extends java.lang.RuntimeException {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
public OutOfLimitsException() {
|
||||||
|
super("unspecific-error");
|
||||||
|
}
|
||||||
|
|
||||||
|
public OutOfLimitsException(final int expectedLimit, final int actualSize) {
|
||||||
|
super("Object size is " + actualSize + "; it exceeds the size limit " + expectedLimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OutOfLimitsException(final int actualSize) {
|
||||||
|
super("Object size is " + actualSize + "; must not be negative");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,473 +0,0 @@
|
|||||||
// serverDate.java
|
|
||||||
// -------------------------------------------
|
|
||||||
// (C) by Michael Peter Christen; mc@yacy.net
|
|
||||||
// (C) by by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
|
|
||||||
// first published on http://www.anomic.de
|
|
||||||
// Frankfurt, Germany, 2005, 2007
|
|
||||||
// last major change: 14.03.2005
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
// this class is needed to replace the slow java built-in date method by a faster version
|
|
||||||
|
|
||||||
package net.yacy.kelondro.util;
|
|
||||||
|
|
||||||
import java.text.DateFormat;
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.StringTokenizer;
|
|
||||||
import java.util.TimeZone;
|
|
||||||
|
|
||||||
public final class DateFormatter {
|
|
||||||
|
|
||||||
/** minimal date format without time information (fixed width: 8) */
|
|
||||||
public static final String PATTERN_SHORT_DAY = "yyyyMMdd";
|
|
||||||
/** minimal date format (fixed width: 14) */
|
|
||||||
public static final String PATTERN_SHORT_SECOND = "yyyyMMddHHmmss";
|
|
||||||
/** minimal date format including milliseconds (fixed width: 17) */
|
|
||||||
public static final String PATTERN_SHORT_MILSEC = "yyyyMMddHHmmssSSS";
|
|
||||||
|
|
||||||
/** default HTTP 1.1 header date format pattern */
|
|
||||||
public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss Z"; // with numeric time zone indicator as defined in RFC5322
|
|
||||||
public static final String PATTERN_RFC1123_SHORT = "EEE, dd MMM yyyy";
|
|
||||||
|
|
||||||
/** date pattern used in older HTTP implementations */
|
|
||||||
public static final String PATTERN_ANSIC = "EEE MMM d HH:mm:ss yyyy";
|
|
||||||
/** date pattern used in older HTTP implementations */
|
|
||||||
public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
|
|
||||||
|
|
||||||
/** pattern for a W3C datetime variant of a non-localized ISO8601 date */
|
|
||||||
public static final String PATTERN_ISO8601 = "yyyy-MM-dd'T'HH:mm:ss'Z'";
|
|
||||||
|
|
||||||
/** predefined GMT TimeZone object */
|
|
||||||
private static final TimeZone TZ_GMT = TimeZone.getTimeZone("GMT");
|
|
||||||
|
|
||||||
/** predefined non-localized Calendar object for generic GMT dates */
|
|
||||||
private static final Calendar CAL_GMT = Calendar.getInstance(TZ_GMT, Locale.US);
|
|
||||||
|
|
||||||
/** Date formatter/parser for minimal yyyyMMdd pattern */
|
|
||||||
private static final SimpleDateFormat FORMAT_SHORT_DAY = new SimpleDateFormat(PATTERN_SHORT_DAY, Locale.US);
|
|
||||||
/** Date formatter/parser for minimal yyyyMMddHHmmss pattern */
|
|
||||||
private static final SimpleDateFormat FORMAT_SHORT_SECOND = new SimpleDateFormat(PATTERN_SHORT_SECOND, Locale.US);
|
|
||||||
/** Date formatter/parser for minimal yyyyMMddHHmmssSSS pattern */
|
|
||||||
private static final SimpleDateFormat FORMAT_SHORT_MILSEC = new SimpleDateFormat(PATTERN_SHORT_MILSEC, Locale.US);
|
|
||||||
|
|
||||||
/** Date formatter/non-sloppy parser for W3C datetime (ISO8601) in GMT/UTC */
|
|
||||||
private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US);
|
|
||||||
|
|
||||||
/** Date formatter/parser for standard compliant HTTP header dates (RFC 1123) */
|
|
||||||
private static final SimpleDateFormat FORMAT_ANSIC = new SimpleDateFormat(PATTERN_ANSIC, Locale.US);
|
|
||||||
|
|
||||||
private static final SimpleDateFormat FORMAT_RFC1123_SHORT = new SimpleDateFormat(PATTERN_RFC1123_SHORT, Locale.US);
|
|
||||||
|
|
||||||
|
|
||||||
/** Initialization of static formats */
|
|
||||||
static {
|
|
||||||
// 2-digit dates are automatically parsed by SimpleDateFormat,
|
|
||||||
// we need to detect the real year by adding 1900 or 2000 to
|
|
||||||
// the year value starting with 1970
|
|
||||||
CAL_GMT.setTimeInMillis(0);
|
|
||||||
|
|
||||||
// we want GMT times on the SHORT formats as well as they don't support any timezone
|
|
||||||
FORMAT_SHORT_DAY.setTimeZone(TZ_GMT);
|
|
||||||
FORMAT_SHORT_SECOND.setTimeZone(TZ_GMT);
|
|
||||||
FORMAT_SHORT_MILSEC.setTimeZone(TZ_GMT);
|
|
||||||
FORMAT_ISO8601.setTimeZone(TZ_GMT);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a String representation of a Date using the format defined
|
|
||||||
* in ISO8601/W3C datetime
|
|
||||||
* The result will be in UTC/GMT, e.g. "2007-12-19T10:20:30Z".
|
|
||||||
*
|
|
||||||
* @param date The Date instance to transform.
|
|
||||||
* @return A fixed width (20 chars) ISO8601 date String.
|
|
||||||
*/
|
|
||||||
private static long FORMAT_ISO8601_last_time = 0;
|
|
||||||
private static String FORMAT_ISO8601_last_format = "";
|
|
||||||
public static final String formatISO8601(final Date date) {
|
|
||||||
if (date == null) return "";
|
|
||||||
if (Math.abs(date.getTime() - FORMAT_ISO8601_last_time) < 1000) return FORMAT_ISO8601_last_format;
|
|
||||||
FORMAT_ISO8601_last_format = format(FORMAT_ISO8601, date);
|
|
||||||
FORMAT_ISO8601_last_time = date.getTime();
|
|
||||||
return FORMAT_ISO8601_last_format;
|
|
||||||
}
|
|
||||||
public static final String formatANSIC(final Date date) {
|
|
||||||
if (date == null) return "";
|
|
||||||
return format(FORMAT_ANSIC, date);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String formatRFC1123_short(final Date date) {
|
|
||||||
if (date == null) return "";
|
|
||||||
return FORMAT_RFC1123_SHORT.format(date);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Identical to {@link #formatShortDay(Date)}, but for short second format.
|
|
||||||
*/
|
|
||||||
private static long FORMAT_SHORT_SECOND_last_time = 0;
|
|
||||||
private static String FORMAT_SHORT_SECOND_last_format = "";
|
|
||||||
public static String formatShortSecond(final Date date) {
|
|
||||||
if (date == null) return "";
|
|
||||||
if (Math.abs(date.getTime() - FORMAT_SHORT_SECOND_last_time) < 1000) return FORMAT_SHORT_SECOND_last_format;
|
|
||||||
FORMAT_SHORT_SECOND_last_format = format(FORMAT_SHORT_SECOND, date);
|
|
||||||
FORMAT_SHORT_SECOND_last_time = date.getTime();
|
|
||||||
return FORMAT_SHORT_SECOND_last_format;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String formatShortMilliSecond(final Date date) {
|
|
||||||
return format(FORMAT_SHORT_MILSEC, date);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Note: The short day format doesn't include any timezone information. This method
|
|
||||||
* transforms the date into the GMT/UTC timezone. Example: If the local system time is,
|
|
||||||
* 2007-12-18 01:15:00 +0200, then the resulting String will be "2007-12-17".
|
|
||||||
* In case you need a format with a timezon offset, use {@link #formatShortDay(TimeZone)}
|
|
||||||
* @return a String representation of the current system date in GMT using the
|
|
||||||
* short day format, e.g. "20071218".
|
|
||||||
*/
|
|
||||||
public static String formatShortDay() {
|
|
||||||
return format(FORMAT_SHORT_DAY, new Date());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @see #formatShortDay()
|
|
||||||
* @param date the Date to transform
|
|
||||||
*/
|
|
||||||
private static long FORMAT_SHORT_DAY_last_time = 0;
|
|
||||||
private static String FORMAT_SHORT_DAY_last_format = "";
|
|
||||||
public static String formatShortDay(final Date date) {
|
|
||||||
if (date == null) return "";
|
|
||||||
if (Math.abs(date.getTime() - FORMAT_SHORT_DAY_last_time) < 1000) return FORMAT_SHORT_DAY_last_format;
|
|
||||||
FORMAT_SHORT_DAY_last_format = format(FORMAT_SHORT_DAY, date);
|
|
||||||
FORMAT_SHORT_DAY_last_time = date.getTime();
|
|
||||||
return FORMAT_SHORT_DAY_last_format;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}.
|
|
||||||
* This format (also specified in ISO8601) allows different "precisions".
|
|
||||||
* The following lower precision versions for the complete date
|
|
||||||
* "2007-12-19T10:20:30.567+0300" are allowed:<br>
|
|
||||||
* "2007"<br>
|
|
||||||
* "2007-12"<br>
|
|
||||||
* "2007-12-19"<br>
|
|
||||||
* "2007-12-19T10:20+0300<br>
|
|
||||||
* "2007-12-19T10:20:30+0300<br>
|
|
||||||
* "2007-12-19T10:20:30.567+0300<br>
|
|
||||||
* Additionally a timezone offset of "+0000" can be substituted as "Z".<br>
|
|
||||||
* Parsing is done in a fuzzy way. If there is an illegal character somewhere in
|
|
||||||
* the String, the date parsed so far will be returned, e.g. the input
|
|
||||||
* "2007-12-19FOO" would return a date that represents "2007-12-19".
|
|
||||||
*
|
|
||||||
* @param s
|
|
||||||
* @return
|
|
||||||
* @throws ParseException
|
|
||||||
*/
|
|
||||||
public static Date parseISO8601(String s) throws ParseException {
|
|
||||||
// do some lazy checks here
|
|
||||||
s = s.trim();
|
|
||||||
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
|
|
||||||
if (s.startsWith("{")) s = s.substring(1);
|
|
||||||
if (s.endsWith("}")) s = s.substring(0, s.length() - 1);
|
|
||||||
if (s.startsWith("[")) s = s.substring(1);
|
|
||||||
if (s.endsWith("]")) s = s.substring(0, s.length() - 1);
|
|
||||||
while (s.length() > 0 && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1);
|
|
||||||
if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00";
|
|
||||||
int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first
|
|
||||||
p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first
|
|
||||||
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
|
|
||||||
|
|
||||||
// no go for exact parsing
|
|
||||||
final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);
|
|
||||||
cal.clear();
|
|
||||||
|
|
||||||
// split 2007-12-19T10:20:30.789+0500 into its parts
|
|
||||||
// correct: yyyy['-'MM['-'dd['T'HH':'MM[':'ss['.'SSS]]('Z'|ZZZZZ)]]]
|
|
||||||
final StringTokenizer t = new StringTokenizer(s, "-T:.Z+", true);
|
|
||||||
if (s == null || t.countTokens() == 0)
|
|
||||||
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// year
|
|
||||||
cal.set(Calendar.YEAR, Integer.parseInt(t.nextToken()));
|
|
||||||
// month
|
|
||||||
if (t.nextToken().equals("-")) {
|
|
||||||
cal.set(Calendar.MONTH, Integer.parseInt(t.nextToken()) - 1);
|
|
||||||
} else {
|
|
||||||
return cal.getTime();
|
|
||||||
}
|
|
||||||
// day
|
|
||||||
if (t.nextToken().equals("-")) {
|
|
||||||
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(t.nextToken()));
|
|
||||||
} else {
|
|
||||||
return cal.getTime();
|
|
||||||
}
|
|
||||||
// The standard says: if there is an hour there has to be a minute and a
|
|
||||||
// timezone token, too.
|
|
||||||
// hour
|
|
||||||
if (t.nextToken().equals("T")) {
|
|
||||||
final int hour = Integer.parseInt(t.nextToken());
|
|
||||||
// no error, got hours
|
|
||||||
int min = 0;
|
|
||||||
int sec = 0;
|
|
||||||
int msec = 0;
|
|
||||||
if (t.nextToken().equals(":")) {
|
|
||||||
min = Integer.parseInt(t.nextToken());
|
|
||||||
// no error, got minutes
|
|
||||||
// need TZ or seconds
|
|
||||||
String token = t.nextToken();
|
|
||||||
if (token.equals(":")) {
|
|
||||||
sec = Integer.parseInt(t.nextToken());
|
|
||||||
// need millisecs or TZ
|
|
||||||
token = t.nextToken();
|
|
||||||
if (token.equals(".")) {
|
|
||||||
msec = Integer.parseInt(t.nextToken());
|
|
||||||
// need TZ
|
|
||||||
token = t.nextToken();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for TZ data
|
|
||||||
int offset;
|
|
||||||
if (token.equals("Z")) {
|
|
||||||
offset = 0;
|
|
||||||
} else {
|
|
||||||
int sign = 0;
|
|
||||||
if (token.equals("+")) {
|
|
||||||
sign = 1;
|
|
||||||
} else if (token.equals("-")) {
|
|
||||||
sign = -1;
|
|
||||||
} else {
|
|
||||||
// no legal TZ offset found
|
|
||||||
return cal.getTime();
|
|
||||||
}
|
|
||||||
offset = sign * Integer.parseInt(t.nextToken()) * 10 * 3600;
|
|
||||||
}
|
|
||||||
cal.set(Calendar.ZONE_OFFSET, offset);
|
|
||||||
}
|
|
||||||
cal.set(Calendar.HOUR_OF_DAY, hour);
|
|
||||||
cal.set(Calendar.MINUTE, min);
|
|
||||||
cal.set(Calendar.SECOND, sec);
|
|
||||||
cal.set(Calendar.MILLISECOND, msec);
|
|
||||||
}
|
|
||||||
} catch (final NoSuchElementException e) {
|
|
||||||
// ignore this as it is perfectly fine to have non-complete date in this format
|
|
||||||
} catch (final Exception e) {
|
|
||||||
// catch all Exceptions and return what we parsed so far
|
|
||||||
//serverLog.logInfo("SERVER", "parseISO8601: DATE ERROR with: '" + s + "' got so far: '" + cal.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
// in case we couldn't even parse a year
|
|
||||||
if (!cal.isSet(Calendar.YEAR))
|
|
||||||
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
|
|
||||||
Date d = cal.getTime();
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse a String representation of a Date in short day format assuming the date
|
|
||||||
* is aligned to the GMT/UTC timezone. An example for such a date string is "20071218".
|
|
||||||
* @see #formatShortDay()
|
|
||||||
* @throws ParseException The exception is thrown if an error occured during while parsing
|
|
||||||
* the String.
|
|
||||||
*/
|
|
||||||
public static Date parseShortDay(final String timeString) throws ParseException {
|
|
||||||
return parse(FORMAT_SHORT_DAY, timeString);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the current date in short second format which is a fixed width (14 chars)
|
|
||||||
* String including the date and the time like "20071218233510". The result is in GMT/UTC.
|
|
||||||
* @see #formatShortDay()
|
|
||||||
*/
|
|
||||||
public static String formatShortSecond() {
|
|
||||||
return formatShortSecond(new Date());
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO check the following 2 parse methods for correct use (GMT vs. different timezone)
|
|
||||||
/**
|
|
||||||
* Like {@link #parseShortDay(String)}, but for the "short second" format which is short date
|
|
||||||
* plus a 6 digit day time value, like "20071218233510". The String should be in GMT/UTC to
|
|
||||||
* get a correct Date.
|
|
||||||
*/
|
|
||||||
public static Date parseShortSecond(final String timeString) throws ParseException {
|
|
||||||
return parse(FORMAT_SHORT_SECOND, timeString);
|
|
||||||
}
|
|
||||||
public static Date parseShortMilliSecond(final String timeString) throws ParseException {
|
|
||||||
return parse(FORMAT_SHORT_MILSEC, timeString);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Like {@link #parseShortSecond(String)} using additional timezone information provided in an
|
|
||||||
* offset String, like "+0100" for CET.
|
|
||||||
*/
|
|
||||||
public static Date parseShortSecond(final String remoteTimeString, final String remoteUTCOffset) {
|
|
||||||
// FIXME: This method returns an incorrect date, check callers!
|
|
||||||
// ex: de.anomic.server.serverDate.parseShortSecond("20070101120000", "+0200").toGMTString()
|
|
||||||
// => 1 Jan 2007 13:00:00 GMT
|
|
||||||
if (remoteTimeString == null || remoteTimeString.length() == 0) { return new Date(); }
|
|
||||||
if (remoteUTCOffset == null || remoteUTCOffset.length() == 0) { return new Date(); }
|
|
||||||
try {
|
|
||||||
return new Date(parse(FORMAT_SHORT_SECOND, remoteTimeString).getTime() - DateFormatter.UTCDiff() + DateFormatter.UTCDiff(remoteUTCOffset));
|
|
||||||
} catch (final java.text.ParseException e) {
|
|
||||||
//serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
|
|
||||||
return new Date();
|
|
||||||
} catch (final java.lang.NumberFormatException e) {
|
|
||||||
//serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
|
|
||||||
return new Date();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format a time inteval in milliseconds into a String of the form
|
|
||||||
* X 'day'['s'] HH':'mm
|
|
||||||
*/
|
|
||||||
public static String formatInterval(final long millis) {
|
|
||||||
try {
|
|
||||||
final long mins = millis / 60000;
|
|
||||||
|
|
||||||
final StringBuilder uptime = new StringBuilder();
|
|
||||||
|
|
||||||
final int uptimeDays = (int) (Math.floor(mins/1440.0));
|
|
||||||
final int uptimeHours = (int) (Math.floor(mins/60.0)%24);
|
|
||||||
final int uptimeMins = (int) mins%60;
|
|
||||||
|
|
||||||
uptime.append(uptimeDays)
|
|
||||||
.append(((uptimeDays == 1)?" day ":" days "))
|
|
||||||
.append((uptimeHours < 10)?"0":"")
|
|
||||||
.append(uptimeHours)
|
|
||||||
.append(":")
|
|
||||||
.append((uptimeMins < 10)?"0":"")
|
|
||||||
.append(uptimeMins);
|
|
||||||
|
|
||||||
return uptime.toString();
|
|
||||||
} catch (final Exception e) {
|
|
||||||
return "unknown";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** called by all public format...(...) methods */
|
|
||||||
private static String format(final SimpleDateFormat format, final Date date) {
|
|
||||||
synchronized (format) {
|
|
||||||
return format.format(date);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** calles by all public parse...(...) methods */
|
|
||||||
private static Date parse(final SimpleDateFormat format, final String dateString) throws ParseException {
|
|
||||||
synchronized (format) {
|
|
||||||
return format.parse(dateString);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// statics
|
|
||||||
public final static long secondMillis = 1000;
|
|
||||||
public final static long minuteMillis = 60 * secondMillis;
|
|
||||||
public final static long hourMillis = 60 * minuteMillis;
|
|
||||||
public final static long dayMillis = 24 * hourMillis;
|
|
||||||
public final static long normalyearMillis = 365 * dayMillis;
|
|
||||||
public final static long leapyearMillis = 366 * dayMillis;
|
|
||||||
public final static int january = 31, normalfebruary = 28, leapfebruary = 29, march = 31,
|
|
||||||
april = 30, may = 31, june = 30, july = 31, august = 31,
|
|
||||||
september = 30, october = 31, november = 30, december = 31;
|
|
||||||
public final static int[] dimnormal = {january, normalfebruary, march, april, may, june, july, august, september, october, november, december};
|
|
||||||
public final static int[] dimleap = {january, leapfebruary, march, april, may, june, july, august, september, october, november, december};
|
|
||||||
public final static String[] wkday = {"Mon","Tue","Wed","Thu","Fri","Sat","Sun"};
|
|
||||||
//private final static String[] month = {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"};
|
|
||||||
|
|
||||||
// find out time zone and DST offset
|
|
||||||
private static Calendar thisCalendar = Calendar.getInstance();
|
|
||||||
|
|
||||||
public static String UTCDiffString() {
|
|
||||||
// we express the UTC Difference in 5 digits:
|
|
||||||
// SHHMM
|
|
||||||
// S ::= '+'|'-'
|
|
||||||
// HH ::= '00'|'01'|'02'|'03'|'04'|'05'|'06'|'07'|'08'|'09'|'10'|'11'|'12'
|
|
||||||
// MM ::= '00'|'15'|'30'|'45'
|
|
||||||
// since there are some places on earth where there is a time shift of half an hour
|
|
||||||
// we need too show also the minutes of the time shift
|
|
||||||
// Examples: http://www.timeanddate.com/library/abbreviations/timezones/
|
|
||||||
final long offsetHours = UTCDiff();
|
|
||||||
final int om = Math.abs((int) (offsetHours / minuteMillis)) % 60;
|
|
||||||
final int oh = Math.abs((int) (offsetHours / hourMillis));
|
|
||||||
String diff = Integer.toString(om);
|
|
||||||
if (diff.length() < 2) diff = "0" + diff;
|
|
||||||
diff = Integer.toString(oh) + diff;
|
|
||||||
if (diff.length() < 4) diff = "0" + diff;
|
|
||||||
if (offsetHours < 0) {
|
|
||||||
return "-" + diff;
|
|
||||||
}
|
|
||||||
return "+" + diff;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static long UTCDiff() {
|
|
||||||
// DST_OFFSET is dependent on the time of the Calendar, so it has to be updated
|
|
||||||
// to get the correct current offset
|
|
||||||
synchronized(thisCalendar) {
|
|
||||||
thisCalendar.setTimeInMillis(System.currentTimeMillis());
|
|
||||||
final long zoneOffsetHours = thisCalendar.get(Calendar.ZONE_OFFSET);
|
|
||||||
final long DSTOffsetHours = thisCalendar.get(Calendar.DST_OFFSET);
|
|
||||||
return zoneOffsetHours + DSTOffsetHours;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static long UTCDiff(final String diffString) {
|
|
||||||
if (diffString.length() != 5) throw new IllegalArgumentException("UTC String malformed (wrong size):" + diffString);
|
|
||||||
boolean ahead = true;
|
|
||||||
if (diffString.length() > 0 && diffString.charAt(0) == '+') ahead = true;
|
|
||||||
else if (diffString.length() > 0 && diffString.charAt(0) == '-') ahead = false;
|
|
||||||
else throw new IllegalArgumentException("UTC String malformed (wrong sign):" + diffString);
|
|
||||||
final long oh = Long.parseLong(diffString.substring(1, 3));
|
|
||||||
final long om = Long.parseLong(diffString.substring(3));
|
|
||||||
return ((ahead) ? (long) 1 : (long) -1) * (oh * hourMillis + om * minuteMillis);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static long correctedUTCTime() {
|
|
||||||
return System.currentTimeMillis() - UTCDiff();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static long remainingTime(final long start, final long due, final long minimum) {
|
|
||||||
if (due < 0) return -1;
|
|
||||||
final long r = due + start - System.currentTimeMillis();
|
|
||||||
if (r <= 0) return minimum;
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(final String[] args) {
|
|
||||||
//System.out.println("kelondroDate is (" + new kelondroDate().toString() + ")");
|
|
||||||
System.out.println("offset is " + (UTCDiff()/1000/60/60) + " hours, javaDate is " + new Date() + ", correctedDate is " + new Date(correctedUTCTime()));
|
|
||||||
System.out.println(" javaDate : " + formatShortSecond());
|
|
||||||
System.out.println("serverDate : " + new DateFormatter().toString());
|
|
||||||
System.out.println(" JavaDate : " + DateFormat.getDateInstance().format(new Date()));
|
|
||||||
System.out.println(" JavaDate0: " + format(FORMAT_SHORT_SECOND, new Date(0)));
|
|
||||||
System.out.println(" JavaDate0: " + DateFormat.getDateInstance().format(new Date(0)));
|
|
||||||
//String testresult;
|
|
||||||
final int cycles = 10000;
|
|
||||||
long start;
|
|
||||||
|
|
||||||
final String[] testresult = new String[10000];
|
|
||||||
start = System.currentTimeMillis();
|
|
||||||
for (int i = 0; i < cycles; i++) testresult[i] = format(FORMAT_SHORT_SECOND, new Date());
|
|
||||||
System.out.println("time for " + cycles + " calls to javaDate:" + (System.currentTimeMillis() - start) + " milliseconds");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,41 +0,0 @@
|
|||||||
// kelondroOutOfLimitsException.java
|
|
||||||
// ---------------------------------
|
|
||||||
// part of The Kelondro Database
|
|
||||||
// (C) by Michael Peter Christen; mc@yacy.net
|
|
||||||
// first published on http://www.anomic.de
|
|
||||||
// Frankfurt, Germany, 2006
|
|
||||||
// created: 17.01.2006
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
package net.yacy.kelondro.util;
|
|
||||||
|
|
||||||
public class kelondroOutOfLimitsException extends java.lang.RuntimeException {
|
|
||||||
|
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
|
|
||||||
public kelondroOutOfLimitsException() {
|
|
||||||
super("unspecific-error");
|
|
||||||
}
|
|
||||||
|
|
||||||
public kelondroOutOfLimitsException(final int expectedLimit, final int actualSize) {
|
|
||||||
super("Object size is " + actualSize + "; it exceeds the size limit " + expectedLimit);
|
|
||||||
}
|
|
||||||
|
|
||||||
public kelondroOutOfLimitsException(final int actualSize) {
|
|
||||||
super("Object size is " + actualSize + "; must not be negative");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Loading…
Reference in new issue