diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html
index a68125815..90b83ef59 100644
--- a/htroot/CrawlStartExpert.html
+++ b/htroot/CrawlStartExpert.html
@@ -513,7 +513,7 @@
diff --git a/htroot/CrawlStartSite.html b/htroot/CrawlStartSite.html
index dddbc4ff2..8127e7770 100644
--- a/htroot/CrawlStartSite.html
+++ b/htroot/CrawlStartSite.html
@@ -91,6 +91,7 @@
+
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 9a6e786de..8b0e39801 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -470,6 +470,8 @@ public class Crawler_p {
}
}
+ int timezoneOffset = post.getInt("timezoneOffset", 0);
+
// prepare a new crawling profile
final CrawlProfile profile;
byte[] handle;
@@ -502,7 +504,8 @@ public class Crawler_p {
cachePolicy,
collection,
agentName,
- new VocabularyScraper(vocabulary_scraper));
+ new VocabularyScraper(vocabulary_scraper),
+ timezoneOffset);
handle = ASCII.getBytes(profile.handle());
// before we fire up a new crawl, we make sure that another crawl with the same name is not running
@@ -585,7 +588,7 @@ public class Crawler_p {
try {
// check if the crawl filter works correctly
Pattern.compile(newcrawlingMustMatch);
- final ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new VocabularyScraper());
+ final ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new VocabularyScraper(), timezoneOffset);
final Writer writer = new TransformerWriter(null, null, scraper, null, false);
if (crawlingFile != null && crawlingFile.exists()) {
FileUtils.copy(new FileInputStream(crawlingFile), writer);
@@ -605,7 +608,7 @@ public class Crawler_p {
}
sb.crawler.putActive(handle, profile);
- sb.crawlStacker.enqueueEntriesAsynchronous(sb.peers.mySeed().hash.getBytes(), profile.handle(), hyperlinks);
+ sb.crawlStacker.enqueueEntriesAsynchronous(sb.peers.mySeed().hash.getBytes(), profile.handle(), hyperlinks, profile.timezoneOffset());
} catch (final PatternSyntaxException e) {
prop.put("info", "4"); // crawlfilter does not match url
prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 61629bb50..bb63d90a3 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -161,7 +161,8 @@ public class HostBrowser {
sb.peers.mySeed().hash.getBytes(),
url, null, load, new Date(),
sb.crawler.defaultProxyProfile.handle(),
- 0
+ 0,
+ sb.crawler.defaultProxyProfile.timezoneOffset()
));
prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load) : ("not indexed url '" + load + "': " + reasonString));
if (wait) waitloop: for (int i = 0; i < 30; i++) {
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java
index 98c8c317b..a7e13a0b8 100644
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@@ -637,11 +637,12 @@ public class IndexControlRWIs_p {
final QueryGoal qg = new QueryGoal(queryhashes, null);
final QueryParams query = new QueryParams(
qg,
- new QueryModifier(),
+ new QueryModifier(0),
Integer.MAX_VALUE,
"",
ContentDomain.ALL,
"", //lang
+ 0, //timezoneOffset
null,
CacheStrategy.IFFRESH,
1000, 0, //count, offset
diff --git a/htroot/NetworkHistory.java b/htroot/NetworkHistory.java
index ef7c329df..cc723ef89 100644
--- a/htroot/NetworkHistory.java
+++ b/htroot/NetworkHistory.java
@@ -74,7 +74,7 @@ public class NetworkHistory {
while (rowi.hasNext()) {
Row row = rowi.next();
String d = ASCII.String(row.getPK());
- Date date = GenericFormatter.SHORT_MINUTE_FORMATTER.parse(d);
+ Date date = GenericFormatter.SHORT_MINUTE_FORMATTER.parse(d, 0).getTime();
if (date.getTime() < timelimit) break;
statrow = new HashMap<>();
for (String key: columns) {
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 106b10151..2b0b599b8 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -128,7 +128,8 @@ public class QuickCrawlLink_p {
final byte[] urlhash = crawlingStartURL.hash();
indexSegment.fulltext().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
-
+ int timezoneOffset = post.getInt("timezoneOffset", 0);
+
// create crawling profile
CrawlProfile pe = null;
try {
@@ -156,7 +157,8 @@ public class QuickCrawlLink_p {
CacheStrategy.IFFRESH,
collection,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ timezoneOffset);
sb.crawler.putActive(pe.handle().getBytes(), pe);
} catch (final Exception e) {
// mist
@@ -175,7 +177,8 @@ public class QuickCrawlLink_p {
(title==null)?"CRAWLING-ROOT":title,
new Date(),
pe.handle(),
- 0
+ 0,
+ pe.timezoneOffset()
));
// validate rejection reason
diff --git a/htroot/api/bookmarks/posts/get.java b/htroot/api/bookmarks/posts/get.java
index f95cd391c..fabc9b38b 100644
--- a/htroot/api/bookmarks/posts/get.java
+++ b/htroot/api/bookmarks/posts/get.java
@@ -39,7 +39,7 @@ public class get {
Date parsedDate = null;
try {
- parsedDate = ISO8601Formatter.FORMATTER.parse(date);
+ parsedDate = ISO8601Formatter.FORMATTER.parse(date, 0).getTime();
} catch (final ParseException e) {
parsedDate = new Date();
}
diff --git a/htroot/api/push_p.java b/htroot/api/push_p.java
index a78e1d776..84689af62 100644
--- a/htroot/api/push_p.java
+++ b/htroot/api/push_p.java
@@ -103,7 +103,8 @@ public class push_p {
"", // the name of the document to crawl
new Date(), // current date
profile.handle(), // the name of the prefetch profile. This must not be null!
- 0); // forkfactor sum of anchors of all ancestors
+ 0, // forkfactor sum of anchors of all ancestors
+ profile.timezoneOffset());
Response response = new Response(
request,
requestHeader,
diff --git a/htroot/api/timeline_p.java b/htroot/api/timeline_p.java
index 9a129edbc..b9e4991b0 100644
--- a/htroot/api/timeline_p.java
+++ b/htroot/api/timeline_p.java
@@ -75,8 +75,8 @@ public final class timeline_p {
// get a time period
Date fromDate = new Date(0);
Date toDate = new Date();
- try {fromDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("from", "20031215182700"));} catch (ParseException e) {}
- try {toDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("to", GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date())));} catch (ParseException e) {}
+ try {fromDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("from", "20031215182700"), 0).getTime();} catch (ParseException e) {}
+ try {toDate = GenericFormatter.SHORT_SECOND_FORMATTER.parse(post.get("to", GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date())), 0).getTime();} catch (ParseException e) {}
// get latest dump;
AccessTracker.dumpLog();
diff --git a/htroot/index.html b/htroot/index.html
index b92b46652..ba3a2544e 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -80,6 +80,7 @@
+
::
diff --git a/htroot/rct_p.java b/htroot/rct_p.java
index e32092485..4fb381ac0 100644
--- a/htroot/rct_p.java
+++ b/htroot/rct_p.java
@@ -78,7 +78,8 @@ public class rct_p {
"REMOTE-CRAWLING",
loaddate,
sb.crawler.defaultRemoteProfile.handle(),
- 0));
+ 0,
+ sb.crawler.defaultRemoteProfile.timezoneOffset()));
} else {
env.getLog().warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
}
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 58dae90da..a5ce1170b 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -118,7 +118,8 @@ public final class search {
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "all");
final String filter = post.get("filter", ".*"); // a filter on the url
- QueryModifier modifier = new QueryModifier();
+ final int timezoneOffset = post.getInt("timezoneOffset", 0);
+ QueryModifier modifier = new QueryModifier(timezoneOffset);
modifier.sitehost = post.get("sitehost", ""); if (modifier.sitehost.isEmpty()) modifier.sitehost = null;
modifier.sitehash = post.get("sitehash", ""); if (modifier.sitehash.isEmpty()) modifier.sitehash = null;
modifier.author = post.get("author", ""); if (modifier.author.isEmpty()) modifier.author = null;
@@ -232,6 +233,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
+ timezoneOffset,
new HashSet(),
null, // no snippet computation
count,
@@ -297,6 +299,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
+ timezoneOffset,
new HashSet(),
null, // no snippet computation
count,
diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java
index 4b042376f..980bd276a 100644
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@@ -55,7 +55,7 @@ public final class transferURL {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final long start = System.currentTimeMillis();
long freshdate = 0;
- try {freshdate = GenericFormatter.SHORT_DAY_FORMATTER.parse("20061101").getTime();} catch (final ParseException e1) {}
+ try {freshdate = GenericFormatter.SHORT_DAY_FORMATTER.parse("20061101", 0).getTime().getTime();} catch (final ParseException e1) {}
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index afaf443ea..c9ba12167 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -108,6 +108,7 @@ Use the RSS search result format to add static searches to your RSS reader, if y
+
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index a27aaf109..8494ab05e 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -214,6 +214,9 @@ public class yacysearch {
prop.setOutgoingHeader(outgoingHeader);
}
+ // time zone
+ int timezoneOffset = post.getInt("timezoneOffset", 0);
+
// collect search attributes
int itemsPerPage =
@@ -359,7 +362,7 @@ public class yacysearch {
}
final RankingProfile ranking = sb.getRanking();
- final QueryModifier modifier = new QueryModifier();
+ final QueryModifier modifier = new QueryModifier(timezoneOffset);
querystring = modifier.parse(querystring);
if (modifier.sitehost != null && modifier.sitehost.length() > 0 && querystring.length() == 0) querystring = "*"; // allow to search for all documents on a host
@@ -643,6 +646,7 @@ public class yacysearch {
prefermask,
contentdom,
language,
+ timezoneOffset,
metatags,
snippetFetchStrategy,
itemsPerPage,
diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java
index f0b445056..a0f39d297 100644
--- a/htroot/yacysearchtrailer.java
+++ b/htroot/yacysearchtrailer.java
@@ -390,9 +390,9 @@ public class yacysearchtrailer {
navigatorIterator = theSearch.dateNavigator.iterator(); // this iterator is different as it iterates by the key order (which is a date order)
int i = 0, pos = 0, neg = 0;
long dx = -1;
- Date fromconstraint = theSearch.getQuery().modifier.from == null ? null : DateDetection.parseLine(theSearch.getQuery().modifier.from);
+ Date fromconstraint = theSearch.getQuery().modifier.from == null ? null : DateDetection.parseLine(theSearch.getQuery().modifier.from, theSearch.getQuery().timezoneOffset);
if (fromconstraint == null) fromconstraint = new Date(System.currentTimeMillis() - AbstractFormatter.normalyearMillis);
- Date toconstraint = theSearch.getQuery().modifier.to == null ? null : DateDetection.parseLine(theSearch.getQuery().modifier.to);
+ Date toconstraint = theSearch.getQuery().modifier.to == null ? null : DateDetection.parseLine(theSearch.getQuery().modifier.to, theSearch.getQuery().timezoneOffset);
if (toconstraint == null) toconstraint = new Date(System.currentTimeMillis() + AbstractFormatter.normalyearMillis);
while (i < QueryParams.FACETS_DATE_MAXCOUNT && navigatorIterator.hasNext()) {
name = navigatorIterator.next().trim();
diff --git a/source/net/yacy/cora/date/AbstractFormatter.java b/source/net/yacy/cora/date/AbstractFormatter.java
index 2a54df377..932fae059 100644
--- a/source/net/yacy/cora/date/AbstractFormatter.java
+++ b/source/net/yacy/cora/date/AbstractFormatter.java
@@ -25,13 +25,19 @@
package net.yacy.cora.date;
import java.text.ParseException;
+import java.util.Calendar;
import java.util.Date;
import java.util.TimeZone;
public abstract class AbstractFormatter implements DateFormatter {
- protected static final TimeZone TZ_GMT = TimeZone.getTimeZone("GMT");
-
+ public final static Calendar testCalendar = Calendar.getInstance(); // a calendar in the current time zone of the server
+ public final static Calendar UTCCalendar = Calendar.getInstance();
+ public final static TimeZone UTCtimeZone = TimeZone.getTimeZone("UTC");
+ static {
+ UTCCalendar.setTimeZone(UTCtimeZone);
+ }
+
// statics
public final static long secondMillis = 1000;
public final static long minuteMillis = 60 * secondMillis;
@@ -45,7 +51,7 @@ public abstract class AbstractFormatter implements DateFormatter {
protected String last_format;
@Override
- public abstract Date parse(String s) throws ParseException;
+ public abstract Calendar parse(String s, int timezoneOffset) throws ParseException;
@Override
public abstract String format(final Date date);
@Override
diff --git a/source/net/yacy/cora/date/DateFormatter.java b/source/net/yacy/cora/date/DateFormatter.java
index 0e1e2e787..f929534d1 100644
--- a/source/net/yacy/cora/date/DateFormatter.java
+++ b/source/net/yacy/cora/date/DateFormatter.java
@@ -25,11 +25,12 @@
package net.yacy.cora.date;
import java.text.ParseException;
+import java.util.Calendar;
import java.util.Date;
public interface DateFormatter {
- public Date parse(String s) throws ParseException;
+ public Calendar parse(String s, int timezoneOffset) throws ParseException;
public String format(final Date date);
public String format();
diff --git a/source/net/yacy/cora/date/GenericFormatter.java b/source/net/yacy/cora/date/GenericFormatter.java
index e824f383d..16c6084d2 100644
--- a/source/net/yacy/cora/date/GenericFormatter.java
+++ b/source/net/yacy/cora/date/GenericFormatter.java
@@ -30,6 +30,7 @@ import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
+import java.util.TimeZone;
import net.yacy.cora.util.NumberTools;
@@ -51,14 +52,11 @@ public class GenericFormatter extends AbstractFormatter implements DateFormatter
public static final SimpleDateFormat FORMAT_ANSIC = new SimpleDateFormat(PATTERN_ANSIC, Locale.US);
public static final SimpleDateFormat FORMAT_SIMPLE = new SimpleDateFormat(PATTERN_SIMPLE, Locale.US);
- // find out time zone and DST offset
- private static Calendar thisCalendar = Calendar.getInstance();
-
static {
// we want GMT times on the formats as well as they don't support any timezone
- FORMAT_SHORT_DAY.setTimeZone(TZ_GMT);
- FORMAT_SHORT_SECOND.setTimeZone(TZ_GMT);
- FORMAT_SHORT_MILSEC.setTimeZone(TZ_GMT);
+ FORMAT_SHORT_DAY.setTimeZone(UTCtimeZone);
+ FORMAT_SHORT_SECOND.setTimeZone(UTCtimeZone);
+ FORMAT_SHORT_MILSEC.setTimeZone(UTCtimeZone);
}
public static final long time_second = 1000L;
@@ -124,56 +122,55 @@ public class GenericFormatter extends AbstractFormatter implements DateFormatter
* the String.
*/
@Override
- public Date parse(final String timeString) throws ParseException {
+ public Calendar parse(final String timeString, final int timezoneOffset) throws ParseException {
synchronized (this.dateFormat) {
- return this.dateFormat.parse(timeString);
+ Calendar cal = Calendar.getInstance(UTCtimeZone);
+ cal.setTime(this.dateFormat.parse(timeString));
+ cal.add(Calendar.MINUTE, timezoneOffset); // add a correction; i.e. for UTC+1 -60 minutes is added to patch a time given in UTC+1 to the actual time at UTC
+ return cal;
}
}
-
+
/**
* Like {@link #parseShortSecond(String)} using additional timezone information provided in an
* offset String, like "+0100" for CET.
+ * @throws ParseException
*/
- public Date parse(final String timeString, final String UTCOffset) {
+ public Calendar parse(final String timeString, final String UTCOffset) throws ParseException {
// FIXME: This method returns an incorrect date, check callers!
// ex: de.anomic.server.serverDate.parseShortSecond("20070101120000", "+0200").toGMTString()
// => 1 Jan 2007 13:00:00 GMT
- if (timeString == null || timeString.isEmpty()) { return new Date(); }
- if (UTCOffset == null || UTCOffset.isEmpty()) { return new Date(); }
- try {
- synchronized (this.dateFormat) {
- return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset));
- }
- } catch (final Throwable e) {
- //serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
- return new Date();
- }
+ if (timeString == null || timeString.isEmpty()) { return Calendar.getInstance(UTCtimeZone); }
+ if (UTCOffset == null || UTCOffset.isEmpty()) { return Calendar.getInstance(UTCtimeZone); }
+ return parse(timeString, UTCDiff(UTCOffset));
}
- private static long UTCDiff(final String diffString) {
+ private static int UTCDiff(final String diffString) {
if (diffString.length() != 5) throw new IllegalArgumentException("UTC String malformed (wrong size):" + diffString);
boolean ahead = true;
if (diffString.length() > 0 && diffString.charAt(0) == '+') ahead = true;
else if (diffString.length() > 0 && diffString.charAt(0) == '-') ahead = false;
else throw new IllegalArgumentException("UTC String malformed (wrong sign):" + diffString);
- final long oh = NumberTools.parseLongDecSubstring(diffString, 1, 3);
- final long om = NumberTools.parseLongDecSubstring(diffString, 3);
- return ((ahead) ? (long) 1 : (long) -1) * (oh * AbstractFormatter.hourMillis + om * AbstractFormatter.minuteMillis);
+ final int oh = NumberTools.parseIntDecSubstring(diffString, 1, 3);
+ final int om = NumberTools.parseIntDecSubstring(diffString, 3);
+ return (int) ((ahead) ? 1 : -1 * (oh * AbstractFormatter.hourMillis + om * AbstractFormatter.minuteMillis));
}
-
+
+ /**
+ * get the difference of this servers time zone to UTC/GMT in milliseconds
+ * @return
+ */
private static long UTCDiff() {
// DST_OFFSET is dependent on the time of the Calendar, so it has to be updated
// to get the correct current offset
- synchronized (thisCalendar) {
- thisCalendar.setTimeInMillis(System.currentTimeMillis());
- final long zoneOffsetHours = thisCalendar.get(Calendar.ZONE_OFFSET);
- final long DSTOffsetHours = thisCalendar.get(Calendar.DST_OFFSET);
+ synchronized (testCalendar) {
+ testCalendar.setTimeInMillis(System.currentTimeMillis());
+ final long zoneOffsetHours = testCalendar.get(Calendar.ZONE_OFFSET);
+ final long DSTOffsetHours = testCalendar.get(Calendar.DST_OFFSET);
return zoneOffsetHours + DSTOffsetHours;
}
}
-
- private final static DecimalFormat D2 = new DecimalFormat("00");
-
+
public static String UTCDiffString() {
// we express the UTC Difference in 5 digits:
// SHHMM
@@ -195,11 +192,9 @@ public class GenericFormatter extends AbstractFormatter implements DateFormatter
return sb.toString();
}
- public static long correctedUTCTime() {
- return System.currentTimeMillis() - UTCDiff();
- }
+ private final static DecimalFormat D2 = new DecimalFormat("00");
- public static void main(final String[] args) {
+ public static void main(String[] args) {
System.out.println(UTCDiffString());
}
}
diff --git a/source/net/yacy/cora/date/ISO8601Formatter.java b/source/net/yacy/cora/date/ISO8601Formatter.java
index 27ff6f45f..e57dfbfa6 100644
--- a/source/net/yacy/cora/date/ISO8601Formatter.java
+++ b/source/net/yacy/cora/date/ISO8601Formatter.java
@@ -41,7 +41,7 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US);
static {
- FORMAT_ISO8601.setTimeZone(TZ_GMT);
+ FORMAT_ISO8601.setTimeZone(AbstractFormatter.UTCtimeZone);
}
public static final ISO8601Formatter FORMATTER = new ISO8601Formatter();
@@ -72,7 +72,7 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
* @throws ParseException
*/
@Override
- public Date parse(String s) throws ParseException {
+ public Calendar parse(String s, final int timezoneOffset) throws ParseException {
// do some lazy checks here
s = s.trim();
while (!s.isEmpty() && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
@@ -87,7 +87,7 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
while (!s.isEmpty() && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
// no go for exact parsing
- final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);
+ final Calendar cal = Calendar.getInstance(AbstractFormatter.UTCtimeZone, Locale.US);
cal.clear();
// split 2007-12-19T10:20:30.789+0500 into its parts
@@ -103,13 +103,13 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
if (t.nextToken().equals("-")) {
cal.set(Calendar.MONTH, Integer.parseInt(t.nextToken()) - 1);
} else {
- return cal.getTime();
+ return cal;
}
// day
if (t.nextToken().equals("-")) {
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(t.nextToken()));
} else {
- return cal.getTime();
+ return cal;
}
// The standard says:
// if there is an hour there has to be a minute and a timezone token, too.
@@ -147,7 +147,7 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
sign = -1;
} else {
// no legal TZ offset found
- return cal.getTime();
+ return cal;
}
offset = sign * Integer.parseInt(t.nextToken()) * 10 * 3600;
}
@@ -168,8 +168,7 @@ public class ISO8601Formatter extends AbstractFormatter implements DateFormatter
// in case we couldn't even parse a year
if (!cal.isSet(Calendar.YEAR))
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
- Date d = cal.getTime();
- return d;
+ return cal;
}
diff --git a/source/net/yacy/cora/document/feed/RSSMessage.java b/source/net/yacy/cora/document/feed/RSSMessage.java
index aea58547e..340d01e99 100644
--- a/source/net/yacy/cora/document/feed/RSSMessage.java
+++ b/source/net/yacy/cora/document/feed/RSSMessage.java
@@ -224,7 +224,7 @@ public class RSSMessage implements Hit, Comparable, Comparator hyperlinks, final boolean replace) {
+ private void enqueueEntries(
+ final byte[] initiator,
+ final String profileHandle,
+ final List hyperlinks,
+ final boolean replace,
+ final int timezoneOffset) {
if (replace) {
// delete old entries, if exists to force a re-load of the url (thats wanted here)
Set hosthashes = new HashSet();
@@ -199,7 +208,7 @@ public final class CrawlStacker {
int p = userInfo == null ? -1 : userInfo.indexOf(':');
String user = userInfo == null ? FTPClient.ANONYMOUS : userInfo.substring(0, p);
String pw = userInfo == null || p == -1 ? "anomic" : userInfo.substring(p + 1);
- enqueueEntriesFTP(initiator, profileHandle, url.getHost(), url.getPort(), user, pw, replace);
+ enqueueEntriesFTP(initiator, profileHandle, url.getHost(), url.getPort(), user, pw, replace, timezoneOffset);
} else {
// put entry on crawl stack
enqueueEntry(new Request(
@@ -209,13 +218,22 @@ public final class CrawlStacker {
url.getNameProperty(),
new Date(),
profileHandle,
- 0
+ 0,
+ timezoneOffset
));
}
}
}
- public void enqueueEntriesFTP(final byte[] initiator, final String profileHandle, final String host, final int port, final String user, final String pw, final boolean replace) {
+ public void enqueueEntriesFTP(
+ final byte[] initiator,
+ final String profileHandle,
+ final String host,
+ final int port,
+ final String user,
+ final String pw,
+ final boolean replace,
+ final int timezoneOffset) {
final CrawlQueues cq = this.nextQueue;
new Thread() {
@Override
@@ -248,7 +266,8 @@ public final class CrawlStacker {
MultiProtocolURL.unescape(entry.name),
entry.date,
profileHandle,
- 0));
+ 0,
+ timezoneOffset));
}
} catch (final IOException e1) {
ConcurrentLog.logException(e1);
@@ -272,7 +291,7 @@ public final class CrawlStacker {
"CRAWLING-ROOT",
new Date(),
pe.handle(),
- 0));
+ 0, 0));
}
/**
diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java
index 4472c59e0..fcce03c4b 100644
--- a/source/net/yacy/crawler/CrawlSwitchboard.java
+++ b/source/net/yacy/crawler/CrawlSwitchboard.java
@@ -296,7 +296,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFFRESH,
"robot_" + CRAWL_PROFILE_PROXY,
ClientIdentification.yacyProxyAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultProxyProfile.handle()),
this.defaultProxyProfile);
@@ -327,7 +328,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFFRESH,
"robot_" + CRAWL_PROFILE_REMOTE,
ClientIdentification.yacyInternetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultRemoteProfile.handle()),
this.defaultRemoteProfile);
@@ -358,7 +360,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()),
this.defaultTextSnippetLocalProfile);
@@ -389,7 +392,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
this.defaultTextSnippetGlobalProfile);
@@ -421,7 +425,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_GREEDY_LEARNING_TEXT,
ClientIdentification.browserAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
this.defaultTextSnippetGlobalProfile);
@@ -452,7 +457,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()),
this.defaultMediaSnippetLocalProfile);
@@ -483,7 +489,8 @@ public final class CrawlSwitchboard {
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()),
this.defaultMediaSnippetGlobalProfile);
@@ -514,7 +521,8 @@ public final class CrawlSwitchboard {
CacheStrategy.NOCACHE,
"robot_" + CRAWL_PROFILE_SURROGATE,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultSurrogateProfile.handle()),
this.defaultSurrogateProfile);
@@ -548,7 +556,8 @@ public final class CrawlSwitchboard {
CacheStrategy.NOCACHE,
collection,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null);
+ null,
+ 0);
this.profilesActiveCrawls.put(UTF8.getBytes(genericPushProfile.handle()), genericPushProfile);
this.defaultPushProfiles.put(collection, genericPushProfile);
return genericPushProfile;
diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java
index f90b25a7f..5a87a2f10 100644
--- a/source/net/yacy/crawler/data/CrawlProfile.java
+++ b/source/net/yacy/crawler/data/CrawlProfile.java
@@ -80,6 +80,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M
public static final String CACHE_STRAGEGY = "cacheStrategy";
public static final String COLLECTIONS = "collections";
public static final String SCRAPER = "scraper";
+ public static final String TIMEZONEOFFSET = "timezoneOffset";
public static final String CRAWLER_URL_MUSTMATCH = "crawlerURLMustMatch";
public static final String CRAWLER_URL_MUSTNOTMATCH = "crawlerURLMustNotMatch";
public static final String CRAWLER_IP_MUSTMATCH = "crawlerIPMustMatch";
@@ -131,6 +132,9 @@ public class CrawlProfile extends ConcurrentHashMap implements M
* @param xpstopw true if parent stop words shall be ignored
* @param cacheStrategy determines if and how cache is used loading content
* @param collections a comma-separated list of tags which are attached to index entries
+ * @param userAgentName the profile name of the user agent to be used
+ * @param scraper a scraper for vocabularies
+ * @param timezoneOffset the time offset in minutes for scraped dates in text without time zone
*/
public CrawlProfile(
String name,
@@ -155,7 +159,8 @@ public class CrawlProfile extends ConcurrentHashMap implements M
final CacheStrategy cacheStrategy,
final String collections,
final String userAgentName,
- final VocabularyScraper scraper) {
+ final VocabularyScraper scraper,
+ final int timezoneOffset) {
super(40);
if (name == null || name.isEmpty()) {
throw new NullPointerException("name must not be null or empty");
@@ -198,6 +203,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M
String jsonString = this.scraper.toString();
assert jsonString != null && jsonString.length() > 0 && jsonString.charAt(0) == '{' : "jsonString = " + jsonString;
put(SCRAPER, jsonString);
+ put(TIMEZONEOFFSET, timezoneOffset);
}
/**
@@ -623,6 +629,16 @@ public class CrawlProfile extends ConcurrentHashMap implements M
return (r.equals(Boolean.TRUE.toString()));
}
+ public int timezoneOffset() {
+ final String timezoneOffset = get(TIMEZONEOFFSET);
+ if (timezoneOffset == null) return 0;
+ try {
+ return Integer.parseInt(timezoneOffset);
+ } catch (NumberFormatException e) {
+ return 0;
+ }
+ }
+
/**
* get a recrawl date for a given age in minutes
* @param oldTimeMinutes
diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java
index 62962e045..5a9b0c4a1 100644
--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@@ -531,7 +531,8 @@ public class CrawlQueues {
item.getDescriptions().size() > 0 ? item.getDescriptions().get(0) : "",
loaddate,
this.sb.crawler.defaultRemoteProfile.handle(),
- 0
+ 0,
+ this.sb.crawler.defaultRemoteProfile.timezoneOffset()
));
} else {
CrawlQueues.log.warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
diff --git a/source/net/yacy/crawler/data/Snapshots.java b/source/net/yacy/crawler/data/Snapshots.java
index 40e5fce30..abf8f981e 100644
--- a/source/net/yacy/crawler/data/Snapshots.java
+++ b/source/net/yacy/crawler/data/Snapshots.java
@@ -359,10 +359,10 @@ public class Snapshots {
private static Date parseDate(String d) {
try {
- return GenericFormatter.SHORT_MINUTE_FORMATTER.parse(d);
+ return GenericFormatter.SHORT_MINUTE_FORMATTER.parse(d, 0).getTime();
} catch (ParseException e) {
try {
- return GenericFormatter.SHORT_DAY_FORMATTER.parse(d);
+ return GenericFormatter.SHORT_DAY_FORMATTER.parse(d, 0).getTime();
} catch (ParseException ee) {
return null;
}
diff --git a/source/net/yacy/crawler/retrieval/Request.java b/source/net/yacy/crawler/retrieval/Request.java
index 81bbaa96f..e02b2fdcb 100644
--- a/source/net/yacy/crawler/retrieval/Request.java
+++ b/source/net/yacy/crawler/retrieval/Request.java
@@ -92,7 +92,8 @@ public class Request extends WorkflowJob
private Bitfield flags;
private String statusMessage;
private int initialHash; // to provide a object hash that does not change even if the url changes because of redirection
-
+ private int timezoneOffset;
+
public Request() {
// used only to create poison entries
this.initiator = null;
@@ -106,6 +107,7 @@ public class Request extends WorkflowJob
this.statusMessage = null;
this.initialHash = 0;
this.status = 0;
+ this.timezoneOffset = 0;
}
/**
@@ -115,7 +117,7 @@ public class Request extends WorkflowJob
* @param referrerhash
*/
public Request(final DigestURL url, final byte[] referrerhash) {
- this(null, url, referrerhash, null, null, null, 0);
+ this(null, url, referrerhash, null, null, null, 0, 0);
}
/**
@@ -136,7 +138,8 @@ public class Request extends WorkflowJob
final String name,
final Date appdate,
final String profileHandle,
- final int depth) {
+ final int depth,
+ final int timezoneOffset) {
// create new entry and store it into database
assert url != null;
assert profileHandle == null || profileHandle.length() == Word.commonHashLength : profileHandle
@@ -150,6 +153,7 @@ public class Request extends WorkflowJob
this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null
this.depth = depth;
+ this.timezoneOffset = timezoneOffset;
this.flags = new Bitfield(rowdef.width(10));
this.statusMessage = "loaded(args)";
this.initialHash = url.hashCode();
@@ -271,6 +275,10 @@ public class Request extends WorkflowJob
// crawl depth where the url appeared
return this.depth;
}
+
+ public int timezoneOffset() {
+ return this.timezoneOffset;
+ }
public String profileHandle() {
// the handle of the crawl profile
diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java
index 615465199..4e1acb6ef 100644
--- a/source/net/yacy/crawler/retrieval/Response.java
+++ b/source/net/yacy/crawler/retrieval/Response.java
@@ -28,7 +28,6 @@ package net.yacy.crawler.retrieval;
import java.util.Date;
-import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@@ -260,7 +259,7 @@ public class Response {
if (docDate == null) docDate = this.responseHeader.date();
}
if (docDate == null && this.request != null) docDate = this.request.appdate();
- if (docDate == null) docDate = new Date(GenericFormatter.correctedUTCTime());
+ if (docDate == null) docDate = new Date();
return docDate;
}
@@ -372,7 +371,7 @@ public class Response {
if (date == null) return "stale_no_date_given_in_response";
try {
final long ttl = 1000 * NumberTools.parseLongDecSubstring(cacheControl, 8); // milliseconds to live
- if (GenericFormatter.correctedUTCTime() - date.getTime() > ttl) {
+ if (System.currentTimeMillis() - date.getTime() > ttl) {
//System.out.println("***not indexed because cache-control");
return "stale_expired";
}
@@ -461,8 +460,8 @@ public class Response {
if (!this.responseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) { return false; }
// parse date
Date d1, d2;
- d2 = this.responseHeader.lastModified(); if (d2 == null) { d2 = new Date(GenericFormatter.correctedUTCTime()); }
- d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) { d1 = new Date(GenericFormatter.correctedUTCTime()); }
+ d2 = this.responseHeader.lastModified(); if (d2 == null) { d2 = new Date(); }
+ d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) { d1 = new Date(); }
// finally, we shall treat the cache as stale if the modification time is after the if-.. time
if (d2.after(d1)) { return false; }
}
@@ -501,9 +500,10 @@ public class Response {
// -expires in cached response
// the expires value gives us a very easy hint when the cache is stale
final Date expires = this.responseHeader.expires();
+ final Date now = new Date();
if (expires != null) {
// System.out.println("EXPIRES-TEST: expires=" + expires + ", NOW=" + serverDate.correctedGMTDate() + ", url=" + url);
- if (expires.before(new Date(GenericFormatter.correctedUTCTime()))) { return false; }
+ if (expires.before(now)) { return false; }
}
final Date lastModified = this.responseHeader.lastModified();
cacheControl = this.responseHeader.get(HeaderFramework.CACHE_CONTROL);
@@ -517,13 +517,13 @@ public class Response {
// file may only be treated as fresh for one more month, not more.
Date date = this.responseHeader.date();
if (lastModified != null) {
- if (date == null) { date = new Date(GenericFormatter.correctedUTCTime()); }
+ if (date == null) { date = now; }
final long age = date.getTime() - lastModified.getTime();
if (age < 0) { return false; }
// TTL (Time-To-Live) is age/10 = (d2.getTime() - d1.getTime()) / 10
// the actual living-time is serverDate.correctedGMTDate().getTime() - d2.getTime()
// therefore the cache is stale, if serverDate.correctedGMTDate().getTime() - d2.getTime() > age/10
- if (GenericFormatter.correctedUTCTime() - date.getTime() > age / 10) { return false; }
+ if (now.getTime() - date.getTime() > age / 10) { return false; }
}
// -cache-control in cached response
@@ -542,7 +542,7 @@ public class Response {
if (date == null) { return false; }
try {
final long ttl = 1000 * NumberTools.parseLongDecSubstring(cacheControl, 8); // milliseconds to live
- if (GenericFormatter.correctedUTCTime() - date.getTime() > ttl) {
+ if (now.getTime() - date.getTime() > ttl) {
return false;
}
} catch (final Exception e) {
@@ -626,12 +626,11 @@ public class Response {
// -if-modified-since in request
// if the page is fresh at the very moment we can index it
final Date ifModifiedSince = this.ifModifiedSince();
+ final Date now = new Date();
if ((ifModifiedSince != null) && (this.responseHeader.containsKey(HeaderFramework.LAST_MODIFIED))) {
// parse date
Date d = this.responseHeader.lastModified();
- if (d == null) {
- d = new Date(GenericFormatter.correctedUTCTime());
- }
+ if (d == null) d = now;
// finally, we shall treat the cache as stale if the modification time is after the if-.. time
if (d.after(ifModifiedSince)) {
//System.out.println("***not indexed because if-modified-since");
@@ -655,7 +654,7 @@ public class Response {
// sometimes, the expires date is set to the past to prevent that a page is cached
// we use that information to see if we should index it
final Date expires = this.responseHeader.expires();
- if (expires != null && expires.before(new Date(GenericFormatter.correctedUTCTime()))) {
+ if (expires != null && expires.before(now)) {
return "Stale_(Expired)";
}
@@ -688,7 +687,7 @@ public class Response {
}
try {
final long ttl = 1000 * NumberTools.parseLongDecSubstring(cacheControl,8); // milliseconds to live
- if (GenericFormatter.correctedUTCTime() - date.getTime() > ttl) {
+ if (now.getTime() - date.getTime() > ttl) {
//System.out.println("***not indexed because cache-control");
return "Stale_(expired_by_cache-control)";
}
@@ -865,7 +864,7 @@ public class Response {
final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.mime());
if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url());
try {
- return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.depth(), this.content);
+ return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
} catch (final Exception e) {
return null;
}
diff --git a/source/net/yacy/crawler/retrieval/SitemapImporter.java b/source/net/yacy/crawler/retrieval/SitemapImporter.java
index 240f8239d..b28e13f11 100644
--- a/source/net/yacy/crawler/retrieval/SitemapImporter.java
+++ b/source/net/yacy/crawler/retrieval/SitemapImporter.java
@@ -108,7 +108,8 @@ public class SitemapImporter extends Thread {
entry.url(),
entry.lastmod(new Date()),
this.crawlingProfile.handle(),
- 0
+ 0,
+ this.crawlingProfile.timezoneOffset()
));
logger.info("New URL '" + entry.url() + "' added for loading.");
}
diff --git a/source/net/yacy/data/BlogBoard.java b/source/net/yacy/data/BlogBoard.java
index c1ec79f15..f97f7c794 100644
--- a/source/net/yacy/data/BlogBoard.java
+++ b/source/net/yacy/data/BlogBoard.java
@@ -210,7 +210,7 @@ public class BlogBoard {
}
try {
- date = GenericFormatter.SHORT_SECOND_FORMATTER.parse(StrDate);
+ date = GenericFormatter.SHORT_SECOND_FORMATTER.parse(StrDate, 0).getTime();
} catch (final ParseException e1) {
date = new Date();
}
@@ -404,7 +404,7 @@ public class BlogBoard {
}
return new Date();
}
- return GenericFormatter.SHORT_SECOND_FORMATTER.parse(date);
+ return GenericFormatter.SHORT_SECOND_FORMATTER.parse(date, 0).getTime();
} catch (final ParseException ex) {
return new Date();
}
diff --git a/source/net/yacy/data/BookmarkHelper.java b/source/net/yacy/data/BookmarkHelper.java
index c10c144c1..86f17ad90 100644
--- a/source/net/yacy/data/BookmarkHelper.java
+++ b/source/net/yacy/data/BookmarkHelper.java
@@ -139,7 +139,7 @@ public class BookmarkHelper {
final Set tags=ListManager.string2set(tag); //this allow multiple default tags
try {
//load the links
- final ContentScraper scraper = new ContentScraper(baseURL, 10000, new VocabularyScraper());
+ final ContentScraper scraper = new ContentScraper(baseURL, 10000, new VocabularyScraper(), 0);
//OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
final Writer writer = new TransformerWriter(null, null, scraper, null, false);
FileUtils.copy(input,writer);
@@ -232,7 +232,7 @@ public class BookmarkHelper {
Date parsedDate = null;
try {
- parsedDate = ISO8601Formatter.FORMATTER.parse(time);
+ parsedDate = ISO8601Formatter.FORMATTER.parse(time, 0).getTime();
} catch (final ParseException e) {
parsedDate = new Date();
}
diff --git a/source/net/yacy/data/ymark/YMarkAutoTagger.java b/source/net/yacy/data/ymark/YMarkAutoTagger.java
index c80ff37a3..df5a2939d 100644
--- a/source/net/yacy/data/ymark/YMarkAutoTagger.java
+++ b/source/net/yacy/data/ymark/YMarkAutoTagger.java
@@ -87,7 +87,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
}
//get words from document
- final Map words = new Condenser(document, null, true, true, LibraryProvider.dymLib, false, false).words();
+ final Map words = new Condenser(document, null, true, true, LibraryProvider.dymLib, false, false, 0).words();
// generate potential tags from document title, description and subject
final int bufferSize = document.dc_title().length() + document.dc_description().length + document.dc_subject(' ').length() + 32;
diff --git a/source/net/yacy/data/ymark/YMarkCrawlStart.java b/source/net/yacy/data/ymark/YMarkCrawlStart.java
index b14c10dc9..562a9703f 100644
--- a/source/net/yacy/data/ymark/YMarkCrawlStart.java
+++ b/source/net/yacy/data/ymark/YMarkCrawlStart.java
@@ -190,7 +190,8 @@ public class YMarkCrawlStart extends HashMap{
CacheStrategy.IFFRESH,
"robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
ClientIdentification.yacyIntranetCrawlerAgentName,
- null); // TODO: make this a default profile in CrawlSwitchboard
+ null,
+ 0); // TODO: make this a default profile in CrawlSwitchboard
sb.crawler.putActive(pe.handle().getBytes(), pe);
return sb.crawlStacker.stackCrawl(new Request(
sb.peers.mySeed().hash.getBytes(),
@@ -198,7 +199,7 @@ public class YMarkCrawlStart extends HashMap{
null,
"CRAWLING-ROOT",
new Date(),
- pe.handle(), 0
+ pe.handle(), 0, pe.timezoneOffset()
));
}
}
diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java
index 2f7c2ffb5..6cf125d17 100644
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@@ -97,7 +97,8 @@ public final class Condenser {
final boolean indexMedia,
final WordCache meaningLib,
final boolean doAutotagging,
- final boolean findDatesInContent
+ final boolean findDatesInContent,
+ final int timezoneOffset
) {
Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging
// if addMedia == true, then all the media links are also parsed and added to the words
@@ -123,7 +124,7 @@ public final class Condenser {
Map.Entry entry;
if (indexText) {
String text = document.getTextString();
- if (findDatesInContent) this.dates_in_content = DateDetection.parse(text);
+ if (findDatesInContent) this.dates_in_content = DateDetection.parse(text, timezoneOffset);
createCondensement(document.dc_source(), text, meaningLib, doAutotagging, scraper);
// the phrase counter:
// phrase 0 are words taken from the URL
diff --git a/source/net/yacy/document/DateDetection.java b/source/net/yacy/document/DateDetection.java
index 9964aedfd..73662ac56 100644
--- a/source/net/yacy/document/DateDetection.java
+++ b/source/net/yacy/document/DateDetection.java
@@ -499,7 +499,7 @@ public class DateDetection {
* @param text
* @return a set of dates, ordered by time. first date in the ordered set is the oldest time.
*/
- public static LinkedHashSet parse(String text) {
+ public static LinkedHashSet parse(String text, int timezoneOffset) {
Long offset;
if ((offset = specialDayOffset.get(text)) != null) {
LinkedHashSet dates = new LinkedHashSet<>(); dates.add(new Date((System.currentTimeMillis() / AbstractFormatter.dayMillis) * AbstractFormatter.dayMillis + offset.longValue())); return dates;
@@ -513,7 +513,7 @@ public class DateDetection {
return dates;
}
- public static Date parseLine(String text) {
+ public static Date parseLine(final String text, final int timezoneOffset) {
Date d = null;
try {d = CONFORM.parse(text);} catch (ParseException e) {}
//if (d == null) try {d = GenericFormatter.FORMAT_SHORT_DAY.parse(text);} catch (ParseException e) {} // did not work well and fired for wrong formats; do not use
@@ -521,7 +521,7 @@ public class DateDetection {
if (d == null) try {d = GenericFormatter.FORMAT_ANSIC.parse(text);} catch (ParseException e) {}
if (d == null) {
- Set dd = parse(text);
+ Set dd = parse(text, timezoneOffset);
if (dd.size() >= 1) d = dd.iterator().next();
}
return d;
@@ -601,7 +601,7 @@ public class DateDetection {
};
long t = System.currentTimeMillis();
for (String s: test) {
- String parsed = parse(fill + " " + s + " " + fill).toString();
+ String parsed = parse(fill + " " + s + " " + fill, 0).toString();
System.out.println("SOURCE: " + s);
System.out.println("DATE : " + parsed);
System.out.println();
diff --git a/source/net/yacy/document/Parser.java b/source/net/yacy/document/Parser.java
index be7b49eba..b9139340a 100644
--- a/source/net/yacy/document/Parser.java
+++ b/source/net/yacy/document/Parser.java
@@ -59,6 +59,7 @@ public interface Parser {
String mimeType,
String charset,
VocabularyScraper scraper,
+ int timezoneOffset,
InputStream source
) throws Parser.Failure, InterruptedException;
diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index 0898f3c35..191793ca0 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -167,6 +167,7 @@ public final class TextParser {
final String mimeType,
final String charset,
final VocabularyScraper scraper,
+ final int timezoneOffset,
final int depth,
final File sourceFile
) throws InterruptedException, Parser.Failure {
@@ -181,7 +182,7 @@ public final class TextParser {
throw new Parser.Failure(errorMsg, location);
}
sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
- docs = parseSource(location, mimeType, charset, scraper, depth, sourceFile.length(), sourceStream);
+ docs = parseSource(location, mimeType, charset, scraper, timezoneOffset, depth, sourceFile.length(), sourceStream);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
@@ -199,6 +200,7 @@ public final class TextParser {
String mimeType,
final String charset,
final VocabularyScraper scraper,
+ final int timezoneOffset,
final int depth,
final byte[] content
) throws Parser.Failure {
@@ -214,7 +216,7 @@ public final class TextParser {
}
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
- Document[] docs = parseSource(location, mimeType, idioms, charset, scraper, depth, content);
+ Document[] docs = parseSource(location, mimeType, idioms, charset, scraper, timezoneOffset, depth, content);
return docs;
}
@@ -224,6 +226,7 @@ public final class TextParser {
String mimeType,
final String charset,
final VocabularyScraper scraper,
+ final int timezoneOffset,
final int depth,
final long contentLength,
final InputStream sourceStream
@@ -244,7 +247,7 @@ public final class TextParser {
// then we use only one stream-oriented parser.
if (idioms.size() == 1 || contentLength > Integer.MAX_VALUE) {
// use a specific stream-oriented parser
- return parseSource(location, mimeType, idioms.iterator().next(), charset, scraper, sourceStream);
+ return parseSource(location, mimeType, idioms.iterator().next(), charset, scraper, timezoneOffset, sourceStream);
}
// in case that we know more parsers we first transform the content into a byte[] and use that as base
@@ -255,7 +258,7 @@ public final class TextParser {
} catch (final IOException e) {
throw new Parser.Failure(e.getMessage(), location);
}
- Document[] docs = parseSource(location, mimeType, idioms, charset, scraper, depth, b);
+ Document[] docs = parseSource(location, mimeType, idioms, charset, scraper, timezoneOffset, depth, b);
return docs;
}
@@ -266,6 +269,7 @@ public final class TextParser {
final Parser parser,
final String charset,
final VocabularyScraper scraper,
+ final int timezoneOffset,
final InputStream sourceStream
) throws Parser.Failure {
if (AbstractParser.log.isFine()) AbstractParser.log.fine("Parsing '" + location + "' from stream");
@@ -275,7 +279,7 @@ public final class TextParser {
if (AbstractParser.log.isFine()) AbstractParser.log.fine("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
try {
- final Document[] docs = parser.parse(location, mimeType, documentCharset, scraper, sourceStream);
+ final Document[] docs = parser.parse(location, mimeType, documentCharset, scraper, timezoneOffset, sourceStream);
return docs;
} catch (final Exception e) {
throw new Parser.Failure("parser failed: " + parser.getName(), location);
@@ -288,6 +292,7 @@ public final class TextParser {
final Set parsers,
final String charset,
final VocabularyScraper scraper,
+ final int timezoneOffset,
final int depth,
final byte[] sourceArray
) throws Parser.Failure {
@@ -310,7 +315,7 @@ public final class TextParser {
bis = new ByteArrayInputStream(sourceArray);
}
try {
- docs = parser.parse(location, mimeType, documentCharset, scraper, bis);
+ docs = parser.parse(location, mimeType, documentCharset, scraper, timezoneOffset, bis);
} catch (final Parser.Failure e) {
failedParser.put(parser, e);
//log.logWarning("tried parser '" + parser.getName() + "' to parse " + location.toNormalform(true, false) + " but failed: " + e.getMessage(), e);
diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java
index 5c44d3dc7..68dbd095a 100644
--- a/source/net/yacy/document/content/DCEntry.java
+++ b/source/net/yacy/document/content/DCEntry.java
@@ -107,7 +107,7 @@ public class DCEntry extends MultiMapSolrParams {
if (d == null) return null;
if (d.isEmpty()) return null;
try {
- Date x = ISO8601Formatter.FORMATTER.parse(d);
+ Date x = ISO8601Formatter.FORMATTER.parse(d, 0).getTime();
Date now = new Date();
return x.after(now) ? now : x;
} catch (final ParseException e) {
diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java
index 9e6ba1116..b9557f803 100644
--- a/source/net/yacy/document/importer/MediawikiImporter.java
+++ b/source/net/yacy/document/importer/MediawikiImporter.java
@@ -524,7 +524,7 @@ public class MediawikiImporter extends Thread implements Importer {
public void genDocument() throws Parser.Failure {
try {
this.url = new AnchorURL(this.urlStub + this.title);
- final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", new VocabularyScraper(), 1, UTF8.getBytes(this.html));
+ final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", new VocabularyScraper(), 0, 1, UTF8.getBytes(this.html));
this.document = Document.mergeDocuments(this.url, "text/html", parsed);
// the wiki parser is not able to find the proper title in the source text, so it must be set here
this.document.setTitle(this.title);
diff --git a/source/net/yacy/document/importer/ResumptionToken.java b/source/net/yacy/document/importer/ResumptionToken.java
index 785c12d26..25075410d 100644
--- a/source/net/yacy/document/importer/ResumptionToken.java
+++ b/source/net/yacy/document/importer/ResumptionToken.java
@@ -158,7 +158,7 @@ public class ResumptionToken extends TreeMap {
final String d = get("expirationDate");
if (d == null) return null;
try {
- return ISO8601Formatter.FORMATTER.parse(d);
+ return ISO8601Formatter.FORMATTER.parse(d, 0).getTime();
} catch (final ParseException e) {
ConcurrentLog.logException(e);
return new Date();
diff --git a/source/net/yacy/document/parser/apkParser.java b/source/net/yacy/document/parser/apkParser.java
index 0eacb05f6..6df35f26d 100644
--- a/source/net/yacy/document/parser/apkParser.java
+++ b/source/net/yacy/document/parser/apkParser.java
@@ -54,7 +54,13 @@ public class apkParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
/*
* things to discover:
diff --git a/source/net/yacy/document/parser/audioTagParser.java b/source/net/yacy/document/parser/audioTagParser.java
index 73195c0a0..ed0a386aa 100644
--- a/source/net/yacy/document/parser/audioTagParser.java
+++ b/source/net/yacy/document/parser/audioTagParser.java
@@ -70,8 +70,13 @@ public class audioTagParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
String filename = location.getFileName();
diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java
index 6b78cf0d3..aa4dcf3df 100644
--- a/source/net/yacy/document/parser/augment/AugmentParser.java
+++ b/source/net/yacy/document/parser/augment/AugmentParser.java
@@ -38,13 +38,19 @@ public class AugmentParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(AnchorURL url, String mimeType, String charset, final VocabularyScraper scraper, InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
- Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, scraper, source);
+ Document[] htmlDocs = this.rdfaParser.parse(location, mimeType, charset, scraper, timezoneOffset, source);
for (final Document doc : htmlDocs) {
/* analyze(doc, url, mimeType, charset); // enrich document text */
- parseAndAugment(doc, url, mimeType, charset); // enrich document with additional tags
+ parseAndAugment(doc, location, mimeType, charset); // enrich document with additional tags
}
return htmlDocs;
}
diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java
index 4d2c9dd6f..4e16fbfce 100644
--- a/source/net/yacy/document/parser/bzipParser.java
+++ b/source/net/yacy/document/parser/bzipParser.java
@@ -57,8 +57,13 @@ public class bzipParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
File tempFile = null;
@@ -95,7 +100,7 @@ public class bzipParser extends AbstractParser implements Parser {
out.close();
// creating a new parser class to parse the unzipped content
- docs = TextParser.parseSource(location, null, null, scraper, 999, tempFile);
+ docs = TextParser.parseSource(location, null, null, scraper, timezoneOffset, 999, tempFile);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
diff --git a/source/net/yacy/document/parser/csvParser.java b/source/net/yacy/document/parser/csvParser.java
index 717aadf2b..25bba2fff 100644
--- a/source/net/yacy/document/parser/csvParser.java
+++ b/source/net/yacy/document/parser/csvParser.java
@@ -53,7 +53,13 @@ public class csvParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(AnchorURL location, String mimeType, String charset, final VocabularyScraper scraper, InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
// construct a document using all cells of the document
// the first row is used as headline
// all lines are artificially terminated by a '.' to separate them as sentence for the condenser.
diff --git a/source/net/yacy/document/parser/docParser.java b/source/net/yacy/document/parser/docParser.java
index 6d3e74fd8..a33844382 100644
--- a/source/net/yacy/document/parser/docParser.java
+++ b/source/net/yacy/document/parser/docParser.java
@@ -59,8 +59,13 @@ public class docParser extends AbstractParser implements Parser {
@SuppressWarnings("deprecation")
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
final WordExtractor extractor;
diff --git a/source/net/yacy/document/parser/dwgParser.java b/source/net/yacy/document/parser/dwgParser.java
index 66b902eeb..25c2d29b6 100644
--- a/source/net/yacy/document/parser/dwgParser.java
+++ b/source/net/yacy/document/parser/dwgParser.java
@@ -61,7 +61,13 @@ public class dwgParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
// check memory for parser
if (!MemoryControl.request(200 * 1024 * 1024, true))
diff --git a/source/net/yacy/document/parser/genericParser.java b/source/net/yacy/document/parser/genericParser.java
index 53e6e46cb..2ff09475d 100644
--- a/source/net/yacy/document/parser/genericParser.java
+++ b/source/net/yacy/document/parser/genericParser.java
@@ -46,8 +46,13 @@ public class genericParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source1)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
String filename = location.getFileName();
final Document[] docs = new Document[]{new Document(
diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java
index 5a57e219a..58f788f37 100644
--- a/source/net/yacy/document/parser/gzipParser.java
+++ b/source/net/yacy/document/parser/gzipParser.java
@@ -56,7 +56,13 @@ public class gzipParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
File tempFile = null;
Document[] docs = null;
@@ -80,7 +86,7 @@ public class gzipParser extends AbstractParser implements Parser {
out.close();
// creating a new parser class to parse the unzipped content
- docs = TextParser.parseSource(location, null, null, scraper, 999, tempFile);
+ docs = TextParser.parseSource(location, null, null, scraper, timezoneOffset, 999, tempFile);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 244dad876..17f9362c7 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -188,6 +188,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
private AnchorURL canonical, publisher;
private final int maxLinks;
private final VocabularyScraper vocabularyScraper;
+ private final int timezoneOffset;
private int breadcrumbs;
@@ -213,7 +214,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
* @param classDetector a map from class names to vocabulary names to scrape content from the DOM with associated class name
*/
@SuppressWarnings("unchecked")
- public ContentScraper(final DigestURL root, int maxLinks, final VocabularyScraper vocabularyScraper) {
+ public ContentScraper(final DigestURL root, int maxLinks, final VocabularyScraper vocabularyScraper, int timezoneOffset) {
// the root value here will not be used to load the resource.
// it is only the reference for relative links
super(linkTags0, linkTags1);
@@ -221,6 +222,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.root = root;
this.maxLinks = maxLinks;
this.vocabularyScraper = vocabularyScraper;
+ this.timezoneOffset = timezoneOffset;
this.evaluationScores = new Evaluation();
this.rss = new SizeLimitedMap(maxLinks);
this.css = new SizeLimitedMap(maxLinks);
@@ -389,12 +391,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (content != null) {
if ("startDate".equals(itemprop)) try {
// parse ISO 8601 date
- Date startDate = ISO8601Formatter.FORMATTER.parse(content);
+ Date startDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
this.startDates.add(startDate);
} catch (ParseException e) {}
if ("endDate".equals(itemprop)) try {
// parse ISO 8601 date
- Date endDate = ISO8601Formatter.FORMATTER.parse(content);
+ Date endDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
this.endDates.add(endDate);
} catch (ParseException e) {}
}
@@ -651,7 +653,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
// start a new scraper to parse links inside this text
// parsing the content
- final ContentScraper scraper = new ContentScraper(this.root, this.maxLinks, this.vocabularyScraper);
+ final ContentScraper scraper = new ContentScraper(this.root, this.maxLinks, this.vocabularyScraper, this.timezoneOffset);
final TransformerWriter writer = new TransformerWriter(null, null, scraper, null, false);
try {
FileUtils.copy(new CharArrayReader(inlineHtml), writer);
@@ -1003,19 +1005,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
//
content = this.metas.get("date");
- if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
+ if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
//
content = this.metas.get("dc.date");
- if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
+ if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
//
content = this.metas.get("dc:date");
- if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
+ if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
//
content = this.metas.get("last-modified");
- if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
+ if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
return new Date();
}
@@ -1153,19 +1155,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
}
- public static ContentScraper parseResource(final File file, final int maxLinks) throws IOException {
+ public static ContentScraper parseResource(final File file, final int maxLinks, final int timezoneOffset) throws IOException {
// load page
final byte[] page = FileUtils.read(file);
if (page == null) throw new IOException("no content in file " + file.toString());
// scrape document to look up charset
- final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), "UTF-8", new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks);
+ final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page), "UTF-8", new VocabularyScraper(), new DigestURL("http://localhost"), null, false, maxLinks, timezoneOffset);
String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
htmlFilter.close();
if (charset == null) charset = Charset.defaultCharset().toString();
// scrape content
- final ContentScraper scraper = new ContentScraper(new DigestURL("http://localhost"), maxLinks, new VocabularyScraper());
+ final ContentScraper scraper = new ContentScraper(new DigestURL("http://localhost"), maxLinks, new VocabularyScraper(), timezoneOffset);
final Writer writer = new TransformerWriter(null, null, scraper, null, false);
FileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName(charset));
writer.close();
diff --git a/source/net/yacy/document/parser/html/ScraperInputStream.java b/source/net/yacy/document/parser/html/ScraperInputStream.java
index b63a56cc4..ae681f97f 100644
--- a/source/net/yacy/document/parser/html/ScraperInputStream.java
+++ b/source/net/yacy/document/parser/html/ScraperInputStream.java
@@ -64,13 +64,14 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
final DigestURL rooturl,
final Transformer transformer,
final boolean passbyIfBinarySuspect,
- final int maxLinks
+ final int maxLinks,
+ final int timezoneOffset
) {
// create a input stream for buffereing
this.bufferedIn = new BufferedInputStream(inStream, (int) preBufferSize);
this.bufferedIn.mark((int) preBufferSize);
- final ContentScraper scraper = new ContentScraper(rooturl, maxLinks, vocabularyScraper);
+ final ContentScraper scraper = new ContentScraper(rooturl, maxLinks, vocabularyScraper, timezoneOffset);
scraper.registerHtmlFilterEventListener(this);
try {
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index db1cf3a23..654716e63 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -87,13 +87,15 @@ public class htmlParser extends AbstractParser implements Parser {
public Document[] parse(
final AnchorURL location,
final String mimeType,
- final String documentCharset, final VocabularyScraper vocscraper,
+ final String documentCharset,
+ final VocabularyScraper vocscraper,
+ final int timezoneOffset,
final InputStream sourceStream) throws Parser.Failure, InterruptedException {
try {
// first get a document from the parsed html
Charset[] detectedcharsetcontainer = new Charset[]{null};
- final ContentScraper scraper = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, sourceStream, maxLinks);
+ final ContentScraper scraper = parseToScraper(location, documentCharset, vocscraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
// parseToScraper also detects/corrects/sets charset from html content tag
final Document document = transformScraper(location, mimeType, detectedcharsetcontainer[0].name(), scraper);
@@ -151,7 +153,7 @@ public class htmlParser extends AbstractParser implements Parser {
return ppd;
}
- public static ContentScraper parseToScraper(final DigestURL location, final String documentCharset, final VocabularyScraper vocabularyScraper, String input, int maxLinks) throws IOException {
+ public static ContentScraper parseToScraper(final DigestURL location, final String documentCharset, final VocabularyScraper vocabularyScraper, final int timezoneOffset, final String input, final int maxLinks) throws IOException {
Charset[] detectedcharsetcontainer = new Charset[]{null};
InputStream sourceStream;
try {
@@ -161,7 +163,7 @@ public class htmlParser extends AbstractParser implements Parser {
}
ContentScraper scraper;
try {
- scraper = parseToScraper(location, documentCharset, vocabularyScraper, detectedcharsetcontainer, sourceStream, maxLinks);
+ scraper = parseToScraper(location, documentCharset, vocabularyScraper, detectedcharsetcontainer, timezoneOffset, sourceStream, maxLinks);
} catch (Failure e) {
throw new IOException(e.getMessage());
}
@@ -173,6 +175,7 @@ public class htmlParser extends AbstractParser implements Parser {
final String documentCharset,
final VocabularyScraper vocabularyScraper,
Charset[] detectedcharsetcontainer,
+ final int timezoneOffset,
InputStream sourceStream,
final int maxLinks) throws Parser.Failure, IOException {
@@ -188,7 +191,7 @@ public class htmlParser extends AbstractParser implements Parser {
if (charset == null) {
ScraperInputStream htmlFilter = null;
try {
- htmlFilter = new ScraperInputStream(sourceStream, documentCharset, vocabularyScraper, location, null, false, maxLinks);
+ htmlFilter = new ScraperInputStream(sourceStream, documentCharset, vocabularyScraper, location, null, false, maxLinks, timezoneOffset);
sourceStream = htmlFilter;
charset = htmlFilter.detectCharset();
} catch (final IOException e1) {
@@ -222,7 +225,7 @@ public class htmlParser extends AbstractParser implements Parser {
}
// parsing the content
- final ContentScraper scraper = new ContentScraper(location, maxLinks, vocabularyScraper);
+ final ContentScraper scraper = new ContentScraper(location, maxLinks, vocabularyScraper, timezoneOffset);
final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false, Math.max(64, Math.min(4096, sourceStream.available())));
try {
FileUtils.copy(sourceStream, writer, detectedcharsetcontainer[0]);
@@ -324,7 +327,7 @@ public class htmlParser extends AbstractParser implements Parser {
try {
url = new AnchorURL(args[0]);
final byte[] content = url.get(ClientIdentification.yacyInternetCrawlerAgent, null, null);
- final Document[] document = new htmlParser().parse(url, "text/html", "utf-8", new VocabularyScraper(), new ByteArrayInputStream(content));
+ final Document[] document = new htmlParser().parse(url, "text/html", "utf-8", new VocabularyScraper(), 0, new ByteArrayInputStream(content));
final String title = document[0].dc_title();
System.out.println(title);
} catch (final MalformedURLException e) {
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index db08ac783..4f69b7eb6 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -93,8 +93,10 @@ public class genericImageParser extends AbstractParser implements Parser {
public Document[] parse(
final AnchorURL location,
final String mimeType,
- final String documentCharset, final VocabularyScraper scraper,
- final InputStream sourceStream) throws Parser.Failure, InterruptedException {
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
ImageInfo ii = null;
String title = null;
@@ -108,7 +110,7 @@ public class genericImageParser extends AbstractParser implements Parser {
if (mimeType.equals("image/bmp") || ext.equals("bmp")) {
byte[] b;
try {
- b = FileUtils.read(sourceStream);
+ b = FileUtils.read(source);
} catch (final IOException e) {
ConcurrentLog.logException(e);
throw new Parser.Failure(e.getMessage(), location);
@@ -126,7 +128,7 @@ public class genericImageParser extends AbstractParser implements Parser {
// a tutorial is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/sampleUsage.html
byte[] b;
try {
- b = FileUtils.read(sourceStream);
+ b = FileUtils.read(source);
} catch (final IOException e) {
ConcurrentLog.logException(e);
throw new Parser.Failure(e.getMessage(), location);
@@ -182,7 +184,7 @@ public class genericImageParser extends AbstractParser implements Parser {
// just ignore
}
} else {
- ii = parseJavaImage(location, sourceStream);
+ ii = parseJavaImage(location, source);
}
final HashSet languages = new HashSet();
@@ -315,7 +317,7 @@ public class genericImageParser extends AbstractParser implements Parser {
AnchorURL uri;
try {
uri = new AnchorURL("http://localhost/" + image.getName());
- final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), "UTF-8", new VocabularyScraper(), new FileInputStream(image));
+ final Document[] document = parser.parse(uri, "image/" + MultiProtocolURL.getFileExtension(uri.getFileName()), "UTF-8", new VocabularyScraper(), 0, new FileInputStream(image));
System.out.println(document[0].toString());
} catch (final MalformedURLException e) {
e.printStackTrace();
diff --git a/source/net/yacy/document/parser/images/metadataImageParser.java b/source/net/yacy/document/parser/images/metadataImageParser.java
index eef448faf..04b20b948 100644
--- a/source/net/yacy/document/parser/images/metadataImageParser.java
+++ b/source/net/yacy/document/parser/images/metadataImageParser.java
@@ -87,8 +87,10 @@ public class metadataImageParser extends AbstractParser implements Parser {
public Document[] parse(
final AnchorURL location,
final String mimeType,
- final String documentCharset, final VocabularyScraper scraper,
- final InputStream sourceStream) throws Parser.Failure, InterruptedException {
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
String title = null;
String author = null;
@@ -99,7 +101,7 @@ public class metadataImageParser extends AbstractParser implements Parser {
StringBuilder imgInfotxt = new StringBuilder();
try {
- final Metadata metadata = ImageMetadataReader.readMetadata(new BufferedInputStream(sourceStream));
+ final Metadata metadata = ImageMetadataReader.readMetadata(new BufferedInputStream(source));
final Iterator directories = metadata.getDirectories().iterator();
final HashMap props = new HashMap();
@@ -160,7 +162,7 @@ public class metadataImageParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
- documentCharset,
+ charset,
this,
new HashSet(0), // languages
keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords
diff --git a/source/net/yacy/document/parser/linkScraperParser.java b/source/net/yacy/document/parser/linkScraperParser.java
index 4c0abbdd4..f0ccbe4d9 100644
--- a/source/net/yacy/document/parser/linkScraperParser.java
+++ b/source/net/yacy/document/parser/linkScraperParser.java
@@ -59,11 +59,16 @@ public class linkScraperParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("text/sgml");
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
- Document[] htmlParserDocs = new htmlParser().parse(location, mimeType, charset, scraper, source);
+ Document[] htmlParserDocs = new htmlParser().parse(location, mimeType, charset, scraper, timezoneOffset, source);
Document htmlParserDoc = htmlParserDocs == null ? null : Document.mergeDocuments(location, mimeType, htmlParserDocs);
diff --git a/source/net/yacy/document/parser/mmParser.java b/source/net/yacy/document/parser/mmParser.java
index 0781eea3c..686b9cddb 100644
--- a/source/net/yacy/document/parser/mmParser.java
+++ b/source/net/yacy/document/parser/mmParser.java
@@ -71,8 +71,13 @@ public class mmParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException
{
final StringBuilder sb = new StringBuilder();
diff --git a/source/net/yacy/document/parser/odtParser.java b/source/net/yacy/document/parser/odtParser.java
index 588d1432d..2f574f0c0 100644
--- a/source/net/yacy/document/parser/odtParser.java
+++ b/source/net/yacy/document/parser/odtParser.java
@@ -216,7 +216,13 @@ public class odtParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
File dest = null;
try {
// creating a tempfile
diff --git a/source/net/yacy/document/parser/ooxmlParser.java b/source/net/yacy/document/parser/ooxmlParser.java
index 6535c95ed..9072938f4 100644
--- a/source/net/yacy/document/parser/ooxmlParser.java
+++ b/source/net/yacy/document/parser/ooxmlParser.java
@@ -202,7 +202,13 @@ public class ooxmlParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
File dest = null;
try {
// creating a tempfile
diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java
index 52df35bba..1a526a6f5 100644
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@@ -86,7 +86,13 @@ public class pdfParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
// check memory for parser
if (!MemoryControl.request(200 * 1024 * 1024, false))
@@ -376,7 +382,7 @@ public class pdfParser extends AbstractParser implements Parser {
final AbstractParser parser = new pdfParser();
Document document = null;
try {
- document = Document.mergeDocuments(null, "application/pdf", parser.parse(null, "application/pdf", null, new VocabularyScraper(), new FileInputStream(pdfFile)));
+ document = Document.mergeDocuments(null, "application/pdf", parser.parse(null, "application/pdf", null, new VocabularyScraper(), 0, new FileInputStream(pdfFile)));
} catch (final Parser.Failure e) {
System.err.println("Cannot parse file " + pdfFile.getAbsolutePath());
ConcurrentLog.logException(e);
diff --git a/source/net/yacy/document/parser/pptParser.java b/source/net/yacy/document/parser/pptParser.java
index 0f793b0f2..f05cf8dec 100644
--- a/source/net/yacy/document/parser/pptParser.java
+++ b/source/net/yacy/document/parser/pptParser.java
@@ -64,8 +64,13 @@ public class pptParser extends AbstractParser implements Parser {
* all extracted information about the parsed document
*/
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure,
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure,
InterruptedException {
try {
/*
diff --git a/source/net/yacy/document/parser/psParser.java b/source/net/yacy/document/parser/psParser.java
index 09cda757e..e25f6439c 100644
--- a/source/net/yacy/document/parser/psParser.java
+++ b/source/net/yacy/document/parser/psParser.java
@@ -258,8 +258,13 @@ public class psParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
File tempFile = null;
diff --git a/source/net/yacy/document/parser/rdfParser.java b/source/net/yacy/document/parser/rdfParser.java
index 6f3b6fee8..dba55415b 100644
--- a/source/net/yacy/document/parser/rdfParser.java
+++ b/source/net/yacy/document/parser/rdfParser.java
@@ -46,8 +46,13 @@ public class rdfParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL url, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Failure, InterruptedException {
@@ -60,7 +65,7 @@ public class rdfParser extends AbstractParser implements Parser {
Document doc;
String all = "rdfdatasource";
- doc = new Document(url, mimeType, charset, null, null, null, singleList(""), "",
+ doc = new Document(location, mimeType, charset, null, null, null, singleList(""), "",
"", null, new ArrayList(0), 0, 0, all, null, null, null, false, new Date());
docs.add(doc);
diff --git a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java
index 2a36f962d..f95cca2ae 100644
--- a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java
+++ b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java
@@ -48,11 +48,16 @@ public class RDFaParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(AnchorURL url, String mimeType,
- String charset, final VocabularyScraper scraper, InputStream source) throws Failure,
+ public Document[] parse(
+ final AnchorURL url,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Failure,
InterruptedException {
- Document[] htmlDocs = parseHtml(url, mimeType, charset, scraper, source);
+ Document[] htmlDocs = parseHtml(url, mimeType, charset, scraper, timezoneOffset, source);
// TODO: current hardcoded restriction: apply rdfa parser only on selected sources.
@@ -97,13 +102,18 @@ public class RDFaParser extends AbstractParser implements Parser {
return doc;
}
- private Document[] parseHtml(AnchorURL url, String mimeType,
- String charset, VocabularyScraper scraper, InputStream source) throws Failure,
+ private Document[] parseHtml(
+ final AnchorURL url,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Failure,
InterruptedException {
Document[] htmlDocs = null;
try {
- htmlDocs = this.hp.parse(url, mimeType, charset, scraper, source);
+ htmlDocs = this.hp.parse(url, mimeType, charset, scraper, timezoneOffset, source);
source.reset();
} catch (final IOException e1) {
@@ -180,7 +190,7 @@ public class RDFaParser extends AbstractParser implements Parser {
if (aReader != null) {
RDFaParser aParser = new RDFaParser();
try {
- aParser.parse(new AnchorURL(args[0]), "", "", new VocabularyScraper(), aURL.openStream());
+ aParser.parse(new AnchorURL(args[0]), "", "", new VocabularyScraper(), 0, aURL.openStream());
} catch (final FileNotFoundException e) {
e.printStackTrace();
} catch (final IOException e) {
diff --git a/source/net/yacy/document/parser/rssParser.java b/source/net/yacy/document/parser/rssParser.java
index f58a14441..7005e85fe 100644
--- a/source/net/yacy/document/parser/rssParser.java
+++ b/source/net/yacy/document/parser/rssParser.java
@@ -59,14 +59,19 @@ public class rssParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL feedurl, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Failure, InterruptedException {
RSSReader rssReader;
try {
rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source);
} catch (final IOException e) {
- throw new Parser.Failure("Load error:" + e.getMessage(), feedurl, e);
+ throw new Parser.Failure("Load error:" + e.getMessage(), location, e);
}
final RSSFeed feed = rssReader.getFeed();
diff --git a/source/net/yacy/document/parser/rtfParser.java b/source/net/yacy/document/parser/rtfParser.java
index 06d7bd5ee..e6ea7d334 100644
--- a/source/net/yacy/document/parser/rtfParser.java
+++ b/source/net/yacy/document/parser/rtfParser.java
@@ -53,8 +53,13 @@ public class rtfParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
try {
diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java
index 5c22533aa..ddfdd8153 100644
--- a/source/net/yacy/document/parser/sevenzipParser.java
+++ b/source/net/yacy/document/parser/sevenzipParser.java
@@ -56,7 +56,12 @@ public class sevenzipParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("application/x-7z-compressed");
}
- public Document parse(final AnchorURL location, final String mimeType, final String charset, final IInStream source) throws Parser.Failure, InterruptedException {
+ public Document parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final int timezoneOffset,
+ final IInStream source) throws Parser.Failure, InterruptedException {
final Document doc = new Document(
location,
mimeType,
@@ -83,7 +88,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
} catch (final IOException e) {
throw new Parser.Failure("error opening 7zip archive: " + e.getMessage(), location);
}
- final SZParserExtractCallback aec = new SZParserExtractCallback(AbstractParser.log, archive, doc, location.getFile());
+ final SZParserExtractCallback aec = new SZParserExtractCallback(AbstractParser.log, archive, doc, location.getFile(), timezoneOffset);
AbstractParser.log.fine("processing archive contents...");
try {
archive.Extract(null, -1, 0, aec);
@@ -101,16 +106,27 @@ public class sevenzipParser extends AbstractParser implements Parser {
}
}
- public Document parse(final AnchorURL location, final String mimeType, final String charset, final byte[] source) throws Parser.Failure, InterruptedException {
- return parse(location, mimeType, charset, new ByteArrayIInStream(source));
+ public Document parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final int timezoneOffset,
+ final byte[] source) throws Parser.Failure, InterruptedException {
+ return parse(location, mimeType, charset, timezoneOffset, new ByteArrayIInStream(source));
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
try {
final ByteArrayOutputStream cfos = new ByteArrayOutputStream();
FileUtils.copy(source, cfos);
- return new Document[]{parse(location, mimeType, charset, cfos.toByteArray())};
+ return new Document[]{parse(location, mimeType, charset, timezoneOffset, cfos.toByteArray())};
} catch (final IOException e) {
throw new Parser.Failure("error processing 7zip archive: " + e.getMessage(), location);
}
@@ -124,13 +140,19 @@ public class sevenzipParser extends AbstractParser implements Parser {
private ByteArrayOutputStream cfos = null;
private final Document doc;
private final String prefix;
+ private final int timezoneOffset;
- public SZParserExtractCallback(final ConcurrentLog logger, final IInArchive handler,
- final Document doc, final String prefix) {
+ public SZParserExtractCallback(
+ final ConcurrentLog logger,
+ final IInArchive handler,
+ final Document doc,
+ final String prefix,
+ final int timezoneOffset) {
super.Init(handler);
this.log = logger;
this.doc = doc;
this.prefix = prefix;
+ this.timezoneOffset = timezoneOffset;
}
@Override
@@ -172,7 +194,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
// below for reversion of the effects
final AnchorURL url = AnchorURL.newAnchor(this.doc.dc_source(), this.prefix + "/" + super.filePath);
final String mime = TextParser.mimeOf(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
- theDocs = TextParser.parseSource(url, mime, null, new VocabularyScraper(), this.doc.getDepth() + 1, this.cfos.toByteArray());
+ theDocs = TextParser.parseSource(url, mime, null, new VocabularyScraper(), timezoneOffset, this.doc.getDepth() + 1, this.cfos.toByteArray());
this.doc.addSubDocuments(theDocs);
}
diff --git a/source/net/yacy/document/parser/sidAudioParser.java b/source/net/yacy/document/parser/sidAudioParser.java
index 4f1cbf5c1..1eb216a3b 100644
--- a/source/net/yacy/document/parser/sidAudioParser.java
+++ b/source/net/yacy/document/parser/sidAudioParser.java
@@ -58,8 +58,13 @@ public class sidAudioParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
try {
final int available = source.available();
diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java
index ecc5eb393..11742179f 100644
--- a/source/net/yacy/document/parser/sitemapParser.java
+++ b/source/net/yacy/document/parser/sitemapParser.java
@@ -70,8 +70,13 @@ public class sitemapParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL url, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Failure, InterruptedException {
final List docs = new ArrayList();
SitemapReader sitemap = new SitemapReader(source, ClientIdentification.yacyInternetCrawlerAgent);
@@ -83,7 +88,7 @@ public class sitemapParser extends AbstractParser implements Parser {
uri = new DigestURL(item.loc);
doc = new Document(
uri,
- TextParser.mimeOf(url),
+ TextParser.mimeOf(location),
charset,
this,
null,
@@ -224,7 +229,7 @@ public class sitemapParser extends AbstractParser implements Parser {
public Date lastmod(final Date dflt) {
try {
- return ISO8601Formatter.FORMATTER.parse(this.lastmod);
+ return ISO8601Formatter.FORMATTER.parse(this.lastmod, 0).getTime();
} catch (final ParseException e) {
return dflt;
}
@@ -245,7 +250,7 @@ public class sitemapParser extends AbstractParser implements Parser {
public Date lastmod(final Date dflt) {
try {
- return ISO8601Formatter.FORMATTER.parse(this.lastmod);
+ return ISO8601Formatter.FORMATTER.parse(this.lastmod, 0).getTime();
} catch (final ParseException e) {
return dflt;
}
diff --git a/source/net/yacy/document/parser/swfParser.java b/source/net/yacy/document/parser/swfParser.java
index ac1c9c2ce..502782b3b 100644
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@@ -56,8 +56,13 @@ public class swfParser extends AbstractParser implements Parser {
* all extracted information about the parsed document
*/
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException
{
diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java
index e9bdb96bc..52a84e296 100644
--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@@ -62,16 +62,22 @@ public class tarParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL url, final String mimeType, final String charset, final VocabularyScraper scraper, InputStream source) throws Parser.Failure, InterruptedException {
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ InputStream source) throws Parser.Failure, InterruptedException {
final List docacc = new ArrayList();
Document[] subDocs = null;
- final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
+ final String ext = MultiProtocolURL.getFileExtension(location.getFileName());
if (ext.equals("gz") || ext.equals("tgz")) {
try {
source = new GZIPInputStream(source);
} catch (final IOException e) {
- throw new Parser.Failure("tar parser: " + e.getMessage(), url);
+ throw new Parser.Failure("tar parser: " + e.getMessage(), location);
}
}
TarEntry entry;
@@ -91,7 +97,7 @@ public class tarParser extends AbstractParser implements Parser {
try {
tmp = FileUtils.createTempFile(this.getClass(), name);
FileUtils.copy(tis, tmp, entry.getSize());
- subDocs = TextParser.parseSource(AnchorURL.newAnchor(url, "#" + name), mime, null, scraper, 999, tmp);
+ subDocs = TextParser.parseSource(AnchorURL.newAnchor(location, "#" + name), mime, null, scraper, timezoneOffset, 999, tmp);
if (subDocs == null) continue;
for (final Document d: subDocs) docacc.add(d);
} catch (final Parser.Failure e) {
diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java
index abe9caed4..3b096ebf1 100644
--- a/source/net/yacy/document/parser/torrentParser.java
+++ b/source/net/yacy/document/parser/torrentParser.java
@@ -57,7 +57,13 @@ public class torrentParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(AnchorURL location, String mimeType, String charset, final VocabularyScraper scraper, InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
byte[] b = null;
try {
@@ -120,8 +126,8 @@ public class torrentParser extends AbstractParser implements Parser {
try {
byte[] b = FileUtils.read(new File(args[0]));
torrentParser parser = new torrentParser();
- Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, "UTF-8", new VocabularyScraper(), new ByteArrayInputStream(b));
- Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false);
+ Document[] d = parser.parse(new AnchorURL("http://localhost/test.torrent"), null, "UTF-8", new VocabularyScraper(), 0, new ByteArrayInputStream(b));
+ Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false, 0);
Map w = c.words();
for (Map.Entry e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText);
} catch (final IOException e) {
diff --git a/source/net/yacy/document/parser/vcfParser.java b/source/net/yacy/document/parser/vcfParser.java
index 107e89feb..f4c4120e2 100644
--- a/source/net/yacy/document/parser/vcfParser.java
+++ b/source/net/yacy/document/parser/vcfParser.java
@@ -66,7 +66,13 @@ public class vcfParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL url, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
try {
@@ -201,7 +207,7 @@ public class vcfParser extends AbstractParser implements Parser {
} else {
if (AbstractParser.log.isFinest()) AbstractParser.log.finest("Invalid data in vcf file" +
- "\n\tURL: " + url +
+ "\n\tURL: " + location +
"\n\tLine: " + line +
"\n\tLine-Nr: " + lineNr);
}
@@ -212,7 +218,7 @@ public class vcfParser extends AbstractParser implements Parser {
final byte[] text = UTF8.getBytes(parsedDataText.toString());
final List descriptions = new ArrayList(1); descriptions.add("vCard");
return new Document[]{new Document(
- url, // url of the source document
+ location, // url of the source document
mimeType, // the documents mime type
null, // charset
this,
@@ -234,7 +240,7 @@ public class vcfParser extends AbstractParser implements Parser {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
- throw new Parser.Failure("Unexpected error while parsing vcf resource. " + e.getMessage(),url);
+ throw new Parser.Failure("Unexpected error while parsing vcf resource. " + e.getMessage(), location);
}
}
diff --git a/source/net/yacy/document/parser/vsdParser.java b/source/net/yacy/document/parser/vsdParser.java
index 9e53f1085..16290f363 100644
--- a/source/net/yacy/document/parser/vsdParser.java
+++ b/source/net/yacy/document/parser/vsdParser.java
@@ -67,7 +67,13 @@ public class vsdParser extends AbstractParser implements Parser {
* all extracted information about the parsed document
*/
@Override
- public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
Document theDoc = null;
diff --git a/source/net/yacy/document/parser/xlsParser.java b/source/net/yacy/document/parser/xlsParser.java
index 40c925493..cf178c85e 100644
--- a/source/net/yacy/document/parser/xlsParser.java
+++ b/source/net/yacy/document/parser/xlsParser.java
@@ -68,8 +68,13 @@ public class xlsParser extends AbstractParser implements Parser {
* all extracted information about the parsed document
*/
@Override
- public Document[] parse(final AnchorURL location, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source) throws Parser.Failure,
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure,
InterruptedException {
return new XLSHSSFListener().parse(location, mimeType, charset, source);
}
diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java
index 2438354f1..a924a6e03 100644
--- a/source/net/yacy/document/parser/zipParser.java
+++ b/source/net/yacy/document/parser/zipParser.java
@@ -62,12 +62,17 @@ public class zipParser extends AbstractParser implements Parser {
}
@Override
- public Document[] parse(final AnchorURL url, final String mimeType,
- final String charset, final VocabularyScraper scraper, final InputStream source)
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source)
throws Parser.Failure, InterruptedException {
// check memory for parser
if (!MemoryControl.request(200 * 1024 * 1024, false))
- throw new Parser.Failure("Not enough Memory available for zip parser: " + MemoryControl.available(), url);
+ throw new Parser.Failure("Not enough Memory available for zip parser: " + MemoryControl.available(), location);
Document[] docs = null;
final List docacc = new ArrayList();
@@ -88,9 +93,9 @@ public class zipParser extends AbstractParser implements Parser {
try {
tmp = FileUtils.createTempFile(this.getClass(), name);
FileUtils.copy(zis, tmp, entry.getSize());
- final DigestURL virtualURL = DigestURL.newURL(url, "#" + name);
+ final DigestURL virtualURL = DigestURL.newURL(location, "#" + name);
//this.log.logInfo("ZIP file parser: " + virtualURL.toNormalform(false, false));
- docs = TextParser.parseSource(new AnchorURL(virtualURL), mime, null, scraper, 999, tmp);
+ docs = TextParser.parseSource(new AnchorURL(virtualURL), mime, null, scraper, timezoneOffset, 999, tmp);
if (docs == null) continue;
for (final Document d: docs) docacc.add(d);
} catch (final Parser.Failure e) {
diff --git a/source/net/yacy/http/ProxyCacheHandler.java b/source/net/yacy/http/ProxyCacheHandler.java
index d5417b641..50a200025 100644
--- a/source/net/yacy/http/ProxyCacheHandler.java
+++ b/source/net/yacy/http/ProxyCacheHandler.java
@@ -74,7 +74,8 @@ public class ProxyCacheHandler extends AbstractRemoteHandler implements Handler
"",
cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
- 0);
+ 0,
+ sb.crawler.defaultProxyProfile.timezoneOffset());
final Response cachedResponse = new Response(
yacyRequest,
diff --git a/source/net/yacy/http/ProxyHandler.java b/source/net/yacy/http/ProxyHandler.java
index 2658e031a..d558aac0a 100644
--- a/source/net/yacy/http/ProxyHandler.java
+++ b/source/net/yacy/http/ProxyHandler.java
@@ -180,7 +180,8 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
"",
responseHeaderLegacy.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
- 0); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
+ 0,
+ sb.crawler.defaultProxyProfile.timezoneOffset()); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
final Response yacyResponse = new Response(
yacyRequest,
null,
diff --git a/source/net/yacy/http/servlets/SolrSelectServlet.java b/source/net/yacy/http/servlets/SolrSelectServlet.java
index 36fc7aa80..bba3de81f 100644
--- a/source/net/yacy/http/servlets/SolrSelectServlet.java
+++ b/source/net/yacy/http/servlets/SolrSelectServlet.java
@@ -137,7 +137,7 @@ public class SolrSelectServlet extends HttpServlet {
if (!mmsp.getMap().containsKey(CommonParams.Q) && mmsp.getMap().containsKey(CommonParams.QUERY)) {
querystring = mmsp.get(CommonParams.QUERY, "");
mmsp.getMap().remove(CommonParams.QUERY);
- QueryModifier modifier = new QueryModifier();
+ QueryModifier modifier = new QueryModifier(0);
querystring = modifier.parse(querystring);
modifier.apply(mmsp);
QueryGoal qg = new QueryGoal(querystring);
diff --git a/source/net/yacy/kelondro/blob/ArrayStack.java b/source/net/yacy/kelondro/blob/ArrayStack.java
index bf2e1d781..f577cc119 100644
--- a/source/net/yacy/kelondro/blob/ArrayStack.java
+++ b/source/net/yacy/kelondro/blob/ArrayStack.java
@@ -172,7 +172,7 @@ public class ArrayStack implements BLOB {
f.delete();
deletions = true;
} else try {
- d = GenericFormatter.SHORT_SECOND_FORMATTER.parse(file.substring(0, 14));
+ d = GenericFormatter.SHORT_SECOND_FORMATTER.parse(file.substring(0, 14), 0).getTime();
f.renameTo(newBLOB(d));
deletions = true;
} catch (final ParseException e) {continue;}
@@ -188,7 +188,7 @@ public class ArrayStack implements BLOB {
for (final String file : files) {
if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try {
- d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
+ d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime();
time = d.getTime();
if (time > maxtime) maxtime = time;
} catch (final ParseException e) {continue;}
@@ -199,7 +199,7 @@ public class ArrayStack implements BLOB {
for (final String file : files) {
if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try {
- d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
+ d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime();
f = new File(heapLocation, file);
time = d.getTime();
try {
@@ -253,7 +253,7 @@ public class ArrayStack implements BLOB {
public synchronized void mountBLOB(final File location, final boolean full) throws IOException {
Date d;
try {
- d = my_SHORT_MILSEC_FORMATTER.parse(location.getName().substring(this.prefix.length() + 1, this.prefix.length() + 18));
+ d = my_SHORT_MILSEC_FORMATTER.parse(location.getName().substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime();
} catch (final ParseException e) {
throw new IOException("date parse problem with file " + location.toString() + ": " + e.getMessage());
}
diff --git a/source/net/yacy/kelondro/blob/BEncodedHeapBag.java b/source/net/yacy/kelondro/blob/BEncodedHeapBag.java
index 3b7ae63c9..1c55cb8d3 100644
--- a/source/net/yacy/kelondro/blob/BEncodedHeapBag.java
+++ b/source/net/yacy/kelondro/blob/BEncodedHeapBag.java
@@ -95,7 +95,7 @@ public class BEncodedHeapBag extends AbstractMapStore implements MapStore {
(element.length() == this.prefix.length() + 23)) {
f = new File(this.baseDir, element);
try {
- d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(element.substring(this.prefix.length() + 1, this.prefix.length() + 18));
+ d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(element.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime();
} catch (final ParseException e) {
ConcurrentLog.severe("BEncodedHeapBag", "", e);
continue;
@@ -203,7 +203,7 @@ public class BEncodedHeapBag extends AbstractMapStore implements MapStore {
final String name = heap.getFile().getName();
long d;
try {
- d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(name.substring(this.prefix.length() + 1, this.prefix.length() + 18)).getTime();
+ d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(name.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime().getTime();
} catch (final ParseException e) {
ConcurrentLog.severe("BEncodedHeapBag", "", e);
d = 0;
diff --git a/source/net/yacy/kelondro/blob/Tables.java b/source/net/yacy/kelondro/blob/Tables.java
index 7dc399db8..bd9de5329 100644
--- a/source/net/yacy/kelondro/blob/Tables.java
+++ b/source/net/yacy/kelondro/blob/Tables.java
@@ -764,7 +764,7 @@ public class Tables implements Iterable {
final byte[] r = this.get(colname);
if (r == null) return dflt;
try {
- return my_SHORT_MILSEC_FORMATTER.parse(UTF8.String(r));
+ return my_SHORT_MILSEC_FORMATTER.parse(UTF8.String(r), 0).getTime();
} catch (final ParseException e) {
return dflt;
}
diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
index 17f2c772d..cbe6ccc52 100644
--- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
+++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java
@@ -107,17 +107,17 @@ public class URIMetadataNode extends SolrDocument {
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
- this.setField(CollectionSchema.last_modified.name(), formatter.parse(prop.getProperty("mod", "20000101")));
+ this.setField(CollectionSchema.last_modified.name(), formatter.parse(prop.getProperty("mod", "20000101"), 0).getTime());
} catch (final ParseException e) {
this.setField(CollectionSchema.last_modified.name(), new Date());
}
try {
- this.setField(CollectionSchema.load_date_dt.name(), formatter.parse(prop.getProperty("load", "20000101")));
+ this.setField(CollectionSchema.load_date_dt.name(), formatter.parse(prop.getProperty("load", "20000101"), 0).getTime());
} catch (final ParseException e) {
this.setField(CollectionSchema.load_date_dt.name(), new Date());
}
try {
- this.setField(CollectionSchema.fresh_date_dt.name(), formatter.parse(prop.getProperty("fresh", "20000101")));
+ this.setField(CollectionSchema.fresh_date_dt.name(), formatter.parse(prop.getProperty("fresh", "20000101"), 0).getTime());
} catch (final ParseException e) {
this.setField(CollectionSchema.fresh_date_dt.name(), new Date());
}
diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java
index a70c0ff1f..ca8bbf90e 100644
--- a/source/net/yacy/kelondro/table/SplitTable.java
+++ b/source/net/yacy/kelondro/table/SplitTable.java
@@ -179,7 +179,7 @@ public class SplitTable implements Index, Iterable {
(element.length() == this.prefix.length() + 24)) {
f = new File(this.path, element);
try {
- d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(element.substring(this.prefix.length() + 1, this.prefix.length() + 18));
+ d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(element.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime();
} catch (final ParseException e) {
ConcurrentLog.severe("SplitTable", "", e);
continue;
@@ -372,7 +372,7 @@ public class SplitTable implements Index, Iterable {
final String name = new File(table.filename()).getName();
long d;
try {
- d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(name.substring(this.prefix.length() + 1, this.prefix.length() + 18)).getTime();
+ d = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(name.substring(this.prefix.length() + 1, this.prefix.length() + 18), 0).getTime().getTime();
} catch (final ParseException e) {
ConcurrentLog.severe("SplitTable", "", e);
d = 0;
diff --git a/source/net/yacy/peers/NewsDB.java b/source/net/yacy/peers/NewsDB.java
index f6926d512..e2dbbde26 100644
--- a/source/net/yacy/peers/NewsDB.java
+++ b/source/net/yacy/peers/NewsDB.java
@@ -46,6 +46,8 @@ package net.yacy.peers;
import java.io.File;
import java.io.IOException;
+import java.text.ParseException;
+import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
@@ -164,10 +166,16 @@ public class NewsDB {
private Record b2r(final Row.Entry b) {
if (b == null) return null;
+ Calendar c;
+ try {
+ c = b.empty(2) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColASCII(2), 0);
+ } catch (ParseException e) {
+ c = null;
+ }
return new NewsDB.Record(
b.getPrimaryKeyASCII(),
b.getColUTF8(1),
- (b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColASCII(2), GenericFormatter.UTCDiffString()),
+ c == null ? null : c.getTime(),
(int) b.getColLong(3),
MapTools.string2map(b.getColUTF8(4), ",")
);
@@ -226,8 +234,8 @@ public class NewsDB {
public class Record {
private final String originator; // hash of originating peer
- private final Date created; // Date when news was created by originator
- private final Date received; // Date when news was received here at this peer
+ private Date created; // Date when news was created by originator
+ private Date received; // Date when news was received here at this peer
private final String category; // keyword that addresses possible actions
private int distributed; // counter that counts number of distributions of this news record
private final Map attributes; // elements of the news for a special category
@@ -238,8 +246,16 @@ public class NewsDB {
if (this.attributes.toString().length() > NewsDB.this.attributesMaxLength) throw new IllegalArgumentException("attributes length (" + this.attributes.toString().length() + ") exceeds maximum (" + NewsDB.this.attributesMaxLength + ")");
this.category = (this.attributes.containsKey("cat")) ? this.attributes.get("cat") : "";
if (this.category.length() > NewsDB.categoryStringLength) throw new IllegalArgumentException("category length (" + this.category.length() + ") exceeds maximum (" + NewsDB.categoryStringLength + ")");
- this.received = (this.attributes.containsKey("rec")) ? my_SHORT_SECOND_FORMATTER.parse(this.attributes.get("rec"), GenericFormatter.UTCDiffString()) : new Date();
- this.created = (this.attributes.containsKey("cre")) ? my_SHORT_SECOND_FORMATTER.parse(this.attributes.get("cre"), GenericFormatter.UTCDiffString()) : new Date();
+ try {
+ this.received = (this.attributes.containsKey("rec")) ? my_SHORT_SECOND_FORMATTER.parse(this.attributes.get("rec"), 0).getTime() : new Date();
+ } catch (ParseException e) {
+ this.received = new Date();
+ }
+ try {
+ this.created = (this.attributes.containsKey("cre")) ? my_SHORT_SECOND_FORMATTER.parse(this.attributes.get("cre"), 0).getTime() : new Date();
+ } catch (ParseException e) {
+ this.created = new Date();
+ }
this.distributed = (this.attributes.containsKey("dis")) ? Integer.parseInt(this.attributes.get("dis")) : 0;
this.originator = (this.attributes.containsKey("ori")) ? this.attributes.get("ori") : "";
removeStandards();
@@ -262,7 +278,11 @@ public class NewsDB {
if (attributes.toString().length() > NewsDB.this.attributesMaxLength) throw new IllegalArgumentException("attributes length (" + attributes.toString().length() + ") exceeds maximum (" + NewsDB.this.attributesMaxLength + ")");
this.attributes = attributes;
this.received = received;
- this.created = my_SHORT_SECOND_FORMATTER.parse(id.substring(0, GenericFormatter.PATTERN_SHORT_SECOND.length()), GenericFormatter.UTCDiffString());
+ try {
+ this.created = my_SHORT_SECOND_FORMATTER.parse(id.substring(0, GenericFormatter.PATTERN_SHORT_SECOND.length()), 0).getTime();
+ } catch (ParseException e) {
+ this.created = new Date();
+ }
this.category = category;
this.distributed = distributed;
this.originator = id.substring(GenericFormatter.PATTERN_SHORT_SECOND.length());
diff --git a/source/net/yacy/peers/Seed.java b/source/net/yacy/peers/Seed.java
index 64c5f9938..c7e44bf79 100644
--- a/source/net/yacy/peers/Seed.java
+++ b/source/net/yacy/peers/Seed.java
@@ -797,7 +797,7 @@ public class Seed implements Cloneable, Comparable, Comparator
try {
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
- final long t = my_SHORT_SECOND_FORMATTER.parse(get(Seed.LASTSEEN, "20040101000000")).getTime();
+ final long t = my_SHORT_SECOND_FORMATTER.parse(get(Seed.LASTSEEN, "20040101000000"), 0).getTime().getTime();
// getTime creates a UTC time number. But in this case java thinks, that the given
// time string is a local time, which has a local UTC offset applied.
// Therefore java subtracts the local UTC offset, to get a UTC number.
@@ -831,7 +831,7 @@ public class Seed implements Cloneable, Comparable, Comparator
try {
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
- b = my_SHORT_SECOND_FORMATTER.parse(get(Seed.BDATE, "20040101000000")).getTime();
+ b = my_SHORT_SECOND_FORMATTER.parse(get(Seed.BDATE, "20040101000000"), 0).getTime().getTime();
} catch (final ParseException e ) {
b = System.currentTimeMillis();
}
diff --git a/source/net/yacy/peers/graphics/WebStructureGraph.java b/source/net/yacy/peers/graphics/WebStructureGraph.java
index d6b7f3139..5c3bea554 100644
--- a/source/net/yacy/peers/graphics/WebStructureGraph.java
+++ b/source/net/yacy/peers/graphics/WebStructureGraph.java
@@ -503,7 +503,7 @@ public class WebStructureGraph {
hr =
new HostReference(
ASCII.getBytes(sentry.hosthash),
- GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(),
+ GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date, 0).getTime().getTime(),
refhosthashandcounter.getValue().intValue());
} catch (final ParseException e ) {
continue refloop;
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 84a01a08d..1da658f65 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -112,21 +112,24 @@ public final class LoaderDispatcher {
final boolean forText,
final boolean global
) {
+ CrawlProfile profile =
+ (forText) ?
+ ((global) ?
+ this.sb.crawler.defaultTextSnippetGlobalProfile :
+ this.sb.crawler.defaultTextSnippetLocalProfile)
+ :
+ ((global) ?
+ this.sb.crawler.defaultMediaSnippetGlobalProfile :
+ this.sb.crawler.defaultMediaSnippetLocalProfile);
return new Request(
ASCII.getBytes(this.sb.peers.mySeed().hash),
url,
null,
"",
new Date(),
- (forText) ?
- ((global) ?
- this.sb.crawler.defaultTextSnippetGlobalProfile.handle() :
- this.sb.crawler.defaultTextSnippetLocalProfile.handle())
- :
- ((global) ?
- this.sb.crawler.defaultMediaSnippetGlobalProfile.handle() :
- this.sb.crawler.defaultMediaSnippetLocalProfile.handle()), // crawl profile
- 0);
+ profile.handle(),
+ 0,
+ profile.timezoneOffset());
}
public void load(final DigestURL url, final CacheStrategy cacheStratgy, final int maxFileSize, final File targetFile, BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
@@ -407,7 +410,7 @@ public final class LoaderDispatcher {
* @return a map from URLs to the anchor texts of the urls
* @throws IOException
*/
- public final Map loadLinks(final AnchorURL url, final CacheStrategy cacheStrategy, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
+ public final Map loadLinks(final AnchorURL url, final CacheStrategy cacheStrategy, BlacklistType blacklistType, final ClientIdentification.Agent agent, final int timezoneOffset) throws IOException {
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, blacklistType, agent);
if (response == null) throw new IOException("response == null");
final ResponseHeader responseHeader = response.getResponseHeader();
@@ -418,7 +421,7 @@ public final class LoaderDispatcher {
final String supportError = TextParser.supports(url, responseHeader.mime());
if (supportError != null) throw new IOException("no parser support: " + supportError);
try {
- documents = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), response.profile().scraper(), response.depth(), response.getContent());
+ documents = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), response.profile().scraper(), timezoneOffset, response.depth(), response.getContent());
if (documents == null) throw new IOException("document == null");
} catch (final Exception e) {
throw new IOException("parser error: " + e.getMessage());
diff --git a/source/net/yacy/search/EventTracker.java b/source/net/yacy/search/EventTracker.java
index 2479e285c..bba8b335f 100644
--- a/source/net/yacy/search/EventTracker.java
+++ b/source/net/yacy/search/EventTracker.java
@@ -152,7 +152,7 @@ public class EventTracker {
}
public long getTime() {
if (this.time instanceof String) try {
- return GenericFormatter.SHORT_SECOND_FORMATTER.parse((String) this.time).getTime();
+ return GenericFormatter.SHORT_SECOND_FORMATTER.parse((String) this.time, 0).getTime().getTime();
} catch (ParseException e) {
return -1L;
}
@@ -162,7 +162,7 @@ public class EventTracker {
}
public Date getDate() {
if (this.time instanceof String) try {
- return GenericFormatter.SHORT_SECOND_FORMATTER.parse((String) this.time);
+ return GenericFormatter.SHORT_SECOND_FORMATTER.parse((String) this.time, 0).getTime();
} catch (ParseException e) {
return null;
}if (this.time instanceof Long) return new Date((Long) this.time);
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index ad9724d44..c1b29eb95 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -1942,7 +1942,8 @@ public final class Switchboard extends serverSwitch {
"",
surrogate.getDate(),
this.crawler.defaultSurrogateProfile.handle(),
- 0);
+ 0,
+ this.crawler.defaultSurrogateProfile.timezoneOffset());
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
final IndexingQueueEntry queueEntry =
new IndexingQueueEntry(response, new Document[] {document}, null);
@@ -2571,6 +2572,7 @@ public final class Switchboard extends serverSwitch {
response.getMimeType(),
response.getCharacterEncoding(),
response.profile().scraper(),
+ response.profile().timezoneOffset(),
response.depth(),
response.getContent());
if ( documents == null ) {
@@ -2673,7 +2675,8 @@ public final class Switchboard extends serverSwitch {
nextEntry.getValue(),
new Date(),
response.profile().handle(),
- nextdepth));
+ nextdepth,
+ response.profile().timezoneOffset()));
} catch (final MalformedURLException e ) {
ConcurrentLog.logException(e);
}
@@ -2754,7 +2757,8 @@ public final class Switchboard extends serverSwitch {
in.documents[i], in.queueEntry.profile().scraper(), in.queueEntry.profile().indexText(),
in.queueEntry.profile().indexMedia(),
LibraryProvider.dymLib, true,
- this.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.dates_in_content_dts));
+ this.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.dates_in_content_dts),
+ profile.timezoneOffset());
// update image result list statistics
// its good to do this concurrently here, because it needs a DNS lookup
@@ -3043,7 +3047,15 @@ public final class Switchboard extends serverSwitch {
int p = userInfo == null ? -1 : userInfo.indexOf(':');
String user = userInfo == null ? FTPClient.ANONYMOUS : userInfo.substring(0, p);
String pw = userInfo == null || p == -1 ? "anomic" : userInfo.substring(p + 1);
- this.crawlStacker.enqueueEntriesFTP(this.peers.mySeed().hash.getBytes(), profile.handle(), url.getHost(), url.getPort(), user, pw, false);
+ this.crawlStacker.enqueueEntriesFTP(
+ this.peers.mySeed().hash.getBytes(),
+ profile.handle(),
+ url.getHost(),
+ url.getPort(),
+ user,
+ pw,
+ false,
+ profile.timezoneOffset());
return null;
} catch (final Exception e) {
// mist
@@ -3080,7 +3092,8 @@ public final class Switchboard extends serverSwitch {
"CRAWLING-ROOT",
new Date(),
profile.handle(),
- 0
+ 0,
+ profile.timezoneOffset()
));
if (reasonString != null) return reasonString;
@@ -3134,7 +3147,7 @@ public final class Switchboard extends serverSwitch {
* @throws IOException
* @throws Parser.Failure
*/
- public void addToIndex(final Collection urls, final SearchEvent searchEvent, final String heuristicName, final Map collections, boolean doublecheck) {
+ public void addToIndex(final Collection urls, final SearchEvent searchEvent, final String heuristicName, final Map collections, final boolean doublecheck) {
Map urlmap = new HashMap();
for (DigestURL url: urls) urlmap.put(ASCII.String(url.hash()), url);
if (searchEvent != null) {
@@ -3192,7 +3205,7 @@ public final class Switchboard extends serverSwitch {
}
final Condenser condenser = new Condenser(
document, null, true, true, LibraryProvider.dymLib, true,
- Switchboard.this.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.dates_in_content_dts));
+ Switchboard.this.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.dates_in_content_dts), searchEvent.query.timezoneOffset);
ResultImages.registerImages(url, document, true);
Switchboard.this.webStructure.generateCitationReference(url, document);
storeDocumentIndex(
@@ -3546,7 +3559,7 @@ public final class Switchboard extends serverSwitch {
final Map links;
searchEvent.oneFeederStarted();
try {
- links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
+ links = Switchboard.this.loader.loadLinks(url, CacheStrategy.NOCACHE, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent, searchEvent.query.timezoneOffset);
if ( links != null ) {
final Iterator i = links.keySet().iterator();
while ( i.hasNext() ) {
@@ -3585,7 +3598,7 @@ public final class Switchboard extends serverSwitch {
final Map links;
DigestURL url;
try {
- links = Switchboard.this.loader.loadLinks(startUrl, CacheStrategy.IFFRESH, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
+ links = Switchboard.this.loader.loadLinks(startUrl, CacheStrategy.IFFRESH, BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent, 0);
if (links != null) {
if (links.size() < 1000) { // limit to 1000 to skip large index pages
final Iterator i = links.keySet().iterator();
diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java
index a8ef16402..aa805c4b7 100644
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@@ -61,18 +61,27 @@ public class DocumentIndex extends Segment {
} catch (final MalformedURLException e ) {
}
}
- BlockingQueue queue; // a queue of document ID's
+ private BlockingQueue queue; // a queue of document ID's
private final Worker[] worker;
- CallbackListener callback;
+ private CallbackListener callback;
+ private int timezoneOffset;
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
- public DocumentIndex(final File segmentPath, final File archivePath, final File collectionConfigurationPath, final File webgraphConfigurationPath, final CallbackListener callback, final int cachesize)
+ public DocumentIndex(
+ final File segmentPath,
+ final File archivePath,
+ final File collectionConfigurationPath,
+ final File webgraphConfigurationPath,
+ final CallbackListener callback,
+ final int cachesize,
+ final int timezoneOffset)
throws IOException {
super(new ConcurrentLog("DocumentIndex"), segmentPath, archivePath,
collectionConfigurationPath == null ? null : new CollectionConfiguration(collectionConfigurationPath, true),
webgraphConfigurationPath == null ? null : new WebgraphConfiguration(webgraphConfigurationPath, true)
);
+ this.timezoneOffset = timezoneOffset;
super.connectRWI(cachesize, targetFileSize * 4 - 1);
super.connectCitation(cachesize, targetFileSize * 4 - 1);
super.fulltext().connectLocalSolr();
@@ -99,7 +108,7 @@ public class DocumentIndex extends Segment {
try {
while ( (f = DocumentIndex.this.queue.take()) != poison ) {
try {
- resultRows = add(f);
+ resultRows = add(f, DocumentIndex.this.timezoneOffset);
for ( final SolrInputDocument resultRow : resultRows ) {
if ( DocumentIndex.this.callback != null ) {
if ( resultRow == null ) {
@@ -132,7 +141,7 @@ public class DocumentIndex extends Segment {
this.queue.clear();
}
- private SolrInputDocument[] add(final AnchorURL url) throws IOException {
+ private SolrInputDocument[] add(final AnchorURL url, final int timezoneOffset) throws IOException {
if ( url == null ) {
throw new IOException("file = null");
}
@@ -150,7 +159,7 @@ public class DocumentIndex extends Segment {
length = -1;
}
try {
- documents = TextParser.parseSource(url, null, null, new VocabularyScraper(), 0, length, url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null));
+ documents = TextParser.parseSource(url, null, null, new VocabularyScraper(), timezoneOffset, 0, length, url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null));
} catch (final Exception e ) {
throw new IOException("cannot parse " + url.toNormalform(false) + ": " + e.getMessage());
}
@@ -159,7 +168,7 @@ public class DocumentIndex extends Segment {
int c = 0;
for ( final Document document : documents ) {
if (document == null) continue;
- final Condenser condenser = new Condenser(document, null, true, true, LibraryProvider.dymLib, true, true);
+ final Condenser condenser = new Condenser(document, null, true, true, LibraryProvider.dymLib, true, true, 0);
rows[c++] =
super.storeDocument(
url,
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index b5bd460e2..236be7537 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -761,7 +761,7 @@ public class Segment {
}
// get the word set
Set words = null;
- words = new Condenser(document, null, true, true, null, false, false).words().keySet();
+ words = new Condenser(document, null, true, true, null, false, false, 0).words().keySet();
// delete all word references
int count = 0;
diff --git a/source/net/yacy/search/query/AccessTracker.java b/source/net/yacy/search/query/AccessTracker.java
index 07d379873..b050ee4ae 100644
--- a/source/net/yacy/search/query/AccessTracker.java
+++ b/source/net/yacy/search/query/AccessTracker.java
@@ -315,7 +315,7 @@ public class AccessTracker {
byte[] b = new byte[GenericFormatter.PATTERN_SHORT_SECOND.length()];
raf.readFully(b);
try {
- return GenericFormatter.SHORT_SECOND_FORMATTER.parse(UTF8.String(b));
+ return GenericFormatter.SHORT_SECOND_FORMATTER.parse(UTF8.String(b), 0).getTime();
} catch (ParseException e) {
throw new IOException(e.getMessage());
}
@@ -326,8 +326,8 @@ public class AccessTracker {
String file = args[0];
Date from;
try {
- from = GenericFormatter.SHORT_SECOND_FORMATTER.parse(args[1]);
- Date to = GenericFormatter.SHORT_SECOND_FORMATTER.parse(args[2]);
+ from = GenericFormatter.SHORT_SECOND_FORMATTER.parse(args[1], 0).getTime();
+ Date to = GenericFormatter.SHORT_SECOND_FORMATTER.parse(args[2], 0).getTime();
List dump = readLog(new File(file), from, to);
for (EventTracker.Event s: dump) System.out.println(s.toString());
} catch (ParseException e) {
diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java
index e7daf4acb..0cb0f6942 100644
--- a/source/net/yacy/search/query/QueryModifier.java
+++ b/source/net/yacy/search/query/QueryModifier.java
@@ -41,8 +41,10 @@ public class QueryModifier {
private final StringBuilder modifier;
public String sitehost, sitehash, filetype, protocol, language, author, collection, on, from, to;
+ public int timezoneOffset;
- public QueryModifier() {
+ public QueryModifier(final int timezoneOffset) {
+ this.timezoneOffset = timezoneOffset;
this.sitehash = null;
this.sitehost = null;
this.filetype = null;
@@ -274,19 +276,19 @@ public class QueryModifier {
if (fq.indexOf(CollectionSchema.dates_in_content_dts.getSolrFieldName()) < 0) {
if (this.on != null && this.on.length() > 0) {
- fq.append(" AND ").append(QueryModifier.parseOnExpression(this.on));
+ fq.append(" AND ").append(QueryModifier.parseOnExpression(this.on, this.timezoneOffset));
}
if (this.from != null && this.from.length() > 0 && (this.to == null || this.to.equals("*"))) {
- fq.append(" AND ").append(QueryModifier.parseFromToExpression(this.from, null));
+ fq.append(" AND ").append(QueryModifier.parseFromToExpression(this.from, null, this.timezoneOffset));
}
if ((this.from == null || this.from.equals("*")) && this.to != null && this.to.length() > 0) {
- fq.append(" AND ").append(QueryModifier.parseFromToExpression(null, this.to));
+ fq.append(" AND ").append(QueryModifier.parseFromToExpression(null, this.to, this.timezoneOffset));
}
if (this.from != null && this.from.length() > 0 && this.to != null && this.to.length() > 0) {
- fq.append(" AND ").append(QueryModifier.parseFromToExpression(this.from, this.to));
+ fq.append(" AND ").append(QueryModifier.parseFromToExpression(this.from, this.to, this.timezoneOffset));
}
}
@@ -348,9 +350,9 @@ public class QueryModifier {
return fq.toString();
}
- public static String parseOnExpression(String onDescription) {
+ public static String parseOnExpression(final String onDescription, final int timezoneOffset) {
assert onDescription != null;
- Date onDate = DateDetection.parseLine(onDescription);
+ Date onDate = DateDetection.parseLine(onDescription, timezoneOffset);
StringBuilder filterQuery = new StringBuilder(20);
if (onDate != null) {
@SuppressWarnings({ "deprecation", "static-access" })
@@ -360,9 +362,9 @@ public class QueryModifier {
return filterQuery.toString();
}
- public static String parseFromToExpression(String from, String to) {
- Date fromDate = from == null || from.equals("*") ? null : DateDetection.parseLine(from);
- Date toDate = to == null || to.equals("*") ? null : DateDetection.parseLine(to);
+ public static String parseFromToExpression(final String from, final String to, final int timezoneOffset) {
+ Date fromDate = from == null || from.equals("*") ? null : DateDetection.parseLine(from, timezoneOffset);
+ Date toDate = to == null || to.equals("*") ? null : DateDetection.parseLine(to, timezoneOffset);
StringBuilder filterQuery = new StringBuilder(20);
if (fromDate != null && toDate != null) {
@SuppressWarnings({ "deprecation", "static-access" })
diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java
index d99d524b9..5adfbc0dc 100644
--- a/source/net/yacy/search/query/QueryParams.java
+++ b/source/net/yacy/search/query/QueryParams.java
@@ -70,7 +70,6 @@ import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
-import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.schema.TrieDateField;
@@ -146,6 +145,7 @@ public final class QueryParams {
public LinkedHashSet facetfields;
private SolrQuery cachedQuery;
private CollectionConfiguration solrSchema;
+ public final int timezoneOffset;
public QueryParams(
final QueryGoal queryGoal,
@@ -154,6 +154,7 @@ public final class QueryParams {
final String prefer,
final ContentDomain contentdom,
final String language,
+ final int timezoneOffset,
final Collection metatags,
final CacheStrategy snippetCacheStrategy,
final int itemsPerPage,
@@ -183,6 +184,7 @@ public final class QueryParams {
this.ranking = ranking;
this.maxDistance = maxDistance;
this.contentdom = contentdom;
+ this.timezoneOffset = timezoneOffset;
this.itemsPerPage = Math.min((specialRights) ? 10000 : 1000, itemsPerPage);
this.offset = Math.max(0, Math.min((specialRights) ? 10000 - this.itemsPerPage : 1000 - this.itemsPerPage, offset));
try {
@@ -527,19 +529,19 @@ public final class QueryParams {
if (this.solrSchema.contains(CollectionSchema.dates_in_content_dts)) {
if (this.modifier.on != null && this.modifier.on.length() > 0) {
- fqs.add(QueryModifier.parseOnExpression(this.modifier.on));
+ fqs.add(QueryModifier.parseOnExpression(this.modifier.on, this.timezoneOffset));
}
if (this.modifier.from != null && this.modifier.from.length() > 0 && (this.modifier.to == null || this.modifier.to.equals("*"))) {
- fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, null));
+ fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, null, this.timezoneOffset));
}
if ((this.modifier.from == null || this.modifier.from.equals("*")) && this.modifier.to != null && this.modifier.to.length() > 0) {
- fqs.add(QueryModifier.parseFromToExpression(null, this.modifier.to));
+ fqs.add(QueryModifier.parseFromToExpression(null, this.modifier.to, this.timezoneOffset));
}
if (this.modifier.from != null && this.modifier.from.length() > 0 && this.modifier.to != null && this.modifier.to.length() > 0) {
- fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, this.modifier.to));
+ fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, this.modifier.to, this.timezoneOffset));
}
}
diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java
index 3463a0552..533b37dc3 100644
--- a/source/net/yacy/server/http/HTTPDProxyHandler.java
+++ b/source/net/yacy/server/http/HTTPDProxyHandler.java
@@ -358,7 +358,8 @@ public final class HTTPDProxyHandler {
"",
cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
- 0);
+ 0,
+ sb.crawler.defaultProxyProfile.timezoneOffset());
final Response response = new Response(
request,
requestHeader,
@@ -473,8 +474,8 @@ public final class HTTPDProxyHandler {
"",
responseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
- 0);
-
+ 0,
+ sb.crawler.defaultProxyProfile.timezoneOffset());
// handle incoming cookies
handleIncomingCookies(responseHeader, host, ip);