- added geo information parsing to html parser

- extended metadata information in index with geolocalisation
- added display of location in yacydoc and ViewFile

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7629 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent efcf37a953
commit 9b25d07295

@ -4,9 +4,9 @@ port=$(grep ^port= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
pw=$(grep ^adminAccountBase64MD5= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
if which curl &>/dev/null; then
curl -s --header "Authorization: realm=$pw" "http://localhost:$port/$1" > /dev/null
curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" > /dev/null
elif which wget &>/dev/null; then
wget -q -t 1 --timeout=5 --header "Authorization: realm=$pw" "http://localhost:$port/$1" -O /dev/null
wget -q -t 1 --timeout=5 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1" -O /dev/null
else
exit 1
fi

@ -19,7 +19,7 @@
<form action="IndexCreateWWWLocalQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
Delete Entries:
<input type="text" name="pattern" value=".*" size="20" maxlength="200" />
<input type="text" name="pattern" value=".*" size="40" maxlength="200" />
<select name="option" size="1">
<option value="5">Initiator</option>
<option value="3">Profile</option>

@ -4,6 +4,7 @@
<head>
<title>YaCy '#[clientname]#': View URL Content</title>
#%env/templates/metas.template%#
<script type="text/javascript" src="/js/highslide/highslide.js"></script>
</head>
<body>
#(display)#
@ -91,6 +92,7 @@
<dt>dc:format</dt><dd>#[format]#</dd>
<dt>dc:identifier</dt><dd>#[identifier]#</dd>
<dt>dc:source</dt><dd>#[source]#</dd>
<dt>geo:lat &amp; geo:long</dt><dd><a href="osm.png?lon=#[lon]#&lat=#[lat]#&zoom=14" onclick="return hs.expand(this)">lat=#[lat]#, lon=#[lon]#</a></dd>
</dl>
<p class="tt">#[parsedText]#</p>
</fieldset>

@ -250,6 +250,8 @@ public class ViewFile {
prop.put("viewMode_format", document.dc_format());
prop.put("viewMode_identifier", document.dc_identifier());
prop.put("viewMode_source", url.toString());
prop.put("viewMode_lat", document.lat());
prop.put("viewMode_lon", document.lon());
prop.put("viewMode_parsedText", markup(wordArray, content).replaceAll("\n", "<br />").replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;"));
} else if (viewMode.equals("sentences")) {

@ -39,7 +39,8 @@ you can validate it with http://validator.w3.org/
<dt>Referrer URL</dt><dd property="yacy:referrer.url">#[yacy_referrer_url]#</dd>
<dt>Document size</dt><dd property="yacy:size">#[yacy_size]#</dd>
<dt>Number of Words</dt><dd property="yacy:words">#[yacy_words]#</dd>
<dt>Location</dt><dd><a href="/osm.png?lon=#[geo_long]#&lat=#[geo_lat]#&zoom=14" onclick="return hs.expand(this)">lat=#[geo_lat]#, lon=#[geo_long]#</a></dd>
</dl>
</fieldset>
</form>

@ -27,6 +27,7 @@
import java.net.MalformedURLException;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
@ -99,10 +100,12 @@ public class yacydoc {
prop.putXML("dc_subject", metadata.dc_subject());
prop.putXML("dc_publisher", metadata.dc_publisher());
prop.putXML("dc_contributor", "");
prop.putXML("dc_date", entry.moddate().toString());
prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate()));
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", metadata.url().toNormalform(false, true));
prop.putXML("dc_language", UTF8.String(entry.language()));
prop.put("geo_lat", metadata.lat());
prop.put("geo_long", metadata.lon());
prop.put("yacy_urlhash", metadata.url().hash());
prop.putXML("yacy_loaddate", entry.loaddate().toString());

@ -6,7 +6,8 @@ this is a xml file with embedded dublin core properties
you can validate it with http://www.stg.brown.edu/service/xmlvalid/
-->
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:yacy="http://yacy.net/">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:yacy="http://yacy.net/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#">
<record>
<dc:title LANG="#[dc_language]#">#[dc_title]#</dc:title>
<dc:creator>#[dc_creator]#</dc:creator>
<dc:contributor>#[dc_contributor]#</dc:contributor>
@ -19,8 +20,14 @@ you can validate it with http://www.stg.brown.edu/service/xmlvalid/
<dc:identifier SCHEME="URL">#[dc_identifier]#</dc:identifier>
<dc:format.extent>#[yacy_size]#</dc:format.extent>
<dc:language SCHEME="ISO639-2">#[dc_language]#</dc:language>
<geo:Point>
<geo:long>#[geo_long]#</geo:long>
<geo:lat>#[geo_lat]#</geo:lat>
</geo:Point>
<yacy:loaddate>#[yacy_loaddate]#</yacy:loaddate>
<yacy:referrer.hash>yacy:urlhash:#[yacy_referrer_hash]#</yacy:referrer.hash>
<yacy:referrer.url>#[yacy_referrer_url]#</yacy:referrer.url>
<yacy:words>#[yacy_words]#</yacy:words>
</record>
</metadata>

@ -60,6 +60,7 @@
map.addLayer(searchLayer_co);
searchLayer_md = new OpenLayers.Layer.GeoRSS('GeoRSS', path_mdsearch + query, {'icon':marker_md});
map.addLayer(searchLayer_md);
document.getElementById('apilink').setAttribute('href', 'yacysearch_location.rss?query=' + query);
}
</script>
@ -72,6 +73,17 @@
::
#%env/templates/embeddedheader.template%#
#(/display)#
<div id="api">
<a href="yacysearch_location.rss" id="apilink"><img src="/env/grafics/api.png" width="60" height="40" alt="API"/></a>
<script type="text/javascript">
//<![CDATA[
document.getElementById('apilink').setAttribute('href', 'yacysearch_location.rss?' + window.location.search.substring(1));
//]]>
</script>
<span>The information that is presented on this page can also be retrieved as XML
Click the API icon to see the XML.
To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de/wiki/index.php/Dev:API">API wiki page</a>.</span>
</div>
<form class="search small" onsubmit="return false;" class="search small" accept-charset="UTF-8">
<h2>#[promoteSearchPageGreeting]#</h2>
<div class="yacylogo"><a href="#[promoteSearchPageGreeting.homepage]#" class="yacylogo"><img src="#[promoteSearchPageGreeting.smallImage]#" alt="yacysearch" /></a></div>

@ -91,7 +91,7 @@ public class yacysearch_location {
if (search_title || search_publisher || search_creator || search_subject) try {
// get a queue of search results
String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8090") + "/yacysearch.rss";
String rssSearchServiceURL = "http://127.0.0.1:" + sb.getConfig("port", "8090") + "/yacysearch.rss";
BlockingQueue<RSSMessage> results = new LinkedBlockingQueue<RSSMessage>();
SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false, null);

@ -221,7 +221,7 @@ public final class ResultURLs {
public static void main(final String[] args) {
try {
final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/");
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0);
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0f, 0.0f, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0);
EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING;
System.out.println("valid test:\n=======");
// add

@ -330,6 +330,8 @@ public class Segment {
document.dc_creator(), // author
document.dc_subject(' '), // tags
document.dc_publisher(), // publisher (may be important to get location data)
document.lon(), // decimal degrees as in WGS84;
document.lat(), // if unknown both values may be 0.0f;
modDate, // modification date
loadDate, // loaded date
new Date(loadDate.getTime() + Math.max(0, loadDate.getTime() - modDate.getTime()) / 2), // freshdate, computed with Proxy-TTL formula

@ -82,4 +82,7 @@ public interface Hit {
public long getSize();
public float getLon();
public float getLat();
}

@ -54,7 +54,10 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
guid("guid"),
ttl("ttl"),
docs("docs"),
size("size,length");
size("size,length"),
lon("geo:long,geo:lon"),
lat("geo:lat");
//point("gml:pos,georss:point,coordinates");
private Set<String> keys;
@ -210,6 +213,14 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
for (String s: map.values()) sb.append(s).append(' ');
return sb.toString();
}
public float getLon() {
return Float.parseFloat(Token.lon.valueFrom(this.map, "0.0"));
}
public float getLat() {
return Float.parseFloat(Token.lat.valueFrom(this.map, "0.0"));
}
@Override
public String toString() {

@ -39,7 +39,7 @@ public class SearchHub {
private static final String[] SRURSSServicesList = {
//"http://192.168.1.51:8000/yacysearch.rss"//,
"http://localhost:8008/yacysearch.rss"//,
"http://127.0.0.1:8008/yacysearch.rss"//,
/*
"http://yacy.dyndns.org:8000/yacysearch.rss",
"http://yacy.caloulinux.net:8085/yacysearch.rss",

@ -189,6 +189,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
}
// send request
byte[] result = new byte[0];
try {
final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
parts.put("query", UTF8.StringBody(query));
@ -197,8 +198,8 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
parts.put("verify", UTF8.StringBody(verify ? "true" : "false"));
parts.put("resource", UTF8.StringBody(global ? "global" : "local"));
parts.put("nav", UTF8.StringBody("none"));
final byte[] result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
String debug = UTF8.String(result); System.out.println("*** DEBUG: " + debug);
result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
@ -210,6 +211,7 @@ public class SearchSRURSS extends Thread implements SearchAccumulator {
}
return feed;
} catch (final IOException e) {
String debug = UTF8.String(result); System.out.println("*** DEBUG: " + debug);
throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
}
}

@ -85,10 +85,12 @@ public class Document {
private int inboundLinks, outboundLinks; // counters for inbound and outbound links, are counted after calling notifyWebStructure
private Set<String> languages;
private boolean indexingDenied;
private float lon, lat;
public Document(final MultiProtocolURI location, final String mimeType, final String charset, final Set<String> languages,
final String[] keywords, final String title, final String author, final String publisher,
final String[] sections, final String abstrct,
final float lon, final float lat,
final Object text,
final Map<MultiProtocolURI, String> anchors,
final Map<MultiProtocolURI, String> rss,
@ -102,6 +104,8 @@ public class Document {
this.creator = (author == null) ? new StringBuilder(0) : new StringBuilder(author);
this.sections = (sections == null) ? new LinkedList<String>() : Arrays.asList(sections);
this.description = (abstrct == null) ? new StringBuilder(0) : new StringBuilder(abstrct);
this.lon = lon;
this.lat = lat;
this.anchors = (anchors == null) ? new HashMap<MultiProtocolURI, String>(0) : anchors;
this.rss = (rss == null) ? new HashMap<MultiProtocolURI, String>(0) : rss;
this.images = (images == null) ? new HashMap<MultiProtocolURI, ImageEntry>() : images;
@ -359,6 +363,14 @@ dc_rights
return emaillinks;
}
public float lon() {
return this.lon;
}
public float lat() {
return this.lat;
}
private synchronized void resortLinks() {
if (this.resorted) return;
@ -655,6 +667,7 @@ dc_rights
final Map<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
final Map<MultiProtocolURI, String> rss = new HashMap<MultiProtocolURI, String>();
final Map<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
float lon = 0.0f, lat = 0.0f;
for (Document doc: docs) {
@ -695,6 +708,7 @@ dc_rights
anchors.putAll(doc.getAnchors());
rss.putAll(doc.getRSS());
ContentScraper.addAllImages(images, doc.getImages());
if (doc.lon() != 0.0f && doc.lat() != 0.0f) { lon = doc.lon(); lat = doc.lat(); }
}
return new Document(
location,
@ -707,6 +721,7 @@ dc_rights
publishers.toString(),
sectionTitles.toArray(new String[sectionTitles.size()]),
description.toString(),
lon, lat,
content.getBytes(),
anchors,
rss,

@ -63,7 +63,9 @@ public class DCEntry extends TreeMap<String, String> {
Date date,
String title,
String author,
String body
String body,
float lat,
float lon
) {
super((Collator) insensitiveCollator.clone());
this.put("dc:identifier", url.toNormalform(true, false));
@ -71,6 +73,8 @@ public class DCEntry extends TreeMap<String, String> {
this.put("dc:title", title);
this.put("dc:creator", author);
this.put("dc:description", body);
this.put("geo:lat", Float.toString(lat));
this.put("geo:long", Float.toString(lon));
}
/*
@ -231,6 +235,22 @@ public class DCEntry extends TreeMap<String, String> {
return t.split(";");
}
public float getLon() {
String t = this.get("geo:long");
if (t == null) this.get("geo:lon");
t = stripCDATA(t);
if (t == null) return 0.0f;
return Float.parseFloat(t);
}
public float getLat() {
String t = this.get("geo:lat");
if (t == null) this.get("geo:lat");
t = stripCDATA(t);
if (t == null) return 0.0f;
return Float.parseFloat(t);
}
private String stripCDATA(String s) {
if (s == null) return null;
s = s.trim();
@ -254,6 +274,7 @@ public class DCEntry extends TreeMap<String, String> {
getPublisher(),
null,
"",
getLon(), getLat(),
UTF8.getBytes(getDescription()),
null,
null,

@ -109,7 +109,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
} else if ("value".equals(tag)) {
this.buffer.setLength(0);
this.parsingValue = true;
} else if (tag.startsWith("dc:")) {
} else if (tag.startsWith("dc:") || tag.startsWith("geo:")) {
// parse dublin core attribute
this.elementName = tag;
this.parsingValue = true;
@ -142,7 +142,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
}
this.buffer.setLength(0);
this.parsingValue = false;
} else if (tag.startsWith("dc:")) {
} else if (tag.startsWith("dc:") || tag.startsWith("geo:")) {
final String value = buffer.toString().trim();
if (this.elementName != null && tag.equals(this.elementName)) {
value.replaceAll(";", ",");

@ -205,7 +205,7 @@ public class PhpBB3Dao implements Dao {
String text = xmlCleaner(rs.getString("post_text"));
String user = getUser(rs.getInt("poster_id"));
Date date = new Date(rs.getLong("post_time") * 1000L);
return new DCEntry(url, date, subject, user, text);
return new DCEntry(url, date, subject, user, text, 0.0f, 0.0f);
}
public static String xmlCleaner(String s) {

@ -71,6 +71,7 @@ public class csvParser extends AbstractParser implements Parser {
"",
null,
null,
0.0f, 0.0f,
sb.toString().getBytes(charset),
null,
null,

@ -95,6 +95,7 @@ public class docParser extends AbstractParser implements Parser {
extractor.getDocSummaryInformation().getCompany(), // publisher
null,
null,
0.0f, 0.0f,
UTF8.getBytes(contents.toString()),
null,
null,

@ -57,6 +57,7 @@ public class genericParser extends AbstractParser implements Parser {
location.getHost(),
null,
null,
0.0f, 0.0f,
location.toTokens(),
null,
null,

@ -88,6 +88,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
private List<String>[] headlines;
private CharBuffer content;
private final EventListenerList htmlFilterEventListeners;
private float lon, lat;
/**
* {@link MultiProtocolURI} to the favicon that belongs to the document
@ -114,6 +115,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
for (int i = 0; i < 4; i++) headlines[i] = new ArrayList<String>();
this.content = new CharBuffer(1024);
this.htmlFilterEventListeners = new EventListenerList();
this.lon = 0.0f;
this.lat = 0.0f;
}
public void scrapeText(final char[] newtext, final String insideTag) {
@ -485,6 +488,42 @@ public class ContentScraper extends AbstractScraper implements Scraper {
return "";
}
// parse location
// <meta NAME="ICBM" CONTENT="38.90551492, 1.454004505" />
// <meta NAME="geo.position" CONTENT="38.90551492;1.454004505" />
public float getLon() {
if (this.lon != 0.0f) return this.lon;
String s = metas.get("ICBM"); // InterContinental Ballistic Missile (abbrev. supposed to be a joke: http://www.jargon.net/jargonfile/i/ICBMaddress.html), see http://geourl.org/add.html#icbm
if (s != null) {
int p = s.indexOf(';');
if (p < 0) p = s.indexOf(',');
if (p < 0) p = s.indexOf(' ');
if (p > 0) {
this.lat = Float.parseFloat(s.substring(0, p).trim());
this.lon = Float.parseFloat(s.substring(p + 1).trim());
}
}
if (this.lon != 0.0f) return this.lon;
s = metas.get("geo.position"); // http://geotags.com/geobot/add-tags.html
if (s != null) {
int p = s.indexOf(';');
if (p < 0) p = s.indexOf(',');
if (p < 0) p = s.indexOf(' ');
if (p > 0) {
this.lat = Float.parseFloat(s.substring(0, p).trim());
this.lon = Float.parseFloat(s.substring(p + 1).trim());
}
}
return this.lon;
}
public float getLat() {
if (this.lat != 0.0f) return this.lat;
getLon(); // parse with getLon() method which creates also the lat value
return this.lat;
}
/*
* (non-Javadoc)
* @see de.anomic.htmlFilter.htmlFilterScraper#close()

@ -180,6 +180,7 @@ public class htmlParser extends AbstractParser implements Parser {
scraper.getPublisher(),
sections,
scraper.getDescription(),
scraper.getLon(), scraper.getLat(),
scraper.getText(),
scraper.getAnchors(),
scraper.getRSS(),

@ -199,6 +199,7 @@ public class genericImageParser extends AbstractParser implements Parser {
location.getHost(), // Publisher
new String[]{}, // sections
description == null ? "" : description, // description
0.0f, 0.0f, // TODO parse location
UTF8.getBytes(infoString), // content text
anchors, // anchors
null,

@ -95,6 +95,7 @@ public class mmParser extends AbstractParser implements Parser {
null,
null,
null,
0.0f, 0.0f,
content,
null,
null,

@ -169,6 +169,7 @@ public class odtParser extends AbstractParser implements Parser {
"",
null,
docDescription,
0.0f, 0.0f,
contentBytes,
null,
null,

@ -154,6 +154,7 @@ public class ooxmlParser extends AbstractParser implements Parser {
"",
null,
docDescription,
0.0f, 0.0f,
contentBytes,
null,
null,

@ -172,6 +172,7 @@ public class pdfParser extends AbstractParser implements Parser {
docPublisher,
null,
null,
0.0f, 0.0f,
contentBytes,
null,
null,

@ -93,6 +93,7 @@ public class pptParser extends AbstractParser implements Parser {
pptExtractor.getDocSummaryInformation().getCompany(),
null,
null,
0.0f, 0.0f,
UTF8.getBytes(contents),
null,
null,

@ -109,6 +109,7 @@ public class psParser extends AbstractParser implements Parser {
"", // publisher
null, // sections
null, // abstract
0.0f, 0.0f,
outputFile, // fulltext
null, // anchors
null, // rss

@ -91,6 +91,8 @@ public class rssParser extends AbstractParser implements Parser {
item.getCopyright(),
new String[0],
item.getDescription(),
item.getLon(),
item.getLat(),
null,
anchors,
null,

@ -78,6 +78,7 @@ public class rtfParser extends AbstractParser implements Parser {
"", // TODO: publisher
null,
null,
0.0f, 0.0f,
UTF8.getBytes(bodyText),
null,
null,

@ -56,7 +56,23 @@ public class sevenzipParser extends AbstractParser implements Parser {
}
public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final IInStream source) throws Parser.Failure, InterruptedException {
final Document doc = new Document(location, mimeType, charset, null, null, null, null, null, null, null, (Object)null, null, null, null, false);
final Document doc = new Document(
location,
mimeType,
charset,
null,
null,
null,
null,
null,
null,
null,
0.0f, 0.0f,
(Object)null,
null,
null,
null,
false);
Handler archive;
super.log.logFine("opening 7zip archive...");
try {

@ -78,7 +78,8 @@ public class sidAudioParser extends AbstractParser implements Parser {
throw new Parser.Failure("Unable to parse SID file, unexpected version: " + version, location);
}
return new Document[]{new Document(location,
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
null,
@ -88,6 +89,7 @@ public class sidAudioParser extends AbstractParser implements Parser {
header.get("publisher"),
null,
null,
0.0f, 0.0f,
null,
null,
null,

@ -94,6 +94,7 @@ public class sitemapParser extends AbstractParser implements Parser {
"",
new String[0],
"",
0.0f, 0.0f,
null,
null,
null,

@ -117,6 +117,7 @@ public class swfParser extends AbstractParser implements Parser {
"",
sections, // an array of section headlines
abstrct, // an abstract
0.0f, 0.0f,
UTF8.getBytes(contents), // the parsed document text
anchors, // a map of extracted anchors
null,

@ -105,6 +105,7 @@ public class torrentParser extends AbstractParser implements Parser {
location.getHost(),
null,
null,
0.0f, 0.0f,
filenames.toString().getBytes(charset),
null,
null,

@ -211,6 +211,7 @@ public class vcfParser extends AbstractParser implements Parser {
"", // the publisher
sections, // an array of section headlines
"vCard", // an abstract
0.0f, 0.0f,
text, // the parsed document text
anchors, // a map of extracted anchors
null,

@ -113,6 +113,7 @@ public class vsdParser extends AbstractParser implements Parser {
"",
null, // an array of section headlines
abstrct, // an abstract
0.0f, 0.0f,
UTF8.getBytes(contents), // the parsed document text
null, // a map of extracted anchors
null,

@ -123,6 +123,7 @@ public class xlsParser extends AbstractParser implements Parser {
"", // TODO: publisher
null,
null,
0.0f, 0.0f,
UTF8.getBytes(contents),
null,
null,

@ -121,6 +121,7 @@ public class URIMetadataRow implements URIMetadata {
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lon, final float lat, // decimal degrees as in WGS84; if unknown both values may be 0.0f;
final Date mod,
final Date load,
final Date fresh,
@ -140,7 +141,7 @@ public class URIMetadataRow implements URIMetadata {
// create new entry
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash());
this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator, dc_subject, dc_publisher));
this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator, dc_subject, dc_publisher, lat, lon));
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
@ -179,13 +180,21 @@ public class URIMetadataRow implements URIMetadata {
*/
}
public static byte[] encodeComp(final DigestURI url, final String dc_title, final String dc_creator, final String dc_subject, final String dc_publisher) {
public static byte[] encodeComp(
final DigestURI url,
final String dc_title,
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lat,
final float lon) {
final CharBuffer s = new CharBuffer(360);
s.append(url.toNormalform(false, true)).append(10);
s.append(dc_title).append(10);
s.append(dc_creator).append(10);
s.append(dc_subject).append(10);
s.append(dc_publisher).append(10);
if (lon == 0.0f && lat == 0.0f) s.append(10); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).append(10);
return UTF8.getBytes(s.toString());
}
@ -211,10 +220,12 @@ public class URIMetadataRow implements URIMetadata {
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash()); // FIXME potential null pointer access
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher));
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
// create new formatters to make concurrency possible
GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
@ -294,6 +305,10 @@ public class URIMetadataRow implements URIMetadata {
assert (s.toString().indexOf(0) < 0);
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
assert (s.toString().indexOf(0) < 0);
s.append(",lat=").append(metadata.lat());
assert (s.toString().indexOf(0) < 0);
s.append(",lon=").append(metadata.lon());
assert (s.toString().indexOf(0) < 0);
s.append(",mod=").append(formatter.format(moddate()));
assert (s.toString().indexOf(0) < 0);
s.append(",load=").append(formatter.format(loaddate()));
@ -371,7 +386,8 @@ public class URIMetadataRow implements URIMetadata {
(cl.size() > 1) ? UTF8.String(cl.get(1)) : "",
(cl.size() > 2) ? UTF8.String(cl.get(2)) : "",
(cl.size() > 3) ? UTF8.String(cl.get(3)) : "",
(cl.size() > 4) ? UTF8.String(cl.get(4)) : "");
(cl.size() > 4) ? UTF8.String(cl.get(4)) : "",
(cl.size() > 5) ? UTF8.String(cl.get(5)) : "");
return this.comp;
}
@ -524,8 +540,16 @@ public class URIMetadataRow implements URIMetadata {
private String urlRaw;
private byte[] urlHash;
private final String dc_title, dc_creator, dc_subject, dc_publisher;
private final String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
public Components(final String urlRaw, final byte[] urlhash, final String title, final String author, final String tags, final String publisher) {
public Components(
final String urlRaw,
final byte[] urlhash,
final String title,
final String author,
final String tags,
final String publisher,
final String latlon) {
this.url = null;
this.urlRaw = urlRaw;
this.urlHash = urlhash;
@ -533,6 +557,7 @@ public class URIMetadataRow implements URIMetadata {
this.dc_creator = author;
this.dc_subject = tags;
this.dc_publisher = publisher;
this.latlon = latlon;
}
public boolean matches(Pattern matcher) {
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
@ -555,6 +580,15 @@ public class URIMetadataRow implements URIMetadata {
public String dc_creator() { return this.dc_creator; }
public String dc_publisher() { return this.dc_publisher; }
public String dc_subject() { return this.dc_subject; }
public float lat() {
if (latlon == null || latlon.length() == 0) return 0.0f;
int p = latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(latlon.substring(0, p));
}
public float lon() {
if (latlon == null || latlon.length() == 0) return 0.0f;
int p = latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(latlon.substring(p + 1));
}
}
}

@ -173,6 +173,11 @@ public final class CharBuffer extends Writer {
return this;
}
public CharBuffer append(final char c) {
write(c);
return this;
}
public CharBuffer append(final String s) {
final char[] temp = new char[s.length()];
s.getChars(0, temp.length, temp, 0);

@ -310,11 +310,13 @@ public final class yacy {
// open the browser window
final boolean browserPopUpTrigger = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_TRIGGER, "true").equals("true");
if (browserPopUpTrigger) {
if (browserPopUpTrigger) try {
final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html");
//boolean properPW = (sb.getConfig("adminAccount", "").length() == 0) && (sb.getConfig(httpd.ADMIN_ACCOUNT_B64MD5, "").length() > 0);
//if (!properPW) browserPopUpPage = "ConfigBasic.html";
Browser.openBrowser((server.withSSL()?"https":"http") + "://localhost:" + serverCore.getPortNr(port) + "/" + browserPopUpPage);
} catch (RuntimeException e) {
Log.logException(e);
}
// unlock yacyTray browser popup

Loading…
Cancel
Save