- some corrections in usage of getFile() and getFileName()

- added more attributes in json response writer according to yacy
servlet
pull/1/head
Michael Peter Christen 13 years ago
parent 62add1d564
commit e54ac38095

@ -201,7 +201,7 @@ public class yacysearchitem {
prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_showSize_sizename", sizename(result.filesize()));
prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost());
prop.putHTML("content_file", resultURL.getFile());
prop.putHTML("content_file", resultURL.getFileName());
prop.putHTML("content_path", resultURL.getPath());
prop.put("content_nl", (item == theQuery.offset) ? 0 : 1);
prop.putHTML("content_publisher", result.publisher());

@ -69,13 +69,12 @@ public class ResultImages {
if (doubleCheck.contains(url)) continue;
doubleCheck.add(url);
final String name = image.url().getFile();
boolean good = false;
if (image.width() > 120 &&
image.height() > 100 &&
image.width() < 1200 &&
image.height() < 1000 &&
name.lastIndexOf(".gif") == -1) {
!"gif".equals(image.url().getFileExtension())) {
// && ((urlString.lastIndexOf(".jpg") != -1)) ||
// ((urlString.lastIndexOf(".png") != -1)){

@ -632,16 +632,29 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
}
}
/**
* get the hpath plus search field plus anchor.
* see http://www.ietf.org/rfc/rfc1738.txt for naming.
* if there is no search and no anchor the result is identical to getPath
* this is defined according to http://docs.oracle.com/javase/1.4.2/docs/api/java/net/URL.html#getFile()
* @return
*/
public String getFile() {
return getFile(false, false);
}
public String getFile(final boolean excludeReference, final boolean removeSessionID) {
// this is the path plus quest plus ref
// if there is no quest and no ref the result is identical to getPath
// this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile()
/**
* get the hpath plus search field plus anchor (if wanted)
* see http://www.ietf.org/rfc/rfc1738.txt for naming.
* if there is no search and no anchor the result is identical to getPath
* this is defined according to http://docs.oracle.com/javase/1.4.2/docs/api/java/net/URL.html#getFile()
* @param excludeAnchor
* @param removeSessionID
* @return
*/
public String getFile(final boolean excludeAnchor, final boolean removeSessionID) {
if (this.searchpart == null) {
if (excludeReference || this.anchor == null) return this.path;
if (excludeAnchor || this.anchor == null) return this.path;
final StringBuilder sb = new StringBuilder(120);
sb.append(this.path);
sb.append('#');
@ -654,7 +667,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) {
final int p = q.indexOf('&');
if (p < 0) {
if (excludeReference || this.anchor == null) return this.path;
if (excludeAnchor || this.anchor == null) return this.path;
final StringBuilder sb = new StringBuilder(120);
sb.append(this.path);
sb.append('#');
@ -678,7 +691,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
sb.append(this.path);
sb.append('?');
sb.append(q);
if (excludeReference || this.anchor == null) return sb.toString();
if (excludeAnchor || this.anchor == null) return sb.toString();
sb.append('#');
sb.append(this.anchor);
return sb.toString();

@ -22,11 +22,14 @@ package net.yacy.cora.services.federated.solr;
import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.services.federated.solr.OpensearchResponseWriter.ResHead;
import net.yacy.search.index.YaCySchema;
@ -50,6 +53,14 @@ import de.anomic.server.serverObjects;
*/
public class JsonResponseWriter implements QueryResponseWriter {
// define a list of simple YaCySchema -> json Token matchings
private static final Map<String, String> field2tag = new HashMap<String, String>();
static {
field2tag.put(YaCySchema.url_protocol_s.name(), "protocol");
field2tag.put(YaCySchema.host_s.name(), "host");
field2tag.put(YaCySchema.url_file_ext_s.name(), "ext");
}
private String title;
public JsonResponseWriter() {
@ -109,18 +120,33 @@ public class JsonResponseWriter implements QueryResponseWriter {
List<Fieldable> fields = doc.getFields();
int fieldc = fields.size();
List<String> texts = new ArrayList<String>();
MultiProtocolURI url = null;
String description = "", title = "";
StringBuilder path = new StringBuilder(80);
for (int j = 0; j < fieldc; j++) {
Fieldable value = fields.get(j);
String fieldName = value.name();
if (YaCySchema.title.name().equals(fieldName)) {
title = value.stringValue();
texts.add(title);
// apply generic matching rule
String stag = field2tag.get(fieldName);
if (stag != null) {
solitaireTag(writer, stag, value.stringValue());
continue;
}
// some special handling here
if (YaCySchema.sku.name().equals(fieldName)) {
solitaireTag(writer, "link", value.stringValue());
String u = value.stringValue();
try {
url = new MultiProtocolURI(u);
solitaireTag(writer, "link", u);
solitaireTag(writer, "file", url.getFileName());
} catch (MalformedURLException e) {}
continue;
}
if (YaCySchema.title.name().equals(fieldName)) {
title = value.stringValue();
texts.add(title);
continue;
}
if (YaCySchema.description.name().equals(fieldName)) {
@ -133,18 +159,10 @@ public class JsonResponseWriter implements QueryResponseWriter {
solitaireTag(writer, "guid", urlhash);
continue;
}
if (YaCySchema.host_s.name().equals(fieldName)) {
solitaireTag(writer, "host", value.stringValue());
continue;
}
if (YaCySchema.url_paths_sxt.name().equals(fieldName)) {
path.append('/').append(value.stringValue());
continue;
}
if (YaCySchema.url_file_ext_s.name().equals(fieldName)) {
solitaireTag(writer, "ext", value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));
@ -169,9 +187,11 @@ public class JsonResponseWriter implements QueryResponseWriter {
texts.add(value.stringValue());
continue;
}
}
// compute snippet from texts
//missing: "code","faviconCode"
}
// compute snippet from texts
solitaireTag(writer, "path", path.toString());
solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title);
List<String> snippet = urlhash == null ? null : snippets.get(urlhash);

@ -447,7 +447,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final String href = tagopts.getProperty("href", EMPTY_STRING);
MultiProtocolURI url;
if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
final String f = url.getFile();
final String f = url.getFileName();
final int p = f.lastIndexOf('.');
final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1);
if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) {

@ -82,7 +82,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String filename = urlentry.url().getFile();
final String path = urlentry.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
// seed is not known from here
@ -90,7 +90,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
indexSegment.termIndex().remove(
Word.words2hashesHandles(Condenser.getWords(
("yacyshare " +
filename.replace('?', ' ') +
path.replace('?', ' ') +
" " +
urlentry.dc_title()), null).keySet()),
urlentry.hash());
@ -100,8 +100,8 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
indexSegment.fulltext().remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
}

Loading…
Cancel
Save