added the protocol and the file name extension to the solr fields since

these fields are probably facets in file search
pull/1/head
Michael Peter Christen 13 years ago
parent e072632a54
commit 62add1d564

@ -298,11 +298,14 @@ italic_txt
#iframesscount_i #iframesscount_i
## the protocol of the url ## the protocol of the url
#url_protocol_s url_protocol_s
## all path elements in the url ## all path elements in the url
url_paths_sxt url_paths_sxt
## the file name extension
url_file_ext_s
## number of key-value pairs in search part of the url ## number of key-value pairs in search part of the url
#url_parameter_i #url_parameter_i

@ -141,6 +141,10 @@ public class JsonResponseWriter implements QueryResponseWriter {
path.append('/').append(value.stringValue()); path.append('/').append(value.stringValue());
continue; continue;
} }
if (YaCySchema.url_file_ext_s.name().equals(fieldName)) {
solitaireTag(writer, "ext", value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) { if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue())); Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d)); solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));

@ -286,9 +286,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
} }
// path elements of link // path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) { if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
}
if (allAttr || contains(YaCySchema.imagescount_i)) add(doc, YaCySchema.imagescount_i, md.limage()); if (allAttr || contains(YaCySchema.imagescount_i)) add(doc, YaCySchema.imagescount_i, md.limage());
if (allAttr || contains(YaCySchema.inboundlinkscount_i)) add(doc, YaCySchema.inboundlinkscount_i, md.llocal()); if (allAttr || contains(YaCySchema.inboundlinkscount_i)) add(doc, YaCySchema.inboundlinkscount_i, md.llocal());
@ -416,9 +415,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
} }
// path elements of link // path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) { if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths()); if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
}
// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme // get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
Set<MultiProtocolURI> inboundLinks = yacydoc.inboundLinks(); Set<MultiProtocolURI> inboundLinks = yacydoc.inboundLinks();
@ -860,13 +858,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash())); add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false)); add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress(); final InetAddress address = digestURI.getInetAddress();
if (address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress()); if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost()); if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());
// path elements of link // path elements of link
add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths()); if (contains(YaCySchema.url_paths_sxt)) add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths());
add(solrdoc, YaCySchema.failreason_t, failReason); if (contains(YaCySchema.url_file_ext_s)) add(solrdoc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
// fail reason and status
if (contains(YaCySchema.failreason_t)) add(solrdoc, YaCySchema.failreason_t, failReason);
if (contains(YaCySchema.httpstatus_i)) add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
return solrdoc; return solrdoc;
} }

@ -141,6 +141,7 @@ public enum YaCySchema implements Schema {
url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"), url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"),
url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"), url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"),
url_file_ext_s(SolrType.string, true, true, false, "the file name extension"),
url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"), url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"),
url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"), url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"),
url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"), url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"),

Loading…
Cancel
Save