added the protocol and the file name extension to the solr fields since

these fields are probably facets in file search
pull/1/head
Michael Peter Christen 13 years ago
parent e072632a54
commit 62add1d564

@ -298,11 +298,14 @@ italic_txt
#iframesscount_i
## the protocol of the url
#url_protocol_s
url_protocol_s
## all path elements in the url
url_paths_sxt
## the file name extension
url_file_ext_s
## number of key-value pairs in search part of the url
#url_parameter_i

@ -141,6 +141,10 @@ public class JsonResponseWriter implements QueryResponseWriter {
path.append('/').append(value.stringValue());
continue;
}
if (YaCySchema.url_file_ext_s.name().equals(fieldName)) {
solitaireTag(writer, "ext", value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));

@ -286,9 +286,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
// path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) {
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
}
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
if (allAttr || contains(YaCySchema.imagescount_i)) add(doc, YaCySchema.imagescount_i, md.limage());
if (allAttr || contains(YaCySchema.inboundlinkscount_i)) add(doc, YaCySchema.inboundlinkscount_i, md.llocal());
@ -416,9 +415,8 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
// path elements of link
if (allAttr || contains(YaCySchema.url_paths_sxt)) {
add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
}
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
Set<MultiProtocolURI> inboundLinks = yacydoc.inboundLinks();
@ -860,13 +858,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());
if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());
// path elements of link
add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths());
add(solrdoc, YaCySchema.failreason_t, failReason);
add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
if (contains(YaCySchema.url_paths_sxt)) add(solrdoc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (contains(YaCySchema.url_file_ext_s)) add(solrdoc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
// fail reason and status
if (contains(YaCySchema.failreason_t)) add(solrdoc, YaCySchema.failreason_t, failReason);
if (contains(YaCySchema.httpstatus_i)) add(solrdoc, YaCySchema.httpstatus_i, httpstatus);
return solrdoc;
}

@ -141,6 +141,7 @@ public enum YaCySchema implements Schema {
url_protocol_s(SolrType.string, true, true, false, "the protocol of the url"),
url_paths_sxt(SolrType.string, true, true, true, "all path elements in the url"),
url_file_ext_s(SolrType.string, true, true, false, "the file name extension"),
url_parameter_i(SolrType.integer, true, true, false, "number of key-value pairs in search part of the url"),
url_parameter_key_sxt(SolrType.string, true, true, true, "the keys from key-value pairs in the search part of the url"),
url_parameter_value_sxt(SolrType.string, true, true, true, "the values from key-value pairs in the search part of the url"),

Loading…
Cancel
Save