getDescription() {
return getStringList(CollectionSchema.description_txt);
}
public static URIMetadataNode importEntry(final String propStr, String collection) {
if (propStr == null || propStr.isEmpty() || propStr.charAt(0) != '{' || !propStr.endsWith("}")) {
ConcurrentLog.severe("URIMetadataNode", "importEntry: propStr is not proper: " + propStr);
return null;
}
try {
return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection);
} catch (final kelondroException e) {
// wrong format
ConcurrentLog.severe("URIMetadataNode", e.getMessage());
return null;
}
}
protected StringBuilder corePropList() {
// generate a parseable string; this is a simple property-list
final StringBuilder s = new StringBuilder(300);
// create new formatters to make concurrency possible
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
s.append("hash=").append(ASCII.String(this.hash()));
s.append(",url=").append(crypt.simpleEncode(this.url().toNormalform(true)));
s.append(",descr=").append(crypt.simpleEncode(this.dc_title()));
s.append(",author=").append(crypt.simpleEncode(this.dc_creator()));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(this.dc_subject())));
s.append(",publisher=").append(crypt.simpleEncode(this.dc_publisher()));
s.append(",lat=").append(this.lat());
s.append(",lon=").append(this.lon());
s.append(",mod=").append(formatter.format(this.moddate()));
s.append(",load=").append(formatter.format(this.loaddate()));
s.append(",fresh=").append(formatter.format(this.freshdate()));
s.append(",referrer=").append(this.referrerHash() == null ? "" : ASCII.String(this.referrerHash()));
s.append(",md5=").append(this.md5());
s.append(",size=").append(this.filesize());
s.append(",wc=").append(this.wordCount());
s.append(",dt=").append(this.doctype());
s.append(",flags=").append(this.flags().exportB64());
s.append(",lang=").append(this.language());
s.append(",llocal=").append(this.llocal());
s.append(",lother=").append(this.lother());
s.append(",limage=").append(this.limage());
s.append(",laudio=").append(this.laudio());
s.append(",lvideo=").append(this.lvideo());
s.append(",lapp=").append(this.lapp());
s.append(",score=").append(Float.toString(this.score()));
if (this.word() != null) {
// append also word properties
final String wprop = this.word().toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
return s;
} catch (final Throwable e) {
ConcurrentLog.logException(e);
return null;
}
}
/**
* the toString format must be completely identical to URIMetadataRow because that is used
* to transport the data over p2p connections.
*/
public String toString(String snippet) {
// add information needed for remote transport
final StringBuilder core = corePropList();
if (core == null)
return null;
core.ensureCapacity(core.length() + snippet.length() * 2);
core.insert(0, '{');
core.append(",snippet=").append(crypt.simpleEncode(snippet));
core.append('}');
return core.toString();
//return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
}
/**
* @return the object as String.
* This e.g. looks like this:
* {hash=jmqfMk7Y3NKw,referrer=------------,mod=20050610,load=20051003,size=51666,wc=1392,cc=0,local=true,q=AEn,dt=h,lang=uk,url=b|aHR0cDovL3d3dy50cmFuc3BhcmVuY3kub3JnL3N1cnZleXMv,descr=b|S25vd2xlZGdlIENlbnRyZTogQ29ycnVwdGlvbiBTdXJ2ZXlzIGFuZCBJbmRpY2Vz}
*/
@Override
public String toString() {
final StringBuilder core = corePropList();
if (core == null) return null;
core.insert(0, '{');
core.append('}');
return core.toString();
}
private int getInt(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.num_integer;
Object x = this.getFieldValue(field.getSolrFieldName());
if (x == null) return 0;
if (x instanceof Integer) return ((Integer) x).intValue();
if (x instanceof Long) return ((Long) x).intValue();
return 0;
}
private Date getDate(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.date;
Date x = (Date) this.getFieldValue(field.getSolrFieldName());
if (x == null) return new Date(0);
Date now = new Date();
return x.after(now) ? now : x;
}
private Date[] getDates(CollectionSchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.date;
@SuppressWarnings("unchecked")
List x = (List) this.getFieldValue(field.getSolrFieldName());
if (x == null) return new Date[0];
return x.toArray(new Date[x.size()]);
}
private String getString(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
Object x = this.getFieldValue(field.getSolrFieldName());
if (x == null) return "";
if (x instanceof ArrayList) {
@SuppressWarnings("unchecked")
ArrayList xa = (ArrayList) x;
return xa.size() == 0 ? "" : xa.get(0);
}
return (String) x;
}
@SuppressWarnings("unchecked")
private ArrayList getStringList(CollectionSchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general;
Object r = this.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList(0);
if (r instanceof ArrayList) {
return (ArrayList) r;
}
ArrayList a = new ArrayList(1);
a.add((String) r);
return a;
}
@SuppressWarnings("unchecked")
private ArrayList getIntList(CollectionSchema field) {
assert field.isMultiValued();
assert field.getType() == SolrType.num_integer;
Object r = this.getFieldValue(field.getSolrFieldName());
if (r == null) return new ArrayList(0);
if (r instanceof ArrayList) {
return (ArrayList) r;
}
ArrayList a = new ArrayList(1);
a.add((Integer) r);
return a;
}
// --- implementation for use as search result ----------
/**
* Initialisize some variables only needed for search results
* and eleminates underlaying fields not needed for search results
*
* ! never put this back to the index because of the reduced content fields
* @param indexSegment
* @param peers
* @param textSnippet
* @return
*/
public URIMetadataNode makeResultEntry(
final Segment indexSegment,
SeedDB peers,
final TextSnippet textSnippet) {
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
//this.indexSegment = indexSegment;
this.alternative_urlstring = null;
this.alternative_urlname = null;
this.textSnippet = textSnippet;
final String host = this.url().getHost();
if (host != null && host.endsWith(".yacyh")) {
// translate host into current IP
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String path = this.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
// seed is not known from here
try {
if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
Word.words2hashesHandles(Tokenizer.getWords(
("yacyshare " +
path.replace('?', ' ') +
" " +
this.dc_title()), null).keySet()),
this.hash());
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
indexSegment.fulltext().remove(this.hash()); // clean up
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
this.alternative_urlname = "http://" + seed.getName() + ".yacy" + path;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
return this;
}
/**
* used for search result entry
*/
public String urlstring() {
if (this.alternative_urlstring != null) return this.alternative_urlstring;
if (!pdfParser.individualPages) return this.url().toNormalform(true);
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
// for pdf links we rewrite the url
// this is a special treatment of pdf files which can be splitted into subpages
String pageprop = pdfParser.individualPagePropertyname;
String resultUrlstring = this.url().toNormalform(true);
int p = resultUrlstring.lastIndexOf(pageprop + "=");
if (p > 0) {
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
}
return resultUrlstring;
}
/**
* used for search result entry
*/
public String urlname() {
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
}
/**
* used for search result entry
*/
public String title() {
String titlestr = this.dc_title();
// if title is empty use filename as title
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
titlestr = this.url() != null ? this.url().getFileName() : "";
}
return titlestr;
}
/**
* used for search result entry
*/
public TextSnippet textSnippet() {
return this.textSnippet;
}
/**
* used for search result entry
*/
public Date[] events() {
return this.datesInContent();
}
/**
* used for search result entry
*/
public boolean hasTextSnippet() {
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
}
/**
* used for search result entry
*/
public String resource() {
// generate transport resource
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
return this.toString();
}
return this.toString(this.textSnippet.getLineRaw());
}
@Override
public int hashCode() {
return this.url().hashCode();
}
}