by instatiation from URIMetadataNode, by eleminating differentiation of ResultEntry/URIMetadataNode. - moved remaining ResultEntry functionallity to URIMetadataNode - for 1:1 functionallity added a function makeResultEntry() - removed ResultEntry - refactored related code Main difference is after makeResultEntry the text_t content is removed and alternative title/url strings for display are calculated. Main difference left is, thatpull/8/head
parent
29c4aa3991
commit
000dde9511
@ -1,117 +0,0 @@
|
|||||||
// BinSearch.java
|
|
||||||
// -----------------------
|
|
||||||
// part of The Kelondro Database
|
|
||||||
// (C) by Michael Peter Christen; mc@yacy.net
|
|
||||||
// first published on http://www.anomic.de
|
|
||||||
// Frankfurt, Germany, 2005
|
|
||||||
// created 22.11.2005
|
|
||||||
//
|
|
||||||
// $LastChangedDate$
|
|
||||||
// $LastChangedRevision$
|
|
||||||
// $LastChangedBy$
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
package net.yacy.kelondro.index;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import net.yacy.cora.order.ByteOrder;
|
|
||||||
import net.yacy.cora.order.NaturalOrder;
|
|
||||||
|
|
||||||
|
|
||||||
public final class BinSearch {
|
|
||||||
|
|
||||||
private final byte[] chunks;
|
|
||||||
private final int chunksize;
|
|
||||||
private final int count;
|
|
||||||
private static final ByteOrder objectOrder = new NaturalOrder(true); // the natural order is much faster than the b64Order
|
|
||||||
|
|
||||||
public BinSearch(final byte[] chunks, final int chunksize) {
|
|
||||||
this.chunks = chunks;
|
|
||||||
this.chunksize = chunksize;
|
|
||||||
this.count = chunks.length / chunksize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public BinSearch(final List<byte[]> chunkList, final int chunksize) {
|
|
||||||
byte[][] chunksa = new byte[chunkList.size()][];
|
|
||||||
chunksa = chunkList.toArray(chunksa);
|
|
||||||
Arrays.sort(chunksa, objectOrder);
|
|
||||||
this.chunks = new byte[chunkList.size() * chunksize];
|
|
||||||
for (int i = 0; i < chunksa.length; i++) System.arraycopy(chunksa[i], 0, this.chunks, i * chunksize, chunksize);
|
|
||||||
this.chunksize = chunksize;
|
|
||||||
this.count = chunks.length / chunksize;
|
|
||||||
assert this.count == chunkList.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public final boolean contains(final byte[] t) {
|
|
||||||
return contains(t, 0, this.count);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final boolean contains(final byte[] t, int beginPos, int endPos) {
|
|
||||||
// the endPos is exclusive, beginPos is inclusive
|
|
||||||
// this method is synchronized to make the use of the buffer possible
|
|
||||||
assert t.length == this.chunksize;
|
|
||||||
while (true) {
|
|
||||||
if (beginPos >= endPos) return false;
|
|
||||||
final int pivot = (beginPos + endPos) / 2;
|
|
||||||
if ((pivot < 0) || (pivot >= this.count)) return false;
|
|
||||||
assert this.chunksize == t.length;
|
|
||||||
final int c = objectOrder.compare(this.chunks, pivot * this.chunksize, t, 0, this.chunksize);
|
|
||||||
if (c == 0) return true;
|
|
||||||
if (c < 0) /* buffer < t */ {beginPos = pivot + 1; continue;}
|
|
||||||
if (c > 0) /* buffer > t */ {endPos = pivot; continue;}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public final int size() {
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final byte[] get(final int element) {
|
|
||||||
final byte[] a = new byte[chunksize];
|
|
||||||
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final byte[] get(final int element, byte[] a) {
|
|
||||||
assert a.length == chunksize;
|
|
||||||
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final void write(File f) throws IOException {
|
|
||||||
FileOutputStream os = new FileOutputStream(f);
|
|
||||||
os.write(this.chunks);
|
|
||||||
os.flush();
|
|
||||||
os.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(final String[] args) {
|
|
||||||
final String s = "4CEvsI8FRczRBo_ApRCkwfEbFLn1pIFXg39QGMgj5RHM6HpIMJq67QX3M5iQYr_LyI_5aGDaa_bYbRgJ9XnQjpmq6QkOoGWAoEaihRqhV3kItLFHjRtqauUR";
|
|
||||||
final BinSearch bs = new BinSearch(s.getBytes(), 6);
|
|
||||||
for (int i = 0; i + 6 <= s.length(); i = i + 6) {
|
|
||||||
System.out.println(s.substring(i, i + 6) + ":" + ((bs.contains(s.substring(i, i + 6).getBytes())) ? "drin" : "draussen"));
|
|
||||||
}
|
|
||||||
for (int i = 0; i + 7 <= s.length(); i = i + 6) {
|
|
||||||
System.out.println(s.substring(i + 1, i + 7) + ":" + ((bs.contains(s.substring(i + 1, i + 7).getBytes())) ? "drin" : "draussen"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,166 +0,0 @@
|
|||||||
// ResultEntry.java
|
|
||||||
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
|
||||||
// first published 10.10.2005 on http://yacy.net
|
|
||||||
//
|
|
||||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
|
||||||
//
|
|
||||||
// $LastChangedDate$
|
|
||||||
// $LastChangedRevision$
|
|
||||||
// $LastChangedBy$
|
|
||||||
//
|
|
||||||
// LICENSE
|
|
||||||
//
|
|
||||||
// This program is free software; you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation; either version 2 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU General Public License
|
|
||||||
// along with this program; if not, write to the Free Software
|
|
||||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
package net.yacy.search.snippet;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
|
||||||
import net.yacy.cora.order.Base64Order;
|
|
||||||
import net.yacy.cora.util.ByteArray;
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
|
||||||
import net.yacy.document.Condenser;
|
|
||||||
import net.yacy.document.parser.pdfParser;
|
|
||||||
import net.yacy.kelondro.data.meta.URIMetadataNode;
|
|
||||||
import net.yacy.kelondro.data.word.Word;
|
|
||||||
import net.yacy.peers.Seed;
|
|
||||||
import net.yacy.peers.SeedDB;
|
|
||||||
import net.yacy.search.index.Segment;
|
|
||||||
import net.yacy.search.schema.CollectionSchema;
|
|
||||||
|
|
||||||
|
|
||||||
public class ResultEntry extends URIMetadataNode implements Comparable<ResultEntry>, Comparator<ResultEntry> {
|
|
||||||
|
|
||||||
private static final long serialVersionUID = -256046934741561978L;
|
|
||||||
// payload objects
|
|
||||||
private String alternative_urlstring;
|
|
||||||
private String alternative_urlname;
|
|
||||||
private final TextSnippet textSnippet;
|
|
||||||
private final Segment indexSegment;
|
|
||||||
|
|
||||||
public ResultEntry(final URIMetadataNode urlentry,
|
|
||||||
final Segment indexSegment,
|
|
||||||
SeedDB peers,
|
|
||||||
final TextSnippet textSnippet) {
|
|
||||||
super(urlentry);
|
|
||||||
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
|
|
||||||
this.indexSegment = indexSegment;
|
|
||||||
this.alternative_urlstring = null;
|
|
||||||
this.alternative_urlname = null;
|
|
||||||
this.textSnippet = textSnippet;
|
|
||||||
final String host = urlentry.url().getHost();
|
|
||||||
if (host != null && host.endsWith(".yacyh")) {
|
|
||||||
// translate host into current IP
|
|
||||||
int p = host.indexOf('.');
|
|
||||||
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
|
|
||||||
final Seed seed = peers.getConnected(hash);
|
|
||||||
final String path = urlentry.url().getFile();
|
|
||||||
String address = null;
|
|
||||||
if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
|
|
||||||
// seed is not known from here
|
|
||||||
try {
|
|
||||||
if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
|
|
||||||
Word.words2hashesHandles(Condenser.getWords(
|
|
||||||
("yacyshare " +
|
|
||||||
path.replace('?', ' ') +
|
|
||||||
" " +
|
|
||||||
urlentry.dc_title()), null).keySet()),
|
|
||||||
urlentry.hash());
|
|
||||||
} catch (final IOException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
}
|
|
||||||
indexSegment.fulltext().remove(urlentry.hash()); // clean up
|
|
||||||
throw new RuntimeException("index void");
|
|
||||||
}
|
|
||||||
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
|
|
||||||
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
|
|
||||||
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
if (this.hashCache == Integer.MIN_VALUE) {
|
|
||||||
this.hashCache = ByteArray.hashCode(this.hash());
|
|
||||||
}
|
|
||||||
return this.hashCache;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public boolean equals(final Object obj) {
|
|
||||||
if (this == obj) return true;
|
|
||||||
if (obj == null) return false;
|
|
||||||
if (!(obj instanceof ResultEntry)) return false;
|
|
||||||
ResultEntry other = (ResultEntry) obj;
|
|
||||||
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
|
|
||||||
}
|
|
||||||
|
|
||||||
public String urlstring() {
|
|
||||||
if (this.alternative_urlstring != null) return this.alternative_urlstring;
|
|
||||||
|
|
||||||
if (!pdfParser.individualPages) return this.url().toNormalform(true);
|
|
||||||
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
|
|
||||||
// for pdf links we rewrite the url
|
|
||||||
// this is a special treatment of pdf files which can be splitted into subpages
|
|
||||||
String pageprop = pdfParser.individualPagePropertyname;
|
|
||||||
String resultUrlstring = this.url().toNormalform(true);
|
|
||||||
int p = resultUrlstring.lastIndexOf(pageprop + "=");
|
|
||||||
if (p > 0) {
|
|
||||||
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
|
|
||||||
}
|
|
||||||
return resultUrlstring;
|
|
||||||
}
|
|
||||||
public String urlname() {
|
|
||||||
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
|
|
||||||
}
|
|
||||||
public String title() {
|
|
||||||
String titlestr = this.dc_title();
|
|
||||||
// if title is empty use filename as title
|
|
||||||
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
|
|
||||||
titlestr = this.url() != null ? this.url().getFileName() : "";
|
|
||||||
}
|
|
||||||
return titlestr;
|
|
||||||
}
|
|
||||||
public TextSnippet textSnippet() {
|
|
||||||
return this.textSnippet;
|
|
||||||
}
|
|
||||||
public Date[] events() {
|
|
||||||
return this.datesInContent();
|
|
||||||
}
|
|
||||||
public int referencesCount() {
|
|
||||||
// urlCitationIndex index might be null (= configuration option)
|
|
||||||
return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0;
|
|
||||||
}
|
|
||||||
public boolean hasTextSnippet() {
|
|
||||||
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
|
|
||||||
}
|
|
||||||
public String resource() {
|
|
||||||
// generate transport resource
|
|
||||||
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
|
|
||||||
return this.toString();
|
|
||||||
}
|
|
||||||
return this.toString(this.textSnippet.getLineRaw());
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int compareTo(ResultEntry o) {
|
|
||||||
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int compare(ResultEntry o1, ResultEntry o2) {
|
|
||||||
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in new issue