by instatiation from URIMetadataNode, by eleminating differentiation of ResultEntry/URIMetadataNode. - moved remaining ResultEntry functionallity to URIMetadataNode - for 1:1 functionallity added a function makeResultEntry() - removed ResultEntry - refactored related code Main difference is after makeResultEntry the text_t content is removed and alternative title/url strings for display are calculated. Main difference left is, thatpull/8/head
parent
29c4aa3991
commit
000dde9511
@ -1,117 +0,0 @@
|
||||
// BinSearch.java
|
||||
// -----------------------
|
||||
// part of The Kelondro Database
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2005
|
||||
// created 22.11.2005
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.kelondro.index;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.order.ByteOrder;
|
||||
import net.yacy.cora.order.NaturalOrder;
|
||||
|
||||
|
||||
public final class BinSearch {
|
||||
|
||||
private final byte[] chunks;
|
||||
private final int chunksize;
|
||||
private final int count;
|
||||
private static final ByteOrder objectOrder = new NaturalOrder(true); // the natural order is much faster than the b64Order
|
||||
|
||||
public BinSearch(final byte[] chunks, final int chunksize) {
|
||||
this.chunks = chunks;
|
||||
this.chunksize = chunksize;
|
||||
this.count = chunks.length / chunksize;
|
||||
}
|
||||
|
||||
public BinSearch(final List<byte[]> chunkList, final int chunksize) {
|
||||
byte[][] chunksa = new byte[chunkList.size()][];
|
||||
chunksa = chunkList.toArray(chunksa);
|
||||
Arrays.sort(chunksa, objectOrder);
|
||||
this.chunks = new byte[chunkList.size() * chunksize];
|
||||
for (int i = 0; i < chunksa.length; i++) System.arraycopy(chunksa[i], 0, this.chunks, i * chunksize, chunksize);
|
||||
this.chunksize = chunksize;
|
||||
this.count = chunks.length / chunksize;
|
||||
assert this.count == chunkList.size();
|
||||
}
|
||||
|
||||
public final boolean contains(final byte[] t) {
|
||||
return contains(t, 0, this.count);
|
||||
}
|
||||
|
||||
private final boolean contains(final byte[] t, int beginPos, int endPos) {
|
||||
// the endPos is exclusive, beginPos is inclusive
|
||||
// this method is synchronized to make the use of the buffer possible
|
||||
assert t.length == this.chunksize;
|
||||
while (true) {
|
||||
if (beginPos >= endPos) return false;
|
||||
final int pivot = (beginPos + endPos) / 2;
|
||||
if ((pivot < 0) || (pivot >= this.count)) return false;
|
||||
assert this.chunksize == t.length;
|
||||
final int c = objectOrder.compare(this.chunks, pivot * this.chunksize, t, 0, this.chunksize);
|
||||
if (c == 0) return true;
|
||||
if (c < 0) /* buffer < t */ {beginPos = pivot + 1; continue;}
|
||||
if (c > 0) /* buffer > t */ {endPos = pivot; continue;}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public final int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public final byte[] get(final int element) {
|
||||
final byte[] a = new byte[chunksize];
|
||||
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
|
||||
return a;
|
||||
}
|
||||
|
||||
public final byte[] get(final int element, byte[] a) {
|
||||
assert a.length == chunksize;
|
||||
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
|
||||
return a;
|
||||
}
|
||||
|
||||
public final void write(File f) throws IOException {
|
||||
FileOutputStream os = new FileOutputStream(f);
|
||||
os.write(this.chunks);
|
||||
os.flush();
|
||||
os.close();
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final String s = "4CEvsI8FRczRBo_ApRCkwfEbFLn1pIFXg39QGMgj5RHM6HpIMJq67QX3M5iQYr_LyI_5aGDaa_bYbRgJ9XnQjpmq6QkOoGWAoEaihRqhV3kItLFHjRtqauUR";
|
||||
final BinSearch bs = new BinSearch(s.getBytes(), 6);
|
||||
for (int i = 0; i + 6 <= s.length(); i = i + 6) {
|
||||
System.out.println(s.substring(i, i + 6) + ":" + ((bs.contains(s.substring(i, i + 6).getBytes())) ? "drin" : "draussen"));
|
||||
}
|
||||
for (int i = 0; i + 7 <= s.length(); i = i + 6) {
|
||||
System.out.println(s.substring(i + 1, i + 7) + ":" + ((bs.contains(s.substring(i + 1, i + 7).getBytes())) ? "drin" : "draussen"));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,166 +0,0 @@
|
||||
// ResultEntry.java
|
||||
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 10.10.2005 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.search.snippet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||
import net.yacy.cora.order.Base64Order;
|
||||
import net.yacy.cora.util.ByteArray;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.parser.pdfParser;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataNode;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.peers.SeedDB;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.schema.CollectionSchema;
|
||||
|
||||
|
||||
public class ResultEntry extends URIMetadataNode implements Comparable<ResultEntry>, Comparator<ResultEntry> {
|
||||
|
||||
private static final long serialVersionUID = -256046934741561978L;
|
||||
// payload objects
|
||||
private String alternative_urlstring;
|
||||
private String alternative_urlname;
|
||||
private final TextSnippet textSnippet;
|
||||
private final Segment indexSegment;
|
||||
|
||||
public ResultEntry(final URIMetadataNode urlentry,
|
||||
final Segment indexSegment,
|
||||
SeedDB peers,
|
||||
final TextSnippet textSnippet) {
|
||||
super(urlentry);
|
||||
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
|
||||
this.indexSegment = indexSegment;
|
||||
this.alternative_urlstring = null;
|
||||
this.alternative_urlname = null;
|
||||
this.textSnippet = textSnippet;
|
||||
final String host = urlentry.url().getHost();
|
||||
if (host != null && host.endsWith(".yacyh")) {
|
||||
// translate host into current IP
|
||||
int p = host.indexOf('.');
|
||||
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
|
||||
final Seed seed = peers.getConnected(hash);
|
||||
final String path = urlentry.url().getFile();
|
||||
String address = null;
|
||||
if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
|
||||
// seed is not known from here
|
||||
try {
|
||||
if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
|
||||
Word.words2hashesHandles(Condenser.getWords(
|
||||
("yacyshare " +
|
||||
path.replace('?', ' ') +
|
||||
" " +
|
||||
urlentry.dc_title()), null).keySet()),
|
||||
urlentry.hash());
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
}
|
||||
indexSegment.fulltext().remove(urlentry.hash()); // clean up
|
||||
throw new RuntimeException("index void");
|
||||
}
|
||||
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
|
||||
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
|
||||
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
|
||||
}
|
||||
}
|
||||
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (this.hashCache == Integer.MIN_VALUE) {
|
||||
this.hashCache = ByteArray.hashCode(this.hash());
|
||||
}
|
||||
return this.hashCache;
|
||||
}
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (!(obj instanceof ResultEntry)) return false;
|
||||
ResultEntry other = (ResultEntry) obj;
|
||||
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
|
||||
}
|
||||
|
||||
public String urlstring() {
|
||||
if (this.alternative_urlstring != null) return this.alternative_urlstring;
|
||||
|
||||
if (!pdfParser.individualPages) return this.url().toNormalform(true);
|
||||
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
|
||||
// for pdf links we rewrite the url
|
||||
// this is a special treatment of pdf files which can be splitted into subpages
|
||||
String pageprop = pdfParser.individualPagePropertyname;
|
||||
String resultUrlstring = this.url().toNormalform(true);
|
||||
int p = resultUrlstring.lastIndexOf(pageprop + "=");
|
||||
if (p > 0) {
|
||||
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
|
||||
}
|
||||
return resultUrlstring;
|
||||
}
|
||||
public String urlname() {
|
||||
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
|
||||
}
|
||||
public String title() {
|
||||
String titlestr = this.dc_title();
|
||||
// if title is empty use filename as title
|
||||
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
|
||||
titlestr = this.url() != null ? this.url().getFileName() : "";
|
||||
}
|
||||
return titlestr;
|
||||
}
|
||||
public TextSnippet textSnippet() {
|
||||
return this.textSnippet;
|
||||
}
|
||||
public Date[] events() {
|
||||
return this.datesInContent();
|
||||
}
|
||||
public int referencesCount() {
|
||||
// urlCitationIndex index might be null (= configuration option)
|
||||
return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0;
|
||||
}
|
||||
public boolean hasTextSnippet() {
|
||||
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
|
||||
}
|
||||
public String resource() {
|
||||
// generate transport resource
|
||||
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
|
||||
return this.toString();
|
||||
}
|
||||
return this.toString(this.textSnippet.getLineRaw());
|
||||
}
|
||||
@Override
|
||||
public int compareTo(ResultEntry o) {
|
||||
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
|
||||
}
|
||||
@Override
|
||||
public int compare(ResultEntry o1, ResultEntry o2) {
|
||||
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
|
||||
}
|
||||
}
|
Loading…
Reference in new issue