You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
710 lines
34 KiB
710 lines
34 KiB
// IndexControlRWIs_p.java
|
|
// -----------------------
|
|
// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
|
// first published 2004 on http://yacy.net
|
|
//
|
|
// This is a part of YaCy, a peer-to-peer based web search engine
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// LICENSE
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Collection;
|
|
import java.util.Date;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.regex.PatternSyntaxException;
|
|
|
|
import net.yacy.cora.date.GenericFormatter;
|
|
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
|
import net.yacy.cora.document.encoding.ASCII;
|
|
import net.yacy.cora.document.id.DigestURL;
|
|
import net.yacy.cora.document.id.MultiProtocolURL;
|
|
import net.yacy.cora.document.id.Punycode.PunycodeException;
|
|
import net.yacy.cora.federate.yacy.CacheStrategy;
|
|
import net.yacy.cora.protocol.ClientIdentification;
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
import net.yacy.cora.storage.HandleSet;
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
import net.yacy.cora.util.SpaceExceededException;
|
|
import net.yacy.data.ListManager;
|
|
import net.yacy.data.TransactionManager;
|
|
import net.yacy.document.Tokenizer;
|
|
import net.yacy.kelondro.data.meta.URIMetadataNode;
|
|
import net.yacy.kelondro.data.word.Word;
|
|
import net.yacy.kelondro.data.word.WordReference;
|
|
import net.yacy.kelondro.data.word.WordReferenceRow;
|
|
import net.yacy.kelondro.index.RowHandleSet;
|
|
import net.yacy.kelondro.rwi.Reference;
|
|
import net.yacy.kelondro.rwi.ReferenceContainer;
|
|
import net.yacy.kelondro.rwi.ReferenceContainerCache;
|
|
import net.yacy.kelondro.util.Bitfield;
|
|
import net.yacy.kelondro.util.FileUtils;
|
|
import net.yacy.peers.DHTSelection;
|
|
import net.yacy.peers.Protocol;
|
|
import net.yacy.peers.Seed;
|
|
import net.yacy.repository.Blacklist;
|
|
import net.yacy.repository.Blacklist.BlacklistType;
|
|
import net.yacy.repository.BlacklistHostAndPath;
|
|
import net.yacy.search.Switchboard;
|
|
import net.yacy.search.SwitchboardConstants;
|
|
import net.yacy.search.index.Segment;
|
|
import net.yacy.search.query.QueryGoal;
|
|
import net.yacy.search.query.QueryModifier;
|
|
import net.yacy.search.query.QueryParams;
|
|
import net.yacy.search.query.SearchEvent;
|
|
import net.yacy.search.query.SearchEventCache;
|
|
import net.yacy.server.serverObjects;
|
|
import net.yacy.server.serverSwitch;
|
|
|
|
public class IndexControlRWIs_p {
|
|
|
|
private static final String APP_NAME = "IndexControlRWIs_p";
|
|
|
|
private final static String errmsg = "not possible to compute word from hash";
|
|
|
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
|
// return variable that accumulates replacements
|
|
final Switchboard sb = (Switchboard) env;
|
|
final serverObjects prop = new serverObjects();
|
|
|
|
// set default values
|
|
prop.putHTML("keystring", "");
|
|
prop.put("keyhash", "");
|
|
prop.put("result", "");
|
|
final boolean limitationsEnabled = (post == null || post.containsKey("maxReferencesLimit"));
|
|
prop.put("limitations", limitationsEnabled ? 1 : 0);
|
|
if(limitationsEnabled) {
|
|
/* Acquire a transaction token for the next available POST form submission */
|
|
prop.put("limitations_" + TransactionManager.TRANSACTION_TOKEN_PARAM, TransactionManager.getTransactionToken(header));
|
|
}
|
|
|
|
// switch off all optional forms/lists
|
|
prop.put("searchresult", 0);
|
|
prop.put("keyhashsimilar", 0);
|
|
prop.put("genUrlList", 0);
|
|
|
|
// clean up all search events
|
|
SearchEventCache.cleanupEvents(true);
|
|
|
|
Segment segment = sb.index;
|
|
|
|
if ( post != null ) {
|
|
ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", ClientIdentification.yacyInternetCrawlerAgentName));
|
|
final String keystring = post.get("keystring", "").trim();
|
|
byte[] keyhash = post.get("keyhash", "").trim().getBytes();
|
|
if (keystring.length() > 0 && !keystring.contains(errmsg)) {
|
|
keyhash = Word.word2hash(keystring);
|
|
}
|
|
prop.putHTML("keystring", keystring);
|
|
prop.putHTML("keyhash", ASCII.String(keyhash));
|
|
|
|
// read values from checkboxes
|
|
final String[] urls = post.getAll("urlhx.*");
|
|
HandleSet urlb =
|
|
new RowHandleSet(
|
|
Word.commonHashLength,
|
|
Word.commonHashOrder,
|
|
urls.length);
|
|
if ( urls != null ) {
|
|
for ( final String s : urls ) {
|
|
try {
|
|
urlb.put(s.getBytes());
|
|
} catch (final SpaceExceededException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
}
|
|
final boolean delurl = post.containsKey("delurl");
|
|
final boolean delurlref = post.containsKey("delurlref");
|
|
|
|
if ( post.containsKey("keystringsearch") ) {
|
|
prop.put("keyhash", keyhash);
|
|
final SearchEvent theSearch = genSearchresult(prop, sb, keyhash, null);
|
|
if (theSearch.local_rwi_available.get() == 0) {
|
|
prop.put("searchresult", 1);
|
|
prop.putHTML("searchresult_word", keystring);
|
|
}
|
|
}
|
|
|
|
if ( post.containsKey("keyhashsearch") ) {
|
|
if ( keystring.isEmpty() || !Arrays.equals(Word.word2hash(keystring), keyhash) ) {
|
|
prop.put("keystring", "<" + errmsg + ">");
|
|
}
|
|
final SearchEvent theSearch = genSearchresult(prop, sb, keyhash, null);
|
|
if (theSearch.local_rwi_available.get() == 0) {
|
|
prop.put("searchresult", 2);
|
|
prop.putHTML("searchresult_wordhash", ASCII.String(keyhash));
|
|
}
|
|
}
|
|
|
|
// set reference limitation
|
|
if ( post.containsKey("maxReferencesLimit") ) {
|
|
/* Check the transaction is valid */
|
|
TransactionManager.checkPostTransaction(header, post);
|
|
|
|
if ( post.get("maxReferencesRadio", "").equals("on") ) {
|
|
ReferenceContainer.maxReferences = post.getInt("maxReferences", 0);
|
|
} else {
|
|
ReferenceContainer.maxReferences = 0;
|
|
}
|
|
sb.setConfig("index.maxReferences", ReferenceContainer.maxReferences);
|
|
}
|
|
|
|
// delete word
|
|
if ( post.containsKey("keyhashdeleteall") ) {
|
|
/* Check the transaction is valid */
|
|
TransactionManager.checkPostTransaction(header, post);
|
|
|
|
try {
|
|
if ( delurl || delurlref ) {
|
|
// generate urlx: an array of url hashes to be deleted
|
|
ReferenceContainer<WordReference> index = null;
|
|
index = segment.termIndex().get(keyhash, null);
|
|
final Iterator<WordReference> en = index.entries();
|
|
urlb =
|
|
new RowHandleSet(
|
|
Word.commonHashLength,
|
|
Word.commonHashOrder,
|
|
index.size());
|
|
while ( en.hasNext() ) {
|
|
try {
|
|
urlb.put(en.next().urlhash());
|
|
} catch (final SpaceExceededException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
index = null;
|
|
}
|
|
if ( delurlref ) {
|
|
segment.removeAllUrlReferences(urlb, sb.loader, agent, CacheStrategy.IFEXIST);
|
|
}
|
|
// delete the word first because that is much faster than the deletion of the urls from the url database
|
|
segment.termIndex().delete(keyhash);
|
|
// now delete all urls if demanded
|
|
if ( delurl || delurlref ) {
|
|
for ( final byte[] b : urlb ) {
|
|
sb.urlRemove(segment, b);
|
|
}
|
|
}
|
|
post.remove("keyhashdeleteall");
|
|
post.put("urllist", "generated");
|
|
} catch (final IOException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
|
|
// delete selected URLs
|
|
if ( post.containsKey("keyhashdelete") ) {
|
|
try {
|
|
if ( delurlref ) {
|
|
segment.removeAllUrlReferences(urlb, sb.loader, agent, CacheStrategy.IFEXIST);
|
|
}
|
|
if ( delurl || delurlref ) {
|
|
for ( final byte[] b : urlb ) {
|
|
sb.urlRemove(segment, b);
|
|
}
|
|
}
|
|
final HandleSet urlHashes =
|
|
new RowHandleSet(
|
|
Word.commonHashLength,
|
|
Word.commonHashOrder,
|
|
0);
|
|
for ( final byte[] b : urlb ) {
|
|
try {
|
|
urlHashes.put(b);
|
|
} catch (final SpaceExceededException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
segment.termIndex().remove(keyhash, urlHashes);
|
|
// this shall lead to a presentation of the list; so handle that the remaining program
|
|
// thinks that it was called for a list presentation
|
|
post.remove("keyhashdelete");
|
|
post.put("urllist", "generated");
|
|
} catch (final IOException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
|
|
if ( post.containsKey("urllist") ) {
|
|
if ( keystring.isEmpty() || !Arrays.equals(Word.word2hash(keystring), keyhash) ) {
|
|
prop.put("keystring", "<" + errmsg + ">");
|
|
}
|
|
final Bitfield flags = compileFlags(post);
|
|
final int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
|
|
final SearchEvent theSearch = genSearchresult(prop, sb, keyhash, flags);
|
|
genURLList(prop, keyhash, keystring, theSearch, flags, count);
|
|
}
|
|
|
|
// transfer to other peer
|
|
if ( post.containsKey("keyhashtransfer") ) {
|
|
/* Check the transaction is valid */
|
|
TransactionManager.checkPostTransaction(header, post);
|
|
|
|
try {
|
|
if ( keystring.isEmpty() || !Arrays.equals(Word.word2hash(keystring), keyhash) ) {
|
|
prop.put("keystring", "<" + errmsg + ">");
|
|
}
|
|
|
|
// find host & peer
|
|
String host = post.get("host", ""); // get host from input field
|
|
Seed seed = null;
|
|
if ( host.length() != 0 ) {
|
|
if ( host.length() == 12 ) {
|
|
// the host string is !likely! a peer hash (or peer name with 12 chars)
|
|
seed = sb.peers.getConnected(host); // check for seed.hash
|
|
if (seed == null) seed = sb.peers.lookupByName(host); // check for peer name
|
|
} else {
|
|
// the host string can be a host name
|
|
seed = sb.peers.lookupByName(host);
|
|
}
|
|
} else {
|
|
host = post.get("hostHash", ""); // if input field is empty, get from select box
|
|
seed = sb.peers.getConnected(host);
|
|
}
|
|
|
|
if (seed != null) { // if no seed found skip transfer
|
|
// prepare index
|
|
ReferenceContainer<WordReference> index;
|
|
final long starttime = System.currentTimeMillis();
|
|
index = segment.termIndex().get(keyhash, null);
|
|
// built urlCache
|
|
final Iterator<WordReference> urlIter = index.entries();
|
|
final HandleSet knownURLs =
|
|
new RowHandleSet(
|
|
WordReferenceRow.urlEntryRow.primaryKeyLength,
|
|
WordReferenceRow.urlEntryRow.objectOrder,
|
|
index.size());
|
|
final HandleSet unknownURLEntries =
|
|
new RowHandleSet(
|
|
WordReferenceRow.urlEntryRow.primaryKeyLength,
|
|
WordReferenceRow.urlEntryRow.objectOrder,
|
|
index.size());
|
|
Reference iEntry;
|
|
while (urlIter.hasNext()) {
|
|
iEntry = urlIter.next();
|
|
boolean exists = segment.fulltext().exists(ASCII.String(iEntry.urlhash()));
|
|
if (exists) {
|
|
try {
|
|
knownURLs.put(iEntry.urlhash());
|
|
} catch (final SpaceExceededException e) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
} else {
|
|
try {
|
|
unknownURLEntries.put(iEntry.urlhash());
|
|
} catch (final SpaceExceededException e) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
urlIter.remove();
|
|
}
|
|
}
|
|
|
|
// make an indexContainerCache
|
|
final ReferenceContainerCache<WordReference> icc =
|
|
new ReferenceContainerCache<WordReference>(
|
|
Segment.wordReferenceFactory,
|
|
Segment.wordOrder,
|
|
Word.commonHashLength);
|
|
try {
|
|
icc.add(index);
|
|
} catch (final SpaceExceededException e) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
|
|
// transport to other peer
|
|
final boolean gzipBody = sb.getConfigBool("indexControl.gzipBody", false);
|
|
final int timeout = (int) sb.getConfigLong("indexControl.timeout", 60000);
|
|
final String error = Protocol.transferIndex(sb, seed, icc, knownURLs, segment, gzipBody, timeout);
|
|
prop.put("result", (error == null) ? ("Successfully transferred "
|
|
+ knownURLs.size()
|
|
+ " words in "
|
|
+ ((System.currentTimeMillis() - starttime) / 1000)
|
|
+ " seconds, "
|
|
+ unknownURLEntries.size() + " URL not found") : "error: " + error);
|
|
index = null;
|
|
} else {
|
|
prop.put("result", "Peer " + host + " not found");
|
|
}
|
|
} catch (final IOException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
|
|
// generate list (if RWI connected, otherwise NPE)
|
|
if (sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) {
|
|
if ( post.containsKey("keyhashsimilar") ) {
|
|
try {
|
|
final Iterator<ReferenceContainer<WordReference>> containerIt =
|
|
segment.termIndex().referenceContainer(keyhash, true, false, 256, false).iterator();
|
|
ReferenceContainer<WordReference> container;
|
|
|
|
int i = 0, rows = 0, cols = 0;
|
|
prop.put("keyhashsimilar", "1");
|
|
while ( containerIt.hasNext() && i < 256 ) {
|
|
container = containerIt.next();
|
|
prop.put(
|
|
"keyhashsimilar_rows_" + rows + "_cols_" + cols + "_wordHash",
|
|
container.getTermHash());
|
|
cols++;
|
|
if ( cols == 8 ) {
|
|
prop.put("keyhashsimilar_rows_" + rows + "_cols", cols);
|
|
cols = 0;
|
|
rows++;
|
|
}
|
|
i++;
|
|
}
|
|
prop.put("keyhashsimilar_rows_" + rows + "_cols", cols);
|
|
prop.put("keyhashsimilar_rows", rows + 1);
|
|
prop.put("result", "");
|
|
} catch (final IOException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
} else {
|
|
prop.put("result", "RWI index not connected (see Index Sources & Targets -> <a href='IndexFederated_p.html'>Peer-to-Peer Operation</a>)");
|
|
}
|
|
|
|
if ( post.containsKey("blacklist") ) {
|
|
final String blacklist = post.get("blacklist", "");
|
|
final HandleSet urlHashes =
|
|
new RowHandleSet(
|
|
Word.commonHashLength,
|
|
Word.commonHashOrder,
|
|
urlb.size());
|
|
if ( post.containsKey("blacklisturls") ) {
|
|
DigestURL url;
|
|
/* Prepare the new blacklist items list to add then them in one operation for better performance */
|
|
final Collection<BlacklistHostAndPath> items = new ArrayList<>();
|
|
for ( final byte[] b : urlb ) {
|
|
try {
|
|
urlHashes.put(b);
|
|
} catch (final SpaceExceededException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
try {
|
|
String u = segment.fulltext().getURL(ASCII.String(b));
|
|
url = u == null ? null : new DigestURL(u);
|
|
segment.fulltext().remove(b);
|
|
if ( url != null ) {
|
|
items.add(new BlacklistHostAndPath(url.getHost(), url.getFile()));
|
|
}
|
|
} catch (IOException e1) {
|
|
ConcurrentLog.logException(e1);
|
|
}
|
|
}
|
|
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
|
|
if ( ListManager.listSetContains(
|
|
supportedBlacklistType + ".BlackLists",
|
|
blacklist) ) {
|
|
try {
|
|
Switchboard.urlBlacklist.add(supportedBlacklistType,
|
|
blacklist, items);
|
|
} catch (PunycodeException e) {
|
|
ConcurrentLog.warn(APP_NAME,
|
|
"Unable to add blacklist entries to blacklist "
|
|
+ supportedBlacklistType, e);
|
|
}
|
|
}
|
|
}
|
|
SearchEventCache.cleanupEvents(true);
|
|
}
|
|
|
|
if ( post.containsKey("blacklistdomains") ) {
|
|
DigestURL url;
|
|
for ( final byte[] b : urlb ) {
|
|
try {
|
|
urlHashes.put(b);
|
|
} catch (final SpaceExceededException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
try {
|
|
String u = segment.fulltext().getURL(ASCII.String(b));
|
|
url = u == null ? null : new DigestURL(u);
|
|
segment.fulltext().remove(b);
|
|
if ( url != null ) {
|
|
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
|
|
if ( ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklist) ) {
|
|
try {
|
|
Switchboard.urlBlacklist.add(
|
|
supportedBlacklistType,
|
|
blacklist,
|
|
url.getHost(),
|
|
".*");
|
|
} catch (final PunycodeException | PatternSyntaxException e) {
|
|
ConcurrentLog.warn(APP_NAME,
|
|
"Unable to add blacklist entry to blacklist "
|
|
+ supportedBlacklistType, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (IOException e1) {
|
|
ConcurrentLog.logException(e1);
|
|
}
|
|
}
|
|
}
|
|
try {
|
|
segment.termIndex().remove(keyhash, urlHashes);
|
|
} catch (final IOException e ) {
|
|
ConcurrentLog.logException(e);
|
|
}
|
|
}
|
|
|
|
if ( prop.getInt("searchresult", 0) == 3 ) {
|
|
/* Acquire a transaction token for the next available POST form submissions */
|
|
prop.put("searchresult_" + TransactionManager.TRANSACTION_TOKEN_PARAM, TransactionManager.getTransactionToken(header));
|
|
|
|
listHosts(prop, keyhash, sb);
|
|
}
|
|
}
|
|
|
|
// insert constants
|
|
prop.putNum("wcount", segment.RWICount());
|
|
prop.put("limitations_maxReferencesRadioChecked", ReferenceContainer.maxReferences > 0 ? 1 : 0);
|
|
prop.put("limitations_maxReferences", ReferenceContainer.maxReferences > 0 ? ReferenceContainer.maxReferences : 100000);
|
|
|
|
// return rewrite properties
|
|
return prop;
|
|
}
|
|
|
|
public static void genURLList(
|
|
final serverObjects prop,
|
|
final byte[] keyhash,
|
|
final String keystring,
|
|
final SearchEvent theSearch,
|
|
final Bitfield flags,
|
|
final int maxlines) {
|
|
// search for a word hash and generate a list of url links
|
|
final String keyhashs = ASCII.String(keyhash);
|
|
prop.put("genUrlList_keyHash", keyhashs);
|
|
|
|
if (theSearch.local_rwi_stored.get() == 0) {
|
|
prop.put("genUrlList", 1);
|
|
prop.put("genUrlList_count", 0);
|
|
prop.put("searchresult", 2);
|
|
} else {
|
|
prop.put("genUrlList", 2);
|
|
prop.put("searchresult", 3);
|
|
prop.put("genUrlList_flags", (flags == null) ? "" : flags.exportB64());
|
|
prop.put("genUrlList_lines", maxlines);
|
|
int i = 0;
|
|
DigestURL url;
|
|
URIMetadataNode entry;
|
|
String us;
|
|
long rn = Long.MIN_VALUE;
|
|
while (!theSearch.rwiIsEmpty() && (entry = theSearch.pullOneFilteredFromRWI(false)) != null) {
|
|
url = entry.url();
|
|
if ( url == null ) {
|
|
continue;
|
|
}
|
|
us = url.toNormalform(true);
|
|
if ( rn == Long.MIN_VALUE ) {
|
|
rn = entry.score();
|
|
}
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists", "1");
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxCount", i);
|
|
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlhxValue", entry.word().urlhash());
|
|
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_keyString", keystring);
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists_keyHash", keyhashs);
|
|
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlString", us);
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists_urlStringShort",
|
|
(us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(20, 40) + "...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + us.substring(20)) : us));
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_ranking", entry.score() - rn);
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_domlength", DigestURL.domLengthEstimation(entry.hash()));
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_tf", 1000.0 * entry.word().termFrequency());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_authority", (theSearch.getOrder() == null) ? -1 : theSearch.getOrder().authority(ASCII.String(entry.hash(), 6, 6)));
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(entry.word().lastModified())));
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_wordsintitle", entry.word().wordsintitle());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_wordsintext", entry.word().wordsintext());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_phrasesintext", entry.word().phrasesintext());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_llocal", entry.word().llocal());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_lother", entry.word().lother());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_hitcount", entry.word().hitcount());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_pos", entry.word().posintext());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_phrase", entry.word().posofphrase());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_posinphrase", entry.word().posinphrase());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_urlcomps", entry.word().urlcomps());
|
|
prop.putNum("genUrlList_urlList_" + i + "_urlExists_urllength", entry.word().urllength());
|
|
prop.put(
|
|
"genUrlList_urlList_" + i + "_urlExists_props",
|
|
((entry.word().flags().get(Tokenizer.flag_cat_indexof)) ? "appears on index page, " : "")
|
|
+ ((entry.word().flags().get(Tokenizer.flag_cat_hasimage)) ? "contains images, " : "")
|
|
+ ((entry.word().flags().get(Tokenizer.flag_cat_hasaudio)) ? "contains audio, " : "")
|
|
+ ((entry.word().flags().get(Tokenizer.flag_cat_hasvideo)) ? "contains video, " : "")
|
|
+ ((entry.word().flags().get(Tokenizer.flag_cat_hasapp)) ? "contains applications, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_dc_identifier)) ? "appears in url, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_dc_title)) ? "appears in title, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_dc_creator)) ? "appears in author, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_dc_subject)) ? "appears in subject, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_dc_description)) ? "appears in description, " : "")
|
|
+ ((entry.word().flags().get(WordReferenceRow.flag_app_emphasized)) ? "appears emphasized, " : ""));
|
|
if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) {
|
|
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1");
|
|
}
|
|
i++;
|
|
if ( (maxlines >= 0) && (i >= maxlines) ) {
|
|
break;
|
|
}
|
|
}
|
|
prop.put("genUrlList_urlList", i);
|
|
prop.putHTML("genUrlList_keyString", keystring);
|
|
prop.put("genUrlList_count", i);
|
|
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER));
|
|
}
|
|
}
|
|
|
|
public static void putBlacklists(final serverObjects prop, final List<String> lists) {
|
|
prop.put("genUrlList_blacklists", lists.size());
|
|
int i = 0;
|
|
for ( final String list : lists ) {
|
|
prop.put("genUrlList_blacklists_" + i++ + "_name", list);
|
|
}
|
|
}
|
|
|
|
public static Bitfield compileFlags(final serverObjects post) {
|
|
final Bitfield b = new Bitfield(4);
|
|
if ( post.get("allurl", "").equals("on") ) {
|
|
return null;
|
|
}
|
|
if ( post.get("flags") != null ) {
|
|
if ( post.get("flags", "").isEmpty() ) {
|
|
return null;
|
|
}
|
|
return new Bitfield(4, post.get("flags"));
|
|
}
|
|
if ( post.get("description", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_dc_description, true);
|
|
}
|
|
if ( post.get("title", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_dc_title, true);
|
|
}
|
|
if ( post.get("creator", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_dc_creator, true);
|
|
}
|
|
if ( post.get("subject", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_dc_subject, true);
|
|
}
|
|
if ( post.get("url", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_dc_identifier, true);
|
|
}
|
|
if ( post.get("emphasized", "").equals("on") ) {
|
|
b.set(WordReferenceRow.flag_app_emphasized, true);
|
|
}
|
|
if ( post.get("image", "").equals("on") ) {
|
|
b.set(Tokenizer.flag_cat_hasimage, true);
|
|
}
|
|
if ( post.get("audio", "").equals("on") ) {
|
|
b.set(Tokenizer.flag_cat_hasaudio, true);
|
|
}
|
|
if ( post.get("video", "").equals("on") ) {
|
|
b.set(Tokenizer.flag_cat_hasvideo, true);
|
|
}
|
|
if ( post.get("app", "").equals("on") ) {
|
|
b.set(Tokenizer.flag_cat_hasapp, true);
|
|
}
|
|
if ( post.get("indexof", "").equals("on") ) {
|
|
b.set(Tokenizer.flag_cat_indexof, true);
|
|
}
|
|
return b;
|
|
}
|
|
|
|
public static void listHosts(final serverObjects prop, final byte[] startHash, final Switchboard sb) {
|
|
// list known hosts
|
|
Seed seed;
|
|
int hc = 0;
|
|
prop.put("searchresult_keyhash", startHash);
|
|
final Iterator<Seed> e =
|
|
DHTSelection.getAcceptRemoteIndexSeeds(sb.peers, startHash, sb.peers.sizeConnected(), true);
|
|
while ( e.hasNext() ) {
|
|
seed = e.next();
|
|
if ( seed != null ) {
|
|
prop.put("searchresult_hosts_" + hc + "_hosthash", seed.hash);
|
|
prop.putHTML(
|
|
"searchresult_hosts_" + hc + "_hostname",
|
|
seed.hash + " " + seed.get(Seed.NAME, "nameless"));
|
|
hc++;
|
|
}
|
|
}
|
|
prop.put("searchresult_hosts", hc);
|
|
}
|
|
|
|
public static SearchEvent genSearchresult(
|
|
final serverObjects prop,
|
|
final Switchboard sb,
|
|
final byte[] keyhash,
|
|
final Bitfield filter) {
|
|
|
|
final HandleSet queryhashes = QueryParams.hashes2Set(ASCII.String(keyhash));
|
|
final QueryGoal qg = new QueryGoal(queryhashes, null);
|
|
final QueryParams query = new QueryParams(
|
|
qg,
|
|
new QueryModifier(0),
|
|
Integer.MAX_VALUE,
|
|
"",
|
|
ContentDomain.ALL,
|
|
"", //lang
|
|
0, //timezoneOffset
|
|
null,
|
|
CacheStrategy.IFFRESH,
|
|
1000, 0, //count, offset
|
|
".*", //urlmask
|
|
null,
|
|
null,
|
|
QueryParams.Searchdom.LOCAL,
|
|
filter,
|
|
false,
|
|
null,
|
|
MultiProtocolURL.TLD_any_zone_filter,
|
|
"",
|
|
false,
|
|
sb.index,
|
|
sb.getRanking(),
|
|
"",//userAgent
|
|
0.0d, 0.0d, 0.0d,
|
|
new HashSet<>());
|
|
final SearchEvent theSearch = SearchEventCache.getEvent(query, sb.peers, sb.tables, null, false, sb.loader, Integer.MAX_VALUE, Long.MAX_VALUE);
|
|
if (theSearch.rwiProcess != null && theSearch.rwiProcess.isAlive()) try {theSearch.rwiProcess.join();} catch (final InterruptedException e) {}
|
|
if (theSearch.local_rwi_available.get() == 0) {
|
|
prop.put("searchresult", 2);
|
|
prop.put("searchresult_wordhash", keyhash);
|
|
} else {
|
|
prop.put("searchresult", 3);
|
|
prop.put("searchresult_allurl", theSearch.local_rwi_available.get());
|
|
prop.put("searchresult_description", theSearch.flagCount()[WordReferenceRow.flag_app_dc_description]);
|
|
prop.put("searchresult_title", theSearch.flagCount()[WordReferenceRow.flag_app_dc_title]);
|
|
prop.put("searchresult_creator", theSearch.flagCount()[WordReferenceRow.flag_app_dc_creator]);
|
|
prop.put("searchresult_subject", theSearch.flagCount()[WordReferenceRow.flag_app_dc_subject]);
|
|
prop.put("searchresult_url", theSearch.flagCount()[WordReferenceRow.flag_app_dc_identifier]);
|
|
prop.put("searchresult_emphasized", theSearch.flagCount()[WordReferenceRow.flag_app_emphasized]);
|
|
prop.put("searchresult_image", theSearch.flagCount()[Tokenizer.flag_cat_hasimage]);
|
|
prop.put("searchresult_audio", theSearch.flagCount()[Tokenizer.flag_cat_hasaudio]);
|
|
prop.put("searchresult_video", theSearch.flagCount()[Tokenizer.flag_cat_hasvideo]);
|
|
prop.put("searchresult_app", theSearch.flagCount()[Tokenizer.flag_cat_hasapp]);
|
|
prop.put("searchresult_indexof", theSearch.flagCount()[Tokenizer.flag_cat_indexof]);
|
|
}
|
|
return theSearch;
|
|
}
|
|
}
|