tried to make a bit less 'noise' to dns server

also included: less processes in snippet fetch to reduce load during
search on small computers
pull/1/head
Michael Peter Christen 13 years ago
parent acf8d521a2
commit f294f2e295

@ -131,7 +131,7 @@ public class IndexControlURLs_p {
String urlhash = post.get("urlhash", "").trim();
if (urlhash.length() == 0 && urlstring.length() > 0) {
try {
urlhash = ASCII.String(new DigestURI(urlstring, null).hash());
urlhash = ASCII.String(new DigestURI(urlstring).hash());
} catch (final MalformedURLException e) {
}
}

@ -115,7 +115,7 @@ public class yacysearchitem {
final int port = resultURL.getPort();
DigestURI faviconURL = null;
if ((fileType == FileType.HTML || fileType == FileType.JSON) && !sb.isIntranetMode() && !resultURL.isLocal()) try {
if ((fileType == FileType.HTML || fileType == FileType.JSON) && !sb.isIntranetMode()) try {
faviconURL = new DigestURI(resultURL.getProtocol() + "://" + resultURL.getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico");
} catch (final MalformedURLException e1) {
Log.logException(e1);

@ -211,7 +211,7 @@ public final class CrawlStacker {
u = u + "/index.html";
}
try {
final byte[] uh = new DigestURI(u, null).hash();
final byte[] uh = new DigestURI(u).hash();
this.indexSegment.urlMetadata().remove(uh);
this.nextQueue.noticeURL.removeByURLHash(uh);
this.nextQueue.errorURL.remove(uh);

@ -9,7 +9,7 @@
// $LastChangedBy: apfelmaennchen $
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -37,35 +37,35 @@ public class YMarkUtil {
public final static String TAGS_SEPARATOR = ",";
public final static String FOLDERS_SEPARATOR = "/";
public final static String EMPTY_STRING = new String();
/**
* conveniance function to generate url hashes for YMark bookmarks
* @param url a string representation of a valid url
* @return a byte[] hash for the input URL string
* @throws MalformedURLException
* @see net.yacy.kelondro.data.meta.DigestURI.DigestURI(String url, byte[] hash).hash()
* @see net.yacy.kelondro.data.meta.DigestURI.DigestURI(String url, byte[] hash).hash()
*/
public final static byte[] getBookmarkId(String url) throws MalformedURLException {
return (new DigestURI(url, null)).hash();
return (new DigestURI(url)).hash();
}
/**
* conveniance function to generate word hashes for YMark tags and folders
* @param key a tag or folder name
* @param key a tag or folder name
* @return a byte[] hash for the input string
* @see net.yacy.kelondro.data.word.Word.word2hash(final String word)
*/
public final static byte[] getKeyId(final String key) {
return Word.word2hash(key.toLowerCase());
}
public final static byte[] keySetToBytes(final HashSet<String> urlSet) {
return keySetToString(urlSet).getBytes();
}
public final static String keySetToString(final HashSet<String> urlSet) {
final Iterator<String> urlIter = urlSet.iterator();
final
final
StringBuilder urls = new StringBuilder(urlSet.size()*20);
while(urlIter.hasNext()) {
urls.append(TAGS_SEPARATOR);
@ -74,22 +74,22 @@ public class YMarkUtil {
urls.deleteCharAt(0);
return urls.toString();
}
public final static HashSet<String> keysStringToSet(final String keysString) {
HashSet<String> keySet = new HashSet<String>();
final String[] keyArray = keysString.split(TAGS_SEPARATOR);
final String[] keyArray = keysString.split(TAGS_SEPARATOR);
for (final String key : keyArray) {
keySet.add(key);
}
return keySet;
}
public final static String cleanTagsString(final String tagsString) {
return cleanTagsString(tagsString, YMarkUtil.EMPTY_STRING);
}
public final static String cleanTagsString(final String tagsString, final String dflt) {
StringBuilder ts = new StringBuilder(tagsString);
public final static String cleanTagsString(final String tagsString, final String dflt) {
StringBuilder ts = new StringBuilder(tagsString);
if(ts.length() == 0)
return dflt;
// get rid of double commas and space characters following a comma
@ -108,15 +108,15 @@ public class YMarkUtil {
ts.deleteCharAt(ts.length()-1);
return ts.toString();
}
public final static String cleanFoldersString(final String foldersString) {
return cleanFoldersString(foldersString, YMarkUtil.EMPTY_STRING);
}
public final static String cleanFoldersString(final String foldersString, final String dflt) {
public final static String cleanFoldersString(final String foldersString, final String dflt) {
if(foldersString.isEmpty())
return dflt;
StringBuilder fs = new StringBuilder(cleanTagsString(foldersString));
StringBuilder fs = new StringBuilder(cleanTagsString(foldersString));
if(fs.length() == 0)
return YMarkEntry.BOOKMARK.FOLDERS.deflt();
for (int i = 0; i < fs.length()-1; i++) {
@ -134,5 +134,5 @@ public class YMarkUtil {
fs.deleteCharAt(fs.length()-1);
}
return fs.toString();
}
}
}

@ -116,7 +116,8 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
* DigestURI from URI string
*/
public DigestURI(final String url) throws MalformedURLException {
this(url, null);
super(url);
this.hash = null;
}
/**

@ -172,7 +172,7 @@ public class URIMetadataRow implements URIMetadata {
// TODO to be implemented
return null;
}
private void encodeDate(final int col, final Date d) {
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
// 86400000 is the number of milliseconds in one day

@ -157,9 +157,6 @@ public class WebStructureGraph
final Document document,
final Condenser condenser) {
// generate citation reference
if ( url.isLocal() ) {
return; // we do this only for global urls
}
final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
final Iterator<MultiProtocolURI> it = hl.keySet().iterator();
final HashSet<MultiProtocolURI> globalRefURLs = new HashSet<MultiProtocolURI>();

@ -2422,7 +2422,7 @@ public final class Switchboard extends serverSwitch
// send the documents to solr
for ( final Document doc : in.documents ) {
try {
final String id = UTF8.String(new DigestURI(doc.dc_identifier(), null).hash());
final String id = UTF8.String(new DigestURI(doc.dc_identifier()).hash());
final String iquh = UTF8.String(in.queueEntry.url().hash());
if ( !id.equals(iquh) ) {
this.log.logWarning("condenseDocument consistency check doc="

@ -53,6 +53,8 @@ import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrField;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet;
@ -62,11 +64,11 @@ import org.apache.solr.common.SolrDocumentList;
import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SolrField;
public class SnippetProcess {
private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2);
// input values
final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query;
@ -126,7 +128,7 @@ public class SnippetProcess {
// start worker threads to fetch urls and snippets
this.workerThreads = null;
deployWorker(Math.min(10, query.itemsPerPage), query.neededResults());
deployWorker(Math.min(SNIPPET_WORKER_THREADS, query.itemsPerPage), query.neededResults());
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false);
}
@ -193,8 +195,8 @@ public class SnippetProcess {
// deploy worker to get more results
if (!anyWorkerAlive()) {
final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024) ? this.query.itemsPerPage : 0);
deployWorker(Math.min(20, this.query.itemsPerPage), neededInclPrefetch);
final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024 && SNIPPET_WORKER_THREADS >= 8) ? this.query.itemsPerPage : 0);
deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), neededInclPrefetch);
}
try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}

Loading…
Cancel
Save