more generics

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4341 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 94f21d9403
commit 45339c3db5

@ -178,7 +178,7 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
// make event // make event
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true, abstractSet); plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true);
urlRetrievalAllTime = theSearch.getURLRetrievalTime(); urlRetrievalAllTime = theSearch.getURLRetrievalTime();
snippetComputationAllTime = theSearch.getSnippetComputationTime(); snippetComputationAllTime = theSearch.getSnippetComputationTime();

@ -293,7 +293,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0; offset = 0;
} }
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, ranking, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null); plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, ranking, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false);
// generate result object // generate result object
serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");

@ -660,7 +660,7 @@ public final class plasmaCondenser {
return ("$%&/()=\"$%&/()=`^+*~#'-_:;,|<>[]\\".indexOf(c) >= 0); return ("$%&/()=\"$%&/()=`^+*~#'-_:;,|<>[]\\".indexOf(c) >= 0);
} }
public static Enumeration wordTokenizer(String s, String charset, int minLength) { public static Enumeration<StringBuffer> wordTokenizer(String s, String charset, int minLength) {
try { try {
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()), charset, minLength); return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()), charset, minLength);
} catch (Exception e) { } catch (Exception e) {
@ -668,10 +668,10 @@ public final class plasmaCondenser {
} }
} }
public static class sievedWordsEnum implements Enumeration { public static class sievedWordsEnum implements Enumeration<StringBuffer> {
// this enumeration removes all words that contain either wrong characters or are too short // this enumeration removes all words that contain either wrong characters or are too short
Object buffer = null; StringBuffer buffer = null;
unsievedWordsEnum e; unsievedWordsEnum e;
int ml; int ml;
@ -685,7 +685,7 @@ public final class plasmaCondenser {
e.pre(x); e.pre(x);
} }
private Object nextElement0() { private StringBuffer nextElement0() {
StringBuffer s; StringBuffer s;
char c; char c;
loop: while (e.hasMoreElements()) { loop: while (e.hasMoreElements()) {
@ -709,8 +709,8 @@ public final class plasmaCondenser {
return buffer != null; return buffer != null;
} }
public Object nextElement() { public StringBuffer nextElement() {
Object r = buffer; StringBuffer r = buffer;
buffer = nextElement0(); buffer = nextElement0();
return r; return r;
} }
@ -794,7 +794,7 @@ public final class plasmaCondenser {
} }
} }
public static class sentencesFromInputStreamEnum implements Iterator { public static class sentencesFromInputStreamEnum implements Iterator<StringBuffer> {
// read sentences from a given input stream // read sentences from a given input stream
// this enumerates StringBuffer objects // this enumerates StringBuffer objects
@ -836,7 +836,7 @@ public final class plasmaCondenser {
return buffer != null; return buffer != null;
} }
public Object next() { public StringBuffer next() {
if (buffer == null) { if (buffer == null) {
return null; return null;
} else { } else {

@ -77,7 +77,7 @@ public final class plasmaCrawlStacker extends Thread {
// keys for different database types // keys for different database types
public static final int QUEUE_DB_TYPE_RAM = 0; public static final int QUEUE_DB_TYPE_RAM = 0;
public static final int QUEUE_DB_TYPE_TREE = 1; public static final int QUEUE_DB_TYPE_TREE = 1;
public static final int QUEUE_DB_TYPE_FLEX = 2; public static final int QUEUE_DB_TYPE_ECO = 2;
final serverLog log = new serverLog("STACKCRAWL"); final serverLog log = new serverLog("STACKCRAWL");
@ -282,7 +282,7 @@ public final class plasmaCrawlStacker extends Thread {
if (this.dbtype == QUEUE_DB_TYPE_RAM) { if (this.dbtype == QUEUE_DB_TYPE_RAM) {
// do nothing.. // do nothing..
} }
if (this.dbtype == QUEUE_DB_TYPE_FLEX) { if (this.dbtype == QUEUE_DB_TYPE_ECO) {
new File(cacheStacksPath, stackfile).delete(); new File(cacheStacksPath, stackfile).delete();
//kelondroFlexWidthArray.delete(cacheStacksPath, stackfile); //kelondroFlexWidthArray.delete(cacheStacksPath, stackfile);
} }
@ -298,7 +298,7 @@ public final class plasmaCrawlStacker extends Thread {
if (this.dbtype == QUEUE_DB_TYPE_RAM) { if (this.dbtype == QUEUE_DB_TYPE_RAM) {
this.urlEntryCache = new kelondroRowSet(plasmaCrawlEntry.rowdef, 0); this.urlEntryCache = new kelondroRowSet(plasmaCrawlEntry.rowdef, 0);
} }
if (this.dbtype == QUEUE_DB_TYPE_FLEX) { if (this.dbtype == QUEUE_DB_TYPE_ECO) {
cacheStacksPath.mkdirs(); cacheStacksPath.mkdirs();
File f = new File(cacheStacksPath, stackfile); File f = new File(cacheStacksPath, stackfile);
try { try {

@ -229,7 +229,7 @@ public class plasmaParserDocument {
return -1; return -1;
} }
public Iterator getSentences(boolean pre) { public Iterator<StringBuffer> getSentences(boolean pre) {
if (this.text == null) return null; if (this.text == null) return null;
plasmaCondenser.sentencesFromInputStreamEnum e = plasmaCondenser.sentencesFromInputStream(getText(), this.charset); plasmaCondenser.sentencesFromInputStreamEnum e = plasmaCondenser.sentencesFromInputStream(getText(), this.charset);
e.pre(pre); e.pre(pre);
@ -248,8 +248,8 @@ public class plasmaParserDocument {
if (hs.size() == 0) return ""; if (hs.size() == 0) return "";
// generate a new list // generate a new list
StringBuffer sb = new StringBuffer(this.keywords.size() * 6); StringBuffer sb = new StringBuffer(this.keywords.size() * 6);
Iterator i = hs.iterator(); Iterator<String> i = hs.iterator();
while (i.hasNext()) sb.append((String) i.next()).append(separator); while (i.hasNext()) sb.append(i.next()).append(separator);
return sb.substring(0, sb.length() - 1); return sb.substring(0, sb.length() - 1);
} }
@ -303,24 +303,23 @@ public class plasmaParserDocument {
private synchronized void resortLinks() { private synchronized void resortLinks() {
// extract hyperlinks, medialinks and emaillinks from anchorlinks // extract hyperlinks, medialinks and emaillinks from anchorlinks
Iterator i;
yacyURL url; yacyURL url;
String u; String u;
int extpos, qpos; int extpos, qpos;
String ext = null; String ext = null;
i = anchors.entrySet().iterator(); Iterator<Map.Entry<String, String>> i = anchors.entrySet().iterator();
hyperlinks = new HashMap<String, String>(); hyperlinks = new HashMap<String, String>();
videolinks = new HashMap<String, String>(); videolinks = new HashMap<String, String>();
audiolinks = new HashMap<String, String>(); audiolinks = new HashMap<String, String>();
applinks = new HashMap<String, String>(); applinks = new HashMap<String, String>();
emaillinks = new HashMap<String, String>(); emaillinks = new HashMap<String, String>();
TreeSet<htmlFilterImageEntry> collectedImages = new TreeSet<htmlFilterImageEntry>(); // this is a set that is collected now and joined later to the imagelinks TreeSet<htmlFilterImageEntry> collectedImages = new TreeSet<htmlFilterImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
Map.Entry entry; Map.Entry<String, String> entry;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
u = (String) entry.getKey(); u = entry.getKey();
if ((u != null) && (u.startsWith("mailto:"))) { if ((u != null) && (u.startsWith("mailto:"))) {
emaillinks.put(u.substring(7), (String)entry.getValue()); emaillinks.put(u.substring(7), entry.getValue());
} else { } else {
extpos = u.lastIndexOf("."); extpos = u.lastIndexOf(".");
if (extpos > 0) { if (extpos > 0) {
@ -350,10 +349,10 @@ public class plasmaParserDocument {
} }
// add image links that we collected from the anchors to the image map // add image links that we collected from the anchors to the image map
i = collectedImages.iterator(); Iterator<htmlFilterImageEntry> j = collectedImages.iterator();
htmlFilterImageEntry iEntry; htmlFilterImageEntry iEntry;
while (i.hasNext()) { while (j.hasNext()) {
iEntry = (htmlFilterImageEntry) i.next(); iEntry = (htmlFilterImageEntry) j.next();
if (!images.contains(iEntry)) images.add(iEntry); if (!images.contains(iEntry)) images.add(iEntry);
} }

@ -41,6 +41,7 @@ import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.plasma.plasmaSnippetCache.MediaSnippet;
import de.anomic.server.serverProfiling; import de.anomic.server.serverProfiling;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
@ -70,7 +71,7 @@ public final class plasmaSearchEvent {
private Map<String, TreeMap<String, String>> rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation private Map<String, TreeMap<String, String>> rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
private yacySearch[] primarySearchThreads, secondarySearchThreads; private yacySearch[] primarySearchThreads, secondarySearchThreads;
private Thread localSearchThread; private Thread localSearchThread;
private TreeMap preselectedPeerHashes; private TreeMap<String, String> preselectedPeerHashes;
//private Object[] references; //private Object[] references;
public TreeMap<String, String> IAResults; public TreeMap<String, String> IAResults;
public TreeMap<String, Integer> IACount; public TreeMap<String, Integer> IACount;
@ -79,16 +80,16 @@ public final class plasmaSearchEvent {
private resultWorker[] workerThreads; private resultWorker[] workerThreads;
private ArrayList<ResultEntry> resultList; private ArrayList<ResultEntry> resultList;
//private int resultListLock; // a pointer that shows that all elements below this pointer are fixed and may not be changed again //private int resultListLock; // a pointer that shows that all elements below this pointer are fixed and may not be changed again
private HashMap failedURLs; // a mapping from a urlhash to a fail reason string private HashMap<String, String> failedURLs; // a mapping from a urlhash to a fail reason string
TreeSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets TreeSet<String> snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
private long urlRetrievalAllTime; private long urlRetrievalAllTime;
private long snippetComputationAllTime; private long snippetComputationAllTime;
@SuppressWarnings("unchecked")
private plasmaSearchEvent(plasmaSearchQuery query, private plasmaSearchEvent(plasmaSearchQuery query,
plasmaWordIndex wordIndex, plasmaWordIndex wordIndex,
TreeMap preselectedPeerHashes, TreeMap<String, String> preselectedPeerHashes,
boolean generateAbstracts, boolean generateAbstracts) {
TreeSet abstractSet) {
this.eventTime = System.currentTimeMillis(); // for lifetime check this.eventTime = System.currentTimeMillis(); // for lifetime check
this.wordIndex = wordIndex; this.wordIndex = wordIndex;
this.query = query; this.query = query;
@ -104,14 +105,14 @@ public final class plasmaSearchEvent {
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.workerThreads = null; this.workerThreads = null;
this.resultList = new ArrayList(10); // this is the result set which is filled up with search results, enriched with snippets this.resultList = new ArrayList<ResultEntry>(10); // this is the result set which is filled up with search results, enriched with snippets
//this.resultListLock = 0; // no locked elements until now //this.resultListLock = 0; // no locked elements until now
this.failedURLs = new HashMap(); // a map of urls to reason strings where a worker thread tried to work on, but failed. this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search // only with the query minus the stopwords which had not been used for the search
final TreeSet filtered = kelondroMSetTools.joinConstructive(query.queryHashes, plasmaSwitchboard.stopwords); final TreeSet<String> filtered = kelondroMSetTools.joinConstructive(query.queryHashes, plasmaSwitchboard.stopwords);
this.snippetFetchWordHashes = (TreeSet) query.queryHashes.clone(); this.snippetFetchWordHashes = (TreeSet<String>) query.queryHashes.clone();
if ((filtered != null) && (filtered.size() > 0)) { if ((filtered != null) && (filtered.size() > 0)) {
kelondroMSetTools.excludeDestructive(this.snippetFetchWordHashes, plasmaSwitchboard.stopwords); kelondroMSetTools.excludeDestructive(this.snippetFetchWordHashes, plasmaSwitchboard.stopwords);
} }
@ -163,15 +164,15 @@ public final class plasmaSearchEvent {
if (generateAbstracts) { if (generateAbstracts) {
// compute index abstracts // compute index abstracts
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
Iterator ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator(); Iterator<Map.Entry<String, indexContainer>> ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator();
Map.Entry entry; Map.Entry<String, indexContainer> entry;
int maxcount = -1; int maxcount = -1;
double mindhtdistance = 1.1, d; double mindhtdistance = 1.1, d;
String wordhash; String wordhash;
while (ci.hasNext()) { while (ci.hasNext()) {
entry = (Map.Entry) ci.next(); entry = ci.next();
wordhash = (String) entry.getKey(); wordhash = entry.getKey();
indexContainer container = (indexContainer) entry.getValue(); indexContainer container = entry.getValue();
assert (container.getWordHash().equals(wordhash)); assert (container.getWordHash().equals(wordhash));
if (container.size() > maxcount) { if (container.size() > maxcount) {
IAmaxcounthash = wordhash; IAmaxcounthash = wordhash;
@ -256,13 +257,13 @@ public final class plasmaSearchEvent {
public static void cleanupEvents(boolean all) { public static void cleanupEvents(boolean all) {
// remove old events in the event cache // remove old events in the event cache
Iterator i = lastEvents.entrySet().iterator(); Iterator<plasmaSearchEvent> i = lastEvents.values().iterator();
plasmaSearchEvent cleanEvent; plasmaSearchEvent cleanEvent;
while (i.hasNext()) { while (i.hasNext()) {
cleanEvent = (plasmaSearchEvent) ((Map.Entry) i.next()).getValue(); cleanEvent = i.next();
if ((all) || (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis())) { if ((all) || (cleanEvent.eventTime + eventLifetime < System.currentTimeMillis())) {
// execute deletion of failed words // execute deletion of failed words
Set removeWords = cleanEvent.query.queryHashes; Set<String> removeWords = cleanEvent.query.queryHashes;
removeWords.addAll(cleanEvent.query.excludeHashes); removeWords.addAll(cleanEvent.query.excludeHashes);
cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet()); cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet());
serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words"); serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words");
@ -315,7 +316,7 @@ public final class plasmaSearchEvent {
if ((query.constraint != null) && if ((query.constraint != null) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) && (query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
(!(comp.title().startsWith("Index of")))) { (!(comp.title().startsWith("Index of")))) {
final Iterator wi = query.queryHashes.iterator(); final Iterator<String> wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash()); while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
registerFailure(page.hash(), "index-of constraint not fullfilled"); registerFailure(page.hash(), "index-of constraint not fullfilled");
return null; return null;
@ -366,7 +367,7 @@ public final class plasmaSearchEvent {
} else { } else {
// attach media information // attach media information
startTime = System.currentTimeMillis(); startTime = System.currentTimeMillis();
ArrayList mediaSnippets = plasmaSnippetCache.retrieveMediaSnippets(comp.url(), snippetFetchWordHashes, query.contentdom, (snippetFetchMode == 2), 6000); ArrayList<MediaSnippet> mediaSnippets = plasmaSnippetCache.retrieveMediaSnippets(comp.url(), snippetFetchWordHashes, query.contentdom, (snippetFetchMode == 2), 6000);
long snippetComputationTime = System.currentTimeMillis() - startTime; long snippetComputationTime = System.currentTimeMillis() - startTime;
serverLog.logInfo("SEARCH_EVENT", "media snippet load time for " + comp.url() + ": " + snippetComputationTime); serverLog.logInfo("SEARCH_EVENT", "media snippet load time for " + comp.url() + ": " + snippetComputationTime);
@ -447,13 +448,12 @@ public final class plasmaSearchEvent {
public static plasmaSearchEvent getEvent(plasmaSearchQuery query, public static plasmaSearchEvent getEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking, plasmaSearchRankingProfile ranking,
plasmaWordIndex wordIndex, plasmaWordIndex wordIndex,
TreeMap preselectedPeerHashes, TreeMap<String, String> preselectedPeerHashes,
boolean generateAbstracts, boolean generateAbstracts) {
TreeSet abstractSet) {
synchronized (lastEvents) { synchronized (lastEvents) {
plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id(false)); plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id(false));
if (event == null) { if (event == null) {
event = new plasmaSearchEvent(query, wordIndex, preselectedPeerHashes, generateAbstracts, abstractSet); event = new plasmaSearchEvent(query, wordIndex, preselectedPeerHashes, generateAbstracts);
} else { } else {
//re-new the event time for this event, so it is not deleted next time too early //re-new the event time for this event, so it is not deleted next time too early
event.eventTime = System.currentTimeMillis(); event.eventTime = System.currentTimeMillis();
@ -634,23 +634,23 @@ public final class plasmaSearchEvent {
System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries"); System.out.println("DEBUG-INDEXABSTRACT: hash " + (String) entry.getKey() + ": " + ((query.queryHashes.contains((String) entry.getKey())) ? "NEEDED" : "NOT NEEDED") + "; " + ((TreeMap) entry.getValue()).size() + " entries");
} }
*/ */
TreeMap abstractJoin = (rcAbstracts.size() == query.queryHashes.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values(), true) : new TreeMap(); TreeMap<String, String> abstractJoin = (rcAbstracts.size() == query.queryHashes.size()) ? kelondroMSetTools.joinConstructive(rcAbstracts.values(), true) : new TreeMap<String, String>();
if (abstractJoin.size() == 0) { if (abstractJoin.size() == 0) {
//System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers"); //System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers");
} else { } else {
//System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search"); //System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search");
// generate query for secondary search // generate query for secondary search
TreeMap secondarySearchURLs = new TreeMap(); // a (peerhash:urlhash-liststring) mapping TreeMap<String, String> secondarySearchURLs = new TreeMap<String, String>(); // a (peerhash:urlhash-liststring) mapping
Iterator i1 = abstractJoin.entrySet().iterator(); Iterator<Map.Entry<String, String>> i1 = abstractJoin.entrySet().iterator();
Map.Entry entry1; Map.Entry<String, String> entry1;
String url, urls, peer, peers; String url, urls, peer, peers;
String mypeerhash = yacyCore.seedDB.mySeed().hash; String mypeerhash = yacyCore.seedDB.mySeed().hash;
boolean mypeerinvolved = false; boolean mypeerinvolved = false;
int mypeercount; int mypeercount;
while (i1.hasNext()) { while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next(); entry1 = i1.next();
url = (String) entry1.getKey(); url = entry1.getKey();
peers = (String) entry1.getValue(); peers = entry1.getValue();
//System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers); //System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
mypeercount = 0; mypeercount = 0;
for (int j = 0; j < peers.length(); j = j + 12) { for (int j = 0; j < peers.length(); j = j + 12) {
@ -670,8 +670,8 @@ public final class plasmaSearchEvent {
secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()]; secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()];
int c = 0; int c = 0;
while (i1.hasNext()) { while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next(); entry1 = i1.next();
peer = (String) entry1.getKey(); peer = entry1.getKey();
if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself
urls = (String) entry1.getValue(); urls = (String) entry1.getValue();
words = wordsFromPeer(peer, urls); words = wordsFromPeer(peer, urls);
@ -686,17 +686,17 @@ public final class plasmaSearchEvent {
} }
private String wordsFromPeer(String peerhash, String urls) { private String wordsFromPeer(String peerhash, String urls) {
Map.Entry entry; Map.Entry<String, TreeMap<String, String>> entry;
String word, peerlist, url, wordlist = ""; String word, peerlist, url, wordlist = "";
TreeMap urlPeerlist; TreeMap<String, String> urlPeerlist;
int p; int p;
boolean hasURL; boolean hasURL;
synchronized (rcAbstracts) { synchronized (rcAbstracts) {
Iterator i = rcAbstracts.entrySet().iterator(); Iterator<Map.Entry <String, TreeMap<String, String>>> i = rcAbstracts.entrySet().iterator();
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
word = (String) entry.getKey(); word = entry.getKey();
urlPeerlist = (TreeMap) entry.getValue(); urlPeerlist = entry.getValue();
hasURL = true; hasURL = true;
for (int j = 0; j < urls.length(); j = j + 12) { for (int j = 0; j < urls.length(); j = j + 12) {
url = urls.substring(j, j + 12); url = urls.substring(j, j + 12);

@ -224,6 +224,7 @@ public plasmaSearchQuery(
return kelondroMSetTools.anymatch(wordhashes, keyhashes); return kelondroMSetTools.anymatch(wordhashes, keyhashes);
} }
@SuppressWarnings("unchecked")
public static TreeSet<String>[] cleanQuery(String querystring) { public static TreeSet<String>[] cleanQuery(String querystring) {
// returns two sets: a query set and a exclude set // returns two sets: a query set and a exclude set
if ((querystring == null) || (querystring.length() == 0)) return new TreeSet[]{new TreeSet<String>(kelondroNaturalOrder.naturalComparator), new TreeSet<String>(kelondroNaturalOrder.naturalComparator)}; if ((querystring == null) || (querystring.length() == 0)) return new TreeSet[]{new TreeSet<String>(kelondroNaturalOrder.naturalComparator), new TreeSet<String>(kelondroNaturalOrder.naturalComparator)};

@ -64,8 +64,8 @@ import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.plasma.cache.IResourceInfo; import de.anomic.plasma.cache.IResourceInfo;
import de.anomic.plasma.parser.ParserException; import de.anomic.plasma.parser.ParserException;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyURL;
public class plasmaSnippetCache { public class plasmaSnippetCache {
@ -623,14 +623,14 @@ public class plasmaSnippetCache {
} }
} }
public static ArrayList retrieveMediaSnippets(yacyURL url, Set queryhashes, int mediatype, boolean fetchOnline, int timeout) { public static ArrayList<MediaSnippet> retrieveMediaSnippets(yacyURL url, Set<String> queryhashes, int mediatype, boolean fetchOnline, int timeout) {
if (queryhashes.size() == 0) { if (queryhashes.size() == 0) {
serverLog.logFine("snippet fetch", "no query hashes given for url " + url); serverLog.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList(); return new ArrayList<MediaSnippet>();
} }
plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false); plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false);
ArrayList a = new ArrayList(); ArrayList<MediaSnippet> a = new ArrayList<MediaSnippet>();
if (document != null) { if (document != null) {
if ((mediatype == plasmaSearchQuery.CONTENTDOM_ALL) || (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, plasmaSearchQuery.CONTENTDOM_AUDIO)); if ((mediatype == plasmaSearchQuery.CONTENTDOM_ALL) || (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, plasmaSearchQuery.CONTENTDOM_AUDIO));
if ((mediatype == plasmaSearchQuery.CONTENTDOM_ALL) || (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO)) a.addAll(computeMediaSnippets(document, queryhashes, plasmaSearchQuery.CONTENTDOM_VIDEO)); if ((mediatype == plasmaSearchQuery.CONTENTDOM_ALL) || (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO)) a.addAll(computeMediaSnippets(document, queryhashes, plasmaSearchQuery.CONTENTDOM_VIDEO));
@ -640,24 +640,24 @@ public class plasmaSnippetCache {
return a; return a;
} }
public static ArrayList computeMediaSnippets(plasmaParserDocument document, Set queryhashes, int mediatype) { public static ArrayList<MediaSnippet> computeMediaSnippets(plasmaParserDocument document, Set<String> queryhashes, int mediatype) {
if (document == null) return new ArrayList(); if (document == null) return new ArrayList<MediaSnippet>();
Map media = null; Map<String, String> media = null;
if (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) media = document.getAudiolinks(); if (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) media = document.getAudiolinks();
else if (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) media = document.getVideolinks(); else if (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) media = document.getVideolinks();
else if (mediatype == plasmaSearchQuery.CONTENTDOM_APP) media = document.getApplinks(); else if (mediatype == plasmaSearchQuery.CONTENTDOM_APP) media = document.getApplinks();
if (media == null) return null; if (media == null) return null;
Iterator i = media.entrySet().iterator(); Iterator<Map.Entry<String, String>> i = media.entrySet().iterator();
Map.Entry entry; Map.Entry<String, String> entry;
String url, desc; String url, desc;
Set s; Set<String> s;
ArrayList result = new ArrayList(); ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
url = (String) entry.getKey(); url = entry.getKey();
desc = (String) entry.getValue(); desc = entry.getValue();
s = removeAppearanceHashes(url, queryhashes); s = removeAppearanceHashes(url, queryhashes);
if (s.size() == 0) { if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, desc, null)); result.add(new MediaSnippet(mediatype, url, desc, null));
@ -672,17 +672,17 @@ public class plasmaSnippetCache {
return result; return result;
} }
public static ArrayList computeImageSnippets(plasmaParserDocument document, Set queryhashes) { public static ArrayList<MediaSnippet> computeImageSnippets(plasmaParserDocument document, Set<String> queryhashes) {
TreeSet images = document.getImages(); TreeSet<htmlFilterImageEntry> images = document.getImages();
Iterator i = images.iterator(); Iterator<htmlFilterImageEntry> i = images.iterator();
htmlFilterImageEntry ientry; htmlFilterImageEntry ientry;
String url, desc; String url, desc;
Set s; Set<String> s;
ArrayList result = new ArrayList(); ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) { while (i.hasNext()) {
ientry = (htmlFilterImageEntry) i.next(); ientry = i.next();
url = ientry.url().toNormalform(true, true); url = ientry.url().toNormalform(true, true);
desc = ientry.alt(); desc = ientry.alt();
s = removeAppearanceHashes(url, queryhashes); s = removeAppearanceHashes(url, queryhashes);
@ -699,17 +699,17 @@ public class plasmaSnippetCache {
return result; return result;
} }
private static Set removeAppearanceHashes(String sentence, Set queryhashes) { private static Set<String> removeAppearanceHashes(String sentence, Set<String> queryhashes) {
// remove all hashes that appear in the sentence // remove all hashes that appear in the sentence
if (sentence == null) return queryhashes; if (sentence == null) return queryhashes;
HashMap hs = hashSentence(sentence); HashMap<String, Integer> hs = hashSentence(sentence);
Iterator j = queryhashes.iterator(); Iterator<String> j = queryhashes.iterator();
String hash; String hash;
Integer pos; Integer pos;
Set remaininghashes = new HashSet(); Set<String> remaininghashes = new HashSet<String>();
while (j.hasNext()) { while (j.hasNext()) {
hash = (String) j.next(); hash = j.next();
pos = (Integer) hs.get(hash); pos = hs.get(hash);
if (pos == null) { if (pos == null) {
remaininghashes.add(new String(hash)); remaininghashes.add(new String(hash));
} }
@ -717,15 +717,15 @@ public class plasmaSnippetCache {
return remaininghashes; return remaininghashes;
} }
private static HashMap hashSentence(String sentence) { private static HashMap<String, Integer> hashSentence(String sentence) {
// generates a word-wordPos mapping // generates a word-wordPos mapping
HashMap map = new HashMap(); HashMap<String, Integer> map = new HashMap<String, Integer>();
Enumeration words = plasmaCondenser.wordTokenizer(sentence, "UTF-8", 0); Enumeration<StringBuffer> words = plasmaCondenser.wordTokenizer(sentence, "UTF-8", 0);
int pos = 0; int pos = 0;
StringBuffer word; StringBuffer word;
String hash; String hash;
while (words.hasMoreElements()) { while (words.hasMoreElements()) {
word = (StringBuffer) words.nextElement(); word = words.nextElement();
hash = plasmaCondenser.word2hash(new String(word)); hash = plasmaCondenser.word2hash(new String(word));
if (!map.containsKey(hash)) map.put(hash, new Integer(pos)); // dont overwrite old values, that leads to too far word distances if (!map.containsKey(hash)) map.put(hash, new Integer(pos)); // dont overwrite old values, that leads to too far word distances
pos += word.length() + 1; pos += word.length() + 1;

@ -238,7 +238,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public double lastrequestedQueries = 0d; public double lastrequestedQueries = 0d;
public int totalPPM = 0; public int totalPPM = 0;
public double totalQPM = 0d; public double totalQPM = 0d;
public TreeMap clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used public TreeMap<String, String> clusterhashes; // map of peerhash(String)/alternative-local-address as ip:port or only ip (String) or null if address in seed should be used
public boolean acceptLocalURLs, acceptGlobalURLs; public boolean acceptLocalURLs, acceptGlobalURLs;
public URLLicense licensedURLs; public URLLicense licensedURLs;
public Timer moreMemory; public Timer moreMemory;

Loading…
Cancel
Save