diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java index 1093285ae..d825be9b4 100644 --- a/htroot/CacheAdmin_p.java +++ b/htroot/CacheAdmin_p.java @@ -175,12 +175,12 @@ public class CacheAdmin_p { formatAnchor(prop, document.getAudiolinks(), "audio"); formatAnchor(prop, document.getVideolinks(), "video"); formatAnchor(prop, document.getApplinks(), "apps"); - formatAnchor(prop, document.getEmaillinks(), "email"); + formatEmail(prop, document.getEmaillinks(), "email"); prop.putHTML("info_type_text", new String(scraper.getText())); i = 0; - final Iterator sentences = document.getSentences(false); + final Iterator sentences = document.getSentences(false); if (sentences != null) while (sentences.hasNext()) { prop.putHTML("info_type_lines_" + i + "_line", @@ -217,8 +217,8 @@ public class CacheAdmin_p { prop.put("info_empty", "1"); } else { prop.put("info_empty", "0"); - final TreeSet dList = new TreeSet(); - final TreeSet fList = new TreeSet(); + final TreeSet dList = new TreeSet(); + final TreeSet fList = new TreeSet(); int size = list.length - 1, i = size; for (; i >= 0 ; i--) { // Rueckwaerts ist schneller if (new File(dir, list[i]).isDirectory()) @@ -227,7 +227,7 @@ public class CacheAdmin_p { fList.add(list[i]); } - Iterator iter = dList.iterator(); + Iterator iter = dList.iterator(); i = 0; prop.put("info_treeFolders", dList.size()); while (iter.hasNext()) { @@ -257,33 +257,33 @@ public class CacheAdmin_p { return prop; } - private static void formatHeader(serverObjects prop, Map header) { + private static void formatHeader(serverObjects prop, Map header) { if (header == null) { prop.put("info_header", "0"); } else { prop.put("info_header", "1"); int i = 0; - final Iterator iter = header.entrySet().iterator(); - Map.Entry entry; + final Iterator> iter = header.entrySet().iterator(); + Map.Entry entry; while (iter.hasNext()) { - entry = (Map.Entry) iter.next(); - prop.put("info_header_line_" + i + "_property", (String) entry.getKey()); - prop.put("info_header_line_" + i + "_value", (String) entry.getValue()); + entry = iter.next(); + prop.put("info_header_line_" + i + "_property", entry.getKey()); + prop.put("info_header_line_" + i + "_value", entry.getValue()); i++; } prop.put("info_header_line", i); } } - private static void formatAnchor(serverObjects prop, Map anchor, String extension) { - final Iterator iter = anchor.entrySet().iterator(); + private static void formatAnchor(serverObjects prop, Map anchor, String extension) { + final Iterator> iter = anchor.entrySet().iterator(); String descr; - Map.Entry entry; + Map.Entry entry; prop.put("info_type_use." + extension + "_" + extension, anchor.size()); int i = 0; while (iter.hasNext()) { - entry = (Map.Entry) iter.next(); - descr = ((String) entry.getValue()).trim(); + entry = iter.next(); + descr = entry.getValue().trim(); if (descr.length() == 0) { descr = "-"; } prop.put("info_type_use." + extension + "_" + extension + "_" + i + "_name", de.anomic.data.htmlTools.encodeUnicode2html(descr.replaceAll("\n", "").trim(), true)); @@ -293,14 +293,33 @@ public class CacheAdmin_p { } prop.put("info_type_use." + extension, (i == 0) ? 0 : 1); } + + private static void formatEmail(serverObjects prop, Map anchor, String extension) { + final Iterator> iter = anchor.entrySet().iterator(); + String descr; + Map.Entry entry; + prop.put("info_type_use." + extension + "_" + extension, anchor.size()); + int i = 0; + while (iter.hasNext()) { + entry = iter.next(); + descr = entry.getValue().trim(); + if (descr.length() == 0) { descr = "-"; } + prop.put("info_type_use." + extension + "_" + extension + "_" + i + "_name", + de.anomic.data.htmlTools.encodeUnicode2html(descr.replaceAll("\n", "").trim(), true)); + prop.put("info_type_use." + extension + "_" + extension + "_" + i + "_link", + de.anomic.data.htmlTools.encodeUnicode2html(entry.getKey().toString(), true)); + i++; + } + prop.put("info_type_use." + extension, (i == 0) ? 0 : 1); + } - private static void formatImageAnchor(serverObjects prop, TreeSet anchor) { - final Iterator iter = anchor.iterator(); + private static void formatImageAnchor(serverObjects prop, TreeSet anchor) { + final Iterator iter = anchor.iterator(); htmlFilterImageEntry ie; prop.put("info_type_use.images_images", anchor.size()); int i = 0; while (iter.hasNext()) { - ie = (htmlFilterImageEntry) iter.next(); + ie = iter.next(); prop.putHTML("info_type_use.images_images_" + i + "_name", ie.alt().replaceAll("\n", "").trim()); prop.putHTML("info_type_use.images_images_" + i + "_link", de.anomic.data.htmlTools.encodeUnicode2html(ie.url().toNormalform(false, true), false)); diff --git a/htroot/ScreenSaver.java b/htroot/ScreenSaver.java deleted file mode 100644 index 61eb1ac39..000000000 --- a/htroot/ScreenSaver.java +++ /dev/null @@ -1,151 +0,0 @@ -// ScreenSaver.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2006 -// -// This File is contributed by Martin Thelian -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -//done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.util.ArrayList; - -import de.anomic.http.httpHeader; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaSwitchboardQueue; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; - -public class ScreenSaver { - - /** - * Generates a proxy-autoconfig-file (application/x-ns-proxy-autoconfig) - * See: Proxy Auto-Config File Format - * @param header the complete HTTP header of the request - * @param post any arguments for this servlet, the request carried with (GET as well as POST) - * @param env the serverSwitch object holding all runtime-data - * @return the rewrite-properties for the template - */ - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { - - plasmaSwitchboard sb = (plasmaSwitchboard)env; - boolean localCrawlStarted = false; - boolean remoteTriggeredCrawlStarted = false; - boolean globalCrawlTriggerStarted = false; - try { - InputStream input = (InputStream) header.get("INPUTSTREAM"); - OutputStream output = (OutputStream) header.get("OUTPUTSTREAM"); - - String line = null; - BufferedReader inputReader = new BufferedReader(new InputStreamReader(input)); - PrintWriter outputWriter = new PrintWriter(output); - while ((line = inputReader.readLine()) != null) { - yacyCore.peerActions.updateMySeed(); - if (line.equals("")) { - continue; - } else if (line.startsWith("PPM")) { - String currentPPM = yacyCore.seedDB.mySeed().get(yacySeed.ISPEED, "-1"); - outputWriter.println(currentPPM); - } else if (line.startsWith("LINKS")) { - String currentLinks = yacyCore.seedDB.mySeed().get(yacySeed.LCOUNT, "-1"); - outputWriter.println(currentLinks); - } else if (line.startsWith("WORDS")) { - String currentWords = yacyCore.seedDB.mySeed().get(yacySeed.ICOUNT, "-1"); - outputWriter.println(currentWords); - } else if (line.equals("CURRENTURL")) { - String currentURL = ""; - ArrayList entryList = new ArrayList(); - synchronized (sb.indexingTasksInProcess) { - if (sb.indexingTasksInProcess.size() > 0) { - entryList.addAll(sb.indexingTasksInProcess.values()); - } - } - if (entryList.size() > 0) { - plasmaSwitchboardQueue.Entry pcentry = (plasmaSwitchboardQueue.Entry) entryList.get(0); - currentURL = pcentry.url().toString(); - } - - outputWriter.println(currentURL); - } else if (line.equals("CONTINUECRAWLING")) { - if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL)) { - localCrawlStarted = true; - sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } - if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) { - remoteTriggeredCrawlStarted = true; - sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } - if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_CRAWL_LOADER)) { - globalCrawlTriggerStarted = true; - sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_CRAWL_LOADER); - } - } else if (line.equals("EXIT")) { - outputWriter.println("OK"); - outputWriter.flush(); - return null; - } else { - outputWriter.println("Unknown command"); - } - outputWriter.flush(); - } - - return null; - } catch (Exception e) { - return null; - } finally { - if (localCrawlStarted) { - sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); - } - if (remoteTriggeredCrawlStarted) { - sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - } - if (globalCrawlTriggerStarted) { - sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_CRAWL_LOADER); - } - } - } - -} diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java index 0aa929405..fd88932d4 100644 --- a/source/de/anomic/http/httpHeader.java +++ b/source/de/anomic/http/httpHeader.java @@ -75,7 +75,7 @@ import de.anomic.server.serverDate; import de.anomic.yacy.yacyURL; -public final class httpHeader extends TreeMap implements Map { +public final class httpHeader extends TreeMap implements Map { private static final long serialVersionUID = 17L; @@ -239,8 +239,8 @@ public final class httpHeader extends TreeMap implements Map Proxy */ public static final String CONNECTION_PROP_CLIENT_REQUEST_HEADER = "CLIENT_REQUEST_HEADER"; @@ -302,7 +302,7 @@ public final class httpHeader extends TreeMap implements Map implements Map implements Map> it = this.entrySet().iterator(); + Iterator> it = this.entrySet().iterator(); while(it.hasNext()) { - Map.Entry e = it.next(); + Map.Entry e = it.next(); //System.out.println(""+e.getKey()+" : "+e.getValue()); if(e.getKey().equals("Cookie")) { diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index 118edafa9..4c90d83af 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -530,8 +530,8 @@ public final class httpdFileHandler { // call rewrite-class requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP")); requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path); - requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body); - requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out); + //requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body); + //requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out); httpd.sendRespondHeader(conProp, out, httpVersion, 200, null); diff --git a/source/de/anomic/index/indexRWIEntryOrder.java b/source/de/anomic/index/indexRWIEntryOrder.java index 6f7ccbdd0..bf3dd6728 100644 --- a/source/de/anomic/index/indexRWIEntryOrder.java +++ b/source/de/anomic/index/indexRWIEntryOrder.java @@ -114,6 +114,10 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder imp return cardinal(new indexRWIRowEntry(key)); } + public long tf(indexRWIEntry t) { + return (t.hitcount() - min.hitcount()) * (1 + max.wordsintext() - min.wordsintext()) / (1 + max.hitcount() - min.hitcount()) / (t.wordsintext() - min.wordsintext()); + } + public long cardinal(indexRWIEntry t) { //return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin, this.entryMax, this.searchWords); // the normalizedEntry must be a normalized indexEntry diff --git a/source/de/anomic/kelondro/kelondroMergeIterator.java b/source/de/anomic/kelondro/kelondroMergeIterator.java index 3dc27827c..138290e59 100644 --- a/source/de/anomic/kelondro/kelondroMergeIterator.java +++ b/source/de/anomic/kelondro/kelondroMergeIterator.java @@ -147,7 +147,6 @@ public class kelondroMergeIterator implements kelondroCloneableIterator { throw new java.lang.UnsupportedOperationException("merge does not support remove"); } - @SuppressWarnings("unchecked") public static kelondroCloneableIterator cascade(Set> /*of*/ iterators, kelondroOrder c, Method merger, boolean up) { // this extends the ability to combine two iterators // to the abiliy of combining a set of iterators diff --git a/source/de/anomic/plasma/cache/ResourceInfoFactory.java b/source/de/anomic/plasma/cache/ResourceInfoFactory.java index 1af06c977..a4a091142 100644 --- a/source/de/anomic/plasma/cache/ResourceInfoFactory.java +++ b/source/de/anomic/plasma/cache/ResourceInfoFactory.java @@ -57,7 +57,7 @@ import de.anomic.yacy.yacyURL; public class ResourceInfoFactory { public IResourceInfo buildResourceInfoObj( yacyURL resourceURL, - Map resourceMetadata + Map resourceMetadata ) throws UnsupportedProtocolException, IllegalAccessException { String protocString = resourceURL.getProtocol(); @@ -70,10 +70,10 @@ public class ResourceInfoFactory { try { // loading class by name - final Class moduleClass = Class.forName(className); + final Class moduleClass = Class.forName(className); // getting the constructor - final Constructor classConstructor = moduleClass.getConstructor( new Class[] { + final Constructor classConstructor = moduleClass.getConstructor( new Class[] { yacyURL.class, Map.class } ); diff --git a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java index e9dae385a..5ee849eaf 100644 --- a/source/de/anomic/plasma/cache/ftp/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/ftp/ResourceInfo.java @@ -58,44 +58,44 @@ public class ResourceInfo implements IResourceInfo { public static final String MIMETYPE = "mimetype"; public static final String MODIFICATION_DATE = "modificationDate"; - public static final String REFERER = "referer"; - private yacyURL url; - private HashMap propertyMap; + private yacyURL objectURL, refererURL; + private HashMap propertyMap; /** * Constructor used by the {@link ResourceInfoFactory} * @param objectURL * @param objectInfo */ - public ResourceInfo(yacyURL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); // generating the url hash - this.url = objectURL; + this.objectURL = objectURL; + this.refererURL = null; // create the http header object - this.propertyMap = new HashMap(objectInfo); + this.propertyMap = new HashMap(objectInfo); } public ResourceInfo(yacyURL objectURL, yacyURL refererUrl, String mimeType, Date fileDate) { if (objectURL == null) throw new NullPointerException(); // generating the url hash - this.url = objectURL; + this.objectURL = objectURL; // create the http header object - this.propertyMap = new HashMap(); + this.propertyMap = new HashMap(); if (refererUrl != null) - this.propertyMap.put(REFERER, refererUrl); + this.refererURL = refererUrl; if (mimeType != null) this.propertyMap.put(MIMETYPE, mimeType); if (fileDate != null) this.propertyMap.put(MODIFICATION_DATE, Long.toString(fileDate.getTime())); } - public Map getMap() { + public Map getMap() { return this.propertyMap; } @@ -109,11 +109,11 @@ public class ResourceInfo implements IResourceInfo { } public yacyURL getRefererUrl() { - return (this.propertyMap == null) ? null : ((yacyURL) this.propertyMap.get(REFERER)); + return this.refererURL; } public yacyURL getUrl() { - return this.url; + return this.objectURL; } public Date ifModifiedSince() { diff --git a/source/de/anomic/plasma/cache/http/ResourceInfo.java b/source/de/anomic/plasma/cache/http/ResourceInfo.java index c942cd2a5..963fa37db 100644 --- a/source/de/anomic/plasma/cache/http/ResourceInfo.java +++ b/source/de/anomic/plasma/cache/http/ResourceInfo.java @@ -68,7 +68,7 @@ public class ResourceInfo implements IResourceInfo { * @param objectURL * @param objectInfo */ - public ResourceInfo(yacyURL objectURL, Map objectInfo) { + public ResourceInfo(yacyURL objectURL, Map objectInfo) { if (objectURL == null) throw new NullPointerException(); if (objectInfo == null) throw new NullPointerException(); @@ -90,7 +90,7 @@ public class ResourceInfo implements IResourceInfo { this.responseHeader = responseHeaders; } - public Map getMap() { + public Map getMap() { return this.responseHeader; } diff --git a/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java b/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java index de6574a7b..f29e3a053 100644 --- a/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java +++ b/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java @@ -38,14 +38,14 @@ public final class plasmaProtocolLoader { private plasmaSwitchboard sb; private serverLog log; - private HashSet supportedProtocols; + private HashSet supportedProtocols; private plasmaHTTPLoader httpLoader; private plasmaFTPLoader ftpLoader; public plasmaProtocolLoader(plasmaSwitchboard sb, serverLog log) { this.sb = sb; this.log = log; - this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https","ftp"})); + this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https","ftp"})); // initiate loader objects httpLoader = new plasmaHTTPLoader(sb, log); @@ -57,8 +57,9 @@ public final class plasmaProtocolLoader { return this.supportedProtocols.contains(protocol.trim().toLowerCase()); } - public HashSet getSupportedProtocols() { - return (HashSet) this.supportedProtocols.clone(); + @SuppressWarnings("unchecked") + public HashSet getSupportedProtocols() { + return (HashSet) this.supportedProtocols.clone(); } public plasmaHTCache.Entry load(plasmaCrawlEntry entry, String parserMode) { diff --git a/source/de/anomic/plasma/plasmaSearchAPI.java b/source/de/anomic/plasma/plasmaSearchAPI.java index ea04217d5..6b0607c60 100644 --- a/source/de/anomic/plasma/plasmaSearchAPI.java +++ b/source/de/anomic/plasma/plasmaSearchAPI.java @@ -149,7 +149,7 @@ public class plasmaSearchAPI { prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", (entry.ranking() - rn)); prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", yacyURL.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", plasmaSearchRankingProcess.ybr(entry.hash())); - prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", ranked.getOrder().authority(entry.hash())); + prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash())); prop.put("genUrlList_urlList_"+i+"_urlExists_date", serverDate.formatShortDay(new Date(entry.word().lastModified()))); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index b56f80ff3..4a74d286d 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -96,7 +96,7 @@ public final class plasmaSearchRankingProcess { this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer)); - // join and exlcude the local result + // join and exclude the local result timer = System.currentTimeMillis(); indexContainer index = (this.localSearchContainerMaps == null) ?