diff --git a/htroot/QuickCrawlLink_p.html b/htroot/QuickCrawlLink_p.html new file mode 100644 index 000000000..5a6f6a108 --- /dev/null +++ b/htroot/QuickCrawlLink_p.html @@ -0,0 +1,69 @@ + + + YaCy '#[clientname]#': Quick Crawl Link + #[metas]# + + + + + +
+ + + + + + +
+ + + + +
#[clientname]#: Quick Crawl Link
+
+
+
+

+ +#(mode)# + + + + + + +
+Quickly adding Bookmarks: +

Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar. +If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.

+
 Crawl with YaCy
+ +:: + + + + + + + + + + + + + +
Title:#[title]#
Link:#[url]#
Status: + #(status)# + URL successfully added to Crawler Queue + :: + Malformed URL + :: + Unable to create new crawling profile for URL: #[error]# + :: + Unable to add URL to crawler queue: #[error]# + #(/status)# +
+#(/mode)# + + + \ No newline at end of file diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java new file mode 100644 index 000000000..ceca581cf --- /dev/null +++ b/htroot/QuickCrawlLink_p.java @@ -0,0 +1,188 @@ +//QuickCrawlLink_p.java +//----------------------- +//part of the AnomicHTTPD caching proxy +//(C) by Michael Peter Christen; mc@anomic.de +//first published on http://www.anomic.de +//Frankfurt, Germany, 2004 +// +//This file was contributed by Martin Thelian +//$LastChangedDate$ +//$LastChangedBy$ +//$LastChangedRevision$ +// +//This program is free software; you can redistribute it and/or modify +//it under the terms of the GNU General Public License as published by +//the Free Software Foundation; either version 2 of the License, or +//(at your option) any later version. +// +//This program is distributed in the hope that it will be useful, +//but WITHOUT ANY WARRANTY; without even the implied warranty of +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//GNU General Public License for more details. +// +//You should have received a copy of the GNU General Public License +//along with this program; if not, write to the Free Software +//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//Using this software in any meaning (reading, learning, copying, compiling, +//running) means that you agree that the Author(s) is (are) not responsible +//for cost, loss of data or any harm that may be caused directly or indirectly +//by usage of this softare or this documentation. The usage of this software +//is on your own risk. The installation and usage (starting/running) of this +//software may allow other people or application to access your computer and +//any attached devices and is highly dependent on the configuration of the +//software which must be done by the user of the software; the author(s) is +//(are) also not responsible for proper configuration and usage of the +//software, even if provoked by documentation provided together with +//the software. +// +//Any changes to this file according to the GPL as documented in the file +//gpl.txt aside this file in the shipment you received can be done to the +//lines that follows this copyright notice here, but changes must not be +//done inside the copyright notive above. A re-distribution must contain +//the intact and unchanged copyright notice. +//Contributions and changes to the program code must be marked as such. + +//You must compile this file with +//javac -classpath .:../classes IndexCreate_p.java +//if the shell's current path is HTROOT + + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Date; + +import de.anomic.htmlFilter.htmlFilterContentScraper; +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaCrawlProfile; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaURL; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; + +public class QuickCrawlLink_p { + + /** + * Example Javascript to call this servlet: + * javascript:w = window.open('http://user:pwd@localhost:8080/QuickCrawlLink_p.html?localIndexing=on&crawlingQ=on&xdstopw=on&title=' + escape(document.title) + '&url=' + location.href,'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no'); w.focus(); + * @param header + * @param post + * @param env + * @return + */ + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + + serverObjects prop = new serverObjects(); + plasmaSwitchboard switchboard = (plasmaSwitchboard)env; + + if (post == null) { + // send back usage example + prop.put("mode",0); + + // getting the http host header + String hostSocket = (String) header.get(httpHeader.CONNECTION_PROP_HOST); + + String host = hostSocket; + int port = 80, pos = hostSocket.indexOf(":"); + if (pos != -1) { + port = Integer.parseInt(hostSocket.substring(pos + 1)); + host = hostSocket.substring(0, pos); + } + + prop.put("mode_host", "localhost"); + prop.put("mode_port", Integer.toString(port)); + + return prop; + } + prop.put("mode",1); + + // getting the URL + String crawlingStart = post.get("url",null); + + // getting the browser title + String title = post.get("title",null); + + // getting other parameters if set + String crawlingFilter = post.get("crawlingFilter", ".*"); + int CrawlingDepth = Integer.parseInt(post.get("crawlingDepth", "0")); + boolean crawlDynamic = post.get("crawlingQ", "").equals("on"); + boolean storeHTCache = post.get("storeHTCache", "").equals("on"); + boolean localIndexing = post.get("localIndexing", "").equals("on"); + boolean remoteIndexing = post.get("crawlOrder", "").equals("on"); + boolean xsstopw = post.get("xsstopw", "").equals("on"); + boolean xdstopw = post.get("xdstopw", "").equals("on"); + boolean xpstopw = post.get("xpstopw", "").equals("on"); + + prop.put("mode_url",(crawlingStart==null)?"unknown":crawlingStart); + prop.put("mode_title",(title==null)?"unknown":title); + + if (crawlingStart != null) { + crawlingStart = crawlingStart.trim(); + crawlingStart = htmlFilterContentScraper.urlNormalform(null, crawlingStart); + + // check if url is proper + URL crawlingStartURL = null; + try { + crawlingStartURL = new URL(crawlingStart); + } catch (MalformedURLException e) { + prop.put("mode_status", 1); + return prop; + } + + String urlhash = plasmaURL.urlHash(crawlingStart); + switchboard.urlPool.loadedURL.remove(urlhash); + switchboard.urlPool.noticeURL.remove(urlhash); + switchboard.urlPool.errorURL.remove(urlhash); + + // create crawling profile + plasmaCrawlProfile.entry pe = null; + try { + pe = switchboard.profiles.newEntry( + crawlingStartURL.getHost(), + crawlingStart, + crawlingFilter, + crawlingFilter, + CrawlingDepth, + CrawlingDepth, + crawlDynamic, + storeHTCache, + true, + localIndexing, + remoteIndexing, + xsstopw, + xdstopw, + xpstopw + ); + } catch (Exception e) { + // mist + prop.put("mode_status", 2);//Error with url + prop.put("mode_status_error", e.getMessage()); + return prop; + } + + // stack URL + String reasonString = switchboard.sbStackCrawlThread.stackCrawl( + crawlingStart, + null, + yacyCore.seedDB.mySeed.hash, + (title==null)?"CRAWLING-ROOT":title, + new Date(), + 0, + pe + ); + + // validate rejection reason + if (reasonString == null) { + prop.put("mode_status", 0);//start msg + } else { + prop.put("mode_status", 3);//start msg + prop.put("mode_status_error", reasonString); + } + + } + + return prop; + } + +} diff --git a/htroot/env/grafics/addlink.gif b/htroot/env/grafics/addlink.gif new file mode 100644 index 000000000..ef9c2db40 Binary files /dev/null and b/htroot/env/grafics/addlink.gif differ diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 89100dcbc..4b0a40eef 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -338,9 +338,16 @@ public final class yacyClient { } } - public static int search(String wordhashes, boolean global, yacySeed targetPeer, - plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, - plasmaURLPattern blacklist, plasmaSnippetCache snippets, plasmaSearchProfile profile) { + public static int search( + String wordhashes, + boolean global, + yacySeed targetPeer, + plasmaCrawlLURL urlManager, + plasmaWordIndexEntity entityCache, + plasmaURLPattern blacklist, + plasmaSnippetCache snippets, + plasmaSearchProfile profile + ) { // send a search request to peer with remote Hash // this mainly converts the words into word hashes