From a44bcd322e837098a5a8c46c58aad713c24e1264 Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 17 Aug 2016 15:15:31 +0200 Subject: [PATCH 1/4] Copy image resources contained in donation iframe. Also refactored the related code : - extracted it in a method - catch Exception and log it, rather than Throwable which is bad practice --- source/net/yacy/yacy.java | 81 ++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 4e26f6dc4..f86a56bd0 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -39,13 +39,22 @@ import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.util.Properties; import java.util.concurrent.Semaphore; + +import com.google.common.io.Files; + import net.yacy.cora.date.GenericFormatter; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.federate.yacy.CacheStrategy; +import net.yacy.cora.order.Digest; import net.yacy.cora.protocol.ClientIdentification; +import net.yacy.cora.protocol.ConnectionInfo; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.TimeoutRequest; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.sorting.Array; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.crawler.retrieval.Response; +import net.yacy.document.Document; import net.yacy.gui.YaCyApp; import net.yacy.gui.framework.Browser; import net.yacy.http.Jetty9HttpServerImpl; @@ -54,17 +63,11 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.OS; +import net.yacy.peers.Seed; import net.yacy.peers.operation.yacyBuildProperties; import net.yacy.peers.operation.yacyRelease; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import com.google.common.io.Files; -import net.yacy.cora.document.id.DigestURL; -import net.yacy.cora.federate.yacy.CacheStrategy; -import net.yacy.cora.order.Digest; -import net.yacy.cora.protocol.ConnectionInfo; -import net.yacy.crawler.retrieval.Response; -import net.yacy.peers.Seed; import net.yacy.server.serverSwitch; import net.yacy.utils.translation.TranslatorXliff; @@ -265,22 +268,7 @@ public final class yacy { //final File htTemplatePath = new File(homePath, sb.getConfig("htTemplatePath","htdocs")); // copy the donate iframe (better to copy this once here instead of doing this in an actual iframe in the search result) - final File wwwEnvPath = new File(htDocsPath, "env"); - mkdirIfNeseccary(wwwEnvPath); - final String iframesource = sb.getConfig("donation.iframesource", ""); - final String iframetarget = sb.getConfig("donation.iframetarget", ""); - final File iframefile = new File(htDocsPath, iframetarget); - if (!iframefile.exists()) new Thread() { - @Override - public void run() { - final ClientIdentification.Agent agent = ClientIdentification.getAgent(ClientIdentification.yacyInternetCrawlerAgentName); - Response response; - try { - response = sb.loader == null ? null : sb.loader.load(sb.loader.request(new DigestURL(iframesource), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); - if (response != null) FileUtils.copy(response.getContent(), iframefile); - } catch (Throwable e) {} - } - }.start(); + importDonationIFrame(sb, htDocsPath); // create default notifier picture File notifierFile = new File(htDocsPath, "notifier.gif"); @@ -421,6 +409,53 @@ public final class yacy { } catch (final Exception e) {} // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790) } + /** + * Concurrently import the donation iframe content to serve it directly from this peer. + * @param switchBoard the SwitchBoard instance + */ + private static void importDonationIFrame(final Switchboard switchBoard, final File htDocsDirectory) { + final File wwwEnvPath = new File(htDocsDirectory, "env"); + mkdirIfNeseccary(wwwEnvPath); + final String iframesource = switchBoard.getConfig("donation.iframesource", ""); + final String iframetarget = switchBoard.getConfig("donation.iframetarget", ""); + final File iframefile = new File(htDocsDirectory, iframetarget); + if (!iframefile.exists()) new Thread() { + @Override + public void run() { + final ClientIdentification.Agent agent = ClientIdentification.getAgent(ClientIdentification.yacyInternetCrawlerAgentName); + Response documentResponse; + try { + /* Load the donation html frame content */ + documentResponse = sb.loader == null ? null : sb.loader.load(sb.loader.request(new DigestURL(iframesource), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); + if (documentResponse != null) { + Document[] documents = documentResponse.parse(); + if(documents != null && documents.length > 0 && documents[0] != null) { + Document donateDocument = documents[0]; + String donateDocContent = new String(documentResponse.getContent(), donateDocument.getCharset()); + /* Load image resources contained in the page */ + if(donateDocument.getImages() != null) { + for(DigestURL imgURL : donateDocument.getImages().keySet()) { + Response response = sb.loader.load(sb.loader.request(imgURL, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); + if (response != null) { + String imgFileName = imgURL.getFileName(); + /* Store each image to this peer custom directory */ + FileUtils.copy(response.getContent(), new File(iframefile.getParentFile(), imgFileName)); + + /* Transform the original image URL to a relative one */ + donateDocContent = donateDocContent.replace(imgURL.getURL().toString(), imgFileName); + } + } + } + FileUtils.copy(donateDocContent.getBytes(donateDocument.getCharset()), iframefile); + } + } + } catch (Exception e) { + ConcurrentLog.warn("STARTUP", "Could not retrieve donation frame content.", e); + } + } + }.start(); + } + /** * @param f */ From 46b88365489374ce238500cf52fe32fb5519e7dc Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 17 Aug 2016 15:15:31 +0200 Subject: [PATCH 2/4] Copy image resources contained in donation iframe. Handle eventual images loading errors. --- source/net/yacy/yacy.java | 87 ++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 4e26f6dc4..4dcbb98f3 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -39,13 +39,22 @@ import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.util.Properties; import java.util.concurrent.Semaphore; + +import com.google.common.io.Files; + import net.yacy.cora.date.GenericFormatter; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.federate.yacy.CacheStrategy; +import net.yacy.cora.order.Digest; import net.yacy.cora.protocol.ClientIdentification; +import net.yacy.cora.protocol.ConnectionInfo; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.TimeoutRequest; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.sorting.Array; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.crawler.retrieval.Response; +import net.yacy.document.Document; import net.yacy.gui.YaCyApp; import net.yacy.gui.framework.Browser; import net.yacy.http.Jetty9HttpServerImpl; @@ -54,17 +63,11 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.OS; +import net.yacy.peers.Seed; import net.yacy.peers.operation.yacyBuildProperties; import net.yacy.peers.operation.yacyRelease; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import com.google.common.io.Files; -import net.yacy.cora.document.id.DigestURL; -import net.yacy.cora.federate.yacy.CacheStrategy; -import net.yacy.cora.order.Digest; -import net.yacy.cora.protocol.ConnectionInfo; -import net.yacy.crawler.retrieval.Response; -import net.yacy.peers.Seed; import net.yacy.server.serverSwitch; import net.yacy.utils.translation.TranslatorXliff; @@ -265,22 +268,7 @@ public final class yacy { //final File htTemplatePath = new File(homePath, sb.getConfig("htTemplatePath","htdocs")); // copy the donate iframe (better to copy this once here instead of doing this in an actual iframe in the search result) - final File wwwEnvPath = new File(htDocsPath, "env"); - mkdirIfNeseccary(wwwEnvPath); - final String iframesource = sb.getConfig("donation.iframesource", ""); - final String iframetarget = sb.getConfig("donation.iframetarget", ""); - final File iframefile = new File(htDocsPath, iframetarget); - if (!iframefile.exists()) new Thread() { - @Override - public void run() { - final ClientIdentification.Agent agent = ClientIdentification.getAgent(ClientIdentification.yacyInternetCrawlerAgentName); - Response response; - try { - response = sb.loader == null ? null : sb.loader.load(sb.loader.request(new DigestURL(iframesource), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); - if (response != null) FileUtils.copy(response.getContent(), iframefile); - } catch (Throwable e) {} - } - }.start(); + importDonationIFrame(sb, htDocsPath); // create default notifier picture File notifierFile = new File(htDocsPath, "notifier.gif"); @@ -421,6 +409,59 @@ public final class yacy { } catch (final Exception e) {} // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790) } + /** + * Concurrently import the donation iframe content to serve it directly from this peer. + * @param switchBoard the SwitchBoard instance. Must not be null. + * @param htDocsDirectory the custom htdocs directory. Must not be null. + */ + private static void importDonationIFrame(final Switchboard switchBoard, final File htDocsDirectory) { + final File wwwEnvPath = new File(htDocsDirectory, "env"); + mkdirIfNeseccary(wwwEnvPath); + final String iframesource = switchBoard.getConfig("donation.iframesource", ""); + final String iframetarget = switchBoard.getConfig("donation.iframetarget", ""); + final File iframefile = new File(htDocsDirectory, iframetarget); + if (!iframefile.exists()) new Thread() { + @Override + public void run() { + final ClientIdentification.Agent agent = ClientIdentification.getAgent(ClientIdentification.yacyInternetCrawlerAgentName); + Response documentResponse; + try { + /* Load the donation html frame content */ + documentResponse = switchBoard.loader == null ? null : switchBoard.loader.load(switchBoard.loader.request(new DigestURL(iframesource), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); + if (documentResponse != null) { + Document[] documents = documentResponse.parse(); + if(documents != null && documents.length > 0 && documents[0] != null) { + Document donateDocument = documents[0]; + String donateDocContent = new String(documentResponse.getContent(), donateDocument.getCharset()); + /* Load image resources contained in the page */ + if(donateDocument.getImages() != null) { + for(DigestURL imgURL : donateDocument.getImages().keySet()) { + try { + Response response = switchBoard.loader.load(switchBoard.loader.request(imgURL, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, agent); + if (response != null) { + String imgFileName = imgURL.getFileName(); + /* Store each image in the same directory as the iframe target file */ + FileUtils.copy(response.getContent(), new File(iframefile.getParentFile(), imgFileName)); + + /* Transform the original image URL to a relative one */ + donateDocContent = donateDocContent.replace(imgURL.getURL().toString(), imgFileName); + } + } catch(IOException e) { + /* Failing to load one image should not stop the whole task */ + ConcurrentLog.warn("STARTUP", "Donation frame retrieval : could not get an image resource.", e); + } + } + } + FileUtils.copy(donateDocContent.getBytes(donateDocument.getCharset()), iframefile); + } + } + } catch (Exception e) { + ConcurrentLog.warn("STARTUP", "Could not retrieve donation frame content.", e); + } + } + }.start(); + } + /** * @param f */ From 4743ade66a8b27402e0a0dd0927b257d2e40141b Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 17 Aug 2016 16:13:46 +0200 Subject: [PATCH 3/4] Search comparison : prefer https URLs Also disabled currently not working URLs. --- htroot/compare_yacy.java | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/htroot/compare_yacy.java b/htroot/compare_yacy.java index 427c55424..956f16af0 100644 --- a/htroot/compare_yacy.java +++ b/htroot/compare_yacy.java @@ -43,19 +43,19 @@ public class compare_yacy { searchengines.put("YaCy (local)", "yacysearch.html?display=2&resource=local&query="); //searchengines.put("google.com", "https://www.google.com/#q="); searchengines.put("startpage.com", "https://startpage.com/do/search?cat=web&query="); - searchengines.put("bing.com", "http://www.bing.com/search?q="); - searchengines.put("metager.de", "http://www.metager.de/meta/cgi-bin/meta.ger1?eingabe="); - searchengines.put("metager2.de (web)", "http://www.metager2.de/search.php?ses=web&q="); - searchengines.put("metager2.de (international)", "http://www.metager2.de/search.php?ses=international&q="); - searchengines.put("yahoo.com", "http://search.yahoo.com/search?p="); - //searchengines.put("romso.de", "http://romso.de/?q="); // no serach service 2016-01-02 - searchengines.put("Wikipedia English", "http://en.wikipedia.org/wiki/"); - searchengines.put("Wikipedia Deutsch", "http://de.wikipedia.org/wiki/"); - searchengines.put("Sciencenet", "http://sciencenet.fzk.de:8080/yacysearch.html?verify=true&resource=global&nav=all&display=2&meanCount=5&query="); + searchengines.put("bing.com", "https://www.bing.com/search?q="); + searchengines.put("metager.de", "https://www.metager.de/meta/cgi-bin/meta.ger1?eingabe="); + searchengines.put("metager2.de (web)", "https://www.metager2.de/search.php?ses=web&q="); + searchengines.put("metager2.de (international)", "https://www.metager2.de/search.php?ses=international&q="); + //searchengines.put("yahoo.com", "https://search.yahoo.com/search?p="); // no search service in iframe 2016-08-17 : "Load denied by X-Frame-Options: does not permit cross-origin framing." + //searchengines.put("romso.de", "http://romso.de/?q="); // no search service 2016-01-02 + searchengines.put("Wikipedia English", "https://en.wikipedia.org/wiki/"); + searchengines.put("Wikipedia Deutsch", "https://de.wikipedia.org/wiki/"); + //searchengines.put("Sciencenet", "http://sciencenet.fzk.de:8080/yacysearch.html?verify=true&resource=global&nav=all&display=2&meanCount=5&query="); // no search service 2016-08-17 //searchengines.put("dbpedia", "http://dbpedia.neofonie.de/browse/~:"); // no search service 2016-01-02 - searchengines.put("wolfram alpha", "http://www.wolframalpha.com/input/?i="); - searchengines.put("OAIster@OCLC", "http://oaister.worldcat.org/search?q="); - searchengines.put("oai.yacy.net", "http://oai.yacy.net/yacysearch.html?verify=true&resource=local&nav=all&display=2&meanCount=5&query="); + searchengines.put("wolfram alpha", "https://www.wolframalpha.com/input/?i="); + searchengines.put("OAIster@OCLC", "https://oaister.worldcat.org/search?q="); + //searchengines.put("oai.yacy.net", "http://oai.yacy.net/yacysearch.html?verify=true&resource=local&nav=all&display=2&meanCount=5&query="); // no search service 2016-08-17 } public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { From f84e584d74c6d6e2fff916b5a3ad8e5849d2ac81 Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 17 Aug 2016 17:05:40 +0200 Subject: [PATCH 4/4] ConfigPortal : fixed mixed-content security issue with https We now use the same protocol as the one used to display the config page : so when using https, the content is not blocked by the browser detecting mixed-content. --- htroot/ConfigPortal.html | 6 +++--- htroot/ConfigPortal.java | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html index 516e4b07c..1d4134824 100644 --- a/htroot/ConfigPortal.html +++ b/htroot/ConfigPortal.html @@ -146,7 +146,7 @@ This would look like: