From 37859dfc85f7cf30dfc31bd0f1a06bfa19a232fe Mon Sep 17 00:00:00 2001 From: sixcooler Date: Thu, 26 Dec 2013 22:01:01 +0100 Subject: [PATCH 1/5] missing entrys for: 'updated poi-3.9 / poi-scratchpad-3.9' --- .classpath | 4 ++-- nbproject/project.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.classpath b/.classpath index 7cf627838..165f0c9a8 100644 --- a/.classpath +++ b/.classpath @@ -26,8 +26,8 @@ - - + + diff --git a/nbproject/project.xml b/nbproject/project.xml index 7fe048e51..504d2584b 100644 --- a/nbproject/project.xml +++ b/nbproject/project.xml @@ -73,7 +73,7 @@ source htroot - lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.8.2.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.1.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.1.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.2.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.5.0.jar;lib/lucene-analyzers-phonetic-4.5.0.jar;lib/lucene-core-4.5.0.jar;lib/lucene-misc-4.5.0.jar;lib/lucene-spatial-4.5.0.jar;lib/metadata-extractor-2.6.2.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.8.2.jar;lib/poi-3.6-20091214.jar;lib/poi-scratchpad-3.6-20091214.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.5.0.jar;lib/solr-solrj-4.5.0.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar + lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.8.2.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.1.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.1.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.2.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.5.0.jar;lib/lucene-analyzers-phonetic-4.5.0.jar;lib/lucene-core-4.5.0.jar;lib/lucene-misc-4.5.0.jar;lib/lucene-spatial-4.5.0.jar;lib/metadata-extractor-2.6.2.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.8.2.jar;lib/poi-3.9-20121203.jar;lib/poi-scratchpad-3.9-20121203.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.5.0.jar;lib/solr-solrj-4.5.0.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar 1.6 From 8954b2d25f5778e85bd79167874b661dbdfff23b Mon Sep 17 00:00:00 2001 From: sixcooler Date: Thu, 26 Dec 2013 22:23:40 +0100 Subject: [PATCH 2/5] removed classpathentry to 'remove obsolete htroot/solr htroot/gsa YaCy-servlets' --- .classpath | 2 -- 1 file changed, 2 deletions(-) diff --git a/.classpath b/.classpath index 165f0c9a8..e249e21fc 100644 --- a/.classpath +++ b/.classpath @@ -10,8 +10,6 @@ - - From 77aeb288a247e314b2ab7c8e2026dd3b2ec7a9d2 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 26 Dec 2013 23:26:21 +0100 Subject: [PATCH 3/5] suppress deprecation warning (for now); TODO: find alternatives --- source/net/yacy/document/parser/docParser.java | 1 + 1 file changed, 1 insertion(+) diff --git a/source/net/yacy/document/parser/docParser.java b/source/net/yacy/document/parser/docParser.java index 301f3ce1a..8c0263ac3 100644 --- a/source/net/yacy/document/parser/docParser.java +++ b/source/net/yacy/document/parser/docParser.java @@ -53,6 +53,7 @@ public class docParser extends AbstractParser implements Parser { this.SUPPORTED_MIME_TYPES.add("application/x-msword"); } + @SuppressWarnings("deprecation") @Override public Document[] parse(final AnchorURL location, final String mimeType, final String charset, final InputStream source) From 667a6adddb24b2a0a32a7ed6887cf5dfbe60f063 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 26 Dec 2013 23:59:04 +0100 Subject: [PATCH 4/5] - use default files from yacy.init property "defaultFiles" if no jetty-configuration is given for default files. - fix a problem with default paths if no path is given (i.e. http://localhost:8090 instead of http://localhost:8090/). Without this patch the path was resolved automatically to http://localhost:8090// --- source/net/yacy/http/servlets/YaCyDefaultServlet.java | 4 ++-- source/net/yacy/server/http/HTTPDFileHandler.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index fd7172ad3..bb6ddd146 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -142,7 +142,7 @@ public class YaCyDefaultServlet extends HttpServlet { _mimeTypes = new MimeTypes(); String tmpstr = this.getServletContext().getInitParameter("welcomeFile"); if (tmpstr == null) { - _welcomes = new String[]{"index.html", "welcome.html"}; // set a default welcome file name + _welcomes = HTTPDFileHandler.defaultFiles; } else { _welcomes = new String[]{tmpstr,"index.html"}; } @@ -305,7 +305,7 @@ public class YaCyDefaultServlet extends HttpServlet { } else { // resource is directory String welcome; - if (!endsWithSlash || (pathInContext.length() == 1)) { + if (!endsWithSlash) { StringBuffer buf = request.getRequestURL(); synchronized (buf) { int param = buf.lastIndexOf(";"); diff --git a/source/net/yacy/server/http/HTTPDFileHandler.java b/source/net/yacy/server/http/HTTPDFileHandler.java index 2241ec56a..8652f37d4 100644 --- a/source/net/yacy/server/http/HTTPDFileHandler.java +++ b/source/net/yacy/server/http/HTTPDFileHandler.java @@ -73,7 +73,7 @@ public final class HTTPDFileHandler { private static File htRootPath = null; public static File htDocsPath = null; - private static String[] defaultFiles = null; + public static String[] defaultFiles = null; private static File htDefaultPath = null; private static File htLocalePath = null; public static String indexForward = ""; From 25f9c350335cfd9c221e3f72e3e8e3a0a31c1a54 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 27 Dec 2013 00:34:55 +0100 Subject: [PATCH 5/5] add patch which shall prevent that naive search mistakes like usage of regular expressions cause no results. Usage of '*' followed by a dot or any expression will now cause that this expression is used as a filetype search. --- htroot/yacysearch.java | 7 +++- .../net/yacy/search/query/QueryModifier.java | 36 +++++++++++-------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index adb16d9e5..f2a577ef3 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -370,7 +370,12 @@ public class yacysearch { int stp = querystring.indexOf('*'); if (stp >= 0) { - querystring = querystring.substring(0, stp) + Segment.catchallString + querystring.substring(stp + 1); + // if the star appears as a single entry, use the catchallstring + if (querystring.length() == 1) { + querystring = Segment.catchallString; + } else { + querystring = querystring.replace('*', ' ').replaceAll(" ", " "); + } } if ( querystring.indexOf("/near", 0) >= 0 ) { querystring = querystring.replace("/near", ""); diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index 362ece108..d17871348 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -27,6 +27,7 @@ import org.apache.solr.common.params.MultiMapSolrParams; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; +import net.yacy.search.index.Segment; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; @@ -74,21 +75,11 @@ public class QueryModifier { add("/file"); } + // parse 'common search mistakes' like guessed regular expressions + querystring = filetypeParser(querystring, "*"); + // parse filetype - final int ftp = querystring.indexOf("filetype:", 0); - if ( ftp >= 0 ) { - int ftb = querystring.indexOf(' ', ftp); - if ( ftb == -1 ) { - ftb = querystring.length(); - } - filetype = querystring.substring(ftp + 9, ftb); - querystring = querystring.replace("filetype:" + filetype, ""); - while ( !filetype.isEmpty() && filetype.charAt(0) == '.' ) { - filetype = filetype.substring(1); - } - add("filetype:" + filetype); - if (filetype.isEmpty()) filetype = null; - } + querystring = filetypeParser(querystring, "filetype:"); // parse site final int sp = querystring.indexOf("site:", 0); @@ -141,6 +132,23 @@ public class QueryModifier { return querystring.trim(); } + private String filetypeParser(String querystring, final String filetypePrefix) { + final int ftp = querystring.indexOf(filetypePrefix, 0); + if ( ftp >= 0 ) { + int ftb = querystring.indexOf(' ', ftp); + if ( ftb < 0 ) ftb = querystring.length(); + filetype = querystring.substring(ftp + filetypePrefix.length(), ftb); + querystring = querystring.replace(filetypePrefix + filetype, ""); + while ( !filetype.isEmpty() && filetype.charAt(0) == '.' ) { + filetype = filetype.substring(1); + } + add("filetype:" + filetype); + if (filetype.isEmpty()) filetype = null; + if (querystring.length() == 0) querystring = "*"; + } + return querystring; + } + public void add(String m) { if (modifier.length() > 0 && modifier.charAt(modifier.length() - 1) != ' ' && m != null && m.length() > 0) modifier.append(' '); if (m != null) modifier.append(m);