Merge branch 'master' of git@github.com:yacy/yacy_search_server.git

9 years ago · df51e4ef07
parent e063aaf97f ff6589fc0f
commit df51e4ef07
20 changed files with 289 additions and 177 deletions
--- a/.classpath
+++ b/.classpath
@ -16,7 +16,6 @@
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 	<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
 	<classpathentry kind="lib" path="lib/J7Zip-modified.jar"/>
-	<classpathentry kind="lib" path="lib/webcat-swf-0.1.jar"/>
 	<classpathentry kind="lib" path="lib/commons-jxpath-1.3.jar"/>
 	<classpathentry kind="lib" path="lib/jsch-0.1.53.jar"/>
 	<classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/>
--- a/README.md
+++ b/README.md
@ -171,8 +171,8 @@ Please clone our code and help with development!
 The code is licensed under the GPL v2.

 Compiling YaCy:
- you need java 1.7 and ant
- just compile: "ant clean all" - then you can "./startYACY.sh"
+- you need Java 1.7 or later and [Apache Ant](http://ant.apache.org/)
+- just compile: "ant clean all" - then you can "./startYACY.sh" or "./startYACY.bat"
 - create a release tarball: "ant dist"
 - create a Mac OS release: "ant distMacApp" (works only on a Mac)
 - create a debian release: "ant deb"
--- a/build.xml
+++ b/build.xml
@ -238,7 +238,6 @@
        <pathelement location="${lib}/solr-solrj-5.5.2.jar" />
    	<pathelement location="${lib}/spatial4j-0.5.jar" />
        <pathelement location="${lib}/stax2-api_3.1.4.jar" />        
-    	<pathelement location="${lib}/webcat-swf-0.1.jar" />
    	<pathelement location="${lib}/weupnp-0.1.4.jar" />
    	<pathelement location="${lib}/woodstox-core-asl-4.4.1.jar" />
    	<pathelement location="${lib}/xercesImpl.jar" />
--- a/htroot/TransNews_p.java
+++ b/htroot/TransNews_p.java
@ -74,6 +74,7 @@ public class TransNews_p {
        // read voting
        if ((post != null) && post.containsKey("publishtranslation")) {
            Iterator<String> filenameit = localTrans.keySet().iterator();
+            int msgcounter = 0;
            while (filenameit.hasNext()) {
                String file = filenameit.next();
                Map<String, String> tmptrans = localTrans.get(file);
@ -109,6 +110,7 @@ public class TransNews_p {
                            map.put("file", file);
                            map.put("source", sourcetxt);
                            map.put("target", targettxt);
+                            map.put("#", Integer.toString(msgcounter++));
                            sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_TRANSLATION_ADD, map);
                        }
                    }
--- a/lib/webcat-swf-0.1.jar
+++ b/lib/webcat-swf-0.1.jar
--- a/lib/webcat-swf.License
+++ b/lib/webcat-swf.License
@ -1,7 +0,0 @@
-This package is part of WebCAT (http://webcat.sourceforge.net/)
-
-WebCAT was developed at the XLDB group of the Department of Informatics of the Faculty of Sciences of the University of Lisbon in Portugal.
-
-WebCAT was written by Bruno Martins.
-
-WebCAT is released under the BSD License. (http://www.opensource.org/licenses/bsd-license.php)
--- a/pom.xml
+++ b/pom.xml
@ -591,12 +591,7 @@
            <artifactId>slf4j-jdk14</artifactId>
            <version>1.7.21</version>
            <type>jar</type>
-        </dependency>
-        <dependency>
-            <groupId>pt.tumba</groupId>
-            <artifactId>webcat-swf</artifactId>
-            <version>0.1</version>
-        </dependency>        
+        </dependency>       
        <dependency>
            <groupId>org.bitlet</groupId>
            <artifactId>weupnp</artifactId>
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -56,7 +56,6 @@ import net.yacy.document.parser.rssParser;
 import net.yacy.document.parser.rtfParser;
 import net.yacy.document.parser.sevenzipParser;
 import net.yacy.document.parser.sidAudioParser;
-import net.yacy.document.parser.swfParser;
 import net.yacy.document.parser.tarParser;
 import net.yacy.document.parser.torrentParser;
 import net.yacy.document.parser.vcfParser;
@ -106,7 +105,6 @@ public final class TextParser {
        initParser(new sevenzipParser());
        initParser(new sidAudioParser());
        initParser(new svgParser());
-        initParser(new swfParser());
        initParser(new tarParser());
        initParser(new torrentParser());
        initParser(new vcfParser());
--- a/source/net/yacy/document/Tokenizer.java
+++ b/source/net/yacy/document/Tokenizer.java
@ -74,9 +74,6 @@ public class Tokenizer {
        String k;
        Tagging.Metatag tag;
        int wordlen;
-        int wordHandle;
-        int wordHandleCount = 0;
-        //final int sentenceHandleCount = 0;
        int allwordcounter = 0;
        int allsentencecounter = 0;
        int wordInSentenceCounter = 1;
@ -167,12 +164,10 @@ public class Tokenizer {
                Word wsp = this.words.get(word);
                if (wsp != null) {
                    // word already exists
-                    wordHandle = wsp.posInText;
                    wsp.inc();
                } else {
                    // word does not yet exist, create new word entry
-                    wordHandle = ++wordHandleCount; // let start pos with 1
-                    wsp = new Word(wordHandle, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
+                    wsp = new Word(allwordcounter, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
                    wsp.flags = this.RESULT_FLAGS.clone();
                    this.words.put(word.toLowerCase(), wsp);
                }
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@ -1,113 +0,0 @@
-//swfParser.java
-//------------------------
-//part of YaCy
-//(C) by Michael Peter Christen; mc@yacy.net
-//first published on http://www.anomic.de
-//Frankfurt, Germany, 2005
-//
-//this file is contributed by Marc Nause
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-//This program is free software; you can redistribute it and/or modify
-//it under the terms of the GNU General Public License as published by
-//the Free Software Foundation; either version 2 of the License, or
-//(at your option) any later version.
-//
-//This program is distributed in the hope that it will be useful,
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//GNU General Public License for more details.
-//
-//You should have received a copy of the GNU General Public License
-//along with this program; if not, write to the Free Software
-//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-
-import net.yacy.cora.document.id.DigestURL;
-import net.yacy.document.AbstractParser;
-import net.yacy.document.Document;
-import net.yacy.document.Parser;
-import net.yacy.document.VocabularyScraper;
-import net.yacy.document.parser.html.ContentScraper;
-import pt.tumba.parser.swf.SWF2HTML;
-
-public class swfParser extends AbstractParser implements Parser {
-
-    public swfParser() {
-        super("Adobe Flash Parser");
-        this.SUPPORTED_EXTENSIONS.add("swf");
-        this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash");
-        this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash2-preview");
-        this.SUPPORTED_MIME_TYPES.add("application/futuresplash");
-        this.SUPPORTED_MIME_TYPES.add("image/vnd.rn-realflash");
-    }
-
-    /*
-     * parses the source documents and returns a plasmaParserDocument containing
-     * all extracted information about the parsed document
-     */
-    @Override
-    public Document[] parse(
-            final DigestURL location,
-            final String mimeType,
-            final String charset,
-            final VocabularyScraper scraper, 
-            final int timezoneOffset,
-            final InputStream source)
-            throws Parser.Failure, InterruptedException
-    {
-
-        try {
-            final SWF2HTML swf2html = new SWF2HTML();
-            String contents = "";
-            try {
-                contents = swf2html.convertSWFToHTML(source);
-                scraperObject = htmlParser.parseToScraper(location, charset, scraper, timezoneOffset, contents, 100);
-            } catch (final NegativeArraySizeException e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            } catch (final IOException e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            } catch (final Exception e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            }
-
-            // As the result of parsing this function must return a plasmaParserDocument object
-            ContentScraper htmlscraper = (ContentScraper) this.scraperObject; // shortcut to access ContentScraper methodes
-            return new Document[]{new Document(
-                location, // url of the source document
-                mimeType, // the documents mime type
-                StandardCharsets.UTF_8.name(), // charset of the document text
-                this,
-                htmlscraper.getContentLanguages(),
-                htmlscraper.getKeywords(),
-                htmlscraper.getTitles(),
-                htmlscraper.getAuthor(),
-                htmlscraper.getPublisher(),
-                null, // sections
-                htmlscraper.getDescriptions(),
-                htmlscraper.getLon(), htmlscraper.getLat(),
-                htmlscraper.getText(),
-                htmlscraper.getAnchors(),
-                htmlscraper.getRSS(),
-                null, // images
-                false,
-                htmlscraper.getDate())};
-        } catch (final Exception e) {
-            if (e instanceof InterruptedException) throw (InterruptedException) e;
-
-            // if an unexpected error occures just log the error and raise a new Parser.Failure
-            final String errorMsg = "Unable to parse the swf document '" + location + "':" + e.getMessage();
-            //AbstractParser.log.logSevere(errorMsg);
-            throw new Parser.Failure(errorMsg, location);
-        }
-    }
-
-}
--- a/source/net/yacy/kelondro/data/word/Word.java
+++ b/source/net/yacy/kelondro/data/word/Word.java
@ -66,7 +66,7 @@ public class Word {

    // object carries statistics for words and sentences
    public  int      count;       // number of occurrences
-    public  int      posInText;   // unique handle, is initialized with word position (excluding double occurring words)
+    public  int      posInText;   // unique handle, is initialized with first word position in text
    public  int      posInPhrase; // position of word in phrase
    public  int      numOfPhrase; // number of phrase. 'normal' phrases begin with number 100
    public  Bitfield flags;       // the flag bits for each word
--- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java
+++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java
@ -66,7 +66,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
            new Column("t", Column.celltype_cardinal,  Column.encoder_b256,  2, "posintext"),
            new Column("r", Column.celltype_cardinal,  Column.encoder_b256,  1, "posinphrase"),
            new Column("o", Column.celltype_cardinal,  Column.encoder_b256,  1, "posofphrase"),
-            new Column("i", Column.celltype_cardinal,  Column.encoder_b256,  1, "worddistance"),
+            new Column("i", Column.celltype_cardinal,  Column.encoder_b256,  1, "worddistance"), // arbitrary column for avg distance of search query words
            new Column("k", Column.celltype_cardinal,  Column.encoder_b256,  1, "reserve")
        },
        Base64Order.enhancedCoder
@ -160,6 +160,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
        this.entry.setCol(col_reserve2, 0);
    }

+    /**
+     * Constructor for WordReferences from title words or as template for content
+     * words (with reduced number of input parameters, skipping the parameter
+     * later set by setWord() for a WordReferenceRow template or not relevant if
+     * used for words from title).
+     */
    public WordReferenceRow(final byte[]   urlHash,
                            final int      urlLength,     // byte-length of complete URL
                            final int      urlComps,      // number of path components
@ -253,7 +259,10 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
    }

    /**
-     * First position of word in text
+     * First position of word in text.
+     * positions() is used to remember word positions for each query word of an
+     * multi word search query. As we currently don't include a separate posintext()
+     * function, we use positions to make the posintext value available.
     * @return Collection with one element
     */
    @Override
--- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java
+++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java
@ -253,9 +253,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
                this.hitcount,      // how often appears this word in the text
                this.wordsintext,   // total number of words
                this.phrasesintext, // total number of phrases
-
-                // TODO: positon 1 on empty positions may give high ranking scores for unknown pos (needs to be checked if 0 would be appropriate)
-                this.positions.isEmpty() ? -1 : this.positions.iterator().next(), // position of word in all words
+                this.positions.isEmpty() ? 0 : this.positions.iterator().next(), // position of word in all words (
                this.posinphrase,   // position of word in its phrase
                this.posofphrase,   // number of the phrase where word appears
                this.lastModified,  // last-modified time of the document where word appears
--- a/source/net/yacy/kelondro/rwi/AbstractReference.java
+++ b/source/net/yacy/kelondro/rwi/AbstractReference.java
@ -98,17 +98,29 @@ public abstract class AbstractReference implements Reference {
        }
        return r;
    }
-    
+
+   /**
+     * max position of search query words for multi word queries
+     * @return
+     */
    @Override
    public int maxposition() {
        return max(positions());
    }
-    
+
+    /**
+     * min word position of search query words for multi word queries
+     * @return
+     */
    @Override
    public int minposition() {
        return min(positions());
    }
-    
+
+    /**
+     * The average distance (in words) between search query terms for multi word searches.
+     * @return word distance
+     */
    @Override
    public int distance() {
        if (positions().size() < 2) return 0;
--- a/source/net/yacy/kelondro/rwi/Reference.java
+++ b/source/net/yacy/kelondro/rwi/Reference.java
@ -51,14 +51,38 @@ public interface Reference {
    @Override
    public boolean equals(Object other);

+    /**
+     * Joins a Reference into this one, setting the values appropriate for ranking
+     * @param joined reference
+     */
    public void join(final Reference oe);
-    
+
+    /**
+     * Positions or search query words for the referenced result url
+     * This is only valid for multi word search queries.
+     * The positions contain the first word position for every search query word
+     * which has been joined (by join() )
+     * @return list with word position
+     */
    public Collection<Integer> positions();
-    
+
+    /**
+     * max position of search query words (for multi word queries)
+     * @return
+     */
    public int maxposition();
-    
+
+    /**
+     * min word position of search query words (for multi word queries)
+     * @return
+     */
    public int minposition();
-    
+
+    /**
+     * The average distance (in words) between search query terms for multi word searches.
+     * The distance is calculated from positions()
+     * @return word distance
+     */
    public int distance();
        
 }
--- a/source/net/yacy/peers/NewsDB.java
+++ b/source/net/yacy/peers/NewsDB.java
@ -261,12 +261,33 @@ public class NewsDB {
            removeStandards();
        }

+        /**
+         * Create a new news record and assign data for the unique message id.
+         * This is composed of Date-String + PeerHash. Date string is with precision
+         * seconds.
+         *
+         * To allow programatically to create more than one message per second
+         * a counter which is added to the date part can be given as attritues
+         * with key "#"
+         * @param mySeed
+         * @param category
+         * @param attributes
+         */
        private Record(final Seed mySeed, final String category, final Map<String, String> attributes) {
            if (category.length() > NewsDB.categoryStringLength) throw new IllegalArgumentException("category length (" + category.length() + ") exceeds maximum (" + NewsDB.categoryStringLength + ")");
            if (attributes.toString().length() > NewsDB.this.attributesMaxLength) throw new IllegalArgumentException("attributes length (" + attributes.toString().length() + ") exceeds maximum (" + NewsDB.this.attributesMaxLength + ")");
            this.attributes = attributes;
            this.received = null;
-            this.created = new Date();
+            // workaround for publishing multiple messages. Message id must be unique and is generated date-sting(yyyyMMddhhmmss)+peerhash
+            // publishing programatically 2 messages creates same id (because created in the same second). To work around this, if map contains
+            // key="#" use value as message counter and put this conter as offset in seconds to the id time part  20161231123001 .. 20161231123002 ..
+            if (attributes.containsKey("#")) {
+                int cnt = Integer.parseInt(attributes.get("#")); // get number used as counter/offset added as second
+                // add (counter * millisecond)
+                this.created = new Date(System.currentTimeMillis() + (cnt * 1000));
+            } else {
+                this.created = new Date();
+            }
            this.category = category;
            this.distributed = 0;
            this.originator = mySeed.hash;
@ -295,6 +316,7 @@ public class NewsDB {
            this.attributes.remove("cre");
            this.attributes.remove("rec");
            this.attributes.remove("dis");
+            this.attributes.remove("#"); // special attribute for id offset (see Record(mySeed... )
        }

        @Override
@ -350,4 +372,4 @@ public class NewsDB {
        }
    }

-}
+}
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@ -259,6 +259,10 @@ public final class Protocol {
            mySeed.setIPs(Switchboard.getSwitchboard().myPublicIPs());
        } else {
            final String myIP = result.get("yourip");
+            if (myIP == null) {
+                Network.log.info("yacyClient.hello result error: Peer sent incompleet hello message (key yourip is missing)");
+                return null; // no success
+            }
            // with the IPv6 extension, this may contain several ips, separated by comma ','
            HashSet<String> h = new HashSet<>();
            for (String s: CommonPattern.COMMA.split(myIP)) {
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@ -226,8 +226,8 @@ public final class Fulltext {
                for (String name: instance.getCoreNames()) {
                    this.solrInstances.getEmbeddedConnector(name).clear();
                }
+                this.commit(false);
            }
-            this.commit(false);
            this.solrInstances.clearCaches();
        }
    }
--- a/source/net/yacy/yacy.java
+++ b/source/net/yacy/yacy.java
@ -145,29 +145,6 @@ public final class yacy {
            System.out.println(copyright);
            System.out.println(hline);

-            // check java version
-            try {
-                "a".isEmpty(); // needs at least Java 1.6
-
-                // check java version string (required min 1.7)
-                final String javaVersion = System.getProperty("java.version");
-                if (javaVersion != null) { // unknown property !?
-                    int pos = javaVersion.indexOf('.');
-                    int count = 1;
-                    for (; pos < javaVersion.length() && count < 2; pos++) {
-                        if (javaVersion.charAt(pos + 1) == '.') count++;
-                    }
-                    Double dVersion = Double.parseDouble(javaVersion.substring(0, pos));
-                    if (dVersion < 1.7) { // required min java 1.7
-                        System.err.println("STARTUP: Java Version too low. You need at least Java 1.7 to run YaCy");
-                        System.exit(-1);
-                    }
-                }
-            } catch (final NoSuchMethodError e) {
-                System.err.println("STARTUP: Java Version too low. You need at least Java 1.7 to run YaCy");
-                System.exit(-1);
-            }
-
            // ensure that there is a DATA directory, if not, create one and if that fails warn and die
            mkdirsIfNeseccary(dataHome);
            mkdirsIfNeseccary(appHome);
--- a/test/java/net/yacy/search/index/SegmentTest.java
+++ b/test/java/net/yacy/search/index/SegmentTest.java
@ -0,0 +1,198 @@
+package net.yacy.search.index;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.Iterator;
+import java.util.Map;
+import net.yacy.cora.document.WordCache;
+import net.yacy.cora.document.encoding.UTF8;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.storage.HandleSet;
+import net.yacy.cora.util.CommonPattern;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.cora.util.SpaceExceededException;
+import net.yacy.crawler.retrieval.Response;
+import net.yacy.document.Tokenizer;
+import net.yacy.document.VocabularyScraper;
+import net.yacy.kelondro.data.word.Word;
+import net.yacy.kelondro.data.word.WordReference;
+import net.yacy.kelondro.data.word.WordReferenceRow;
+import net.yacy.kelondro.rwi.ReferenceContainer;
+import net.yacy.kelondro.rwi.ReferenceFactory;
+import net.yacy.kelondro.rwi.TermSearch;
+import net.yacy.kelondro.util.Bitfield;
+import static net.yacy.search.index.Segment.catchallWord;
+import net.yacy.search.query.QueryGoal;
+import org.junit.AfterClass;
+import static org.junit.Assert.assertTrue;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class SegmentTest {
+
+    static Segment index;
+
+    /**
+     * Setup RWI index
+     *
+     * @throws IOException
+     */
+    @BeforeClass
+    public static void setUpClass() throws IOException {
+        // setup a index segment
+        index = new Segment(new ConcurrentLog("SegmentTest"),
+                new File("test/DATA/INDEX/webportal/SEGMENTS"),
+                new File("test/DATA/INDEX/webportal/ARCHIVE"),
+                null, null);
+
+        // connect RWI index
+        index.connectRWI(10, 1024);
+    }
+
+    @AfterClass
+    public static void tearDownClass() {
+        index.close();
+        ConcurrentLog.shutdown();
+    }
+
+    /**
+     * Test of clear method (for RWI), of class Segment.
+     */
+    @Test
+    public void testClear() throws MalformedURLException, IOException, SpaceExceededException {
+        DigestURL url = new DigestURL("http://test.org/test.html");
+        int urlComps = MultiProtocolURL.urlComps(url.toNormalform(true)).length;
+        int urlLength = url.toNormalform(true).length();
+
+        byte[] termHash = Word.word2hash("test");
+        Word word = new Word(1, 1, 1);
+        word.flags = new Bitfield(4); // flags must not be null
+
+        WordReferenceRow ientry = new WordReferenceRow(
+                url.hash(), urlLength, urlComps, 0, 1, 1,
+                System.currentTimeMillis(), System.currentTimeMillis(),
+                UTF8.getBytes("en"), Response.DT_TEXT, 0, 0);
+        ientry.setWord(word);
+
+        // add a dummy Word and WordReference
+        index.termIndex.add(termHash, ientry);
+
+        // check index count
+        long cnt = index.RWICount();
+        assertTrue(cnt > 0);
+
+        index.clear();
+
+        // check index count after clear
+        cnt = index.RWICount();
+        assertTrue(cnt == 0);
+    }
+
+    /**
+     * Helper to store a text to the rwi index. This was derived from the
+     * Segment.storeDocument() procedure.
+     *
+     * @param text of the document
+     * @throws IOException
+     * @throws SpaceExceededException
+     */
+    private void storeTestDocTextToTermIndex(DigestURL url, String text) throws IOException, SpaceExceededException {
+
+        // set a pseudo url for the simulated test document
+        final String urlNormalform = url.toNormalform(true);
+        String dc_title = "Test Document";
+        // STORE PAGE INDEX INTO WORD INDEX DB
+        // create a word prototype which is re-used for all entries
+        if (index.termIndex != null) {
+            final int outlinksSame = 0;
+            final int outlinksOther = 0;
+            final int urlLength = urlNormalform.length();
+            final int urlComps = MultiProtocolURL.urlComps(url.toNormalform(false)).length;
+            final int wordsintitle = CommonPattern.SPACES.split(dc_title).length; // same calculation as for CollectionSchema.title_words_val
+
+            WordCache meaningLib = new WordCache(null);
+            boolean doAutotagging = false;
+            VocabularyScraper scraper = null;
+
+            Tokenizer t = new Tokenizer(url, text, meaningLib, doAutotagging, scraper);
+
+            // create a WordReference template
+            final WordReferenceRow ientry = new WordReferenceRow(
+                    url.hash(), urlLength, urlComps, wordsintitle,
+                    t.RESULT_NUMB_WORDS, t.RESULT_NUMB_SENTENCES,
+                    System.currentTimeMillis(), System.currentTimeMillis(),
+                    UTF8.getBytes("en"), Response.DT_TEXT,
+                    outlinksSame, outlinksOther);
+
+            // add the words to rwi index
+            Word wprop = null;
+            byte[] wordhash;
+            String word;
+            for (Map.Entry<String, Word> wentry : t.words().entrySet()) {
+                word = wentry.getKey();
+                wprop = wentry.getValue();
+                assert (wprop.flags != null);
+                ientry.setWord(wprop);
+                wordhash = Word.word2hash(word);
+                if (this.index != null) {
+                    index.termIndex.add(wordhash, ientry);
+                }
+
+            }
+        }
+    }
+
+    /**
+     * Simulates a multi word query for the rwi termIndex
+     *
+     * @throws SpaceExceededException
+     * @throws MalformedURLException
+     * @throws IOException
+     */
+    @Test
+    public void testQuery_MultiWordQuery() throws SpaceExceededException, MalformedURLException, IOException {
+
+        // creates one test url with this text in the rwi index
+        DigestURL url = new DigestURL("http://test.org/test.html");
+        storeTestDocTextToTermIndex(url, "One Two Three Four Five. This is a test text. One two three for five");
+
+        // create a query to get the search word hashsets
+        QueryGoal qg = new QueryGoal("five test ");
+        HandleSet queryHashes = qg.getIncludeHashes();
+        HandleSet excludeHashes = qg.getExcludeHashes();
+        HandleSet urlselection = null;
+        ReferenceFactory<WordReference> termFactory = Segment.wordReferenceFactory;
+
+        // do the search
+        TermSearch<WordReference> result = index.termIndex.query(queryHashes, excludeHashes, urlselection, termFactory, Integer.MAX_VALUE);
+
+        // get the joined resutls
+        ReferenceContainer<WordReference> wc = result.joined();
+
+        // we should have now one result (stored to index above)
+        assertTrue("test url hash in result set", wc.has(url.hash()));
+
+        // the returned WordReference is expected to be a joined Reference with properties set used in ranking
+        Iterator<WordReference> it = wc.entries();
+        System.out.println("-----------------");
+
+        // currently the results are not as expected for a multi-word query
+        while (it.hasNext()) {
+            WordReference r = it.next();
+            // expected to be 1st in text
+            System.out.println("posintext=" + r.positions() + " (expected=5)");
+            // min position of search word in text
+            System.out.println("minposition=" + r.minposition() + " (expected=5)");
+            // max position of search word in text
+            System.out.println("maxposition=" + r.maxposition() + " (expected=8)");
+            // for a multiword query distance expected to be the avg of search word positions in text
+            System.out.println("distance=" + r.distance() + " (expected=3)");
+            // occurence of search words in text
+            System.out.println("hitcount=" + r.hitcount() + " (expected=2)");
+        }
+        System.out.println("-----------------");
+    }
+
+}