calculated ranking parameterpull/88/head
parent
3cc2af8f92
commit
20a1b29ed3
@ -0,0 +1,100 @@
|
|||||||
|
/**
|
||||||
|
* ReferenceContainerTest
|
||||||
|
* part of YaCy
|
||||||
|
* Copyright 2016 by reger24; https://github.com/reger24
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package net.yacy.kelondro.rwi;
|
||||||
|
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import net.yacy.cora.document.id.DigestURL;
|
||||||
|
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||||
|
import net.yacy.crawler.retrieval.Response;
|
||||||
|
import net.yacy.kelondro.data.word.Word;
|
||||||
|
import net.yacy.kelondro.data.word.WordReference;
|
||||||
|
import net.yacy.kelondro.data.word.WordReferenceFactory;
|
||||||
|
import net.yacy.kelondro.data.word.WordReferenceVars;
|
||||||
|
import net.yacy.kelondro.util.Bitfield;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for ReferenceContainer class.
|
||||||
|
*
|
||||||
|
* @author reger24
|
||||||
|
*/
|
||||||
|
public class ReferenceContainerTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of add method, of class ReferenceContainer. this also demonstrates a
|
||||||
|
* issue with word.distance() used in ranking
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testAdd() throws Exception {
|
||||||
|
ReferenceFactory<WordReference> wordReferenceFactory = new WordReferenceFactory();
|
||||||
|
byte[] termHash = Word.word2hash("test");
|
||||||
|
|
||||||
|
ReferenceContainer<WordReference> rc = new ReferenceContainer(wordReferenceFactory, termHash);
|
||||||
|
|
||||||
|
// prepare a WordReference to be added to the container
|
||||||
|
DigestURL url = new DigestURL("http://test.org/test.html");
|
||||||
|
int urlComps = MultiProtocolURL.urlComps(url.toNormalform(true)).length;
|
||||||
|
int urlLength = url.toNormalform(true).length();
|
||||||
|
|
||||||
|
Queue<Integer> positions = new LinkedBlockingQueue<Integer>();
|
||||||
|
positions.add(10);
|
||||||
|
|
||||||
|
WordReferenceVars wentry = new WordReferenceVars(
|
||||||
|
url.hash(),
|
||||||
|
urlLength, // byte-length of complete URL
|
||||||
|
urlComps, // number of path components
|
||||||
|
0, // length of description/length (longer are better?)
|
||||||
|
1, // how often appears this word in the text
|
||||||
|
1, // total number of words
|
||||||
|
1, // total number of phrases
|
||||||
|
1, // first position of word in text
|
||||||
|
positions, // positions of words that are joined into the reference
|
||||||
|
1, // position of word in its phrase
|
||||||
|
1, // number of the phrase where word appears
|
||||||
|
0, // last-modified time of the document where word appears
|
||||||
|
"en", // (guessed) language of document
|
||||||
|
Response.DT_TEXT, // type of document
|
||||||
|
0, // outlinks to same domain
|
||||||
|
0, // outlinks to other domain
|
||||||
|
new Bitfield(4), // attributes to the url and to the word according the url
|
||||||
|
0.0d
|
||||||
|
);
|
||||||
|
|
||||||
|
rc.add(wentry); // add the ref
|
||||||
|
|
||||||
|
assertTrue("size after add", rc.size() > 0);
|
||||||
|
|
||||||
|
WordReference wc = rc.getReference(url.hash()); // retrieve the ref
|
||||||
|
|
||||||
|
assertNotNull("getReference failed", wc);
|
||||||
|
|
||||||
|
// TODO: ReferenceContainer used for rwi results. As it distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix
|
||||||
|
System.out.println("-----------------------------------------------------------");
|
||||||
|
System.out.println("WordReference (word distance) before add to container: " + wentry.distance());
|
||||||
|
System.out.println("WordReference (word distance) after get from container: " + wc.distance());
|
||||||
|
System.out.println("-----------------------------------------------------------");
|
||||||
|
assertEquals("distance()", wentry.distance(), wc.distance());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in new issue