/ * *
* ReferenceContainerTest
* part of YaCy
* Copyright 2016 by reger24 ; https : //github.com/reger24
*
* This library is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
* version 2.1 of the License , or ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21 . txt
* If not , see < http : //www.gnu.org/licenses/>.
* /
package net.yacy.kelondro.rwi ;
import java.util.Queue ;
import java.util.concurrent.LinkedBlockingQueue ;
import net.yacy.cora.document.id.DigestURL ;
import net.yacy.cora.document.id.MultiProtocolURL ;
import net.yacy.crawler.retrieval.Response ;
import net.yacy.kelondro.data.word.Word ;
import net.yacy.kelondro.data.word.WordReference ;
import net.yacy.kelondro.data.word.WordReferenceFactory ;
import net.yacy.kelondro.data.word.WordReferenceVars ;
import net.yacy.kelondro.util.Bitfield ;
import static org.junit.Assert.assertEquals ;
import static org.junit.Assert.assertNotNull ;
import static org.junit.Assert.assertTrue ;
import org.junit.Test ;
/ * *
* Unit tests for ReferenceContainer class .
*
* @author reger24
* /
public class ReferenceContainerTest {
/ * *
* Test of add method , of class ReferenceContainer . this also demonstrates a
* issue with word . distance ( ) used in ranking
* /
@Test
public void testAdd ( ) throws Exception {
ReferenceFactory < WordReference > wordReferenceFactory = new WordReferenceFactory ( ) ;
byte [ ] termHash = Word . word2hash ( "test" ) ;
ReferenceContainer < WordReference > rc = new ReferenceContainer ( wordReferenceFactory , termHash ) ;
// prepare a WordReference to be added to the container
DigestURL url = new DigestURL ( "http://test.org/test.html" ) ;
int urlComps = MultiProtocolURL . urlComps ( url . toNormalform ( true ) ) . length ;
int urlLength = url . toNormalform ( true ) . length ( ) ;
Queue < Integer > positions = new LinkedBlockingQueue < Integer > ( ) ;
positions . add ( 10 ) ;
WordReferenceVars wentry = new WordReferenceVars (
url . hash ( ) ,
urlLength , // byte-length of complete URL
urlComps , // number of path components
0 , // length of description/length (longer are better?)
1 , // how often appears this word in the text
1 , // total number of words
1 , // total number of phrases
1 , // first position of word in text
positions , // positions of words that are joined into the reference
1 , // position of word in its phrase
1 , // number of the phrase where word appears
0 , // last-modified time of the document where word appears
"en" , // (guessed) language of document
Response . DT_TEXT , // type of document
0 , // outlinks to same domain
0 , // outlinks to other domain
new Bitfield ( 4 ) , // attributes to the url and to the word according the url
0.0d
) ;
rc . add ( wentry ) ; // add the ref
assertTrue ( "size after add" , rc . size ( ) > 0 ) ;
WordReference wc = rc . getReference ( url . hash ( ) ) ; // retrieve the ref
assertNotNull ( "getReference failed" , wc ) ;
// TODO: ReferenceContainer used for rwi results. As distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix
System . out . println ( "-----------------------------------------------------------" ) ;
System . out . println ( "WordReference (word distance) before add to container: " + wentry . distance ( ) ) ;
System . out . println ( "WordReference (word distance) after get from container: " + wc . distance ( ) ) ;
System . out . println ( "-----------------------------------------------------------" ) ;
assertEquals ( "distance()" , wentry . distance ( ) , wc . distance ( ) ) ;
}
}