Fixed references count when multiple links target the same domain name in one document.pull/105/head
parent
f793d97e56
commit
86adfef30f
@ -0,0 +1,263 @@
|
||||
// WebStructureGraphTest.java
|
||||
// Copyright 2017 by luccioman; https://github.com/luccioman
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.peers.graphics;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.peers.graphics.WebStructureGraph.LearnObject;
|
||||
import net.yacy.peers.graphics.WebStructureGraph.StructureEntry;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link WebStructureGraph}
|
||||
*
|
||||
* @author luccioman
|
||||
*
|
||||
*/
|
||||
public class WebStructureGraphTest {
|
||||
|
||||
/**
|
||||
* Most basic out going references unit test
|
||||
*/
|
||||
@Test
|
||||
public void testOutgoingReferences() throws MalformedURLException {
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
final DigestURL source = new DigestURL("http://source.net/index.html");
|
||||
final String sourceHash = source.hosthash();
|
||||
final Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
final DigestURL target = new DigestURL("http://target.com/index.html");
|
||||
final String targetHash = target.hosthash();
|
||||
targets.add(target);
|
||||
|
||||
LearnObject lro = new LearnObject(source, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Check that reference from the exact source URL is retrieved from structure */
|
||||
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
|
||||
|
||||
Assert.assertNotNull(outRefs);
|
||||
Assert.assertEquals("source.net", outRefs.hostname);
|
||||
Assert.assertNotNull(outRefs.references);
|
||||
Assert.assertEquals(1, outRefs.references.size());
|
||||
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
|
||||
|
||||
/* Check that reference from the host name URL is retrieved from structure */
|
||||
outRefs = graph.outgoingReferences(new DigestURL("http://source.net").hosthash());
|
||||
|
||||
Assert.assertNotNull(outRefs);
|
||||
Assert.assertEquals("source.net", outRefs.hostname);
|
||||
Assert.assertNotNull(outRefs.references);
|
||||
Assert.assertEquals(1, outRefs.references.size());
|
||||
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Out going references from one source document to different resources on the same target host
|
||||
*/
|
||||
@Test
|
||||
public void testOutgoingFromOneToMultipleSameTargeHost() throws MalformedURLException {
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
final DigestURL source = new DigestURL("http://source.net/index.html");
|
||||
final String sourceHash = source.hosthash();
|
||||
final Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
final DigestURL indexTarget = new DigestURL("http://target.com/index.html");
|
||||
targets.add(indexTarget);
|
||||
|
||||
final DigestURL pathTarget = new DigestURL("http://target.com/path/doc.html");
|
||||
targets.add(pathTarget);
|
||||
|
||||
final DigestURL queryTarget = new DigestURL("http://target.com/path/query?param=value");
|
||||
targets.add(queryTarget);
|
||||
|
||||
LearnObject lro = new LearnObject(source, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Check that accumulated references from the host name URL is retrieved from structure */
|
||||
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
|
||||
|
||||
Assert.assertNotNull(outRefs);
|
||||
Assert.assertEquals("source.net", outRefs.hostname);
|
||||
Assert.assertNotNull(outRefs.references);
|
||||
/* One accumulated host target reference */
|
||||
Assert.assertEquals(1, outRefs.references.size());
|
||||
/* 3 accumulated links to that target host */
|
||||
Assert.assertEquals(Integer.valueOf(3), outRefs.references.get(indexTarget.hosthash()));
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Most basic incoming references unit test
|
||||
*/
|
||||
@Test
|
||||
public void testIncomingReferences() throws MalformedURLException {
|
||||
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
final DigestURL source = new DigestURL("http://source.net/index.html");
|
||||
final String sourceHash = source.hosthash();
|
||||
final Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
final DigestURL target = new DigestURL("http://target.com/index.html");
|
||||
final String targetHash = target.hosthash();
|
||||
targets.add(target);
|
||||
|
||||
LearnObject lro = new LearnObject(source, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Check that reference to the exact target URL is retrieved from structure */
|
||||
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
||||
|
||||
Assert.assertNotNull(inRefs);
|
||||
Assert.assertEquals("target.com", inRefs.hostname);
|
||||
Assert.assertNotNull(inRefs.references);
|
||||
Assert.assertEquals(1, inRefs.references.size());
|
||||
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
|
||||
|
||||
/* Check that reference to the host name target URL is retrieved from structure */
|
||||
inRefs = graph.incomingReferences(new DigestURL("http://target.com").hosthash());
|
||||
|
||||
Assert.assertNotNull(inRefs);
|
||||
Assert.assertEquals("target.com", inRefs.hostname);
|
||||
Assert.assertNotNull(inRefs.references);
|
||||
Assert.assertEquals(1, inRefs.references.size());
|
||||
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Incoming references from multiple sources on the same host to one target URL
|
||||
*/
|
||||
@Test
|
||||
public void testIncomingReferencesFromMultipleSourcesOnOneHost() throws MalformedURLException {
|
||||
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
|
||||
final String sourceHash = indexSource.hosthash();
|
||||
Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
final DigestURL target = new DigestURL("http://target.com/index.html");
|
||||
final String targetHash = target.hosthash();
|
||||
targets.add(target);
|
||||
|
||||
LearnObject lro = new LearnObject(indexSource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
|
||||
targets = new HashSet<>();
|
||||
targets.add(target);
|
||||
|
||||
lro = new LearnObject(pathSource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
|
||||
targets = new HashSet<>();
|
||||
targets.add(target);
|
||||
|
||||
lro = new LearnObject(querySource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Check that reference to the exact target URL is retrieved from structure */
|
||||
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
||||
|
||||
Assert.assertNotNull(inRefs);
|
||||
Assert.assertEquals("target.com", inRefs.hostname);
|
||||
Assert.assertNotNull(inRefs.references);
|
||||
/* One accumulated host source reference */
|
||||
Assert.assertEquals(1, inRefs.references.size());
|
||||
/* 3 accumulated links from that host */
|
||||
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple performance measurements with a test structure filled to its limits.
|
||||
*/
|
||||
public static void main(String args[]) throws MalformedURLException {
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
long beginTime = System.nanoTime();
|
||||
/* Generate maxhosts structure entries */
|
||||
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
|
||||
final DigestURL source = new DigestURL("http://source" + i + ".net/index.html");
|
||||
final Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
/* Generate maxref targets */
|
||||
for(int j = 0; j < WebStructureGraph.maxref; j++) {
|
||||
final DigestURL target = new DigestURL("http://target" + String.valueOf(j) + ".com/index.html");
|
||||
targets.add(target);
|
||||
}
|
||||
|
||||
LearnObject lro = new LearnObject(source, targets);
|
||||
graph.learnrefs(lro);
|
||||
}
|
||||
long endTime = System.nanoTime();
|
||||
System.out.println("testPerfs test structure initialisation time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
||||
|
||||
beginTime = System.nanoTime();
|
||||
/* Loop and look for incoming references on each sample generated target */
|
||||
for(int j = 0; j < WebStructureGraph.maxref; j++) {
|
||||
String targetHash = new DigestURL("http://target" + j + ".com/index.html").hosthash();
|
||||
graph.incomingReferences(targetHash);
|
||||
}
|
||||
endTime = System.nanoTime();
|
||||
System.out.println("testPerfs incomingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
||||
|
||||
beginTime = System.nanoTime();
|
||||
/* Loop and look for outgoing references on each sample generated source */
|
||||
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
|
||||
String sourceHash = new DigestURL("http://source" + i + ".net/index.html").hosthash();
|
||||
graph.outgoingReferences(sourceHash);
|
||||
}
|
||||
endTime = System.nanoTime();
|
||||
System.out.println("testPerfs outgoingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
ConcurrentLog.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue