You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/test/java/net/yacy/peers/graphics/WebStructureGraphTest.java

373 lines
13 KiB

// WebStructureGraphTest.java
// Copyright 2017 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.peers.graphics;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.junit.Assert;
import org.junit.Test;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.peers.graphics.WebStructureGraph.LearnObject;
import net.yacy.peers.graphics.WebStructureGraph.StructureEntry;
/**
* Unit tests for {@link WebStructureGraph}
*
* @author luccioman
*
*/
public class WebStructureGraphTest {
/**
* Most basic out going references unit test
*/
@Test
public void testOutgoingReferences() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL source = new DigestURL("http://source.net/index.html");
final String sourceHash = source.hosthash();
final Set<DigestURL> targets = new HashSet<>();
final DigestURL target = new DigestURL("http://target.com/index.html");
final String targetHash = target.hosthash();
targets.add(target);
LearnObject lro = new LearnObject(source, targets);
graph.learnrefs(lro);
/* Check that reference from the exact source URL is retrieved from structure */
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
Assert.assertNotNull(outRefs);
Assert.assertEquals("source.net", outRefs.hostname);
Assert.assertNotNull(outRefs.references);
Assert.assertEquals(1, outRefs.references.size());
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
/* Check that reference from the host name URL is retrieved from structure */
outRefs = graph.outgoingReferences(new DigestURL("http://source.net").hosthash());
Assert.assertNotNull(outRefs);
Assert.assertEquals("source.net", outRefs.hostname);
Assert.assertNotNull(outRefs.references);
Assert.assertEquals(1, outRefs.references.size());
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
} finally {
graph.close();
}
}
/**
* Out going references from one source document to different resources on the same target host
*/
@Test
public void testOutgoingFromOneToMultipleSameTargeHost() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL source = new DigestURL("http://source.net/index.html");
final String sourceHash = source.hosthash();
final Set<DigestURL> targets = new HashSet<>();
final DigestURL indexTarget = new DigestURL("http://target.com/index.html");
targets.add(indexTarget);
final DigestURL pathTarget = new DigestURL("http://target.com/path/doc.html");
targets.add(pathTarget);
final DigestURL queryTarget = new DigestURL("http://target.com/path/query?param=value");
targets.add(queryTarget);
LearnObject lro = new LearnObject(source, targets);
graph.learnrefs(lro);
/* Check that accumulated references from the host name URL is retrieved from structure */
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
Assert.assertNotNull(outRefs);
Assert.assertEquals("source.net", outRefs.hostname);
Assert.assertNotNull(outRefs.references);
/* One accumulated host target reference */
Assert.assertEquals(1, outRefs.references.size());
/* 3 accumulated links to that target host */
Assert.assertEquals(Integer.valueOf(3), outRefs.references.get(indexTarget.hosthash()));
} finally {
graph.close();
}
}
/**
* Out going references by host name
*/
@Test
public void outgoingReferencesByHostName() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL httpSource = new DigestURL("http://source.net/index.html");
Set<DigestURL> targets = new HashSet<>();
final DigestURL indexTarget = new DigestURL("http://target.com/index.html");
targets.add(indexTarget);
LearnObject lro = new LearnObject(httpSource, targets);
graph.learnrefs(lro);
final DigestURL httpsSource = new DigestURL("https://source.net/index.html");
targets = new HashSet<>();
final DigestURL pathTarget = new DigestURL("http://target.com/path");
targets.add(pathTarget);
lro = new LearnObject(httpsSource, targets);
graph.learnrefs(lro);
final DigestURL otherPortSource = new DigestURL("https://source.net:8080/index.html");
targets = new HashSet<>();
final DigestURL queryTarget = new DigestURL("http://target.com/query?param=value");
targets.add(queryTarget);
lro = new LearnObject(otherPortSource, targets);
graph.learnrefs(lro);
/* Check that accumulated references from the host name is retrieved from structure */
Map<String, Integer> outRefs = graph.outgoingReferencesByHostName("source.net");
Assert.assertNotNull(outRefs);
Assert.assertEquals(1, outRefs.size());
Assert.assertEquals(new DigestURL("http://target.com").hosthash(), outRefs.keySet().iterator().next());
Assert.assertEquals(Integer.valueOf(3), outRefs.values().iterator().next());
/* Check that accumulated references from unknown host name is empty */
outRefs = graph.outgoingReferencesByHostName("test.net");
Assert.assertNotNull(outRefs);
Assert.assertTrue(outRefs.isEmpty());
} finally {
graph.close();
}
}
/**
* Most basic incoming references unit test
*/
@Test
public void testIncomingReferences() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL source = new DigestURL("http://source.net/index.html");
final String sourceHash = source.hosthash();
final Set<DigestURL> targets = new HashSet<>();
final DigestURL target = new DigestURL("http://target.com/index.html");
final String targetHash = target.hosthash();
targets.add(target);
LearnObject lro = new LearnObject(source, targets);
graph.learnrefs(lro);
/* Check that reference to the exact target URL is retrieved from structure */
StructureEntry inRefs = graph.incomingReferences(targetHash);
Assert.assertNotNull(inRefs);
Assert.assertEquals("target.com", inRefs.hostname);
Assert.assertNotNull(inRefs.references);
Assert.assertEquals(1, inRefs.references.size());
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
/* Check that reference to the host name target URL is retrieved from structure */
inRefs = graph.incomingReferences(new DigestURL("http://target.com").hosthash());
Assert.assertNotNull(inRefs);
Assert.assertEquals("target.com", inRefs.hostname);
Assert.assertNotNull(inRefs.references);
Assert.assertEquals(1, inRefs.references.size());
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
} finally {
graph.close();
}
}
/**
* Incoming references from multiple sources on the same host to one target URL
*/
@Test
public void testIncomingReferencesFromMultipleSourcesOnOneHost() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
final String sourceHash = indexSource.hosthash();
Set<DigestURL> targets = new HashSet<>();
final DigestURL target = new DigestURL("http://target.com/index.html");
final String targetHash = target.hosthash();
targets.add(target);
LearnObject lro = new LearnObject(indexSource, targets);
graph.learnrefs(lro);
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
targets = new HashSet<>();
targets.add(target);
lro = new LearnObject(pathSource, targets);
graph.learnrefs(lro);
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
targets = new HashSet<>();
targets.add(target);
lro = new LearnObject(querySource, targets);
graph.learnrefs(lro);
/* Check that reference to the exact target URL is retrieved from structure */
StructureEntry inRefs = graph.incomingReferences(targetHash);
Assert.assertNotNull(inRefs);
Assert.assertEquals("target.com", inRefs.hostname);
Assert.assertNotNull(inRefs.references);
/* One accumulated host source reference */
Assert.assertEquals(1, inRefs.references.size());
/* 3 accumulated links from that host */
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
} finally {
graph.close();
}
}
/**
* Incoming references from multiple sources on the same host to one target
* URL accumulated between old and new structure
*/
@Test
public void testIncomingReferencesFromNewAndOld() throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
final String sourceHash = indexSource.hosthash();
Set<DigestURL> targets = new HashSet<>();
final DigestURL target = new DigestURL("http://target.com/index.html");
final String targetHash = target.hosthash();
targets.add(target);
LearnObject lro = new LearnObject(indexSource, targets);
graph.learnrefs(lro);
/* Backup learned reference to the old structure */
graph.joinOldNew();
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
targets = new HashSet<>();
targets.add(target);
lro = new LearnObject(pathSource, targets);
graph.learnrefs(lro);
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
targets = new HashSet<>();
targets.add(target);
lro = new LearnObject(querySource, targets);
graph.learnrefs(lro);
/* Check that reference to the exact target URL is retrieved from structure */
StructureEntry inRefs = graph.incomingReferences(targetHash);
Assert.assertNotNull(inRefs);
Assert.assertEquals("target.com", inRefs.hostname);
Assert.assertNotNull(inRefs.references);
/* One accumulated host source reference */
Assert.assertEquals(1, inRefs.references.size());
/* 3 accumulated links from that host */
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
} finally {
graph.close();
}
}
/**
* Simple performance measurements with a test structure filled to its limits.
*/
public static void main(String args[]) throws MalformedURLException {
WebStructureGraph graph = new WebStructureGraph(null);
try {
long beginTime = System.nanoTime();
/* Generate maxhosts structure entries */
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
final DigestURL source = new DigestURL("http://source" + i + ".net/index.html");
final Set<DigestURL> targets = new HashSet<>();
/* Generate maxref targets */
for(int j = 0; j < WebStructureGraph.maxref; j++) {
final DigestURL target = new DigestURL("http://target" + String.valueOf(j) + ".com/index.html");
targets.add(target);
}
LearnObject lro = new LearnObject(source, targets);
graph.learnrefs(lro);
}
long endTime = System.nanoTime();
System.out.println("testPerfs test structure initialisation time : " + ((endTime - beginTime) / 1000000000) + " seconds");
beginTime = System.nanoTime();
/* Loop and look for incoming references on each sample generated target */
for(int j = 0; j < WebStructureGraph.maxref; j++) {
String targetHash = new DigestURL("http://target" + j + ".com/index.html").hosthash();
graph.incomingReferences(targetHash);
}
endTime = System.nanoTime();
System.out.println("testPerfs incomingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
beginTime = System.nanoTime();
/* Loop and look for outgoing references on each sample generated source */
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
String sourceHash = new DigestURL("http://source" + i + ".net/index.html").hosthash();
graph.outgoingReferences(sourceHash);
}
endTime = System.nanoTime();
System.out.println("testPerfs outgoingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
beginTime = System.nanoTime();
/* Loop and look for host hashes from host name on each sample generated source */
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
graph.hostName2HostHashes("source" + i + ".net");
}
endTime = System.nanoTime();
System.out.println("testPerfs hostName2HostHashes running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
} finally {
graph.close();
ConcurrentLog.shutdown();
}
}
}