|
|
|
// WebStructureGraphTest.java
|
|
|
|
// Copyright 2017 by luccioman; https://github.com/luccioman
|
|
|
|
//
|
|
|
|
// This is a part of YaCy, a peer-to-peer based web search engine
|
|
|
|
//
|
|
|
|
// LICENSE
|
|
|
|
//
|
|
|
|
// This program is free software; you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with this program; if not, write to the Free Software
|
|
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
|
|
|
package net.yacy.peers.graphics;
|
|
|
|
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Set;
|
|
|
|
|
|
|
|
import org.junit.Assert;
|
|
|
|
import org.junit.Test;
|
|
|
|
|
|
|
|
import net.yacy.cora.document.id.DigestURL;
|
|
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph.LearnObject;
|
|
|
|
import net.yacy.peers.graphics.WebStructureGraph.StructureEntry;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unit tests for {@link WebStructureGraph}
|
|
|
|
*
|
|
|
|
* @author luccioman
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
public class WebStructureGraphTest {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Most basic out going references unit test
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testOutgoingReferences() throws MalformedURLException {
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL source = new DigestURL("http://source.net/index.html");
|
|
|
|
final String sourceHash = source.hosthash();
|
|
|
|
final Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
final DigestURL target = new DigestURL("http://target.com/index.html");
|
|
|
|
final String targetHash = target.hosthash();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(source, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that reference from the exact source URL is retrieved from structure */
|
|
|
|
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
|
|
|
|
|
|
|
|
Assert.assertNotNull(outRefs);
|
|
|
|
Assert.assertEquals("source.net", outRefs.hostname);
|
|
|
|
Assert.assertNotNull(outRefs.references);
|
|
|
|
Assert.assertEquals(1, outRefs.references.size());
|
|
|
|
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
|
|
|
|
|
|
|
|
/* Check that reference from the host name URL is retrieved from structure */
|
|
|
|
outRefs = graph.outgoingReferences(new DigestURL("http://source.net").hosthash());
|
|
|
|
|
|
|
|
Assert.assertNotNull(outRefs);
|
|
|
|
Assert.assertEquals("source.net", outRefs.hostname);
|
|
|
|
Assert.assertNotNull(outRefs.references);
|
|
|
|
Assert.assertEquals(1, outRefs.references.size());
|
|
|
|
Assert.assertEquals(Integer.valueOf(1), outRefs.references.get(targetHash));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Out going references from one source document to different resources on the same target host
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testOutgoingFromOneToMultipleSameTargeHost() throws MalformedURLException {
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL source = new DigestURL("http://source.net/index.html");
|
|
|
|
final String sourceHash = source.hosthash();
|
|
|
|
final Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
final DigestURL indexTarget = new DigestURL("http://target.com/index.html");
|
|
|
|
targets.add(indexTarget);
|
|
|
|
|
|
|
|
final DigestURL pathTarget = new DigestURL("http://target.com/path/doc.html");
|
|
|
|
targets.add(pathTarget);
|
|
|
|
|
|
|
|
final DigestURL queryTarget = new DigestURL("http://target.com/path/query?param=value");
|
|
|
|
targets.add(queryTarget);
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(source, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that accumulated references from the host name URL is retrieved from structure */
|
|
|
|
StructureEntry outRefs = graph.outgoingReferences(sourceHash);
|
|
|
|
|
|
|
|
Assert.assertNotNull(outRefs);
|
|
|
|
Assert.assertEquals("source.net", outRefs.hostname);
|
|
|
|
Assert.assertNotNull(outRefs.references);
|
|
|
|
/* One accumulated host target reference */
|
|
|
|
Assert.assertEquals(1, outRefs.references.size());
|
|
|
|
/* 3 accumulated links to that target host */
|
|
|
|
Assert.assertEquals(Integer.valueOf(3), outRefs.references.get(indexTarget.hosthash()));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Out going references by host name
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void outgoingReferencesByHostName() throws MalformedURLException {
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL httpSource = new DigestURL("http://source.net/index.html");
|
|
|
|
Set<DigestURL> targets = new HashSet<>();
|
|
|
|
final DigestURL indexTarget = new DigestURL("http://target.com/index.html");
|
|
|
|
targets.add(indexTarget);
|
|
|
|
LearnObject lro = new LearnObject(httpSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
final DigestURL httpsSource = new DigestURL("https://source.net/index.html");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
final DigestURL pathTarget = new DigestURL("http://target.com/path");
|
|
|
|
targets.add(pathTarget);
|
|
|
|
lro = new LearnObject(httpsSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
final DigestURL otherPortSource = new DigestURL("https://source.net:8080/index.html");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
final DigestURL queryTarget = new DigestURL("http://target.com/query?param=value");
|
|
|
|
targets.add(queryTarget);
|
|
|
|
lro = new LearnObject(otherPortSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that accumulated references from the host name is retrieved from structure */
|
|
|
|
Map<String, Integer> outRefs = graph.outgoingReferencesByHostName("source.net");
|
|
|
|
|
|
|
|
Assert.assertNotNull(outRefs);
|
|
|
|
Assert.assertEquals(1, outRefs.size());
|
|
|
|
Assert.assertEquals(new DigestURL("http://target.com").hosthash(), outRefs.keySet().iterator().next());
|
|
|
|
Assert.assertEquals(Integer.valueOf(3), outRefs.values().iterator().next());
|
|
|
|
|
|
|
|
/* Check that accumulated references from unknown host name is empty */
|
|
|
|
outRefs = graph.outgoingReferencesByHostName("test.net");
|
|
|
|
|
|
|
|
Assert.assertNotNull(outRefs);
|
|
|
|
Assert.assertTrue(outRefs.isEmpty());
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Most basic incoming references unit test
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testIncomingReferences() throws MalformedURLException {
|
|
|
|
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL source = new DigestURL("http://source.net/index.html");
|
|
|
|
final String sourceHash = source.hosthash();
|
|
|
|
final Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
final DigestURL target = new DigestURL("http://target.com/index.html");
|
|
|
|
final String targetHash = target.hosthash();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(source, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that reference to the exact target URL is retrieved from structure */
|
|
|
|
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
|
|
|
|
|
|
|
Assert.assertNotNull(inRefs);
|
|
|
|
Assert.assertEquals("target.com", inRefs.hostname);
|
|
|
|
Assert.assertNotNull(inRefs.references);
|
|
|
|
Assert.assertEquals(1, inRefs.references.size());
|
|
|
|
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
|
|
|
|
|
|
|
|
/* Check that reference to the host name target URL is retrieved from structure */
|
|
|
|
inRefs = graph.incomingReferences(new DigestURL("http://target.com").hosthash());
|
|
|
|
|
|
|
|
Assert.assertNotNull(inRefs);
|
|
|
|
Assert.assertEquals("target.com", inRefs.hostname);
|
|
|
|
Assert.assertNotNull(inRefs.references);
|
|
|
|
Assert.assertEquals(1, inRefs.references.size());
|
|
|
|
Assert.assertEquals(Integer.valueOf(1), inRefs.references.get(sourceHash));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Incoming references from multiple sources on the same host to one target URL
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testIncomingReferencesFromMultipleSourcesOnOneHost() throws MalformedURLException {
|
|
|
|
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
|
|
|
|
final String sourceHash = indexSource.hosthash();
|
|
|
|
Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
final DigestURL target = new DigestURL("http://target.com/index.html");
|
|
|
|
final String targetHash = target.hosthash();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(indexSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
lro = new LearnObject(pathSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
lro = new LearnObject(querySource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that reference to the exact target URL is retrieved from structure */
|
|
|
|
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
|
|
|
|
|
|
|
Assert.assertNotNull(inRefs);
|
|
|
|
Assert.assertEquals("target.com", inRefs.hostname);
|
|
|
|
Assert.assertNotNull(inRefs.references);
|
|
|
|
/* One accumulated host source reference */
|
|
|
|
Assert.assertEquals(1, inRefs.references.size());
|
|
|
|
/* 3 accumulated links from that host */
|
|
|
|
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Incoming references from multiple sources on the same host to one target
|
|
|
|
* URL accumulated between old and new structure
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testIncomingReferencesFromNewAndOld() throws MalformedURLException {
|
|
|
|
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
|
|
|
|
final String sourceHash = indexSource.hosthash();
|
|
|
|
Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
final DigestURL target = new DigestURL("http://target.com/index.html");
|
|
|
|
final String targetHash = target.hosthash();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(indexSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Backup learned reference to the old structure */
|
|
|
|
graph.joinOldNew();
|
|
|
|
|
|
|
|
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
lro = new LearnObject(pathSource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
|
|
|
|
targets = new HashSet<>();
|
|
|
|
targets.add(target);
|
|
|
|
|
|
|
|
lro = new LearnObject(querySource, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
|
|
|
|
/* Check that reference to the exact target URL is retrieved from structure */
|
|
|
|
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
|
|
|
|
|
|
|
Assert.assertNotNull(inRefs);
|
|
|
|
Assert.assertEquals("target.com", inRefs.hostname);
|
|
|
|
Assert.assertNotNull(inRefs.references);
|
|
|
|
/* One accumulated host source reference */
|
|
|
|
Assert.assertEquals(1, inRefs.references.size());
|
|
|
|
/* 3 accumulated links from that host */
|
|
|
|
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Simple performance measurements with a test structure filled to its limits.
|
|
|
|
*/
|
|
|
|
public static void main(String args[]) throws MalformedURLException {
|
|
|
|
WebStructureGraph graph = new WebStructureGraph(null);
|
|
|
|
try {
|
|
|
|
long beginTime = System.nanoTime();
|
|
|
|
/* Generate maxhosts structure entries */
|
|
|
|
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
|
|
|
|
final DigestURL source = new DigestURL("http://source" + i + ".net/index.html");
|
|
|
|
final Set<DigestURL> targets = new HashSet<>();
|
|
|
|
|
|
|
|
/* Generate maxref targets */
|
|
|
|
for(int j = 0; j < WebStructureGraph.maxref; j++) {
|
|
|
|
final DigestURL target = new DigestURL("http://target" + String.valueOf(j) + ".com/index.html");
|
|
|
|
targets.add(target);
|
|
|
|
}
|
|
|
|
|
|
|
|
LearnObject lro = new LearnObject(source, targets);
|
|
|
|
graph.learnrefs(lro);
|
|
|
|
}
|
|
|
|
long endTime = System.nanoTime();
|
|
|
|
System.out.println("testPerfs test structure initialisation time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
|
|
|
|
|
|
|
beginTime = System.nanoTime();
|
|
|
|
/* Loop and look for incoming references on each sample generated target */
|
|
|
|
for(int j = 0; j < WebStructureGraph.maxref; j++) {
|
|
|
|
String targetHash = new DigestURL("http://target" + j + ".com/index.html").hosthash();
|
|
|
|
graph.incomingReferences(targetHash);
|
|
|
|
}
|
|
|
|
endTime = System.nanoTime();
|
|
|
|
System.out.println("testPerfs incomingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
|
|
|
|
|
|
|
beginTime = System.nanoTime();
|
|
|
|
/* Loop and look for outgoing references on each sample generated source */
|
|
|
|
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
|
|
|
|
String sourceHash = new DigestURL("http://source" + i + ".net/index.html").hosthash();
|
|
|
|
graph.outgoingReferences(sourceHash);
|
|
|
|
}
|
|
|
|
endTime = System.nanoTime();
|
|
|
|
System.out.println("testPerfs outgoingReferences running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
|
|
|
|
|
|
|
beginTime = System.nanoTime();
|
|
|
|
/* Loop and look for host hashes from host name on each sample generated source */
|
|
|
|
for(int i = 0; i < WebStructureGraph.maxhosts; i++) {
|
|
|
|
graph.hostName2HostHashes("source" + i + ".net");
|
|
|
|
}
|
|
|
|
endTime = System.nanoTime();
|
|
|
|
System.out.println("testPerfs hostName2HostHashes running time : " + ((endTime - beginTime) / 1000000000) + " seconds");
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
graph.close();
|
|
|
|
ConcurrentLog.shutdown();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|