https://github.com/ptnplanet/Java-Naive-Bayes-Classifier and modified inside the loklak.org project. After optimization in loklak it was inserted into the net.yacy.cora.bayes package. It shall be used to create custom search navigation filters. The original copyright notice was copied from the README.md from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier/blob/master/README.md The original package domain was de.daslaboratorium.machinelearning.classifierpull/9/merge
parent
1bced1ae60
commit
1ccbf739b1
@ -0,0 +1,154 @@
|
||||
/*
|
||||
* The MIT License (MIT)
|
||||
* ------------------
|
||||
*
|
||||
* Copyright (c) 2012-2014 Philipp Nolte
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This software was taken from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier
|
||||
* and inserted into the loklak class hierarchy to be enhanced and extended
|
||||
* by @0rb1t3r. After optimization in loklak it was inserted into the net.yacy.cora.bayes
|
||||
* package. It shall be used to create custom search navigation filters.
|
||||
* The original copyright notice was copied from the README.mnd
|
||||
* from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier/blob/master/README.md
|
||||
* The original package domain was de.daslaboratorium.machinelearning.classifier
|
||||
*/
|
||||
|
||||
package net.yacy.cora.bayes;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* A concrete implementation of the abstract Classifier class. The Bayes
|
||||
* classifier implements a naive Bayes approach to classifying a given set of
|
||||
* features: classify(feat1,...,featN) = argmax(P(cat)*PROD(P(featI|cat)
|
||||
*
|
||||
* @author Philipp Nolte
|
||||
*
|
||||
* @see http://en.wikipedia.org/wiki/Naive_Bayes_classifier
|
||||
*
|
||||
* @param <T> The feature class.
|
||||
* @param <K> The category class.
|
||||
*/
|
||||
public class BayesClassifier<T, K> extends Classifier<T, K> {
|
||||
|
||||
/**
|
||||
* Calculates the product of all feature probabilities: PROD(P(featI|cat)
|
||||
*
|
||||
* @param features The set of features to use.
|
||||
* @param category The category to test for.
|
||||
* @return The product of all feature probabilities.
|
||||
*/
|
||||
private float featuresProbabilityProduct(Collection<T> features,
|
||||
K category) {
|
||||
float product = 1.0f;
|
||||
for (T feature : features)
|
||||
product *= this.featureWeighedAverage(feature, category);
|
||||
return product;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the probability that the features can be classified as the
|
||||
* category given.
|
||||
*
|
||||
* @param features The set of features to use.
|
||||
* @param category The category to test for.
|
||||
* @return The probability that the features can be classified as the
|
||||
* category.
|
||||
*/
|
||||
private float categoryProbability(Collection<T> features, K category) {
|
||||
return ((float) this.categoryCount(category)
|
||||
/ (float) this.getCategoriesTotal())
|
||||
* featuresProbabilityProduct(features, category);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a sorted <code>Set</code> of probabilities that the given set
|
||||
* of features is classified as the available categories.
|
||||
*
|
||||
* @param features The set of features to use.
|
||||
* @return A sorted <code>Set</code> of category-probability-entries.
|
||||
*/
|
||||
private SortedSet<Classification<T, K>> categoryProbabilities(
|
||||
Collection<T> features) {
|
||||
|
||||
/*
|
||||
* Sort the set according to the possibilities. Because we have to sort
|
||||
* by the mapped value and not by the mapped key, we can not use a
|
||||
* sorted tree (TreeMap) and we have to use a set-entry approach to
|
||||
* achieve the desired functionality. A custom comparator is therefore
|
||||
* needed.
|
||||
*/
|
||||
SortedSet<Classification<T, K>> probabilities =
|
||||
new TreeSet<Classification<T, K>>(
|
||||
new Comparator<Classification<T, K>>() {
|
||||
|
||||
@Override
|
||||
public int compare(Classification<T, K> o1,
|
||||
Classification<T, K> o2) {
|
||||
int toReturn = Float.compare(
|
||||
o1.getProbability(), o2.getProbability());
|
||||
if ((toReturn == 0)
|
||||
&& !o1.getCategory().equals(o2.getCategory()))
|
||||
toReturn = -1;
|
||||
return toReturn;
|
||||
}
|
||||
});
|
||||
|
||||
for (K category : this.getCategories())
|
||||
probabilities.add(new Classification<T, K>(
|
||||
features, category,
|
||||
this.categoryProbability(features, category)));
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies the given set of features.
|
||||
*
|
||||
* @return The category the set of features is classified as.
|
||||
*/
|
||||
@Override
|
||||
public Classification<T, K> classify(Collection<T> features) {
|
||||
SortedSet<Classification<T, K>> probabilites =
|
||||
this.categoryProbabilities(features);
|
||||
|
||||
if (probabilites.size() > 0) {
|
||||
return probabilites.last();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies the given set of features. and return the full details of the
|
||||
* classification.
|
||||
*
|
||||
* @return The set of categories the set of features is classified as.
|
||||
*/
|
||||
public Collection<Classification<T, K>> classifyDetailed(
|
||||
Collection<T> features) {
|
||||
return this.categoryProbabilities(features);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* The MIT License (MIT)
|
||||
* ------------------
|
||||
*
|
||||
* Copyright (c) 2012-2014 Philipp Nolte
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This software was taken from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier
|
||||
* and inserted into the loklak class hierarchy to be enhanced and extended
|
||||
* by @0rb1t3r. After optimization in loklak it was inserted into the net.yacy.cora.bayes
|
||||
* package. It shall be used to create custom search navigation filters.
|
||||
* The original copyright notice was copied from the README.mnd
|
||||
* from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier/blob/master/README.md
|
||||
* The original package domain was de.daslaboratorium.machinelearning.classifier
|
||||
*/
|
||||
|
||||
package net.yacy.cora.bayes;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* A basic wrapper reflecting a classification. It will store both featureset
|
||||
* and resulting classification.
|
||||
*
|
||||
* @author Philipp Nolte
|
||||
*
|
||||
* @param <T> The feature class.
|
||||
* @param <K> The category class.
|
||||
*/
|
||||
public class Classification<T, K> {
|
||||
|
||||
/**
|
||||
* The classified featureset.
|
||||
*/
|
||||
private Collection<T> featureset;
|
||||
|
||||
/**
|
||||
* The category as which the featureset was classified.
|
||||
*/
|
||||
private K category;
|
||||
|
||||
/**
|
||||
* The probability that the featureset belongs to the given category.
|
||||
*/
|
||||
private float probability;
|
||||
|
||||
/**
|
||||
* Constructs a new Classification with the parameters given and a default
|
||||
* probability of 1.
|
||||
*
|
||||
* @param featureset The featureset.
|
||||
* @param category The category.
|
||||
*/
|
||||
public Classification(Collection<T> featureset, K category) {
|
||||
this(featureset, category, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new Classification with the parameters given.
|
||||
*
|
||||
* @param featureset The featureset.
|
||||
* @param category The category.
|
||||
* @param probability The probability.
|
||||
*/
|
||||
public Classification(Collection<T> featureset, K category, float probability) {
|
||||
this.featureset = featureset;
|
||||
this.category = category;
|
||||
this.probability = probability;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the featureset classified.
|
||||
*
|
||||
* @return The featureset.
|
||||
*/
|
||||
public Collection<T> getFeatureset() {
|
||||
return featureset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the classification's probability.
|
||||
* @return
|
||||
*/
|
||||
public float getProbability() {
|
||||
return this.probability;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the category the featureset was classified as.
|
||||
*
|
||||
* @return The category.
|
||||
*/
|
||||
public K getCategory() {
|
||||
return category;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Classification [category=" + this.category
|
||||
+ ", probability=" + this.probability
|
||||
+ ", featureset=" + this.featureset
|
||||
+ "]";
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue