From d27adc2b92f077fb249000b5e2ea4a4ef318f15b Mon Sep 17 00:00:00 2001 From: luccioman Date: Mon, 5 Dec 2016 18:12:21 +0100 Subject: [PATCH] Fixed language detector initialization and NullPointerException cases. NullPointerException occurred when using and Identificator instance which encountered and error in its constructor. This error could be caused by a missing "langdetect" folder in the current folder of the main process, or by simultaneous first calls to the constructor, initializing concurrently the DetectorFactory.langlist. Fixes the mantis 714 (http://mantis.tokeek.de/view.php?id=714) --- .../yacy/document/language/Identificator.java | 50 +++++++++++-------- source/net/yacy/search/Switchboard.java | 10 ++++ 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/source/net/yacy/document/language/Identificator.java b/source/net/yacy/document/language/Identificator.java index 6528f0182..a9628788e 100644 --- a/source/net/yacy/document/language/Identificator.java +++ b/source/net/yacy/document/language/Identificator.java @@ -24,8 +24,8 @@ package net.yacy.document.language; -import java.io.File; import java.util.ArrayList; + import com.cybozu.labs.langdetect.Detector; import com.cybozu.labs.langdetect.DetectorFactory; import com.cybozu.labs.langdetect.LangDetectException; @@ -41,9 +41,11 @@ public final class Identificator { private Detector detector; private Language language; + /** + * Default constructor. Requires the DetectorFactory language profiles to be loaded before. + */ public Identificator() { try { - if(DetectorFactory.getLangList().isEmpty()) DetectorFactory.loadProfile(new File("langdetect").toString()); this.detector = DetectorFactory.create(); } catch (LangDetectException e) { ConcurrentLog.logException(e); @@ -56,33 +58,37 @@ public final class Identificator { * @param word */ public void add(final String word) { - if (word == null) return; + if (word == null || this.detector == null) { + return; + } this.detector.append(" " + word); // detector internally caches text up to maxtextlen = default = 10000 chars } /** * Get the detected language with highest probability * if detection probability is above 0.3 (30%) - * Underlaying detector differentiates zh-cn and zh-tw, these are returned as zh here. + * Underlying detector differentiates zh-cn and zh-tw, these are returned as zh here. * @return 2 char language code (ISO 639-1) */ public String getLanguage() { - try { - ArrayList probabilities = this.detector.getProbabilities(); - if(probabilities.isEmpty()) return null; - this.language = this.detector.getProbabilities().get(0); - } catch (LangDetectException e) { - // this contains mostly the message "no features in text" - //ConcurrentLog.logException(e); - return null; - } - // Return language only if probability is higher than 30% to account for missing language profiles - if (this.language.prob > 0.3) { - if (this.language.lang.length() == 2) - return this.language.lang; - else - return this.language.lang.substring(0,2); - } + if(this.detector != null) { + try { + ArrayList probabilities = this.detector.getProbabilities(); + if(probabilities.isEmpty()) return null; + this.language = this.detector.getProbabilities().get(0); + } catch (LangDetectException e) { + // this contains mostly the message "no features in text" + //ConcurrentLog.logException(e); + return null; + } + // Return language only if probability is higher than 30% to account for missing language profiles + if (this.language.prob > 0.3) { + if (this.language.lang.length() == 2) { + return this.language.lang; + } + return this.language.lang.substring(0,2); + } + } return null; @@ -95,8 +101,8 @@ public final class Identificator { public double getProbability() { if (language != null) { return language.prob; - } else - return 0.0; + } + return 0.0; } } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 274f2a6fd..b55fe3340 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -217,6 +217,8 @@ import net.yacy.utils.crypt; import net.yacy.utils.upnp.UPnP; import net.yacy.visualization.CircleTool; +import com.cybozu.labs.langdetect.DetectorFactory; +import com.cybozu.labs.langdetect.LangDetectException; import com.google.common.io.Files; @@ -410,6 +412,14 @@ public final class Switchboard extends serverSwitch { ProbabilisticClassifier.initialize(Switchboard.this.classificationPath); } }.start(); + + // init the language detector + this.log.config("Loading language profiles"); + try { + DetectorFactory.loadProfile(new File(appPath, "langdetect").toString()); + } catch (LangDetectException e) { + ConcurrentLog.logException(e); + } // init global host name cache Domains.init(new File(this.workPath, "globalhosts.list"));