package com.aliasi.test.unit.spell;

import com.aliasi.lm.CompiledNGramProcessLM;
import com.aliasi.lm.NGramProcessLM;
import com.aliasi.spell.CompiledSpellChecker;
import com.aliasi.spell.FixedWeightEditDistance;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Strings;
import com.aliasi.xml.XHtmlWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import junit.framework.Assert;
import org.junit.Test;

/* loaded from: input_file:com/aliasi/test/unit/spell/CompiledSpellCheckerTest.class */
public class CompiledSpellCheckerTest {
    @Test
    public void testShortTokenMidQuery() throws ClassNotFoundException, IOException {
        NGramProcessLM nGramProcessLM = new NGramProcessLM(5);
        for (int i = 1; i < 10000; i++) {
            nGramProcessLM.train(" FINANCE ACT SCHEDULE ");
        }
        CompiledNGramProcessLM compiledNGramProcessLM = (CompiledNGramProcessLM) AbstractExternalizable.compile(nGramProcessLM);
        FixedWeightEditDistance fixedWeightEditDistance = new FixedWeightEditDistance(0.0d, -1.0d, -1.0d, -1.0d, -1.0d);
        HashSet hashSet = new HashSet();
        hashSet.add("FINANCE");
        hashSet.add("ACT");
        hashSet.add("SCHEDULE");
        CompiledSpellChecker compiledSpellChecker = new CompiledSpellChecker(compiledNGramProcessLM, fixedWeightEditDistance, hashSet);
        compiledSpellChecker.setFirstCharEditCost(-1.0d);
        compiledSpellChecker.setSecondCharEditCost(-1.0d);
        compiledSpellChecker.setNBest(32);
        compiledSpellChecker.setKnownTokenEditCost(-1.0d);
        compiledSpellChecker.setNumConsecutiveInsertionsAllowed(1);
        compiledSpellChecker.setAllowDelete(true);
        compiledSpellChecker.setAllowInsert(true);
        compiledSpellChecker.setAllowMatch(true);
        compiledSpellChecker.setAllowSubstitute(true);
        compiledSpellChecker.setAllowTranspose(true);
        compiledSpellChecker.setMinimumTokenLengthToCorrect(2);
        assertCorrection(compiledSpellChecker, "FINANCE ACT SCEDULE", "FINANCE ACT SCHEDULE");
        compiledSpellChecker.setMinimumTokenLengthToCorrect(3);
        assertCorrection(compiledSpellChecker, "FINANCE ACT SCEDULE", "FINANCE ACT SCHEDULE");
    }

    @Test
    public void testShortToken() throws ClassNotFoundException, IOException {
        NGramProcessLM nGramProcessLM = new NGramProcessLM(5);
        for (int i = 1; i < 1000; i++) {
            nGramProcessLM.train(" ab ");
        }
        CompiledNGramProcessLM compiledNGramProcessLM = (CompiledNGramProcessLM) AbstractExternalizable.compile(nGramProcessLM);
        FixedWeightEditDistance fixedWeightEditDistance = new FixedWeightEditDistance(0.0d, -1.0d, -1.0d, -1.0d, -1.0d);
        HashSet hashSet = new HashSet();
        hashSet.add("ab");
        CompiledSpellChecker compiledSpellChecker = new CompiledSpellChecker(compiledNGramProcessLM, fixedWeightEditDistance, hashSet);
        compiledSpellChecker.setMinimumTokenLengthToCorrect(2);
        assertCorrection(compiledSpellChecker, "ac", "ac");
    }

    @Test
    public void testTwo() throws ClassNotFoundException, IOException {
        NGramProcessLM nGramProcessLM = new NGramProcessLM(5);
        for (int i = 1; i < 10000; i++) {
            nGramProcessLM.train(" Smith ");
        }
        CompiledNGramProcessLM compiledNGramProcessLM = (CompiledNGramProcessLM) AbstractExternalizable.compile(nGramProcessLM);
        FixedWeightEditDistance fixedWeightEditDistance = new FixedWeightEditDistance(0.0d, -2.0d, -2.0d, -2.0d, -2.0d);
        HashSet hashSet = new HashSet();
        hashSet.add("Smith");
        CompiledSpellChecker compiledSpellChecker = new CompiledSpellChecker(compiledNGramProcessLM, fixedWeightEditDistance, hashSet);
        assertCorrection(compiledSpellChecker, "Smythe", "Smith");
        assertCorrection(compiledSpellChecker, "mith", "Smith");
        assertCorrection(compiledSpellChecker, "Tmith", "Smith");
        assertCorrection(compiledSpellChecker, "mSith", "Smith");
        assertCorrection(compiledSpellChecker, "Stith", "Smith");
        assertCorrection(compiledSpellChecker, "Skth", "Smith");
        assertCorrection(compiledSpellChecker, "mith Smith", "Smith Smith");
        assertCorrection(compiledSpellChecker, "Smith mith", "Smith Smith");
        assertCorrection(compiledSpellChecker, "SmithSmith", "Smith Smith");
        assertCorrection(compiledSpellChecker, "Smi th", "Smith");
        HashSet hashSet2 = new HashSet();
        hashSet2.add("mith");
        compiledSpellChecker.setDoNotEditTokens(hashSet2);
        assertCorrection(compiledSpellChecker, "mith", "mith");
        assertCorrection(compiledSpellChecker, "Smith mith", "Smith mith");
        compiledSpellChecker.setMinimumTokenLengthToCorrect(3);
        assertCorrection(compiledSpellChecker, "Sm th", "Sm th");
        compiledSpellChecker.setMinimumTokenLengthToCorrect(1);
        assertCorrection(compiledSpellChecker, "Sm th", "Smith");
        hashSet2.add("Sm");
        hashSet2.add(XHtmlWriter.TH);
        compiledSpellChecker.setDoNotEditTokens(hashSet2);
        assertCorrection(compiledSpellChecker, "Sm th", "Sm th");
        compiledSpellChecker.setDoNotEditTokens(Collections.EMPTY_SET);
        compiledSpellChecker.setFirstCharEditCost(-1000.0d);
        assertCorrection(compiledSpellChecker, "mith", "mith");
        assertCorrection(compiledSpellChecker, "Tmith", "Tmith");
        assertCorrection(new CompiledSpellChecker(compiledNGramProcessLM, fixedWeightEditDistance, new HashSet()), "Smth", "Smth");
    }

    void assertCorrection(CompiledSpellChecker compiledSpellChecker, String str, String str2) {
        Assert.assertEquals(str2, compiledSpellChecker.didYouMean(str));
    }

    void displayPs(String str, String str2, CompiledSpellChecker compiledSpellChecker) {
        System.out.println(String.valueOf(str) + Strings.SINGLE_SPACE_STRING + " log2 P(" + str2 + ")=" + compiledSpellChecker.languageModel().log2Estimate(Strings.SINGLE_SPACE_STRING + str2 + Strings.SINGLE_SPACE_STRING));
    }
}
