package edu.stanford.nlp.tagger.maxent;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.InDataStreamFile;
import edu.stanford.nlp.io.OutDataStreamFile;
import edu.stanford.nlp.io.PrintFile;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.maxent.CGRunner;
import edu.stanford.nlp.maxent.Problem;
import edu.stanford.nlp.maxent.iis.LambdaSolve;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.objectbank.ReaderIteratorFactory;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.ListProcessor;
import edu.stanford.nlp.process.Morphology;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TransformXML;
import edu.stanford.nlp.process.WhitespaceTokenizer;
import edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.util.DataFilePaths;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.XMLUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.lang.reflect.InvocationTargetException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.core.xml.XMLSerialization;

/* loaded from: input_file:edu/stanford/nlp/tagger/maxent/MaxentTagger.class */
public class MaxentTagger implements Function<List<? extends HasWord>, ArrayList<TaggedWord>>, ListProcessor<List<? extends HasWord>, ArrayList<TaggedWord>> {
    public static final String BASE_TAGGER_HOME = "$NLP_DATA_HOME/data/pos-tagger/wsj3t0-18-left3words";
    public static final String TAGGER_HOME;
    public static final String DEFAULT_NLP_GROUP_MODEL_PATH;
    public static final String DEFAULT_DISTRIBUTION_PATH = "models/left3words-wsj-0-18.tagger";
    final Dictionary dict;
    TTags tags;
    byte[][] fnumArr;
    LambdaSolveTagger prob;
    HashMap<FeatureKey, Integer> fAssociations;
    Extractors extractors;
    Extractors extractorsRare;
    AmbiguityClasses ambClasses;
    final boolean alltags = false;
    final HashMap<String, HashSet<String>> tagTokens;
    static final int RARE_WORD_THRESH = 5;
    static final int MIN_FEATURE_THRESH = 5;
    static final int CUR_WORD_MIN_FEATURE_THRESH = 2;
    static final int RARE_WORD_MIN_FEATURE_THRESH = 10;
    static final int VERY_COMMON_WORD_THRESH = 250;
    static final boolean OCCURRING_TAGS_ONLY = false;
    static final boolean POSSIBLE_TAGS_ONLY = false;
    double defaultScore;
    int leftContext;
    int rightContext;
    TaggerConfig config;
    private int rareWordThresh;
    int minFeatureThresh;
    int curWordMinFeatureThresh;
    int rareWordMinFeatureThresh;
    int veryCommonWordThresh;
    int xSize;
    int ySize;
    boolean occuringTagsOnly;
    boolean possibleTagsOnly;
    private boolean initted;
    static final boolean VERBOSE = false;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/stanford/nlp/tagger/maxent/MaxentTagger$TaggerWrapper.class */
    public static class TaggerWrapper implements Function<String, String> {
        private final TaggerConfig config;
        private final MaxentTagger tagger;
        private TokenizerFactory<? extends HasWord> tokenizerFactory;
        private int sentNum;
        private final boolean tokenize;
        private final boolean outputVerbosity;
        private final boolean outputLemmas;
        private final PlainTextDocumentReaderAndWriter.OutputStyle outputStyle;
        private final String tagSeparator;
        private final Morphology morpha;

        protected TaggerWrapper(MaxentTagger maxentTagger) {
            this(null, maxentTagger);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public TaggerWrapper(TaggerConfig taggerConfig, MaxentTagger maxentTagger) {
            this.config = taggerConfig;
            this.tagger = maxentTagger;
            if (taggerConfig == null) {
                this.tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newWordTokenizerFactory("");
                this.outputStyle = PlainTextDocumentReaderAndWriter.OutputStyle.SLASH_TAGS;
                this.outputVerbosity = false;
                this.outputLemmas = false;
                this.morpha = null;
                this.tokenize = true;
                this.tagSeparator = null;
                return;
            }
            try {
                this.tokenizerFactory = MaxentTagger.chooseTokenizerFactory(taggerConfig.getTokenize(), taggerConfig.getTokenizerFactory(), taggerConfig.getTokenizerOptions(), taggerConfig.getTokenizerInvertible());
            } catch (Exception e) {
                System.err.println("Error in tokenizer factory instantiation for class: " + taggerConfig.getTokenizerFactory());
                e.printStackTrace();
                this.tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newWordTokenizerFactory(taggerConfig.getTokenizerOptions());
            }
            this.outputStyle = PlainTextDocumentReaderAndWriter.OutputStyle.fromShortName(taggerConfig.getOutputFormat());
            this.outputVerbosity = taggerConfig.getOutputVerbosity();
            this.outputLemmas = taggerConfig.getOutputLemmas();
            this.morpha = this.outputLemmas ? new Morphology() : null;
            this.tokenize = taggerConfig.getTokenize();
            this.tagSeparator = taggerConfig.getTagSeparator();
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // edu.stanford.nlp.util.Function
        public String apply(String str) {
            List<List<? extends HasWord>> arrayList;
            StringBuilder sb = new StringBuilder();
            TestSentence testSentence = new TestSentence(this.tagger);
            if (this.tokenize) {
                MaxentTagger maxentTagger = this.tagger;
                arrayList = MaxentTagger.tokenizeText(new StringReader(str), this.tokenizerFactory);
            } else {
                arrayList = new ArrayList();
                arrayList.add(Sentence.toWordList((List<String>) Arrays.asList(str.split("\\s+"))));
            }
            for (List<? extends HasWord> list : arrayList) {
                ArrayList<TaggedWord> tagSentence = testSentence.tagSentence(list);
                if (this.outputLemmas) {
                    MaxentTagger.lemmatize(tagSentence, list, this.morpha);
                }
                switch (this.outputStyle) {
                    case TSV:
                        sb.append(MaxentTagger.getTsvWords(this.outputVerbosity, this.outputLemmas, tagSentence, list));
                        break;
                    case XML:
                    case INLINE_XML:
                        int i = this.sentNum;
                        this.sentNum = i + 1;
                        sb.append(MaxentTagger.getXMLWords(tagSentence, i, list, this.outputLemmas));
                        break;
                    case SLASH_TAGS:
                        sb.append(Sentence.listToString(tagSentence, false, this.tagSeparator)).append(' ');
                        break;
                    default:
                        throw new IllegalArgumentException("Unsupported output style " + this.outputStyle);
                }
            }
            return sb.toString();
        }
    }

    public MaxentTagger() {
        this.dict = new Dictionary();
        this.fAssociations = new HashMap<>();
        this.alltags = false;
        this.tagTokens = new HashMap<>();
        this.rareWordThresh = 5;
        this.minFeatureThresh = 5;
        this.curWordMinFeatureThresh = 2;
        this.rareWordMinFeatureThresh = 10;
        this.veryCommonWordThresh = VERY_COMMON_WORD_THRESH;
        this.occuringTagsOnly = false;
        this.possibleTagsOnly = false;
        this.initted = false;
    }

    public MaxentTagger(String str) throws IOException, ClassNotFoundException {
        this(str, new TaggerConfig("-model", str), true);
    }

    public MaxentTagger(String str, TaggerConfig taggerConfig) throws IOException, ClassNotFoundException {
        this(str, taggerConfig, true);
    }

    public MaxentTagger(String str, TaggerConfig taggerConfig, boolean z) throws IOException, ClassNotFoundException {
        this.dict = new Dictionary();
        this.fAssociations = new HashMap<>();
        this.alltags = false;
        this.tagTokens = new HashMap<>();
        this.rareWordThresh = 5;
        this.minFeatureThresh = 5;
        this.curWordMinFeatureThresh = 2;
        this.rareWordMinFeatureThresh = 10;
        this.veryCommonWordThresh = VERY_COMMON_WORD_THRESH;
        this.occuringTagsOnly = false;
        this.possibleTagsOnly = false;
        this.initted = false;
        readModelAndInit(taggerConfig, str, z);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public LambdaSolve getLambdaSolve() {
        return this.prob;
    }

    void init(TaggerConfig taggerConfig) {
        String lang;
        String arch;
        String[] openClassTags;
        String[] closedClassTags;
        if (this.initted) {
            return;
        }
        this.config = taggerConfig;
        if (taggerConfig == null) {
            lang = "english";
            arch = "left3words";
            openClassTags = StringUtils.EMPTY_STRING_ARRAY;
            closedClassTags = StringUtils.EMPTY_STRING_ARRAY;
        } else {
            lang = taggerConfig.getLang();
            arch = taggerConfig.getArch();
            openClassTags = taggerConfig.getOpenClassTags();
            closedClassTags = taggerConfig.getClosedClassTags();
            if ((openClassTags.length > 0 && !lang.equals("")) || ((closedClassTags.length > 0 && !lang.equals("")) || (closedClassTags.length > 0 && openClassTags.length > 0))) {
                throw new RuntimeException("At least two of lang (\"" + lang + "\"), openClassTags (length " + openClassTags.length + ": " + Arrays.toString(openClassTags) + "),and closedClassTags (length " + closedClassTags.length + ": " + Arrays.toString(closedClassTags) + ") specified---you must choose one!");
            }
            if (openClassTags.length == 0 && lang.equals("") && closedClassTags.length == 0 && !taggerConfig.getLearnClosedClassTags()) {
                System.err.println("warning: no language set, no open-class tags specified, and no closed-class tags specified; assuming ALL tags are open class tags");
            }
        }
        if (openClassTags.length > 0) {
            this.tags = new TTags();
            this.tags.setOpenClassTags(openClassTags);
        } else if (closedClassTags.length > 0) {
            this.tags = new TTags();
            this.tags.setClosedClassTags(closedClassTags);
        } else {
            this.tags = new TTags(lang);
        }
        this.defaultScore = lang.equals("english") ? 1.0d : KStarConstants.FLOOR;
        if (taggerConfig != null) {
            this.rareWordThresh = taggerConfig.getRareWordThresh();
            this.minFeatureThresh = taggerConfig.getMinFeatureThresh();
            this.curWordMinFeatureThresh = taggerConfig.getCurWordMinFeatureThresh();
            this.rareWordMinFeatureThresh = taggerConfig.getRareWordMinFeatureThresh();
            this.veryCommonWordThresh = taggerConfig.getVeryCommonWordThresh();
            this.occuringTagsOnly = taggerConfig.occuringTagsOnly();
            this.possibleTagsOnly = taggerConfig.possibleTagsOnly();
            if (taggerConfig.getDefaultScore() >= KStarConstants.FLOOR) {
                this.defaultScore = taggerConfig.getDefaultScore();
            }
        }
        if (taggerConfig == null || taggerConfig.getMode() == TaggerConfig.Mode.TRAIN) {
            this.extractors = new Extractors(ExtractorFrames.getExtractorFrames(arch));
            this.extractorsRare = new Extractors(ExtractorFramesRare.getExtractorFramesRare(arch, this.tags));
            setExtractorsGlobal();
        }
        this.ambClasses = new AmbiguityClasses(this.tags);
        this.initted = true;
    }

    protected TokenizerFactory<? extends HasWord> chooseTokenizerFactory() throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        return chooseTokenizerFactory(this.config.getTokenize(), this.config.getTokenizerFactory(), this.config.getTokenizerOptions(), this.config.getTokenizerInvertible());
    }

    protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean z, String str, String str2, boolean z2) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        if (z && str.trim().length() != 0) {
            return (TokenizerFactory) Class.forName(str.trim()).getMethod("newTokenizerFactory", new Class[0]).invoke(str2, new Object[0]);
        }
        if (!z) {
            return WhitespaceTokenizer.factory();
        }
        if (!z2) {
            return PTBTokenizer.PTBTokenizerFactory.newWordTokenizerFactory(str2);
        }
        if (str2.equals("")) {
            str2 = "invertible=true";
        } else if (!str2.matches("(^|.*,)invertible=true")) {
            str2 = str2 + ",invertible=true";
        }
        return PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(str2);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getNum(FeatureKey featureKey) {
        Integer num = this.fAssociations.get(featureKey);
        if (num == null) {
            return -1;
        }
        return num.intValue();
    }

    private void saveExtractors(OutputStream outputStream) throws IOException {
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(outputStream);
        System.err.println(this.extractors.toString() + "\nrare" + this.extractorsRare.toString());
        objectOutputStream.writeObject(this.extractors);
        objectOutputStream.writeObject(this.extractorsRare);
    }

    private void readExtractors(String str) throws IOException, ClassNotFoundException {
        BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(str));
        readExtractors(bufferedInputStream);
        bufferedInputStream.close();
    }

    private void readExtractors(InputStream inputStream) throws IOException, ClassNotFoundException {
        ObjectInputStream objectInputStream = new ObjectInputStream(inputStream);
        this.extractors = (Extractors) objectInputStream.readObject();
        this.extractorsRare = (Extractors) objectInputStream.readObject();
        this.extractors.initTypes();
        this.extractorsRare.initTypes();
        int leftContext = this.extractors.leftContext();
        int leftContext2 = this.extractorsRare.leftContext();
        if (leftContext2 > leftContext) {
            leftContext = leftContext2;
        }
        this.leftContext = leftContext;
        int rightContext = this.extractors.rightContext();
        int rightContext2 = this.extractorsRare.rightContext();
        if (rightContext2 > rightContext) {
            rightContext = rightContext2;
        }
        this.rightContext = rightContext;
        setExtractorsGlobal();
    }

    private void setExtractorsGlobal() {
        this.extractors.setGlobalHolder(this);
        this.extractorsRare.setGlobalHolder(this);
    }

    protected void saveModel(String str, TaggerConfig taggerConfig) {
        try {
            OutDataStreamFile outDataStreamFile = new OutDataStreamFile(str);
            taggerConfig.saveConfig(outDataStreamFile);
            outDataStreamFile.writeInt(this.xSize);
            outDataStreamFile.writeInt(this.ySize);
            this.dict.save(outDataStreamFile);
            this.tags.save(outDataStreamFile, this.tagTokens);
            saveExtractors(outDataStreamFile);
            outDataStreamFile.writeInt(this.fAssociations.size());
            for (Map.Entry<FeatureKey, Integer> entry : this.fAssociations.entrySet()) {
                outDataStreamFile.writeInt(entry.getValue().intValue());
                entry.getKey().save(outDataStreamFile);
            }
            LambdaSolve.save_lambdas(outDataStreamFile, this.prob.lambda);
            outDataStreamFile.close();
        } catch (IOException e) {
            System.err.println("Error saving tagger to file " + str);
            e.printStackTrace();
        }
    }

    private static boolean convertMultifileTagger(String str, String str2, TaggerConfig taggerConfig) throws ClassNotFoundException, IOException, FileNotFoundException {
        InDataStreamFile inDataStreamFile = new InDataStreamFile(str);
        MaxentTagger maxentTagger = new MaxentTagger();
        maxentTagger.init(taggerConfig);
        maxentTagger.xSize = inDataStreamFile.readInt();
        maxentTagger.ySize = inDataStreamFile.readInt();
        maxentTagger.dict.read(str + ".dict");
        maxentTagger.tags.read(str + ".tags");
        maxentTagger.readExtractors(str + ".ex");
        maxentTagger.dict.setAmbClasses(maxentTagger.ambClasses, maxentTagger.veryCommonWordThresh, maxentTagger.tags);
        int[] iArr = new int[maxentTagger.extractors.getSize() + maxentTagger.extractorsRare.getSize()];
        int readInt = inDataStreamFile.readInt();
        for (int i = 0; i < readInt; i++) {
            int readInt2 = inDataStreamFile.readInt();
            FeatureKey featureKey = new FeatureKey();
            featureKey.read(inDataStreamFile);
            int i2 = featureKey.num;
            iArr[i2] = iArr[i2] + 1;
            maxentTagger.fAssociations.put(featureKey, Integer.valueOf(readInt2));
        }
        maxentTagger.prob = new LambdaSolveTagger(str + ".prob");
        maxentTagger.saveModel(str2, taggerConfig);
        inDataStreamFile.close();
        return true;
    }

    protected void readModelAndInit(TaggerConfig taggerConfig, String str, boolean z) throws IOException, ClassNotFoundException {
        DataInputStream taggerDataInputStream = taggerConfig.getTaggerDataInputStream(str);
        readModelAndInit(taggerConfig, taggerDataInputStream, z);
        taggerDataInputStream.close();
    }

    protected void readModelAndInit(TaggerConfig taggerConfig, DataInputStream dataInputStream, boolean z) throws IOException, ClassNotFoundException {
        Timing timing = new Timing();
        if (z) {
            timing.doing("Reading POS tagger model from " + taggerConfig.getModel());
        }
        init(taggerConfig);
        TaggerConfig.readConfig(dataInputStream);
        this.xSize = dataInputStream.readInt();
        this.ySize = dataInputStream.readInt();
        this.dict.read(dataInputStream);
        this.tags.read(dataInputStream);
        readExtractors(dataInputStream);
        this.dict.setAmbClasses(this.ambClasses, this.veryCommonWordThresh, this.tags);
        int[] iArr = new int[this.extractors.getSize() + this.extractorsRare.getSize()];
        int readInt = dataInputStream.readInt();
        this.fAssociations = new HashMap<>(readInt * 2);
        for (int i = 0; i < readInt; i++) {
            int readInt2 = dataInputStream.readInt();
            FeatureKey featureKey = new FeatureKey();
            featureKey.read(dataInputStream);
            int i2 = featureKey.num;
            iArr[i2] = iArr[i2] + 1;
            this.fAssociations.put(featureKey, Integer.valueOf(readInt2));
        }
        this.prob = new LambdaSolveTagger(dataInputStream);
        if (z) {
            timing.done();
        }
    }

    protected void dumpModel(PrintStream printStream) {
        if (!$assertionsDisabled && this.fAssociations.size() != this.prob.lambda.length) {
            throw new AssertionError();
        }
        for (Map.Entry<FeatureKey, Integer> entry : this.fAssociations.entrySet()) {
            printStream.println(entry.getKey() + ": " + this.prob.lambda[entry.getValue().intValue()]);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean isRare(String str) {
        return this.dict.sum(str) < this.rareWordThresh;
    }

    public TTags getTags() {
        return this.tags;
    }

    public String tagTokenizedString(String str) {
        ArrayList<Word> untaggedList = Sentence.toUntaggedList((List<String>) Arrays.asList(str.split("\\s+")));
        TestSentence testSentence = new TestSentence(this);
        testSentence.tagSentence(untaggedList);
        return testSentence.getTaggedNice();
    }

    public String tagString(String str) {
        return new TaggerWrapper(this).apply(str);
    }

    @Override // edu.stanford.nlp.util.Function
    public ArrayList<TaggedWord> apply(List<? extends HasWord> list) {
        return new TestSentence(this).tagSentence(list);
    }

    @Override // edu.stanford.nlp.process.ListProcessor
    public List<ArrayList<TaggedWord>> process(List<? extends List<? extends HasWord>> list) {
        ArrayList arrayList = new ArrayList();
        TestSentence testSentence = new TestSentence(this);
        Iterator<? extends List<? extends HasWord>> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(testSentence.tagSentence(it.next()));
        }
        return arrayList;
    }

    public ArrayList<TaggedWord> tagSentence(List<? extends HasWord> list) {
        return new TestSentence(this).tagSentence(list);
    }

    public void tagCoreLabels(List<CoreLabel> list) {
        ArrayList<TaggedWord> tagSentence = tagSentence(list);
        if (tagSentence.size() != list.size()) {
            throw new AssertionError("Tagged word list not the same length as the original sentence");
        }
        int size = list.size();
        for (int i = 0; i < size; i++) {
            list.get(i).setTag(tagSentence.get(i).tag());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void lemmatize(List<TaggedWord> list, List<HasWord> list2, Morphology morphology) {
        for (int i = 0; i < list2.size(); i++) {
            HasWord hasWord = list2.get(i);
            if (hasWord instanceof CoreLabel) {
                CoreLabel coreLabel = (CoreLabel) hasWord;
                coreLabel.setTag(list.get(i).tag());
                morphology.stem(coreLabel);
            }
        }
    }

    public static List<List<HasWord>> tokenizeText(Reader reader) {
        return tokenizeText(reader, null);
    }

    protected static List<List<HasWord>> tokenizeText(Reader reader, TokenizerFactory<? extends HasWord> tokenizerFactory) {
        DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(reader);
        if (tokenizerFactory != null) {
            documentPreprocessor.setTokenizerFactory(tokenizerFactory);
        }
        ArrayList arrayList = new ArrayList();
        Iterator<List<HasWord>> it = documentPreprocessor.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        return arrayList;
    }

    private static void convertToSingleFileFormat(TaggerConfig taggerConfig) {
        try {
            taggerConfig.dump();
            convertMultifileTagger(taggerConfig.getModel() + ".holder", taggerConfig.getFile(), taggerConfig);
        } catch (Exception e) {
            System.err.println("An error occurred while converting to the new tagger format.");
            e.printStackTrace();
        }
    }

    private static void dumpModel(TaggerConfig taggerConfig) {
        try {
            MaxentTagger maxentTagger = new MaxentTagger(taggerConfig.getFile(), taggerConfig, false);
            System.out.println("Serialized tagger built with config:");
            taggerConfig.dump(System.out);
            maxentTagger.dumpModel(System.out);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void runTest(TaggerConfig taggerConfig) {
        if (taggerConfig.getVerbose()) {
            System.err.println("## tagger testing invoked at " + new Date() + " with arguments:");
            taggerConfig.dump();
        }
        try {
            MaxentTagger maxentTagger = new MaxentTagger(taggerConfig.getModel(), taggerConfig);
            Timing timing = new Timing();
            TestClassifier testClassifier = new TestClassifier(taggerConfig, maxentTagger);
            printErrWordsPerSec(timing.stop(), testClassifier.getNumWords());
            testClassifier.printModelAndAccuracy(taggerConfig, maxentTagger);
        } catch (Exception e) {
            System.err.println("An error occurred while testing the tagger.");
            e.printStackTrace();
        }
    }

    private static void trainAndSaveModel(TaggerConfig taggerConfig) throws IOException {
        String model = taggerConfig.getModel();
        MaxentTagger maxentTagger = new MaxentTagger();
        maxentTagger.init(taggerConfig);
        TaggerExperiments taggerExperiments = new TaggerExperiments(taggerConfig, maxentTagger);
        TaggerFeatures taggerFeatures = taggerExperiments.getTaggerFeatures();
        System.err.println("Samples from " + taggerConfig.getFile());
        System.err.println("Number of features: " + taggerFeatures.size());
        LambdaSolveTagger lambdaSolveTagger = new LambdaSolveTagger(new Problem(taggerExperiments, taggerFeatures), 1.0E-4d, 1.0E-5d, maxentTagger.fnumArr);
        maxentTagger.prob = lambdaSolveTagger;
        if (taggerConfig.getSearch().equals("owlqn")) {
            new CGRunner(lambdaSolveTagger, taggerConfig.getModel(), taggerConfig.getSigmaSquared()).solveL1(taggerConfig.getRegL1());
        } else if (taggerConfig.getSearch().equals("cg")) {
            new CGRunner(lambdaSolveTagger, taggerConfig.getModel(), taggerConfig.getSigmaSquared()).solveCG();
        } else if (taggerConfig.getSearch().equals(TaggerConfig.SEARCH)) {
            new CGRunner(lambdaSolveTagger, taggerConfig.getModel(), taggerConfig.getSigmaSquared()).solveQN();
        } else {
            lambdaSolveTagger.improvedIterative(taggerConfig.getIterations());
        }
        if (lambdaSolveTagger.checkCorrectness()) {
            System.err.println("Model is correct [empirical expec = model expec]");
        } else {
            System.err.println("Model is not correct");
        }
        maxentTagger.saveModel(model, taggerConfig);
    }

    private static void runTraining(TaggerConfig taggerConfig) throws Exception {
        Date date = new Date();
        System.err.println("## tagger training invoked at " + date + " with arguments:");
        taggerConfig.dump();
        Timing timing = new Timing();
        try {
            PrintFile printFile = new PrintFile(taggerConfig.getModel() + ".props");
            printFile.println("## tagger training invoked at " + date + " with arguments:");
            taggerConfig.dump(printFile);
            printFile.close();
            trainAndSaveModel(taggerConfig);
            timing.done("Training POS tagger");
        } catch (Exception e) {
            System.err.println("An error occurred while training a new tagger.");
            throw e;
        }
    }

    private static void printErrWordsPerSec(long j, int i) {
        System.err.println("Tagged " + i + " words at " + new DecimalFormat("0.00").format(i / (j / 1000.0d)) + " words per second.");
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getXMLWords(ArrayList<TaggedWord> arrayList, int i, List<HasWord> list, boolean z) {
        boolean z2 = list != null && list.size() > 0 && (list.get(0) instanceof CoreLabel);
        StringBuilder sb = new StringBuilder();
        sb.append("<sentence id=\"").append(i).append("\">\n");
        int size = arrayList.size();
        for (int i2 = 0; i2 < size; i2++) {
            String word = arrayList.get(i2).word();
            sb.append("  <word wid=\"").append(i2).append("\" pos=\"").append(XMLUtils.escapeAttributeXML(arrayList.get(i2).tag())).append("\"");
            if (z && z2) {
                HasWord hasWord = list.get(i2);
                if (hasWord instanceof CoreLabel) {
                    String lemma = ((CoreLabel) hasWord).lemma();
                    if (lemma != null) {
                        sb.append(" lemma=\"" + XMLUtils.escapeElementXML(lemma) + "\"");
                    }
                }
            }
            sb.append(">").append(XMLUtils.escapeElementXML(word)).append("</word>\n");
        }
        sb.append("</sentence>\n");
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getTsvWords(boolean z, boolean z2, ArrayList<TaggedWord> arrayList, List<HasWord> list) {
        StringBuilder sb = new StringBuilder();
        if (z && list != null) {
            if (arrayList.size() != list.size()) {
                throw new IllegalArgumentException("expected tagged and original sentences of the same length");
            }
            if (list.get(0) instanceof CoreLabel) {
                int size = list.size();
                for (int i = 0; i < size; i++) {
                    TaggedWord taggedWord = arrayList.get(i);
                    CoreLabel coreLabel = (CoreLabel) list.get(i);
                    sb.append(taggedWord.word());
                    sb.append("\t");
                    sb.append(coreLabel.current());
                    sb.append("\t");
                    if (z2) {
                        sb.append(coreLabel.lemma());
                        sb.append("\t");
                    }
                    sb.append(taggedWord.tag());
                    sb.append("\t");
                    sb.append(coreLabel.beginPosition());
                    sb.append("\t");
                    sb.append(coreLabel.endPosition());
                    sb.append("\n");
                }
                sb.append('\n');
                return sb.toString();
            }
        }
        int size2 = arrayList.size();
        for (int i2 = 0; i2 < size2; i2++) {
            sb.append(arrayList.get(i2).word()).append('\t').append(arrayList.get(i2).tag()).append('\n');
        }
        sb.append('\n');
        return sb.toString();
    }

    private static void writeXMLSentence(Writer writer, ArrayList<TaggedWord> arrayList, int i, List<HasWord> list, boolean z) {
        try {
            writer.write(getXMLWords(arrayList, i, list, z));
        } catch (IOException e) {
            System.err.println("Error writing sentence " + i + ": " + Sentence.listToString(arrayList));
            throw new RuntimeIOException(e);
        }
    }

    public void tagFromXML(InputStream inputStream, Writer writer, String... strArr) {
        new TransformXML().transformXML(strArr, new TaggerWrapper(this.config, this), inputStream, writer, new TransformXML.SAXInterface());
    }

    public void tagFromXML(Reader reader, Writer writer, String... strArr) {
        new TransformXML().transformXML(strArr, new TaggerWrapper(this.config, this), reader, writer, new TransformXML.SAXInterface());
    }

    private void tagFromXML() {
        BufferedInputStream bufferedInputStream = null;
        Writer writer = null;
        try {
            try {
                try {
                    bufferedInputStream = new BufferedInputStream(new FileInputStream(this.config.getFile()));
                    String outputFile = this.config.getOutputFile();
                    writer = outputFile.length() > 0 ? new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), this.config.getEncoding())) : new PrintWriter(System.out);
                    tagFromXML(bufferedInputStream, writer, this.config.getXMLInput());
                    IOUtils.closeIgnoringExceptions(bufferedInputStream);
                    IOUtils.closeIgnoringExceptions(writer);
                } catch (IOException e) {
                    System.err.println("tagFromXML: mysterious IO Exception");
                    e.printStackTrace();
                    IOUtils.closeIgnoringExceptions(bufferedInputStream);
                    IOUtils.closeIgnoringExceptions(writer);
                }
            } catch (FileNotFoundException e2) {
                System.err.println("Input file not found: " + this.config.getFile());
                e2.printStackTrace();
                IOUtils.closeIgnoringExceptions(bufferedInputStream);
                IOUtils.closeIgnoringExceptions(writer);
            }
        } catch (Throwable th) {
            IOUtils.closeIgnoringExceptions(bufferedInputStream);
            IOUtils.closeIgnoringExceptions(writer);
            throw th;
        }
    }

    private static void runTagger(TaggerConfig taggerConfig) throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        if (taggerConfig.getVerbose()) {
            System.err.println("## tagger invoked at " + new Date() + " with arguments:");
            taggerConfig.dump();
        }
        new MaxentTagger(taggerConfig.getModel(), taggerConfig).runTagger();
    }

    private void runTagger() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        BufferedReader bufferedReader;
        String[] xMLInput = this.config.getXMLInput();
        if (xMLInput.length > 0 && (xMLInput.length > 1 || !xMLInput[0].equals(XMLSerialization.ATT_NULL))) {
            tagFromXML();
            return;
        }
        BufferedWriter bufferedWriter = null;
        try {
            String outputFile = this.config.getOutputFile();
            bufferedWriter = outputFile.length() > 0 ? new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), this.config.getEncoding())) : new BufferedWriter(new OutputStreamWriter(System.out, this.config.getEncoding()));
            boolean useStdin = this.config.useStdin();
            if (useStdin) {
                System.err.println("Type some text to tag, then EOF.");
                System.err.println("  (For EOF, use Return, Ctrl-D on Unix; Enter, Ctrl-Z, Enter on Windows.)");
                bufferedReader = new BufferedReader(new InputStreamReader(System.in));
            } else {
                bufferedReader = IOUtils.readReaderFromString(this.config.getFile(), this.config.getEncoding());
            }
            runTagger(bufferedReader, bufferedWriter, this.config.getTagInside(), useStdin);
            if (bufferedWriter != null) {
                IOUtils.closeIgnoringExceptions(bufferedWriter);
            }
        } catch (Throwable th) {
            if (bufferedWriter != null) {
                IOUtils.closeIgnoringExceptions(bufferedWriter);
            }
            throw th;
        }
    }

    public void runTagger(BufferedReader bufferedReader, BufferedWriter bufferedWriter, String str, boolean z) throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        DocumentPreprocessor documentPreprocessor;
        Timing timing = new Timing();
        String sentenceDelimiter = this.config.getSentenceDelimiter();
        TokenizerFactory<? extends HasWord> chooseTokenizerFactory = chooseTokenizerFactory();
        int i = 0;
        int i2 = 0;
        if (str != null && !str.equals("")) {
            z = false;
        }
        PlainTextDocumentReaderAndWriter.OutputStyle fromShortName = PlainTextDocumentReaderAndWriter.OutputStyle.fromShortName(this.config.getOutputFormat());
        boolean outputVerbosity = this.config.getOutputVerbosity();
        boolean outputLemmas = this.config.getOutputLemmas();
        Morphology morphology = outputLemmas ? new Morphology() : null;
        do {
            if (this.config.getSGML()) {
                PlainTextDocumentReaderAndWriter plainTextDocumentReaderAndWriter = new PlainTextDocumentReaderAndWriter();
                ObjectBank objectBank = new ObjectBank(new ReaderIteratorFactory(bufferedReader), plainTextDocumentReaderAndWriter);
                PrintWriter printWriter = new PrintWriter(bufferedWriter);
                Iterator it = objectBank.iterator();
                while (it.hasNext()) {
                    List list = (List) it.next();
                    ArrayList arrayList = new ArrayList(list);
                    i += arrayList.size();
                    ArrayList<TaggedWord> tagSentence = tagSentence(arrayList);
                    Iterator it2 = list.iterator();
                    Iterator<TaggedWord> it3 = tagSentence.iterator();
                    while (it3.hasNext()) {
                        ((CoreLabel) it2.next()).set(CoreAnnotations.AnswerAnnotation.class, it3.next().tag());
                    }
                    plainTextDocumentReaderAndWriter.printAnswers(list, printWriter, fromShortName, true);
                }
            } else {
                if (str.length() > 0) {
                    documentPreprocessor = new DocumentPreprocessor(bufferedReader, DocumentPreprocessor.DocType.XML);
                    documentPreprocessor.setElementDelimiter(str);
                } else if (z) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        bufferedWriter.flush();
                        printErrWordsPerSec(timing.stop(), i);
                    }
                    documentPreprocessor = new DocumentPreprocessor(new BufferedReader(new StringReader(readLine)));
                } else {
                    documentPreprocessor = new DocumentPreprocessor(bufferedReader);
                    documentPreprocessor.setSentenceDelimiter(sentenceDelimiter);
                }
                documentPreprocessor.setTokenizerFactory(chooseTokenizerFactory);
                documentPreprocessor.setEncoding(this.config.getEncoding());
                Iterator<List<HasWord>> it4 = documentPreprocessor.iterator();
                while (it4.hasNext()) {
                    List<? extends HasWord> next = it4.next();
                    i += next.size();
                    ArrayList<TaggedWord> tagSentence2 = tagSentence(next);
                    if (outputLemmas) {
                        lemmatize(tagSentence2, next, morphology);
                    }
                    switch (fromShortName) {
                        case TSV:
                            bufferedWriter.write(getTsvWords(outputVerbosity, outputLemmas, tagSentence2, next));
                            break;
                        case XML:
                        case INLINE_XML:
                            writeXMLSentence(bufferedWriter, tagSentence2, i2, next, outputLemmas);
                            break;
                        case SLASH_TAGS:
                            bufferedWriter.write(Sentence.listToString(tagSentence2, false, this.config.getTagSeparator()));
                            bufferedWriter.newLine();
                            break;
                        default:
                            throw new IllegalArgumentException("Unsupported output style " + fromShortName);
                    }
                    if (z) {
                        bufferedWriter.newLine();
                        bufferedWriter.flush();
                    }
                    i2++;
                }
            }
        } while (z);
        bufferedWriter.flush();
        printErrWordsPerSec(timing.stop(), i);
    }

    public static void main(String[] strArr) throws Exception {
        TaggerConfig taggerConfig = new TaggerConfig(strArr);
        if (taggerConfig.getMode() == TaggerConfig.Mode.TRAIN) {
            runTraining(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.TAG) {
            runTagger(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.TEST) {
            runTest(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.CONVERT) {
            convertToSingleFileFormat(taggerConfig);
        } else if (taggerConfig.getMode() == TaggerConfig.Mode.DUMP) {
            dumpModel(taggerConfig);
        } else {
            System.err.println("Impossible: nothing to do. None of train, tag, test, or convert was specified.");
        }
    }

    static {
        $assertionsDisabled = !MaxentTagger.class.desiredAssertionStatus();
        TAGGER_HOME = DataFilePaths.convert(BASE_TAGGER_HOME);
        DEFAULT_NLP_GROUP_MODEL_PATH = new File(TAGGER_HOME, "left3words-wsj-0-18.tagger").getPath();
    }
}
