package com.hankcs.hanlp.model.hmm;

import com.baidu.ocr.sdk.utils.LogUtil;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.model.perceptron.tagset.CWSTagSet;
import com.hankcs.hanlp.model.perceptron.tagset.TagSet;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: classes.dex */
public class HMMSegmenter extends HMMTrainer implements Segmenter {
    CWSTagSet tagSet;

    public HMMSegmenter() {
        this.tagSet = new CWSTagSet();
    }

    public HMMSegmenter(HiddenMarkovModel hiddenMarkovModel) {
        super(hiddenMarkovModel);
        this.tagSet = new CWSTagSet();
    }

    @Override // com.hankcs.hanlp.model.hmm.HMMTrainer
    protected List<String[]> convertToSequence(Sentence sentence) {
        LinkedList linkedList = new LinkedList();
        Iterator<Word> it = sentence.toSimpleWordList().iterator();
        while (it.hasNext()) {
            String convert = CharTable.convert(it.next().value);
            if (convert.length() == 1) {
                linkedList.add(new String[]{convert, "S"});
            } else {
                linkedList.add(new String[]{convert.substring(0, 1), "B"});
                int i = 1;
                while (i < convert.length() - 1) {
                    int i2 = i + 1;
                    linkedList.add(new String[]{convert.substring(i, i2), "M"});
                    i = i2;
                }
                linkedList.add(new String[]{convert.substring(convert.length() - 1), LogUtil.E});
            }
        }
        return linkedList;
    }

    @Override // com.hankcs.hanlp.model.hmm.HMMTrainer
    protected TagSet getTagSet() {
        return this.tagSet;
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public List<String> segment(String str) {
        LinkedList linkedList = new LinkedList();
        segment(str, CharTable.convert(str), linkedList);
        return linkedList;
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public void segment(String str, String str2, List<String> list) {
        int length = str.length();
        int[] iArr = new int[length];
        int i = 0;
        while (i < length) {
            int i2 = i + 1;
            iArr[i] = this.vocabulary.idOf(str2.substring(i, i2));
            i = i2;
        }
        int length2 = str.length();
        int[] iArr2 = new int[length2];
        this.model.predict(iArr, iArr2);
        StringBuilder sb = new StringBuilder();
        sb.append(str.charAt(0));
        for (int i3 = 1; i3 < length2; i3++) {
            if (iArr2[i3] == this.tagSet.B || iArr2[i3] == this.tagSet.S) {
                list.add(sb.toString());
                sb.setLength(0);
            }
            sb.append(str.charAt(i3));
        }
        if (sb.length() != 0) {
            list.add(sb.toString());
        }
    }

    public Segment toSegment() {
        return new Segment() { // from class: com.hankcs.hanlp.model.hmm.HMMSegmenter.1
            @Override // com.hankcs.hanlp.seg.Segment
            protected List<Term> segSentence(char[] cArr) {
                List<String> segment = HMMSegmenter.this.segment(new String(cArr));
                LinkedList linkedList = new LinkedList();
                Iterator<String> it = segment.iterator();
                while (it.hasNext()) {
                    linkedList.add(new Term(it.next(), null));
                }
                return linkedList;
            }
        }.enableCustomDictionary(false);
    }
}
