/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.fsm;

import com.infomata.data.DataFile;
import com.infomata.data.DataFormat;
import com.infomata.data.DataRow;
import com.infomata.data.TabFormat;
import edu.ucla.fsm.Alphabet;
import edu.ucla.fsm.Corpus;
import edu.ucla.fsm.FeatureMatrixReader;
import edu.ucla.util.Counter;
import edu.ucla.util.IntArrayComparator;
import java.io.File;
import java.util.Arrays;
import java.util.Comparator;
import java.util.TreeMap;

public class CorpusReader {
    static int MAXIMUM_WORD_SIZE = 100;
    static int verbosity = 0;

    public static Corpus apply(String string, Alphabet alphabet, boolean bl) throws Exception {
        return CorpusReader.apply(string, alphabet, 0, bl);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static Corpus apply(String string, Alphabet alphabet, int n, boolean bl) throws Exception {
        DataFile dataFile = DataFile.createReader((String)"8859_1");
        dataFile.setDataFormat((DataFormat)new TabFormat());
        Corpus corpus = new Corpus(alphabet.number_of_segments, bl);
        TreeMap<int[], Counter> treeMap = bl ? new TreeMap<int[], Counter>((Comparator<int[]>)new IntArrayComparator()) : null;
        int[] nArray = new int[MAXIMUM_WORD_SIZE];
        int[] nArray2 = null;
        int n2 = 0;
        int n3 = 0;
        int n4 = 0;
        int n5 = 0;
        try {
            dataFile.open(new File(string));
            DataRow dataRow = dataFile.next();
            while (dataRow != null) {
                if (dataRow.size() != 0) {
                    nArray[0] = 0;
                    String[] stringArray = dataRow.getString(0).split(" ");
                    n4 = stringArray.length;
                    for (int i = 0; i < n4; ++i) {
                        n3 = alphabet.segment_names.indexOf(stringArray[i]);
                        if (n3 == -1) {
                            System.out.println("Error: cannot find an index for segment XXX" + stringArray[i] + "XXX");
                            System.exit(1);
                        }
                        nArray[i + 1] = n3;
                    }
                    nArray[n4 + 1] = 0;
                    if (dataRow.size() >= 2) {
                        try {
                            n5 = Integer.parseInt(dataRow.getString(1));
                        }
                        catch (Exception exception) {
                            n5 = 1;
                        }
                    } else {
                        n5 = 1;
                    }
                    if (n5 > 0) {
                        n5 += n;
                    }
                    if (n5 > 0) {
                        corpus.updateCounts(nArray, 0, n4 + 2, n5);
                    }
                    if (bl) {
                        nArray2 = new int[n4 + 2];
                        System.arraycopy(nArray, 0, nArray2, 0, n4 + 2);
                        if (treeMap.containsKey(nArray2)) {
                            ((Counter)treeMap.get((Object)nArray2)).value += n5;
                        } else {
                            Counter counter = new Counter(n5);
                            treeMap.put(nArray2, counter);
                        }
                    }
                    n2 += n5;
                    if (verbosity > 9) {
                        int[] nArray3 = Arrays.copyOf(nArray, n4 + 2);
                        System.out.println(Arrays.toString(nArray3) + "\t" + n5);
                    }
                }
                dataRow = dataFile.next();
            }
        }
        finally {
            dataFile.close();
        }
        if (bl) {
            corpus.data = treeMap;
        }
        corpus.size = n2;
        return corpus;
    }

    public static void main(String[] stringArray) throws Exception {
        Alphabet alphabet = FeatureMatrixReader.apply(stringArray[0]);
        Corpus corpus = CorpusReader.apply(stringArray[1], alphabet, 0, true);
        for (int[] nArray : corpus.data.keySet()) {
            System.out.println(Arrays.toString(nArray) + " (" + corpus.data.get(nArray) + ")");
        }
        System.out.println("types: " + corpus.data.size());
        System.out.println("tokens: " + corpus.size);
        System.out.println("empirical length distrib: " + Arrays.toString(corpus.empiricalLengthDistrib));
    }
}

