/*
 * Decompiled with CFR 0.152.
 */
package iitb.cfilt.cpost.tokenizer;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.text.BreakIterator;
import java.util.Vector;

public class Tokeniser {
    private Vector tokens;
    private String inFileName;

    public Tokeniser() {
    }

    public Tokeniser(String inFile) {
        this.inFileName = inFile;
    }

    public Vector tokenise(String inFile) {
        this.inFileName = inFile;
        Vector t = this.tokenise();
        return t;
    }

    public Vector tokenise() {
        BufferedReader buffread;
        block9: {
            this.tokens = new Vector();
            buffread = null;
            try {
                if (this.inFileName != null) {
                    buffread = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.inFileName), "UTF8"));
                    break block9;
                }
                System.out.println("Input File Name not initialized");
                return null;
            }
            catch (UnsupportedEncodingException e) {
                System.out.println("Unsupported encoding:" + this.inFileName);
            }
            catch (FileNotFoundException e) {
                System.out.println("File not found:" + this.inFileName);
                System.exit(-1);
            }
        }
        String input = null;
        try {
            input = buffread.readLine();
        }
        catch (IOException e1) {
            System.out.println("Can't read from file" + this.inFileName);
        }
        while (input != null) {
            this.tokens.addAll(this.tokeniseSentence(input));
            try {
                input = buffread.readLine();
            }
            catch (IOException e1) {
                System.out.println("Can't read from file" + this.inFileName);
            }
        }
        return this.tokens;
    }

    public Vector tokeniseSentence(String input) {
        Vector<String> words = new Vector<String>();
        BreakIterator boundary = BreakIterator.getWordInstance();
        boundary.setText(input);
        int start = boundary.first();
        int end = boundary.next();
        while (end != -1) {
            String word = input.substring(start, end);
            if (!this.isGarbage(word)) {
                System.out.print("\"" + word + "\" + ");
                words.add(word);
            }
            start = end;
            end = boundary.next();
        }
        return words;
    }

    public boolean isGarbage(String word) {
        String list = " !],-";
        return list.contains(word);
    }

    public static void main(String[] args) {
        System.out.println("20   21 ! 5D ] 29 ) 23 # 24 $ 25 % 26 & 27 ' 28 ( 2A * 2B +1 2C , 2D - 2E . 2F / 3A * 3B +");
        Tokeniser tkn = new Tokeniser();
        tkn.tokeniseSentence("\u092f\u0939\u0940 \u0928\u0939\u0940\u0902, \u0926\u094b\u0928\u094b\u0902 \u0915\u0940 \u0907\u0938 \u092e\u0941\u0932\u093e\u0915\u093e\u0924 \u0915\u0940 \u091a\u0930\u094d\u091a\u093e \u092c\u094d\u0932\u0949\u0917 \u092e\u0947\u0902 \u092d\u0940 \u0915\u093e\u092b\u0940 \u091c\u093c\u094b\u0930\u094b\u0902 \u092a\u0930 \u0939\u0948 ");
        int i = 10;
        System.out.println(Integer.toString(i));
    }
}

