/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTB2TextLexer;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

public class PTBTokenizer
extends AbstractTokenizer {
    private boolean tokenizeCRs;
    PTBLexer lexer;
    LexedTokenFactory tokenFactory;

    public PTBTokenizer(Reader r) {
        this(r, false);
    }

    public PTBTokenizer(Reader r, boolean tokenizeCRs) {
        this(r, tokenizeCRs, new WordTokenFactory());
    }

    public PTBTokenizer(Reader r, boolean tokenizeCRs, LexedTokenFactory tokenFactory) {
        this.tokenizeCRs = tokenizeCRs;
        this.tokenFactory = tokenFactory;
        this.setSource(r);
    }

    protected Object getNext() {
        if (this.lexer == null) {
            return null;
        }
        Object token = null;
        try {
            token = this.lexer.next();
            while (!this.tokenizeCRs && "*CR*".equals(((HasWord)token).word())) {
                token = this.lexer.next();
            }
        }
        catch (Exception e) {
            this.nextToken = null;
        }
        return token;
    }

    public void setSource(Reader r) {
        this.lexer = new PTBLexer(r, this.tokenFactory);
    }

    public static String ptb2Text(String ptbText) {
        StringBuffer sb = new StringBuffer(ptbText.length());
        PTB2TextLexer lexer = new PTB2TextLexer(new StringReader(ptbText));
        try {
            String token;
            while ((token = lexer.next()) != null) {
                sb.append(token);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return sb.toString();
    }

    public static String ptb2Text(List ptbWords) {
        int sz = ptbWords.size();
        for (int i = 0; i < sz; ++i) {
            if (!(ptbWords.get(i) instanceof Word)) continue;
            ptbWords.set(i, ((Word)ptbWords.get(i)).word());
        }
        return PTBTokenizer.ptb2Text(StringUtils.join(ptbWords));
    }

    public static TokenizerFactory factory() {
        return new PTBTokenizerFactory();
    }

    public static void main(String[] args) throws IOException {
        BufferedReader r;
        int j;
        if (args.length < 1) {
            System.err.println("usage: java edu.stanford.nlp.process.PTBTokenizer [-nl/-preserveLines/-ioFileList] filename");
            return;
        }
        int i = 0;
        String charset = "utf-8";
        Pattern parseInsideBegin = null;
        Pattern parseInsideEnd = null;
        boolean tokenizeNL = false;
        boolean preserveLines = false;
        boolean inputOutputFileList = false;
        while (args[i].charAt(0) == '-') {
            if ("-nl".equals(args[i])) {
                tokenizeNL = true;
            } else if ("-preserveLines".equals(args[i])) {
                preserveLines = true;
                tokenizeNL = true;
            } else if ("-ioFileList".equals(args[i])) {
                inputOutputFileList = true;
            } else if ("-charset".equals(args[i]) && i < args.length - 1) {
                charset = args[++i];
            } else if ("-parseInside".equals(args[i]) && i < args.length - 1) {
                ++i;
                try {
                    parseInsideBegin = Pattern.compile("<(?:" + args[i] + ")>");
                    parseInsideEnd = Pattern.compile("</(?:" + args[i] + ")>");
                }
                catch (Exception e) {
                    parseInsideBegin = null;
                    parseInsideEnd = null;
                }
            } else {
                System.err.println("Unknown option: " + args[i]);
            }
            ++i;
        }
        ArrayList<String> inputFileList = new ArrayList<String>();
        ArrayList<String> outputFileList = null;
        if (inputOutputFileList) {
            outputFileList = new ArrayList<String>();
            for (j = i; j < args.length; ++j) {
                String inLine;
                r = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(args[j]), charset));
                while ((inLine = r.readLine()) != null) {
                    String[] fields = inLine.split("\\s+");
                    inputFileList.add(fields[0]);
                    outputFileList.add(fields[1]);
                }
            }
        } else {
            for (j = i; j < args.length; ++j) {
                inputFileList.add(args[j]);
            }
        }
        for (j = 0; j < inputFileList.size(); ++j) {
            r = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream((String)inputFileList.get(j)), charset));
            PrintStream out2 = System.out;
            if (outputFileList != null) {
                out2 = new PrintStream(new BufferedOutputStream(new FileOutputStream((String)outputFileList.get(j))));
            }
            PTBTokenizer tokenizer = new PTBTokenizer(r, tokenizeNL);
            boolean printing = true;
            if (parseInsideBegin != null) {
                printing = false;
            }
            boolean beginLine = true;
            while (tokenizer.hasNext()) {
                Object obj = tokenizer.next();
                String str = obj.toString();
                if (parseInsideBegin != null && parseInsideBegin.matcher(str).matches()) {
                    printing = true;
                    continue;
                }
                if (parseInsideEnd != null && parseInsideEnd.matcher(str).matches()) {
                    printing = false;
                    continue;
                }
                if (!printing) continue;
                if (preserveLines) {
                    if ("*CR*".equals(str)) {
                        beginLine = true;
                        out2.println("");
                        continue;
                    }
                    if (!beginLine) {
                        out2.print(" ");
                    }
                    out2.print(str);
                    beginLine = false;
                    continue;
                }
                out2.println(str);
            }
            if (outputFileList == null) continue;
            out2.close();
        }
    }

    public static class PTBTokenizerFactory
    implements TokenizerFactory {
        protected boolean tokenizeCRs;

        public PTBTokenizerFactory() {
            this(false);
        }

        public PTBTokenizerFactory(boolean tokenizeCRs) {
            this.tokenizeCRs = tokenizeCRs;
        }

        public Iterator getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        public Tokenizer getTokenizer(Reader r) {
            return new PTBTokenizer(r, this.tokenizeCRs);
        }
    }
}

