/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.SentenceProcessor;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.AbstractEval;
import edu.stanford.nlp.parser.lexparser.BiLexPCFGParser;
import edu.stanford.nlp.parser.lexparser.BinaryGrammar;
import edu.stanford.nlp.parser.lexparser.BinaryGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.CollinsPuncTransformer;
import edu.stanford.nlp.parser.lexparser.Debinarizer;
import edu.stanford.nlp.parser.lexparser.DependencyGrammar;
import edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.EvalB;
import edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser;
import edu.stanford.nlp.parser.lexparser.ExhaustivePCFGParser;
import edu.stanford.nlp.parser.lexparser.LabeledConstituentEval;
import edu.stanford.nlp.parser.lexparser.LeftHeadFinder;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.MLEDependencyGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.N5BiLexPCFGParser;
import edu.stanford.nlp.parser.lexparser.NodePruner;
import edu.stanford.nlp.parser.lexparser.NullGrammarProjection;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.ParserData;
import edu.stanford.nlp.parser.lexparser.ProjectionScorer;
import edu.stanford.nlp.parser.lexparser.Test;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.parser.lexparser.TransformTreeDependency;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.TwinScorer;
import edu.stanford.nlp.parser.lexparser.UnaryGrammar;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeLengthComparator;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Numberer;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Timing;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FactoredParser {
    public static void main(String[] args) {
        TreebankLangParserParams tlpParams = new EnglishTreebankParserParams();
        Options op = new Options(tlpParams);
        System.out.println("Currently " + new Date());
        System.out.print("Invoked with arguments:");
        for (int i = 0; i < args.length; ++i) {
            System.out.print(" " + args[i]);
        }
        System.out.println();
        String path = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
        int trainLow = 200;
        int trainHigh = 2199;
        int testLow = 2200;
        int testHigh = 2219;
        String serializeFile = null;
        int i = 0;
        while (i < args.length && args[i].startsWith("-")) {
            if (args[i].equalsIgnoreCase("-path") && i + 1 < args.length) {
                path = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-train") && i + 2 < args.length) {
                trainLow = Integer.parseInt(args[i + 1]);
                trainHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-test") && i + 2 < args.length) {
                testLow = Integer.parseInt(args[i + 1]);
                testHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-serialize") && i + 1 < args.length) {
                serializeFile = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tLPP") && i + 1 < args.length) {
                try {
                    op.tlpParams = tlpParams = (TreebankLangParserParams)Class.forName(args[i + 1]).newInstance();
                }
                catch (ClassNotFoundException e) {
                    System.err.println("Class not found: " + args[i + 1]);
                }
                catch (InstantiationException e) {
                    System.err.println("Couldn't instantiate: " + args[i + 1] + ": " + e.toString());
                }
                catch (IllegalAccessException e) {
                    System.err.println("illegal access" + e);
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-encoding")) {
                tlpParams.setInputEncoding(args[i + 1]);
                tlpParams.setOutputEncoding(args[i + 1]);
                i += 2;
                continue;
            }
            int j = FactoredParser.setOptionFlag(op, args, i);
            if (j == i) {
                j = tlpParams.setOptionFlag(args, i);
            }
            if (j == i) {
                System.err.println("Unknown option ignored: " + args[i]);
            }
            i = ++j;
        }
        TreebankLanguagePack tlp = tlpParams.treebankLanguagePack();
        Train.sisterSplitters = new HashSet<String>(Arrays.asList(tlpParams.sisterSplitters()));
        PrintWriter pw = tlpParams.pw();
        Test.display();
        Train.display();
        op.display();
        tlpParams.display();
        MemoryTreebank trainTreebank = tlpParams.memoryTreebank();
        MemoryTreebank testTreebank = tlpParams.testMemoryTreebank();
        Timing.startTime();
        System.err.print("Reading trees...");
        testTreebank.loadPath(path, (FileFilter)new NumberRangeFileFilter(testLow, testHigh, true));
        if (Test.increasingLength) {
            Collections.sort(testTreebank, new TreeLengthComparator());
        }
        trainTreebank.loadPath(path, (FileFilter)new NumberRangeFileFilter(trainLow, trainHigh, true));
        Timing.tick("done.");
        System.err.print("Binarizing trees...");
        TreeAnnotatorAndBinarizer binarizer = null;
        binarizer = !Train.leftToRight ? new TreeAnnotatorAndBinarizer(tlpParams, op.forceCNF, !Train.outsideFactor(), true) : new TreeAnnotatorAndBinarizer(tlpParams.headFinder(), new LeftHeadFinder(), tlpParams, op.forceCNF, !Train.outsideFactor(), true);
        CollinsPuncTransformer collinsPuncTransformer = null;
        if (Train.collinsPunc) {
            collinsPuncTransformer = new CollinsPuncTransformer(tlp);
        }
        Debinarizer debinarizer = new Debinarizer(tlp, op.forceCNF);
        ArrayList<Tree> binaryTrainTrees = new ArrayList<Tree>();
        if (Train.selectiveSplit) {
            Train.splitters = ParentAnnotationStats.getSplitCategories(trainTreebank, Train.tagSelectiveSplit, 0, Train.selectiveSplitCutOff, Train.tagSelectiveSplitCutOff, tlpParams.treebankLanguagePack());
            if (Train.deleteSplitters != null) {
                ArrayList<String> deleted = new ArrayList<String>();
                for (String del : Train.deleteSplitters) {
                    String baseDel = tlp.basicCategory(del);
                    boolean checkBasic = del.equals(baseDel);
                    Iterator it = Train.splitters.iterator();
                    while (it.hasNext()) {
                        String elem = (String)it.next();
                        String baseElem = tlp.basicCategory(elem);
                        boolean delStr = checkBasic && baseElem.equals(baseDel) || elem.equals(del);
                        if (!delStr) continue;
                        it.remove();
                        deleted.add(elem);
                    }
                }
                System.err.println("Removed from vertical splitters: " + deleted);
            }
        }
        if (Train.selectivePostSplit) {
            TreeAnnotator myTransformer = new TreeAnnotator(tlpParams.headFinder(), tlpParams);
            Treebank annotatedTB = ((Treebank)trainTreebank).transform(myTransformer);
            Train.postSplitters = ParentAnnotationStats.getSplitCategories(annotatedTB, true, 0, Train.selectivePostSplitCutOff, Train.tagSelectivePostSplitCutOff, tlpParams.treebankLanguagePack());
        }
        if (Train.hSelSplit) {
            binarizer.setDoSelectiveSplit(false);
            for (Tree tree : trainTreebank) {
                if (Train.collinsPunc) {
                    tree = collinsPuncTransformer.transformTree(tree);
                }
                tree = binarizer.transformTree(tree);
            }
            binarizer.setDoSelectiveSplit(true);
        }
        for (Tree tree : trainTreebank) {
            if (Train.collinsPunc) {
                tree = collinsPuncTransformer.transformTree(tree);
            }
            tree = binarizer.transformTree(tree);
            binaryTrainTrees.add(tree);
        }
        if (Test.verbose) {
            binarizer.dumpStats();
        }
        ArrayList<Tree> binaryTestTrees = new ArrayList<Tree>();
        for (Tree tree : testTreebank) {
            if (Train.collinsPunc) {
                tree = collinsPuncTransformer.transformTree(tree);
            }
            tree = binarizer.transformTree(tree);
            binaryTestTrees.add(tree);
        }
        Timing.tick("done.");
        BinaryGrammar bg = null;
        UnaryGrammar ug = null;
        DependencyGrammar dg = null;
        Lexicon lex = null;
        BinaryGrammarExtractor bgExtractor = new BinaryGrammarExtractor();
        MLEDependencyGrammarExtractor dgExtractor = new MLEDependencyGrammarExtractor(op);
        if (op.doPCFG) {
            System.err.print("Extracting PCFG...");
            Pair bgug = null;
            if (Train.cheatPCFG) {
                ArrayList<Tree> allTrees = new ArrayList<Tree>(binaryTrainTrees);
                allTrees.addAll(binaryTestTrees);
                bgug = (Pair)bgExtractor.extract(allTrees);
            } else {
                bgug = (Pair)bgExtractor.extract(binaryTrainTrees);
            }
            bg = (BinaryGrammar)bgug.second;
            bg.splitRules();
            ug = (UnaryGrammar)bgug.first;
            ug.purgeRules();
            Timing.tick("done.");
        }
        System.err.print("Extracting Lexicon...");
        lex = tlpParams.lex(op.lexOptions);
        lex.train(binaryTrainTrees);
        Timing.tick("done.");
        if (op.doDep) {
            System.err.print("Extracting Dependencies...");
            binaryTrainTrees.clear();
            DependencyGrammar dg1 = (DependencyGrammar)dgExtractor.extract(((AbstractCollection)trainTreebank).iterator(), new TransformTreeDependency(tlpParams, true));
            Timing.tick("done.");
            System.out.print("Tuning Dependency Model...");
            dg.tune(binaryTestTrees);
            Timing.tick("done.");
        }
        BinaryGrammar boundBG = bg;
        UnaryGrammar boundUG = ug;
        NullGrammarProjection gp = new NullGrammarProjection(bg, ug);
        if (serializeFile != null) {
            System.err.print("Serializing parser...");
            LexicalizedParser.saveParserDataToSerialized(new ParserData(lex, bg, ug, dg, Numberer.getNumberers(), op), serializeFile);
            Timing.tick("done.");
        }
        ExhaustivePCFGParser parser = null;
        if (op.doPCFG) {
            parser = new ExhaustivePCFGParser(boundBG, boundUG, lex, op);
        }
        ExhaustiveDependencyParser dparser = op.doDep ? new ExhaustiveDependencyParser(dg, lex, op) : null;
        TwinScorer scorer = op.doPCFG ? new TwinScorer(new ProjectionScorer(parser, gp), dparser) : null;
        BiLexPCFGParser bparser = null;
        if (op.doPCFG && op.doDep) {
            bparser = Test.useN5 ? new N5BiLexPCFGParser(scorer, parser, dparser, bg, ug, dg, lex, op, gp) : new BiLexPCFGParser(scorer, parser, dparser, bg, ug, dg, lex, op, gp);
        }
        LabeledConstituentEval pcfgPE = new LabeledConstituentEval("pcfg  PE", tlp);
        LabeledConstituentEval comboPE = new LabeledConstituentEval("combo PE", tlp);
        LabeledConstituentEval.CBEval pcfgCB = new LabeledConstituentEval.CBEval("pcfg  CB", tlp);
        AbstractEval.TaggingEval pcfgTE = new AbstractEval.TaggingEval("pcfg  TE");
        AbstractEval.TaggingEval comboTE = new AbstractEval.TaggingEval("combo TE");
        AbstractEval.TaggingEval pcfgTEnoPunct = new AbstractEval.TaggingEval("pcfg nopunct TE");
        AbstractEval.TaggingEval comboTEnoPunct = new AbstractEval.TaggingEval("combo nopunct TE");
        AbstractEval.TaggingEval depTE = new AbstractEval.TaggingEval("depnd TE");
        AbstractEval.DependencyEval depDE = new AbstractEval.DependencyEval("depnd DE", tlp.punctuationWordAcceptFilter());
        AbstractEval.DependencyEval comboDE = new AbstractEval.DependencyEval("combo DE", tlp.punctuationWordAcceptFilter());
        if (Test.evalb) {
            EvalB.initEVALBfiles(tlpParams);
        }
        SentenceProcessor tagger = null;
        if (Test.preTag) {
            try {
                Class[] argsClass = new Class[]{String.class};
                Object[] arguments = new Object[]{"/u/nlp/data/tagger.params/wsj0-21.holder"};
                tagger = (SentenceProcessor)Class.forName("edu.stanford.nlp.process.SentenceTagger").getConstructor(argsClass).newInstance(arguments);
            }
            catch (Exception e) {
                System.err.println(e);
                System.err.println("Warning: No pretagging of sentences will be done.");
            }
        }
        int ttSize = testTreebank.size();
        for (int tNum = 0; tNum < ttSize; ++tNum) {
            Tree tree = testTreebank.get(tNum);
            int testTreeLen = tree.yield().size();
            if (testTreeLen > Test.maxLength) continue;
            Tree binaryTree = (Tree)binaryTestTrees.get(tNum);
            System.out.println("-------------------------------------");
            System.out.println("Number: " + (tNum + 1));
            System.out.println("Length: " + testTreeLen);
            long timeMil1 = System.currentTimeMillis();
            Timing.tick("Starting parse.");
            if (op.doPCFG) {
                if (Test.forceTags) {
                    if (tagger != null) {
                        parser.parse(FactoredParser.addLast(tagger.processSentence(FactoredParser.cutLast(FactoredParser.wordify(binaryTree.yield())))));
                    } else {
                        parser.parse(FactoredParser.cleanTags(binaryTree.taggedYield(), tlp));
                    }
                } else {
                    parser.parse(binaryTree.yield());
                }
            }
            if (op.doDep) {
                dparser.parse(binaryTree.yield());
            }
            boolean bothPassed = false;
            if (op.doPCFG && op.doDep) {
                bothPassed = bparser.parse(binaryTree.yield());
            }
            long timeMil2 = System.currentTimeMillis();
            long elapsed = timeMil2 - timeMil1;
            System.err.println("Time: " + (double)((int)(elapsed / 100L)) / 10.0 + " sec.");
            Tree tree2b = null;
            Tree tree2 = null;
            if (op.doPCFG) {
                tree2b = parser.getBestParse();
                tree2 = debinarizer.transformTree(tree2b);
            }
            Tree tree3 = null;
            Tree tree3db = null;
            if (op.doDep) {
                tree3 = dparser.getBestParse();
                tree3db = debinarizer.transformTree(tree3);
                tree3.pennPrint(pw);
            }
            Tree tree4 = null;
            if (op.doPCFG && op.doDep) {
                try {
                    tree4 = bparser.getBestParse();
                    if (tree4 == null) {
                        tree4 = tree2b;
                    }
                }
                catch (NullPointerException e) {
                    System.err.println("Blocked, using PCFG parse!");
                    tree4 = tree2b;
                }
            }
            if (op.doPCFG && !bothPassed) {
                tree4 = tree2b;
            }
            if (op.doDep) {
                depDE.evaluate(tree3, binaryTree, pw);
                ((AbstractEval)depTE).evaluate(tree3db, tree, pw);
            }
            TreeTransformer tc = tlpParams.collinizer();
            TreeTransformer tcEvalb = tlpParams.collinizerEvalb();
            Tree tree4b = null;
            if (op.doPCFG) {
                pcfgPE.evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                ((AbstractEval)pcfgCB).evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                if (op.doDep) {
                    comboDE.evaluate(bothPassed ? tree4 : tree3, binaryTree, pw);
                    tree4b = tree4;
                    tree4 = debinarizer.transformTree(tree4);
                    if (op.nodePrune) {
                        NodePruner np = new NodePruner(parser, debinarizer);
                        tree4 = np.prune(tree4);
                    }
                    comboPE.evaluate(tc.transformTree(tree4), tc.transformTree(tree), pw);
                }
                ((AbstractEval)pcfgTE).evaluate(tcEvalb.transformTree(tree2), tcEvalb.transformTree(tree), pw);
                ((AbstractEval)pcfgTEnoPunct).evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                if (op.doDep) {
                    ((AbstractEval)comboTE).evaluate(tcEvalb.transformTree(tree4), tcEvalb.transformTree(tree), pw);
                    ((AbstractEval)comboTEnoPunct).evaluate(tc.transformTree(tree4), tc.transformTree(tree), pw);
                }
                System.out.println("PCFG only: " + parser.scoreBinarizedTree(tree2b, 0));
                tree2.pennPrint(pw);
                if (op.doDep) {
                    System.out.println("Combo: " + parser.scoreBinarizedTree(tree4b, 0));
                    tree4.pennPrint(pw);
                }
                System.out.println("Correct:" + parser.scoreBinarizedTree(binaryTree, 0));
                tree.pennPrint(pw);
            }
            if (!Test.evalb) continue;
            if (op.doPCFG && op.doDep) {
                EvalB.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree4));
                continue;
            }
            if (op.doPCFG) {
                EvalB.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree2));
                continue;
            }
            if (!op.doDep) continue;
            EvalB.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree3db));
        }
        if (Test.evalb) {
            EvalB.closeEVALBfiles();
        }
        if (op.doPCFG) {
            System.out.print("ParseEval for PCFG: ");
            pcfgPE.display(false, pw);
            System.out.println("Grammar size: " + Numberer.getGlobalNumberer("states").total());
            ((AbstractEval)pcfgCB).display(false, pw);
            if (op.doDep) {
                System.out.print("ParseEval for Combo: ");
                comboPE.display(false, pw);
            }
            System.out.print("TagEval for PCFG: ");
            pcfgTE.display(false, pw);
            System.out.print("TagEval for PCFG excluding punct.: ");
            pcfgTEnoPunct.display(false, pw);
            if (op.doDep) {
                System.out.print("TagEval for Combo: ");
                comboTE.display(false, pw);
                System.out.print("TagEval for Combo excluding punct.: ");
                comboTEnoPunct.display(false, pw);
            }
        }
        if (op.doDep) {
            System.out.print("TagEval for Dependency: ");
            depTE.display(false, pw);
            System.out.print("DepEval for Dependency: ");
            depDE.display(false, pw);
        }
        if (op.doPCFG && op.doDep) {
            System.out.print("DepEval for Combo: ");
            comboDE.display(false, pw);
        }
    }

    public static int setOptionFlag(Options op, String[] args, int i) {
        if (args[i].equalsIgnoreCase("-PCFG")) {
            op.doDep = false;
            op.doPCFG = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-dep")) {
            op.doDep = true;
            op.doPCFG = false;
            ++i;
        } else if (args[i].equalsIgnoreCase("-factored")) {
            op.doDep = true;
            op.doPCFG = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-noRecoveryTagging")) {
            Test.noRecoveryTagging = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-maxLength") && i + 1 < args.length) {
            Test.maxLength = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-MAX_ITEMS") && i + 1 < args.length) {
            Test.MAX_ITEMS = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-iterativeCKY")) {
            Test.iterativeCKY = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-vMarkov") && i + 1 < args.length) {
            int order = Integer.parseInt(args[i + 1]);
            if (order <= 1) {
                Train.PA = false;
                Train.gPA = false;
            } else if (order == 2) {
                Train.PA = true;
                Train.gPA = false;
            } else if (order >= 3) {
                Train.PA = true;
                Train.gPA = true;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-vSelSplitCutOff") && i + 1 < args.length) {
            Train.selectiveSplitCutOff = Double.parseDouble(args[i + 1]);
            Train.selectiveSplit = Train.selectiveSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-vSelPostSplitCutOff") && i + 1 < args.length) {
            Train.selectivePostSplitCutOff = Double.parseDouble(args[i + 1]);
            Train.selectivePostSplit = Train.selectivePostSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-deleteSplitters") && i + 1 < args.length) {
            String[] toDel = args[i + 1].split(" *, *");
            Train.deleteSplitters = new HashSet<String>();
            for (int idx = 0; idx < toDel.length; ++idx) {
                Train.deleteSplitters.add(toDel[idx]);
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-postSplitWithBaseCategory")) {
            Train.postSplitWithBaseCategory = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-vPostMarkov") && i + 1 < args.length) {
            int order = Integer.parseInt(args[i + 1]);
            if (order <= 1) {
                Train.postPA = false;
                Train.postGPA = false;
            } else if (order == 2) {
                Train.postPA = true;
                Train.postGPA = false;
            } else if (order >= 3) {
                Train.postPA = true;
                Train.postGPA = true;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-hMarkov") && i + 1 < args.length) {
            int order = Integer.parseInt(args[i + 1]);
            if (order >= 0) {
                Train.markovOrder = order;
                Train.markovFactor = true;
            } else {
                Train.markovFactor = false;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-depWeight") && i + 1 < args.length) {
            Test.depWeight = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-smoothTagsThresh") && i + 1 < args.length) {
            int thresh;
            op.lexOptions.smoothInUnknownsThreshold = thresh = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unseenSmooth") && i + 1 < args.length) {
            Test.unseenSmooth = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-fractionBeforeUnseenCounting") && i + 1 < args.length) {
            Train.fractionBeforeUnseenCounting = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-hSelSplitThresh") && i + 1 < args.length) {
            int thresh = Integer.parseInt(args[i + 1]);
            Train.HSEL_CUT = thresh;
            Train.hSelSplit = Train.HSEL_CUT > 0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-tagPA")) {
            Train.tagPA = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-tagSelSplitCutOff") && i + 1 < args.length) {
            Train.tagSelectiveSplitCutOff = Double.parseDouble(args[i + 1]);
            Train.tagSelectiveSplit = Train.tagSelectiveSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-tagSelPostSplitCutOff") && i + 1 < args.length) {
            Train.tagSelectivePostSplitCutOff = Double.parseDouble(args[i + 1]);
            Train.tagSelectivePostSplit = Train.tagSelectivePostSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-noTagSplit")) {
            Train.noTagSplit = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-uwm") && i + 1 < args.length) {
            op.lexOptions.useUnknownWordSignatures = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-openClassThreshold") && i + 1 < args.length) {
            int thresh;
            Train.openClassTypesThreshold = thresh = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-leaveItAll")) {
            Train.leaveItAll = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-unary")) {
            Train.markUnary = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-unary2")) {
            Train.markUnary2 = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-unaryTags")) {
            Train.markUnaryTags = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-mutate")) {
            op.lexOptions.smartMutation = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-useUnicodeType")) {
            op.lexOptions.useUnicodeType = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-rightRec")) {
            Train.rightRec = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-noRightRec")) {
            Train.rightRec = false;
            ++i;
        } else if (args[i].equalsIgnoreCase("-preTag")) {
            Test.preTag = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-forceTags")) {
            Test.forceTags = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-scTags")) {
            op.dcTags = false;
            ++i;
        } else if (args[i].equalsIgnoreCase("-dcTags")) {
            op.dcTags = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-evalb")) {
            Test.evalb = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-v") || args[i].equalsIgnoreCase("-verbose")) {
            Test.verbose = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-outputFilesDirectory") && i + 1 < args.length) {
            Test.outputFilesDirectory = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputFilesExtension") && i + 1 < args.length) {
            Test.outputFilesExtension = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-writeOutputFiles")) {
            Test.writeOutputFiles = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-printAllBestParses")) {
            Test.printAllBestParses = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-outputTreeFormat") || args[i].equalsIgnoreCase("-outputFormat")) {
            Test.outputFormat = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputTreeFormatOptions") || args[i].equalsIgnoreCase("-outputFormatOptions")) {
            Test.outputFormatOptions = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-addMissingFinalPunctuation")) {
            Test.addMissingFinalPunctuation = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-flexiTag")) {
            op.flexiTag = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-lexiTag")) {
            op.flexiTag = false;
            ++i;
        } else if (args[i].equalsIgnoreCase("-compactGrammar")) {
            Train.compactGrammar = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-markFinalStates")) {
            Train.markFinalStates = args[i + 1].equalsIgnoreCase("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-leftToRight")) {
            Train.leftToRight = args[i + 1].equals("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-cnf")) {
            op.forceCNF = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-nodePrune") && i + 1 < args.length) {
            op.nodePrune = args[i + 1].equalsIgnoreCase("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-acl03pcfg")) {
            op.doDep = false;
            op.doPCFG = true;
            Train.markUnary = true;
            Train.PA = true;
            Train.gPA = false;
            Train.tagPA = true;
            Train.tagSelectiveSplit = false;
            Train.rightRec = true;
            Train.selectiveSplit = true;
            Train.selectiveSplitCutOff = 400.0;
            Train.markovFactor = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            op.lexOptions.useUnknownWordSignatures = 2;
            op.flexiTag = true;
            op.dcTags = false;
        } else if (args[i].equalsIgnoreCase("-goodPCFG")) {
            op.doDep = false;
            op.doPCFG = true;
            Train.markUnary = true;
            Train.PA = true;
            Train.gPA = false;
            Train.tagPA = true;
            Train.tagSelectiveSplit = false;
            Train.rightRec = true;
            Train.selectiveSplit = true;
            Train.selectiveSplitCutOff = 400.0;
            Train.markovFactor = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            op.lexOptions.useUnknownWordSignatures = 2;
            op.flexiTag = true;
            op.dcTags = false;
            String[] delSplit = new String[]{"-deleteSplitters", "VP^NP,VP^VP,VP^SINV,VP^SQ"};
            if (FactoredParser.setOptionFlag(op, delSplit, 0) != 2) {
                System.err.println("Error processing deleteSplitters");
            }
        } else if (args[i].equalsIgnoreCase("-linguisticPCFG")) {
            op.doDep = false;
            op.doPCFG = true;
            Train.markUnary = true;
            Train.PA = true;
            Train.gPA = false;
            Train.tagPA = true;
            Train.tagSelectiveSplit = false;
            Train.rightRec = false;
            Train.selectiveSplit = true;
            Train.selectiveSplitCutOff = 400.0;
            Train.markovFactor = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            op.lexOptions.useUnknownWordSignatures = 5;
            op.flexiTag = false;
            op.dcTags = false;
        } else if (args[i].equalsIgnoreCase("-ijcai03")) {
            op.doDep = true;
            op.doPCFG = true;
            Train.markUnary = false;
            Train.PA = true;
            Train.gPA = false;
            Train.tagPA = false;
            Train.tagSelectiveSplit = false;
            Train.rightRec = false;
            Train.selectiveSplit = true;
            Train.selectiveSplitCutOff = 300.0;
            Train.markovFactor = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            Train.compactGrammar = 0;
            op.lexOptions.useUnknownWordSignatures = 2;
            op.flexiTag = false;
            op.dcTags = true;
        } else if (args[i].equalsIgnoreCase("-goodFactored")) {
            op.doDep = true;
            op.doPCFG = true;
            Train.markUnary = false;
            Train.PA = true;
            Train.gPA = false;
            Train.tagPA = false;
            Train.tagSelectiveSplit = false;
            Train.rightRec = false;
            Train.selectiveSplit = true;
            Train.selectiveSplitCutOff = 300.0;
            Train.markovFactor = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            Train.compactGrammar = 0;
            op.lexOptions.useUnknownWordSignatures = 5;
            op.flexiTag = false;
            op.dcTags = true;
        } else if (args[i].equalsIgnoreCase("-chineseFactored")) {
            op.dcTags = false;
            op.lexOptions.useUnicodeType = true;
            Train.markovOrder = 2;
            Train.hSelSplit = true;
            Train.markovFactor = true;
            Train.HSEL_CUT = 50;
        } else if (args[i].equalsIgnoreCase("-chinesePCFG")) {
            op.doDep = false;
            op.doPCFG = true;
            op.dcTags = false;
        } else if (args[i].equalsIgnoreCase("-printTT")) {
            Train.printTreeTransformations = true;
            ++i;
        } else if (args[i].equalsIgnoreCase("-printAnnotated") && i + 1 < args.length) {
            try {
                Train.printAnnotatedPW = op.tlpParams.pw(new FileOutputStream(args[i + 1]));
            }
            catch (IOException ioe) {
                Train.printAnnotatedPW = null;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printBinarized") && i + 1 < args.length) {
            try {
                Train.printBinarizedPW = op.tlpParams.pw(new FileOutputStream(args[i + 1]));
            }
            catch (IOException ioe) {
                Train.printBinarizedPW = null;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printStates")) {
            Train.printStates = true;
            ++i;
        }
        return i;
    }

    private static List<TaggedWord> cleanTags(List twList, TreebankLanguagePack tlp) {
        int sz = twList.size();
        ArrayList<TaggedWord> l = new ArrayList<TaggedWord>(sz);
        for (int i = 0; i < sz; ++i) {
            TaggedWord tw = (TaggedWord)twList.get(i);
            TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
            l.add(tw2);
        }
        return l;
    }

    private static Sentence wordify(List wList) {
        Sentence s = new Sentence();
        for (int i = 0; i < wList.size(); ++i) {
            s.add(new Word(wList.get(i).toString()));
        }
        return s;
    }

    private static Sentence cutLast(Sentence s) {
        return new Sentence(s.subList(0, s.size() - 1));
    }

    private static Sentence addLast(Sentence s) {
        Sentence s2 = new Sentence(s);
        s2.add(new Word(".$."));
        return s2;
    }

    private FactoredParser() {
    }
}

