/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.trees.Tree;
import java.io.Serializable;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class GermanUnknownWordModel
implements Serializable {
    private static final String encoding = "UTF-8";
    private static final boolean useFirst = true;
    private static final boolean useEnd = true;
    private static final boolean useGT = false;
    private static final String unknown = "UNK";
    private static final String numberMatch = "[0-9]+\\.?[0-9]*";
    private Map tagHash = new HashMap();
    private Set seenEnd = new HashSet();
    private Map unknownGT = new HashMap();
    private static final long serialVersionUID = 221L;

    public double score(IntTaggedWord itw) {
        return this.score(itw.toTaggedWord());
    }

    public double score(TaggedWord tw) {
        double logProb;
        String word = tw.word().toString();
        String tag = tw.tag().toString();
        if (word.matches(numberMatch)) {
            logProb = tag.equals("CARD") ? 0.0 : Double.NEGATIVE_INFINITY;
        } else {
            Counter wordProbs;
            String end = word.substring(0, 1);
            if (!this.seenEnd.contains(end)) {
                end = unknown;
            }
            logProb = (wordProbs = (Counter)this.tagHash.get(tag)) == null ? Double.NEGATIVE_INFINITY : (wordProbs.keySet().contains(end) ? wordProbs.getCount(end) : wordProbs.getCount(unknown));
        }
        return logProb;
    }

    private double scoreGT(String tag) {
        double logProb = this.unknownGT.containsKey(tag) ? (Double)this.unknownGT.get(tag) : Double.NEGATIVE_INFINITY;
        return logProb;
    }

    public void train(Collection trees) {
        System.out.println("Including first letter for unknown words.");
        System.out.println("treating unknown word as the average of their equivalents by identity of last three letters.");
        this.trainUnknownGT(trees);
        HashMap c = new HashMap();
        Counter<String> tc = new Counter<String>();
        for (Tree t : trees) {
            Sentence words = t.taggedYield();
            for (TaggedWord tw : words) {
                String word = tw.word();
                int n = word.length() - 1;
                String subString = "";
                subString = subString + tw.word().substring(0, 1);
                subString = subString + tw.word().substring(n - 3 > 0 ? n - 3 : 0, n);
                String tag = tw.tag();
                if (!c.containsKey(tag)) {
                    c.put(tag, new Counter());
                }
                ((Counter)c.get(tag)).incrementCount(subString);
                tc.incrementCount(tag);
                this.seenEnd.add(subString);
            }
        }
        for (String tag : c.keySet()) {
            Counter wc = (Counter)c.get(tag);
            if (!this.tagHash.containsKey(tag)) {
                this.tagHash.put(tag, new Counter());
            }
            tc.incrementCount(tag);
            wc.setCount(unknown, 1.0);
            for (String end : wc.keySet()) {
                double prob = Math.log(wc.getCount(end) / tc.getCount(tag));
                ((Counter)this.tagHash.get(tag)).setCount(end, prob);
            }
        }
    }

    private void trainUnknownGT(Collection trees) {
        Counter<TaggedWord> twCount = new Counter<TaggedWord>();
        Counter<WordTag> wtCount = new Counter<WordTag>();
        Counter<String> tagCount = new Counter<String>();
        Counter<String> r1 = new Counter<String>();
        Counter<String> r0 = new Counter<String>();
        HashSet<String> seenWords = new HashSet<String>();
        int tokens = 0;
        for (Tree t : trees) {
            Sentence words = t.taggedYield();
            Iterator j = words.iterator();
            while (j.hasNext()) {
                ++tokens;
                TaggedWord tw = (TaggedWord)j.next();
                WordTag wt = GermanUnknownWordModel.toWordTag(tw);
                String word = wt.word();
                String tag = wt.tag();
                wtCount.incrementCount(wt);
                twCount.incrementCount(tw);
                tagCount.incrementCount(tag);
                boolean alreadySeen = seenWords.add(word);
            }
        }
        System.out.println("Total tokens: " + tokens);
        System.out.println("Total WordTag types: " + wtCount.keySet().size());
        System.out.println("Total TaggedWord types: " + twCount.keySet().size());
        System.out.println("Total tag types: " + tagCount.keySet().size());
        System.out.println("Total word types: " + seenWords.size());
        for (WordTag wt : wtCount.keySet()) {
            if (wtCount.getCount(wt) != 1.0) continue;
            r1.incrementCount(wt.tag());
        }
        for (String tag : tagCount.keySet()) {
            for (String word : seenWords) {
                WordTag wt = new WordTag(word, tag);
                if (wtCount.keySet().contains(wt)) continue;
                r0.incrementCount(tag);
            }
        }
        for (String tag : tagCount.keySet()) {
            double logprob = Math.log(r1.getCount(tag) / (tagCount.getCount(tag) * r0.getCount(tag)));
            this.unknownGT.put(tag, new Double(logprob));
        }
    }

    public static void main(String[] args) {
        System.out.println("Testing tagged word");
        Counter<TaggedWord> c = new Counter<TaggedWord>();
        TaggedWord tw1 = new TaggedWord("w", "t");
        c.incrementCount(tw1);
        TaggedWord tw2 = new TaggedWord("w", "t2");
        System.out.println(c.keySet().contains(tw2));
        System.out.println(tw1.equals(tw2));
        WordTag wt1 = GermanUnknownWordModel.toWordTag(tw1);
        WordTag wt2 = GermanUnknownWordModel.toWordTag(tw2);
        WordTag wt3 = new WordTag("w", "t2");
        System.out.println(wt1.equals(wt2));
        System.out.println(wt2.equals(wt3));
    }

    private static WordTag toWordTag(TaggedWord tw) {
        return new WordTag(tw.word(), tw.tag());
    }
}

