public:t-malv-15-3:4
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| public:t-malv-15-3:4 [2015/09/10 22:34] – [5. Working with Bigram scores] orvark | public:t-malv-15-3:4 [2024/04/29 13:33] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 95: | Line 95: | ||
| FYI: a normal Python dictionary throws a '' | FYI: a normal Python dictionary throws a '' | ||
| + | |||
| + | ===== Solutions | ||
| + | |||
| + | <code python> | ||
| + | import nltk | ||
| + | from nltk.collocations import * | ||
| + | from nltk.metrics import BigramAssocMeasures, | ||
| + | from nltk.corpus import brown, stopwords | ||
| + | |||
| + | #1 | ||
| + | |||
| + | bam = BigramAssocMeasures | ||
| + | |||
| + | corpus = brown.words() | ||
| + | |||
| + | finder = BigramCollocationFinder.from_words(corpus) | ||
| + | |||
| + | word_filter = lambda w: len(w) < 3 or w.lower() in stopwords.words(' | ||
| + | #def word_filter(w): | ||
| + | |||
| + | |||
| + | finder.apply_freq_filter(2) | ||
| + | finder.apply_word_filter(word_filter) | ||
| + | |||
| + | print(finder.nbest(bam.raw_freq, | ||
| + | |||
| + | |||
| + | finder_win3 = BigramCollocationFinder.from_words(corpus, | ||
| + | finder_win3.apply_freq_filter(2) | ||
| + | finder_win3.apply_word_filter(word_filter) | ||
| + | print(finder_win3.nbest(bam.raw_freq, | ||
| + | |||
| + | |||
| + | tam = TrigramAssocMeasures | ||
| + | |||
| + | finder_tri = TrigramCollocationFinder.from_words(corpus) | ||
| + | finder_tri.apply_freq_filter(2) | ||
| + | finder_tri.apply_word_filter(word_filter) | ||
| + | print(finder_tri.nbest(tam.raw_freq, | ||
| + | |||
| + | #2 | ||
| + | |||
| + | # Pointwise mutal information | ||
| + | print(finder.nbest(bam.pmi, | ||
| + | # Log-likelihood ratio | ||
| + | print(finder.nbest(bam.likelihood_ratio, | ||
| + | # Mutal information likelihood, a mi variant | ||
| + | print(finder.nbest(bam.mi_like, | ||
| + | # Chi squared test | ||
| + | print(finder.nbest(bam.chi_sq, | ||
| + | # Student' | ||
| + | print(finder.nbest(bam.student_t, | ||
| + | |||
| + | #3 | ||
| + | |||
| + | tagged_corpus = brown.tagged_words(tagset=' | ||
| + | |||
| + | finder_tagged = BigramCollocationFinder.from_words(tagged_corpus) | ||
| + | print(finder_tagged.nbest(bam.raw_freq, | ||
| + | |||
| + | finder_tags = BigramCollocationFinder.from_words(t for w, t in tagged_corpus) | ||
| + | print(finder_tags.nbest(bam.raw_freq, | ||
| + | </ | ||
/var/www/cadia.ru.is/wiki/data/attic/public/t-malv-15-3/4.1441924484.txt.gz · Last modified: 2024/04/29 13:32 (external edit)