public:t-malv-15-3:4
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
public:t-malv-15-3:4 [2015/09/10 22:34] – [5. Working with Bigram scores] orvark | public:t-malv-15-3:4 [2024/04/29 13:33] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 95: | Line 95: | ||
FYI: a normal Python dictionary throws a '' | FYI: a normal Python dictionary throws a '' | ||
+ | |||
+ | ===== Solutions | ||
+ | |||
+ | <code python> | ||
+ | import nltk | ||
+ | from nltk.collocations import * | ||
+ | from nltk.metrics import BigramAssocMeasures, | ||
+ | from nltk.corpus import brown, stopwords | ||
+ | |||
+ | #1 | ||
+ | |||
+ | bam = BigramAssocMeasures | ||
+ | |||
+ | corpus = brown.words() | ||
+ | |||
+ | finder = BigramCollocationFinder.from_words(corpus) | ||
+ | |||
+ | word_filter = lambda w: len(w) < 3 or w.lower() in stopwords.words(' | ||
+ | #def word_filter(w): | ||
+ | |||
+ | |||
+ | finder.apply_freq_filter(2) | ||
+ | finder.apply_word_filter(word_filter) | ||
+ | |||
+ | print(finder.nbest(bam.raw_freq, | ||
+ | |||
+ | |||
+ | finder_win3 = BigramCollocationFinder.from_words(corpus, | ||
+ | finder_win3.apply_freq_filter(2) | ||
+ | finder_win3.apply_word_filter(word_filter) | ||
+ | print(finder_win3.nbest(bam.raw_freq, | ||
+ | |||
+ | |||
+ | tam = TrigramAssocMeasures | ||
+ | |||
+ | finder_tri = TrigramCollocationFinder.from_words(corpus) | ||
+ | finder_tri.apply_freq_filter(2) | ||
+ | finder_tri.apply_word_filter(word_filter) | ||
+ | print(finder_tri.nbest(tam.raw_freq, | ||
+ | |||
+ | #2 | ||
+ | |||
+ | # Pointwise mutal information | ||
+ | print(finder.nbest(bam.pmi, | ||
+ | # Log-likelihood ratio | ||
+ | print(finder.nbest(bam.likelihood_ratio, | ||
+ | # Mutal information likelihood, a mi variant | ||
+ | print(finder.nbest(bam.mi_like, | ||
+ | # Chi squared test | ||
+ | print(finder.nbest(bam.chi_sq, | ||
+ | # Student' | ||
+ | print(finder.nbest(bam.student_t, | ||
+ | |||
+ | #3 | ||
+ | |||
+ | tagged_corpus = brown.tagged_words(tagset=' | ||
+ | |||
+ | finder_tagged = BigramCollocationFinder.from_words(tagged_corpus) | ||
+ | print(finder_tagged.nbest(bam.raw_freq, | ||
+ | |||
+ | finder_tags = BigramCollocationFinder.from_words(t for w, t in tagged_corpus) | ||
+ | print(finder_tags.nbest(bam.raw_freq, | ||
+ | </ |
/var/www/cadia.ru.is/wiki/data/attic/public/t-malv-15-3/4.1441924484.txt.gz · Last modified: 2024/04/29 13:32 (external edit)