;------------------------------------------------------------------------------------------------- ; Ch02.17, Naiive Bayesian Classifier 'Spam Filter' --- ---- ; with Laplacian Correction ; ; PCM 2015/01/25 ;------------------------------------------------------------------------------------------------- (define no_of_samples 20000) (define no_of_vars 1) ; Spam (define math_expect ;0.999995 ; P(spam | money, ...), Bess.&al, Bayes.Prog.,2014,p.33,Tab 2.3 row 1 ;0.996073 ; P(spam | next, money, ...), Bess.&al, Bayes.Prog.,2014,p.33,Tab 2.3 row 2 ;0.996073 ; P(spam | next, money, you, ...), Bess.&al, Bayes.Prog.,2014,p.33,Tab 2.3 row 3 0.00134393 ; P(spam | next, programming, money, ...),Bess.&al, Bayes.Prog.,2014,p.33,Tab 2.3 row 4 ;0.999984 ; P(spam | fortune, next, money, ...), Bess.&al, Bayes.Prog.,2014,p.33,Tab 2.3 row 5 ;0.502648976369882 ; P(Spam=true|programming, money) Bess.&al, Bayes.Prog.,2014, assignment 7 ) ;------------------------------------------------------------------------------------------------- ; Bessiere et al. 2014, p. 33, Table 2.1 ; (define a_f 250) ; a_f = a(f) = #(Spam=false) (define a_t 750) ; a_t = a(t) = #(Spam=true) (define a_if_s '(0 125 150 0 125)) ; a_if_s = (..., a(i,f), ...) = (..., #(Word(i)=true|Spam=false), ...) (define a_it_s '(375 0 0 750 375)) ; a_it_s = (..., a(i,t), ...) = (..., #(Word(i)=true|Spam=true) , ...) ; ; Laplacian Correction, Bess.& al, Bayes.Prog.,2014,p.27 ; (define (cond_freq->cond_p a_i a) (/ (+ 1 a_i) (+ 2 a))) ; P(Leaf(i)=true | Root) ; ;------------------------------------------------------------------------------------------------- ; (define (expected_value nth samples no_of_samples) (mean (map (lambda (sample) (list-ref sample nth)) samples))) (define (expected_values samples vars) (let ((no_of_samples (length samples))) (if (= vars 0) '() (cons (expected_value (- no_of_vars vars) samples no_of_samples) (expected_values samples (- vars 1)))))) (define (take-a-sample) (rejection-query (define Spam (if (flip 0.75) 1 0)) ; generative model (define Fortune ; generative model, word 0 (cond ;((= Spam 0) (if (flip 0.00396825) 1 0)) ((= Spam 0) (if (flip (cond_freq->cond_p (list-ref a_if_s 0) a_f)) 1 0)) ;((= Spam 1) (if (flip 0.5) 1 0)))) ((= Spam 1) (if (flip (cond_freq->cond_p (list-ref a_it_s 0) a_t)) 1 0)))) (define Next ; generative model, word 1 (cond ;((= Spam 0) (if (flip 0.5) 1 0)) ((= Spam 0) (if (flip (cond_freq->cond_p (list-ref a_if_s 1) a_f)) 1 0)) ;((= Spam 1) (if (flip 0.00132979) 1 0)))) ((= Spam 1) (if (flip (cond_freq->cond_p (list-ref a_it_s 1) a_t)) 1 0)))) (define Programming ; generative model, word 2 (cond ;((= Spam 0) (if (flip 0.996032) 1 0)) ((= Spam 0) (if (flip (cond_freq->cond_p (list-ref a_if_s 2) a_f)) 1 0)) ;((= Spam 1) (if (flip 0.00132979) 1 0)))) ((= Spam 1) (if (flip (cond_freq->cond_p (list-ref a_it_s 2) a_t)) 1 0)))) (define Money ; generative model, word 3 (cond ;((= Spam 0) (if (flip 0.00396825) 1 0)) ((= Spam 0) (if (flip (cond_freq->cond_p (list-ref a_if_s 3) a_f)) 1 0)) ;((= Spam 1) (if (flip 0.99867) 1 0)))) ((= Spam 1) (if (flip (cond_freq->cond_p (list-ref a_it_s 3) a_t)) 1 0)))) (define You ; generative model, word 4 (cond ;((= Spam 0) (if (flip 0.5) 1 0)) ((= Spam 0) (if (flip (cond_freq->cond_p (list-ref a_if_s 4) a_f)) 1 0)) ;((= Spam 1) (if (flip 0.5) 1 0)))) ((= Spam 1) (if (flip (cond_freq->cond_p (list-ref a_it_s 4) a_t)) 1 0)))) (list Spam) ; sampled value (and ; condition expression, observational evidence, constraint ; Bessiere & al, Bayes.Prog.,2014,p.33,Tab 2.3 row 1 ; (= Money 1) ; (= Fortune 0) (= Next 0) (= Programming 0) (= Money 1) (= You 0) ; Bessiere & al, Bayes.Prog.,2014,p.33,Tab 2.3 row 2 ; (= Next 1) (= Money 1) ; (= Fortune 0) (= Next 1) (= Programming 0) (= Money 1) (= You 0) ; Bessiere & al, Bayes.Prog.,2014,p.33,Tab 2.3 row 3 ; (= Next 1) (= Money 1) (= You 1) ; (= Fortune 0) (= Next 1) (= Programming 0) (= Money 1) (= You 1) ; Bessiere & al, Bayes.Prog.,2014,p.33,Tab 2.3 row 4 ; (= Next 1) (= Programming 1) (= Money 1) (= Fortune 0) (= Next 1) (= Programming 1) (= Money 1) (= You 0) ; Bessiere & al, Bayes.Prog.,2014,p.33,Tab 2.3 row 5 ; (= Fortune 1) (= Next 1) (= Money 1) ; (= Fortune 1) (= Next 1) (= Programming 0) (= Money 1) (= You 0) ; ; Bessiere & al, Bayes.Prog.,2014,p.33,assignment 7 ; (= Programming 1) (= Money 1) ; (= Fortune 0) (= Next 0) (= Programming 1) (= Money 1) (= You 0) ) )) (define (my_return) (let* ((time_start (get-time)) (header1 (display "Ch02.17, Naiive Bayesian Classifier 'SpamFilter' *** CHURCH-code by PCM 2015/01/25 ***")) (header2 (display " with Laplacian correction ")) (line (display "--------------------------------------------------------------------------------------")) (comment1 (display "sample size = " no_of_samples)) (samples (repeat no_of_samples take-a-sample)) (dummy_value (hist samples "smpl-b.est.of P(Spam=t | ... )")) (sample-based-estimator (first (expected_values samples no_of_vars))) (comment2 (display "by ...." "parameter theta E[Spam | ... ] = " math_expect)) (comment3 (display "by CHURCH's approximate rejection sampling:" "sample-based estimator E[Spam | ... ] = " sample-based-estimator)) (comment4(display "|deviation| = " (abs (- math_expect sample-based-estimator)))) (time_stop (get-time))) (display "computation time in sec =" (/ (- time_stop time_start) 1000)))) (my_return) ;-------------------------------------------------------------------------------------------------