#!/usr/bin/perl use strict; my %probabilities = (); my %alphas = (); my %betas = (); my @states = ("C", "V"); my @characters = ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " "); my $hmm_string = ""; my $use_corpus = 0; my $print_probabilities = 0; my $longest_probability = 0; eval { process_options(); }; die $@ if $@; eval { assign_initial_probabilities(); }; die $@ if $@; eval { print_initial_probabilities(); } if ($print_probabilities); die $@ if $@; eval { assign_transition_probabilities(); }; die $@ if $@; eval { print_transition_probabilities(); } if ($print_probabilities); die $@ if $@; eval { assign_emission_probabilities(); }; die $@ if $@; eval { print_emission_probabilities(); } if ($print_probabilities); die $@ if $@; eval { calculate_probabilities(); }; die $@ if $@; sub process_options { for (my $i = 0; $i <= $#ARGV; $i++) { if ($ARGV[$i] =~ /^(\-p|\-\-print\-probabilities)$/) { $print_probabilities = 1; } elsif ($ARGV[$i] =~ /^(\-h|\-\-help)$/) { eval { print_help(); }; die $@ if $@; exit(0); } } if ($ARGV[0] =~ /^\-{1,2}\w+/ || $ARGV[0] =~ /^ *$/) { print "\nThe first argument must be a string, or the path to a file containing strings\n"; exit(0); } $hmm_string = $ARGV[0]; } sub assign_initial_probabilities { $longest_probability = 0; my $random_number_sum = 0; # assign a non-zero random number value as the initial probability for each state foreach my $state (@states) { my $state_random_number = rand; while ($state_random_number == 0) { $state_random_number = rand; } $probabilities{initial}{$state} = $state_random_number; $random_number_sum += $state_random_number; } # normalize the above-assigned probabilities so they sum to one (form a distribution) foreach my $state (keys %{$probabilities{initial}}) { $probabilities{initial}{$state} /= $random_number_sum; $longest_probability = length($probabilities{initial}{$state}) if (length($probabilities{initial}{$state}) > $longest_probability); } } sub assign_transition_probabilities { $longest_probability = 0; foreach my $state (@states) { my $random_number_sum = 0; # assign a non-zero random number value as the initial probability for each transition foreach my $next_state (@states) { my $transition_random_number = rand; while ($transition_random_number == 0) { $transition_random_number = rand; } $probabilities{transition}{$state}{$next_state} = $transition_random_number; $random_number_sum += $transition_random_number; } # normalize the above-assigned probabilities so they sum to one (form a distribution) foreach my $next_state (keys %{$probabilities{transition}{$state}}) { $probabilities{transition}{$state}{$next_state} /= $random_number_sum; $longest_probability = length($probabilities{transition}{$state}{$next_state}) if (length($probabilities{transition}{$state}{$next_state}) > $longest_probability); } } } sub assign_emission_probabilities { $longest_probability = 0; foreach my $state (@states) { my $random_number_sum = 0; foreach my $next_state (@states) { foreach my $character (@characters) { # assign a non-zero random number value as the emission probability for each character at each transition my $emission_random_number = rand; while ($emission_random_number == 0) { $emission_random_number = rand; } $probabilities{emission}{$state}{$next_state}{$character} = $emission_random_number; $random_number_sum += $emission_random_number; } } # normalize the above-assigned probabilities so they sum to one (form a distribution) foreach my $next_state (keys %{$probabilities{emission}{$state}}) { foreach my $character (keys %{$probabilities{emission}{$state}{$next_state}}) { $probabilities{emission}{$state}{$next_state}{$character} /= $random_number_sum; $longest_probability = length($probabilities{emission}{$state}{$next_state}{$character}) if (length($probabilities{emission}{$state}{$next_state}{$character}) > $longest_probability); } } } } sub calculate_probabilities { my $line = 1; if (-f $hmm_string) { open(HMM, "<$hmm_string") || die "$! [" . $hmm_string . "]\n"; while () { chomp; tr/A-Z/a-z/; s/[^a-z ]//g; print "\n" . get_character_string("\_", 24) . "HMM String " . $line . get_character_string("\_", 24) . "\n"; eval { calculate_forward_probability($_); }; die $@ if $@; eval { calculate_backward_probability($_); }; die $@ if $@; print "\n"; $line++; } close(HMM); print "\n"; } else { print "\n"; eval { calculate_forward_probability($hmm_string); }; die $@ if $@; eval { calculate_backward_probability($hmm_string); }; die $@ if $@; print "\n\n"; } } sub calculate_forward_probability { my $string = shift; my $string_length = length($string); my $total_probability = 0; undef %alphas; print "FORWARD PROBABILITIES (ALPHA)\n"; print "Time=1\n"; # set alpha_i(1) = pi(i) foreach my $state (@states) { $alphas{$state}{0} = $probabilities{initial}{$state}; print "Pr[State(1)=" . $state . "]=" . $alphas{$state}{0} . "\n"; } for (my $time = 1; $time <= $string_length; $time++) { my $character = lc(substr($string, ($time - 1), 1)); print "Obs(" . $time . ")='" . $character . "'\n\n"; print "Time=" . ($time + 1) . "\n"; foreach my $state (@states) { foreach my $previous_state (@states) { $alphas{$state}{$time} += $alphas{$previous_state}{($time - 1)} * $probabilities{transition}{$previous_state}{$state} * $probabilities{emission}{$previous_state}{$state}{$character}; } $total_probability += $alphas{$state}{$time} if ($time == ($string_length)); my $observation_string = ""; if ($time == 1) { $observation_string = " \&\& Obs(1)"; } elsif ($time == 2) { $observation_string = " \&\& Obs(1),Obs(2)"; } elsif ($time > 2) { $observation_string = " \&\& Obs(1)\,\.\.\.\,Obs(" . $time . ")"; } print "Pr[State(" . ($time + 1) . ")=" . $state . $observation_string . "]=" . $alphas{$state}{$time} . "\n"; } } print "\nPr[Observation|Model]=" . $total_probability . "\n\n"; } sub calculate_backward_probability { my $string = shift; my $string_length = length($string); my $total_probability = 0; undef %betas; print "\nBACKWARD PROBABILITIES (BETA)\n"; print "Time=" . ($string_length + 1) . "\n"; # set beta_i(T+1) = 1 foreach my $state (@states) { $betas{$state}{$string_length} = 1; print "Pr[State(" . ($string_length + 1) . ")=" . $state . "]=1\n"; } for (my $time = ($string_length - 1); $time >= 0; $time--) { my $character = lc(substr($string, $time, 1)); print "Obs(" . ($time + 1) . ")='" . $character . "'\n\n"; print "Time=" . ($time + 1) . "\n"; foreach my $state (@states) { foreach my $next_state (@states) { $betas{$state}{$time} += $betas{$next_state}{($time + 1)} * $probabilities{transition}{$state}{$next_state} * $probabilities{emission}{$state}{$next_state}{$character}; } $total_probability += ($betas{$state}{$time} * $probabilities{initial}{$state}) if ($time == 0); my $observation_string = ""; if ($time == ($string_length - 1)) { $observation_string = " \&\& Obs(" . ($time + 1) . ")"; } elsif ($time == ($string_length - 2)) { $observation_string = " \&\& Obs(" . ($time + 1) . "),Obs(" . $string_length . ")"; } elsif ($time < ($string_length - 2)) { $observation_string = " \&\& Obs(" . ($time + 1) .")\,\.\.\.\,Obs(" . $string_length . ")"; } print "Pr[State(" . ($time + 1) . ")=" . $state . $observation_string . "]=" . $betas{$state}{$time} . "\n"; } } print "\nPr[Observation|Model]=" . $total_probability . "\n"; } sub print_initial_probabilities { print "\nINITIAL STATE PROBABILITIES\n"; print "+" . get_character_string("\-", $longest_probability + 8) . "+\n"; foreach my $state (sort { $a cmp $b } keys %{$probabilities{initial}}) { print sprintf("%-" . ($longest_probability + 8) . "s", "| Pr[" . $state . "]=" . $probabilities{initial}{$state}) . " |\n"; } print "+" . get_character_string("\-", $longest_probability + 8) . "+\n"; } sub print_transition_probabilities { print "\nINITIAL TRANSITION PROBABILITIES\n"; print "+" . get_character_string("\-", $longest_probability + 11) . "+\n"; foreach my $state (sort { $a cmp $b } keys %{$probabilities{transition}}) { foreach my $next_state (sort { $a cmp $b } keys %{$probabilities{transition}{$state}}) { print sprintf("%-" . ($longest_probability + 11) . "s", "| Pr[" . $state . "->" . $next_state . "]=" . $probabilities{transition}{$state}{$next_state}) . " |\n"; } } print "+" . get_character_string("\-", $longest_probability + 11) . "+\n"; } sub print_emission_probabilities { print "\nINITIAL EMISSION PROBABILITIES\n"; print "+" . get_character_string("\-", $longest_probability + 13) . "+\n"; foreach my $state (sort { $a cmp $b } keys %{$probabilities{emission}}) { foreach my $next_state (sort { $a cmp $b } keys %{$probabilities{emission}{$state}}) { foreach my $character (sort { $a cmp $b } keys %{$probabilities{emission}{$state}{$next_state}}) { print sprintf("%-" . ($longest_probability + 13) . "s", "| Pr[" . $state . "-" . $character . "->" . $next_state . "]=" . $probabilities{emission}{$state}{$next_state}{$character}) . " |\n"; } } } print "+" . get_character_string("\-", $longest_probability + 13) . "+\n\n"; } sub get_character_string { my ($character, $length) = @_; my $character_string = ""; for (my $i = 0; $i < $length; $i++) { $character_string .= $character; } return $character_string; } sub print_help { print <