#!/usr/bin/perl -w

# ./train_model.pl train_data

use strict;
use File::Basename;

my $fix_sp_model = check_prog("fix_sp_model.pl");

if (@ARGV != 1) {
    print STDERR "Usage: train_model.pl <training data dir>\n";
    exit 2;
}

my ($train_data) = @ARGV;

if (! -d $train_data) {
    print STDERR "Training data directory '$train_data' not found\n";
    exit 2;
}

my $tmpdir = "tmp";

my $outdir = "final";
run("rm -rf '$outdir'");
mkdir $outdir;

my $current_hmm = 1;

open TRAIN, ">$tmpdir/train.scp";
foreach (glob "$train_data/*.mfc") {
    print TRAIN "$_\n";
}
close TRAIN;

# Re-estimate the models three times using the training data.

reestimate("$tmpdir/train.scp");
reestimate("$tmpdir/train.scp");
reestimate("$tmpdir/train.scp");

# Fix the silence model by introducing a new short pause
# model tied to the existing silence model, and making the existing
# silence model more robust by adding backwards transitions to it.

print "Fixing the silence model...\n";

my ($indir1, $outdir1) = new_hmm();

run("$fix_sp_model '$outdir1' '$indir1/hmmdefs' '$indir1/hmmlist'");

# Re-estimate two more times.

reestimate("$tmpdir/train.scp");
reestimate("$tmpdir/train.scp");


# FIXME: do some more stuff here?


# We now have a set of monophone models which can be used for 
# recognition. 

my $monophone_hmm = current_hmm_dir();
run("cp -a '$monophone_hmm' $outdir/hmm_mono");


# FIXME: build triphone models?

print "\n";
print "Done.\n";
print "\n";
print "Monophone model:\n";
print "HMMLIST = $outdir/hmm_mono/hmmlist\n";
print "MMF0    = $outdir/hmm_mono/hmmdefs\n";
print "MMF1    = \n";

sub check_prog {
    my $prog_dir = (fileparse($0, qr/\.[^.]*/))[1];
    my $prog = ($prog_dir ne "") ? 
	         (($prog_dir =~ /\/$/) ? $prog_dir.$_[0] : $prog_dir."/".$_[0])
		 : $_[0];

    if (! -x $prog) {
	print STDERR "$prog is not an executable file.\n";
	print STDERR "Maybe you need to run 'make'?\n";
	exit 1;
    }

    return $prog;
}

sub current_hmm_dir {
    return "$tmpdir/hmm$current_hmm";
}

sub new_hmm {
    my $indir = current_hmm_dir();
    $current_hmm++;
    my $outdir = current_hmm_dir(); 
    mkdir $outdir;
    return ($indir, $outdir);
}

# reestimate(file list file, model list file)
sub reestimate {
    print "Re-estimating...\n";
    my ($file_list) = @_;
    my ($indir, $outdir) = new_hmm();
    if ((stat($file_list))[7] == 0) {
	print STDERR "No files to train on, just copying.\n";
	run("cp '$indir/hmmdefs' '$outdir/hmmdefs'");
    } else {
	run("HERest32 -C $tmpdir/train_config -t 250.0 150.0 1000.0"
	    . " -L '$train_data' -S '$file_list' -H '$indir/hmmdefs'"
	    . " -M '$outdir' '$indir/hmmlist'");
    }
    run("cp '$indir/hmmlist' '$outdir/hmmlist'");
}

sub run {
    my $command = shift;
    print "$command\n";
    system($command) == 0 || die "Failed: $?";
}
