#!/usr/bin/perl -w

# ./bootstrap_hmms.pl train_data

use strict;
use File::Basename;

if (@ARGV != 1) {
    print STDERR "Usage: bootstrap_hmms.pl <training data dir>\n";
    exit 2;
}

my ($train_data) = @ARGV;

if (! -d $train_data) {
    print STDERR "Training data directory '$train_data' not found\n";
    exit 2;
}

my $tmpdir = "tmp";

my $current_hmm = 0;

my ($hmm0,$hmm1) = new_hmm();
mkdir $hmm0;

my @labels = ();

open HMMLIST, "$tmpdir/monophones";
while (<HMMLIST>) {
    chomp;
    push @labels, $_;
}
close HMMLIST;

run("cp '$tmpdir/monophones' '$hmm0/hmmlist'");

foreach my $label (@labels) {
    print "Creating initial model for $label...\n";
    run("HInit32 -C $tmpdir/train_config -S $tmpdir/train.scp -M $hmm0 -l '$label' -o '$label' -L $train_data $tmpdir/proto");
}

run("cp '$hmm0/hmmlist' '$hmm1/hmmlist'");

foreach my $label (@labels) {
    print "Training model for $label...\n";
    run("HRest32 -C $tmpdir/train_config -S $tmpdir/train.scp -M $hmm1 -l '$label' -L $train_data '$hmm0/$label'");
}

my $cat_cmd = "cat";
foreach my $label (@labels) {
    $cat_cmd .= " '$hmm1/$label'";
}
run("$cat_cmd > $hmm1/hmmdefs");

sub current_hmm_dir {
    return "$tmpdir/hmm$current_hmm";
}

sub new_hmm {
    my $indir = current_hmm_dir();
    $current_hmm++;
    my $outdir = current_hmm_dir(); 
    mkdir $outdir;
    return ($indir, $outdir);
}

sub run {
    my $command = shift;
    print "$command\n";
    system($command) == 0 || die "Failed: $?";
}
