#!/usr/bin/perl -w

# Creates a list of all the phones used in the training data.
# ./list_data_phones.pl train_data

use strict;
use File::Basename;

if (@ARGV != 1) {
    print STDERR "Usage: list_data_phones.pl <training data dir>\n";
    exit 2;
}

my ($train_data) = @ARGV;

if (! -d $train_data) {
    print STDERR "Training data directory '$train_data' not found\n";
    exit 2;
}

my @labelfiles = glob "$train_data/*.lab";

my %monophones = ();
foreach my $labf (@labelfiles) {
    open LABF, $labf;
    while (<LABF>) {
	chomp;
	s/^\d+\s+\d+\s+(\S+).*$/$1/;
	$monophones{$_} = 1;
    }
    close LABF;
}
my @labels = sort (keys %monophones);

foreach (@labels) {
    print "$_\n";
}
