hunk ./Makefile 5 - ghc -threaded --make -package nuance-recognizer -package nlsml -package realspeak -package alsa -package gf -o $@ $< /usr/local/ScanSoft/RealSpeak_4.0/api/lib/lhstts.so + ghc -threaded --make -package utf8-string -package nuance-recognizer -package nlsml -package realspeak -package alsa -package gf -o $@ $< /usr/local/ScanSoft/RealSpeak_4.0/api/lib/lhstts.so hunk ./UTF8.hs 1 ----------------------------------------------------------------------- --- | --- Module : UTF8 --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/04/21 16:23:42 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.5 $ --- --- From the Char module supplied with HBC. --- code by Thomas Hallgren (Jul 10 1999) ------------------------------------------------------------------------------ - -module UTF8 (decodeUTF8, encodeUTF8) where - --- | Take a Unicode string and encode it as a string --- with the UTF8 method. -decodeUTF8 :: String -> String -decodeUTF8 "" = "" -decodeUTF8 (c:cs) | c < '\x80' = c : decodeUTF8 cs -decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && - '\x80' <= c' && c' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs -decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && - '\x80' <= c' && c' <= '\xbf' && - '\x80' <= c'' && c'' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs -decodeUTF8 s = s ---- AR workaround 22/6/2006 -----decodeUTF8 _ = error "UniChar.decodeUTF8: bad data" - -encodeUTF8 :: String -> String -encodeUTF8 "" = "" -encodeUTF8 (c:cs) = - if c > '\x0000' && c < '\x0080' then - c : encodeUTF8 cs - else if c < toEnum 0x0800 then - let i = fromEnum c - in toEnum (0xc0 + i `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - encodeUTF8 cs - else - let i = fromEnum c - in toEnum (0xe0 + i `div` 0x1000) : - toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - encodeUTF8 cs rmfile ./UTF8.hs hunk ./translator.hs 1 -import UTF8 - hunk ./translator.hs 10 +import Codec.Binary.UTF8.String as UTF8 + hunk ./translator.hs 83 - printResults rs = sequence [printf "%3d %s\n" c (encodeUTF8 r) | (c,r) <- rs] + printResults rs = sequence [printf "%3d %s\n" c (UTF8.encodeString r) | (c,r) <- rs]