0
点赞
收藏
分享

微信扫一扫

一个基于Windows Vista speech API5.3以及WPF技术的语音识别代码



本人小试牛刀,试验了一下用c#.net3.0 WPF技术开发了一个语音识别程序,



windows.cs


using System;

using System.Collections.Generic;

using System.Text;

using System.Windows;

using System.Windows.Controls;

using System.Windows.Data;

using System.Windows.Documents;

using System.Windows.Input;

using System.Windows.Media;

using System.Windows.Media.Imaging;

using System.Windows.Shapes;

using System.Reflection;

using System.Windows.Threading;

using System.IO;

using System.Xml;

using System.Collections.ObjectModel;

using System.ComponentModel;

using System.Speech.Recognition;

using System.Speech.Recognition.SrgsGrammar;

using System.Speech.Synthesis;

namespace speechReco

{

    /// <summary>

    /// Interaction logic for Window1.xaml

    /// </summary>

    public partial class Window1 : System.Windows.Window

    {

        private SpeechRecognizer sharedRecognizer;

        private SpeechRecognitionEngine appRecognizer;

        private SrgsDocument sdCmnrules;

        public Window1()

        {

            InitializeComponent();

            sharedRecognizer = new SpeechRecognizer();

            sharedRecognizer.AudioLevelUpdated += new EventHandler<AudioLevelUpdatedEventArgs>(sharedRecognizer_AudioLevelUpdated);

            sharedRecognizer.AudioSignalProblemOccurred += new EventHandler<AudioSignalProblemOccurredEventArgs>(sharedRecognizer_AudioSignalProblemOccurred);

            sharedRecognizer.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sharedRecognizer_AudioStateChanged);

            sharedRecognizer.EmulateRecognizeCompleted += new EventHandler<EmulateRecognizeCompletedEventArgs>(sharedRecognizer_EmulateRecognizeCompleted);

            sharedRecognizer.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(sharedRecognizer_LoadGrammarCompleted);

            sharedRecognizer.RecognizerUpdateReached += new EventHandler<RecognizerUpdateReachedEventArgs>(sharedRecognizer_RecognizerUpdateReached);

            sharedRecognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(sharedRecognizer_SpeechDetected);

            sharedRecognizer.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(sharedRecognizer_SpeechHypothesized);

            sharedRecognizer.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(sharedRecognizer_SpeechRecognitionRejected);

            sharedRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sharedRecognizer_SpeechRecognized);

            sharedRecognizer.StateChanged += new EventHandler<System.Speech.Recognition.StateChangedEventArgs>(sharedRecognizer_StateChanged);

            //load SRGS library

            byte[] ba = speechReco.Properties.Resources.cmnrules;

            MemoryStream ms = new MemoryStream(ba);

            ms.Position = 0;

            XmlReader xr = XmlReader.Create(ms);

            sdCmnrules = new SrgsDocument(xr);

            //populate ComboBox

            foreach(SrgsRule rule in sdCmnrules.Rules)

            {

                if (rule.Scope == SrgsRuleScope.Public)

                {

                    cbRules.Items.Add(rule.Id);

                }

            }

            //default to integer rule

            cbRules.SelectedValue = "integer";

            cbRules.SelectionChanged += new SelectionChangedEventHandler(cbRules_SelectionChanged);

            this.btnSharedColor.Click += new RoutedEventHandler(btnSharedColor_Click);

            this.btnInProcColor.Click += new RoutedEventHandler(btnInProcColor_Click);

            this.btnTapDictation.PreviewMouseLeftButtonDown += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonDown);

            this.btnTapDictation.PreviewMouseLeftButtonUp += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonUp);

            this.btnSrgs.Click += new RoutedEventHandler(btnSrgs_Click);

            this.btnAdvGrammarBuilder.Click += new RoutedEventHandler(btnAdvGrammarBuilder_Click);

            this.btnWavFile.Click += new RoutedEventHandler(btnWavFile_Click);

            this.btnSynthPhonemes.Click += new RoutedEventHandler(btnSynthPhonemes_Click);

            this.btnEnable.Click += new RoutedEventHandler(btnEnable_Click);

            this.btnDisable.Click += new RoutedEventHandler(btnDisable_Click);

            this.btnUnload.Click += new RoutedEventHandler(btnUnload_Click);

            this.btnEmulate.Click += new RoutedEventHandler(btnEmulate_Click);

        }

        void btnEmulate_Click(object sender, RoutedEventArgs e)

        {

            //sharedRecognizer.EmulateRecognize("green");

            sharedRecognizer.EmulateRecognizeAsync("green");

            //sharedRecognizer.EmulateRecognize("stop listening");

        }

        void btnUnload_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.UnloadAllGrammars();

        }

        void btnDisable_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = false;

        }

        void btnEnable_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = true;

        }

        string recoPhonemes;

        void btnSynthPhonemes_Click(object sender, RoutedEventArgs e)

        {

            //this is a trick to figure out phonemes used by synthesis engine


            //txt to wav

            MemoryStream audioStream = new MemoryStream();

            SpeechSynthesizer synth = new SpeechSynthesizer();

            synth.SetOutputToWaveStream(audioStream);

            PromptBuilder pb = new PromptBuilder();

            pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?

            synth.Speak(pb);

            string textToSpeak = this.txtSynthTxt.Text.Trim();

            synth.Speak(textToSpeak);

            //synth.Speak(pb);

            synth.SetOutputToNull();

            audioStream.Position = 0;

            //now wav to txt (for reco phonemes)

            recoPhonemes = String.Empty;

            GrammarBuilder gb = new GrammarBuilder(textToSpeak);

            Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'

            SpeechRecognitionEngine reco = new SpeechRecognitionEngine();

            reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);

            reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);

            reco.UnloadAllGrammars(); //only use the one word grammar

            reco.LoadGrammar(g);

            reco.SetInputToWaveStream(audioStream);

            RecognitionResult rr = reco.Recognize();

            reco.SetInputToNull();

            if (rr != null)

            {

                recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);

            }

            txtRecoPho.Text = recoPhonemes;

        }

        void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)

        {

            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);

        }

        void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)

        {

            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);

        }

        void btnWavFile_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = false;

            appRecognizer = new SpeechRecognitionEngine();

            appRecognizer.SetInputToWaveFile("spoken.wav");

            appRecognizer.LoadGrammar(new DictationGrammar());

            RecognitionResult rr = appRecognizer.Recognize();

            appRecognizer.SetInputToNull();

            if (rr == null)

            {

                MessageBox.Show("null result?");

            }

            else

            {

                //NOTE in-process recognir cannot send feedback to microphone bar

                //SpeechUI.SendTextFeedback(rr, rr.Text, true);

                //show phoneme result

                string phonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);

                txtRecoPho.Text = phonemes;

                //show text result

                MessageBox.Show(rr.Text);               

            }

            appRecognizer.Dispose();

        }

        public enum WordType

        {

            Text,

            Normalized = Text,

            Lexical,

            Pronunciation

        }

        public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)

        {

            string text = "";

            foreach (RecognizedWordUnit word in words)

            {

                string wordText = "";

                if (type == WordType.Text || type == WordType.Normalized)

                {

                    wordText = word.Text;

                }

                else if (type == WordType.Lexical)

                {

                    wordText = word.LexicalForm;

                }

                else if (type == WordType.Pronunciation)

                {

                    wordText = word.Pronunciation;

                }

                else

                {

                    throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));

                }

                //Use display attribute

                if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)

                {

                    wordText += " ";

                }

                if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)

                {

                    wordText += "  ";

                }

                if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)

                {

                    wordText = wordText.TrimStart();

                }

                if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)

                {

                    wordText = wordText.TrimEnd();

                }

                text += wordText;

            }

            return text;

        }

        void btnAdvGrammarBuilder_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = true;

            sharedRecognizer.UnloadAllGrammars();

            //from ​​​http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5​​

            //[I'd like] a [<size>] [<crust>] [<topping>] pizza [please]

            //build the core set of choices

            Choices sizes = new Choices("small", "regular", "large");

            Choices crusts = new Choices("thin crust", "thick crust");

            Choices toppings = new Choices("vegetarian", "pepperoni", "cheese");

            SemanticResultKey srkSize = new SemanticResultKey("size", sizes.ToGrammarBuilder());

            SemanticResultKey srkCrust = new SemanticResultKey("crust", crusts.ToGrammarBuilder());

            SemanticResultKey srkTopping = new SemanticResultKey("topping", toppings.ToGrammarBuilder());

            SemanticResultValue srvSize = new SemanticResultValue(srkSize, "regular");

            SemanticResultValue srvCrust = new SemanticResultValue(srkCrust, "thick crust");

            //build the permutations of choices...

            //choose all three

            GrammarBuilder sizeCrustTopping = new GrammarBuilder();

            //sizeCrustTopping.AppendChoices(sizes, "size");

            //sizeCrustTopping.AppendChoices(crusts, "crust");

            //sizeCrustTopping.AppendChoices(toppings, "topping");

            sizeCrustTopping.Append(srkSize);

            sizeCrustTopping.Append(srkCrust);

            sizeCrustTopping.Append(srkTopping);

            //choose size and topping, and assume thick crust

            GrammarBuilder sizeAndTopping = new GrammarBuilder();

            //sizeAndTopping.AppendChoices(sizes, "size");

            //sizeAndTopping.AppendChoices(toppings, "topping");

            //sizeAndTopping.AppendResultKeyValue("crust", "thick crust");

            sizeAndTopping.Append(srkSize);

            sizeAndTopping.Append(srkTopping);

            //TODO how to set default semantic value for "crust"?

            //sizeAndTopping.Append(srvCrust);

            //sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));

            //sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));

            //sizeAndTopping.Append(new SemanticResultValue("thick crust"));

            //sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));

            //choose topping only, and assume the rest

            GrammarBuilder toppingOnly = new GrammarBuilder();

            //toppingOnly.AppendChoices(toppings, "topping");

            //toppingOnly.AppendResultKeyValue("size", "regular");

            //toppingOnly.AppendResultKeyValue("crust", "thick crust");

            toppingOnly.Append(srkTopping);

            //TODO how to set default semantic value for "size" and "crust"?

            //toppingOnly.Append(srvSize);

            //toppingOnly.Append(srvCrust);

            //toppingOnly.Append(new SemanticResultKey("size", "regular"));

            //toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));

            //assemble the permutations

            Choices permutations = new Choices();

            permutations.Add(sizeCrustTopping);

            permutations.Add(sizeAndTopping);

            permutations.Add(toppingOnly);

            //now build the complete pattern...

            GrammarBuilder pizzaRequest = new GrammarBuilder();

            //pre-amble "[I'd like] a"

            pizzaRequest.Append(new Choices("I'd like a", "a"));

            //permutations "[<size>] [<crust>] [<topping>]"

            pizzaRequest.Append(permutations);

            //post-amble "pizza [please]"

            pizzaRequest.Append(new Choices("pizza", "pizza please"));

            string debug = pizzaRequest.DebugShowPhrases;

            //create the pizza grammar

            Grammar pizzaGrammar = new Grammar(pizzaRequest);

            //attach the event handler

            pizzaGrammar.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(pizzaGrammar_SpeechRecognized);

            //load the grammar into the recognizer

            sharedRecognizer.LoadGrammar(pizzaGrammar);

        }

        void pizzaGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

        {

            StringBuilder resultString = new StringBuilder();

            resultString.Append("Raw text result: ");

            resultString.AppendLine(e.Result.Text);

            resultString.Append("Size: ");

            resultString.AppendLine(e.Result.Semantics["size"].Value.ToString());

            resultString.Append("Crust: ");

            resultString.AppendLine(e.Result.Semantics["crust"].Value.ToString());

            resultString.Append("Topping: ");

            resultString.AppendLine(

                e.Result.Semantics["topping"].Value.ToString());

            MessageBox.Show(resultString.ToString());

        }

        void cbRules_SelectionChanged(object sender, SelectionChangedEventArgs e)

        {

            //TODO

        }

        void btnSrgs_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = true;

            sharedRecognizer.UnloadAllGrammars();

            string ruleName = (string) cbRules.SelectedValue;

            //SrgsRule rule = sdCmnrules.Rules[ruleName];

            Grammar grammarSrgs = new Grammar(sdCmnrules, ruleName);

            grammarSrgs.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarSrgs_SpeechRecognized);

            sharedRecognizer.LoadGrammar(grammarSrgs);

            MessageBox.Show("listening for user input based on the selected rule : " + ruleName);

        }

        void grammarSrgs_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

        {

            //send text to microphone bar

            SpeechUI.SendTextFeedback(e.Result, e.Result.Text, true);

            //send actual numeric value to TextBox on form

            if (e.Result.Semantics.Value != null)

            {

                this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Semantics.Value.ToString());

            }

        }

        void btnTapDictation_PreviewMouseLeftButtonDown(object sender, MouseButtonEventArgs e)

        {

            sharedRecognizer.Enabled = false;

            dictationResult = String.Empty;

            appRecognizer = new SpeechRecognitionEngine();

            appRecognizer.SetInputToDefaultAudioDevice();

            appRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(appRecognizer_SpeechRecognized);

            DictationGrammar dg;

            if (cbSpelling.IsChecked == false)

            {

                dg = new DictationGrammar();

            }

            else

            {

                dg = new DictationGrammar("grammar:dictation#spelling");

            }

            appRecognizer.LoadGrammar(dg);

            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

        }

        string dictationResult;

        void appRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

        {

            //on UI thread

            dictationResult += e.Result.Text;

            txtReco.Text = dictationResult;

        }

        void btnTapDictation_PreviewMouseLeftButtonUp(object sender, MouseButtonEventArgs e)

        {

            appRecognizer.RecognizeAsyncStop();

            appRecognizer.Dispose();

        }

        void btnInProcColor_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = false;

            Choices cColor = GetColorChoices();

            GrammarBuilder gb = new GrammarBuilder(cColor);

            Grammar grammarColors = new Grammar(gb);

            grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

            appRecognizer = new SpeechRecognitionEngine();

            appRecognizer.SetInputToDefaultAudioDevice();

            appRecognizer.LoadGrammar(grammarColors);

            appRecognizer.LoadGrammar(new DictationGrammar());

            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

            MessageBox.Show("listening for you to say a color (e.g. Green)");

        }

        private Choices GetColorChoices()

        {

            //build a grammar list of colors

            Choices cColor = new Choices();

            Type t = typeof(Colors);

            MemberInfo[] mia = t.GetMembers(BindingFlags.Public | BindingFlags.Static);

            foreach (MemberInfo mi in mia)

            {

                if (mi.Name.StartsWith("get_") == true)

                    continue;

                cColor.Add(mi.Name);

            }

            return cColor;

        }

        void btnSharedColor_Click(object sender, RoutedEventArgs e)

        {

            sharedRecognizer.Enabled = true;

            sharedRecognizer.UnloadAllGrammars();

            Choices cColor = GetColorChoices();

            GrammarBuilder gb = new GrammarBuilder(cColor);

            Grammar grammarColors = new Grammar(gb);

            grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

            sharedRecognizer.LoadGrammar(grammarColors);

            MessageBox.Show("listening for you to say a color (e.g. Green)");

        }

        void grammarColors_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

        {

            //not on UI thread

            //txtReco.Text = e.Result.Text;

            //need to use Dispatcher to get back on UI thread

            //TODO cannot convert from 'anonymous method' to 'System.Delegate' ... WTF?

            //this.Dispatcher.Invoke(DispatcherPriority.Render,

            //    delegate()

            //    {

            //        txtReco.Text = e.Result.Text;

            //    });

            //http://romanski.livejournal.com/1761.html

            this.Dispatcher.Invoke(DispatcherPriority.Render,

            (System.Windows.Forms.MethodInvoker) delegate

            {

                txtReco.Text = e.Result.Text;

            });


            //this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);

        }


        delegate void UpdateTxtRecoDelegate(string arg);

        public void UpdateTextReco(string arg)

        {

            txtReco.Text = arg;

        }

        #region SHARED_RECOGNIZER_EVENTS

        void sharedRecognizer_StateChanged(object sender, System.Speech.Recognition.StateChangedEventArgs e)

        {

            System.Console.WriteLine("StateChanged : " + e.RecognizerState.ToString());

        }

        void sharedRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

        {

            //on UI thread

            System.Console.WriteLine("SpeechRecognized : " + e.Result.Text);

            //txtReco.Text = e.Result.Text;

        }

        void sharedRecognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)

        {

            System.Console.WriteLine("SpeechRecognitionRejected : " + e.Result.Text);

        }

        void sharedRecognizer_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)

        {

            System.Console.WriteLine("SpeechHypothesized : " + e.Result.Text);

        }

        void sharedRecognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)

        {

            System.Console.WriteLine("SpeechDetected : " + e.AudioPosition.TotalMilliseconds.ToString());

        }

        void sharedRecognizer_RecognizerUpdateReached(object sender, RecognizerUpdateReachedEventArgs e)

        {

            System.Console.WriteLine("RecognizerUpdateReached : " + e.AudioPosition.TotalMilliseconds.ToString());

        }

        void sharedRecognizer_LoadGrammarCompleted(object sender, LoadGrammarCompletedEventArgs e)

        {

            System.Console.WriteLine("LoadGrammarCompleted : " + e.Grammar.Name);

        }

        void sharedRecognizer_EmulateRecognizeCompleted(object sender, EmulateRecognizeCompletedEventArgs e)

        {

            if (e.Result != null)

            {

                System.Console.WriteLine("EmulateRecognizeCompleted : " + e.Result.Text);

            }

            else

            {

                System.Console.WriteLine("EmulateRecognizeCompleted : null result");

            }

        }

        void sharedRecognizer_AudioStateChanged(object sender, AudioStateChangedEventArgs e)

        {

            System.Console.WriteLine("AudioStateChanged : " + e.AudioState.ToString());

        }

        void sharedRecognizer_AudioSignalProblemOccurred(object sender, AudioSignalProblemOccurredEventArgs e)

        {

            System.Console.WriteLine("AudioSignalProblemOccurred : " + e.AudioSignalProblem.ToString());

        }

        void sharedRecognizer_AudioLevelUpdated(object sender, AudioLevelUpdatedEventArgs e)

        {

            //System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());

        }

        #endregion

    }

}

需要的留下Email,我给大家发


举报

相关推荐

0 条评论