一个基于Windows Vista speech API5.3以及WPF技术的语音识别代码-CFANZ编程社区

本人小试牛刀，试验了一下用c#.net3.0 WPF技术开发了一个语音识别程序，

windows.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Windows;

using System.Windows.Controls;

using System.Windows.Data;

using System.Windows.Documents;

using System.Windows.Input;

using System.Windows.Media;

using System.Windows.Media.Imaging;

using System.Windows.Shapes;

using System.Reflection;

using System.Windows.Threading;

using System.IO;

using System.Xml;

using System.Collections.ObjectModel;

using System.ComponentModel;

using System.Speech.Recognition;

using System.Speech.Recognition.SrgsGrammar;

using System.Speech.Synthesis;

namespace speechReco

{

/// <summary>

/// Interaction logic for Window1.xaml

/// </summary>

public partial class Window1 : System.Windows.Window

{

private SpeechRecognizer sharedRecognizer;

private SpeechRecognitionEngine appRecognizer;

private SrgsDocument sdCmnrules;

public Window1()

{

InitializeComponent();

sharedRecognizer = new SpeechRecognizer();

sharedRecognizer.AudioLevelUpdated += new EventHandler<AudioLevelUpdatedEventArgs>(sharedRecognizer_AudioLevelUpdated);

sharedRecognizer.AudioSignalProblemOccurred += new EventHandler<AudioSignalProblemOccurredEventArgs>(sharedRecognizer_AudioSignalProblemOccurred);

sharedRecognizer.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sharedRecognizer_AudioStateChanged);

sharedRecognizer.EmulateRecognizeCompleted += new EventHandler<EmulateRecognizeCompletedEventArgs>(sharedRecognizer_EmulateRecognizeCompleted);

sharedRecognizer.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(sharedRecognizer_LoadGrammarCompleted);

sharedRecognizer.RecognizerUpdateReached += new EventHandler<RecognizerUpdateReachedEventArgs>(sharedRecognizer_RecognizerUpdateReached);

sharedRecognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(sharedRecognizer_SpeechDetected);

sharedRecognizer.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(sharedRecognizer_SpeechHypothesized);

sharedRecognizer.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(sharedRecognizer_SpeechRecognitionRejected);

sharedRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sharedRecognizer_SpeechRecognized);

sharedRecognizer.StateChanged += new EventHandler<System.Speech.Recognition.StateChangedEventArgs>(sharedRecognizer_StateChanged);

//load SRGS library

byte[] ba = speechReco.Properties.Resources.cmnrules;

MemoryStream ms = new MemoryStream(ba);

ms.Position = 0;

XmlReader xr = XmlReader.Create(ms);

sdCmnrules = new SrgsDocument(xr);

//populate ComboBox

foreach(SrgsRule rule in sdCmnrules.Rules)

{

if (rule.Scope == SrgsRuleScope.Public)

{

cbRules.Items.Add(rule.Id);

}

//default to integer rule

cbRules.SelectedValue = "integer";

cbRules.SelectionChanged += new SelectionChangedEventHandler(cbRules_SelectionChanged);

this.btnSharedColor.Click += new RoutedEventHandler(btnSharedColor_Click);

this.btnInProcColor.Click += new RoutedEventHandler(btnInProcColor_Click);

this.btnTapDictation.PreviewMouseLeftButtonDown += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonDown);

this.btnTapDictation.PreviewMouseLeftButtonUp += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonUp);

this.btnSrgs.Click += new RoutedEventHandler(btnSrgs_Click);

this.btnAdvGrammarBuilder.Click += new RoutedEventHandler(btnAdvGrammarBuilder_Click);

this.btnWavFile.Click += new RoutedEventHandler(btnWavFile_Click);

this.btnSynthPhonemes.Click += new RoutedEventHandler(btnSynthPhonemes_Click);

this.btnEnable.Click += new RoutedEventHandler(btnEnable_Click);

this.btnDisable.Click += new RoutedEventHandler(btnDisable_Click);

this.btnUnload.Click += new RoutedEventHandler(btnUnload_Click);

this.btnEmulate.Click += new RoutedEventHandler(btnEmulate_Click);

}

void btnEmulate_Click(object sender, RoutedEventArgs e)

{

//sharedRecognizer.EmulateRecognize("green");

sharedRecognizer.EmulateRecognizeAsync("green");

//sharedRecognizer.EmulateRecognize("stop listening");

}

void btnUnload_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.UnloadAllGrammars();

}

void btnDisable_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = false;

}

void btnEnable_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = true;

}

string recoPhonemes;

void btnSynthPhonemes_Click(object sender, RoutedEventArgs e)

{

//this is a trick to figure out phonemes used by synthesis engine

//txt to wav

MemoryStream audioStream = new MemoryStream();

SpeechSynthesizer synth = new SpeechSynthesizer();

synth.SetOutputToWaveStream(audioStream);

PromptBuilder pb = new PromptBuilder();

pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?

synth.Speak(pb);

string textToSpeak = this.txtSynthTxt.Text.Trim();

synth.Speak(textToSpeak);

//synth.Speak(pb);

synth.SetOutputToNull();

audioStream.Position = 0;

//now wav to txt (for reco phonemes)

recoPhonemes = String.Empty;

GrammarBuilder gb = new GrammarBuilder(textToSpeak);

Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'

SpeechRecognitionEngine reco = new SpeechRecognitionEngine();

reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);

reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);

reco.UnloadAllGrammars(); //only use the one word grammar

reco.LoadGrammar(g);

reco.SetInputToWaveStream(audioStream);

RecognitionResult rr = reco.Recognize();

reco.SetInputToNull();

if (rr != null)

{

recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);

}

txtRecoPho.Text = recoPhonemes;

}

void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)

{

recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);

}

void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)

{

recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);

}

void btnWavFile_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = false;

appRecognizer = new SpeechRecognitionEngine();

appRecognizer.SetInputToWaveFile("spoken.wav");

appRecognizer.LoadGrammar(new DictationGrammar());

RecognitionResult rr = appRecognizer.Recognize();

appRecognizer.SetInputToNull();

if (rr == null)

{

MessageBox.Show("null result?");

}

else

{

//NOTE in-process recognir cannot send feedback to microphone bar

//SpeechUI.SendTextFeedback(rr, rr.Text, true);

//show phoneme result

string phonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);

txtRecoPho.Text = phonemes;

//show text result

MessageBox.Show(rr.Text);

}

appRecognizer.Dispose();

}

public enum WordType

{

Text,

Normalized = Text,

Lexical,

Pronunciation

}

public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)

{

string text = "";

foreach (RecognizedWordUnit word in words)

{

string wordText = "";

if (type == WordType.Text || type == WordType.Normalized)

{

wordText = word.Text;

}

else if (type == WordType.Lexical)

{

wordText = word.LexicalForm;

}

else if (type == WordType.Pronunciation)

{

wordText = word.Pronunciation;

}

else

{

throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));

}

//Use display attribute

if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)

{

wordText += " ";

}

if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)

{

wordText += " ";

}

if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)

{

wordText = wordText.TrimStart();

}

if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)

{

wordText = wordText.TrimEnd();

}

text += wordText;

}

return text;

}

void btnAdvGrammarBuilder_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = true;

sharedRecognizer.UnloadAllGrammars();

//from http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5

//[I'd like] a [<size>] [<crust>] [<topping>] pizza [please]

//build the core set of choices

Choices sizes = new Choices("small", "regular", "large");

Choices crusts = new Choices("thin crust", "thick crust");

Choices toppings = new Choices("vegetarian", "pepperoni", "cheese");

SemanticResultKey srkSize = new SemanticResultKey("size", sizes.ToGrammarBuilder());

SemanticResultKey srkCrust = new SemanticResultKey("crust", crusts.ToGrammarBuilder());

SemanticResultKey srkTopping = new SemanticResultKey("topping", toppings.ToGrammarBuilder());

SemanticResultValue srvSize = new SemanticResultValue(srkSize, "regular");

SemanticResultValue srvCrust = new SemanticResultValue(srkCrust, "thick crust");

//build the permutations of choices...

//choose all three

GrammarBuilder sizeCrustTopping = new GrammarBuilder();

//sizeCrustTopping.AppendChoices(sizes, "size");

//sizeCrustTopping.AppendChoices(crusts, "crust");

//sizeCrustTopping.AppendChoices(toppings, "topping");

sizeCrustTopping.Append(srkSize);

sizeCrustTopping.Append(srkCrust);

sizeCrustTopping.Append(srkTopping);

//choose size and topping, and assume thick crust

GrammarBuilder sizeAndTopping = new GrammarBuilder();

//sizeAndTopping.AppendChoices(sizes, "size");

//sizeAndTopping.AppendChoices(toppings, "topping");

//sizeAndTopping.AppendResultKeyValue("crust", "thick crust");

sizeAndTopping.Append(srkSize);

sizeAndTopping.Append(srkTopping);

//TODO how to set default semantic value for "crust"?

//sizeAndTopping.Append(srvCrust);

//sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));

//sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));

//sizeAndTopping.Append(new SemanticResultValue("thick crust"));

//sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));

//choose topping only, and assume the rest

GrammarBuilder toppingOnly = new GrammarBuilder();

//toppingOnly.AppendChoices(toppings, "topping");

//toppingOnly.AppendResultKeyValue("size", "regular");

//toppingOnly.AppendResultKeyValue("crust", "thick crust");

toppingOnly.Append(srkTopping);

//TODO how to set default semantic value for "size" and "crust"?

//toppingOnly.Append(srvSize);

//toppingOnly.Append(srvCrust);

//toppingOnly.Append(new SemanticResultKey("size", "regular"));

//toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));

//assemble the permutations

Choices permutations = new Choices();

permutations.Add(sizeCrustTopping);

permutations.Add(sizeAndTopping);

permutations.Add(toppingOnly);

//now build the complete pattern...

GrammarBuilder pizzaRequest = new GrammarBuilder();

//pre-amble "[I'd like] a"

pizzaRequest.Append(new Choices("I'd like a", "a"));

//permutations "[<size>] [<crust>] [<topping>]"

pizzaRequest.Append(permutations);

//post-amble "pizza [please]"

pizzaRequest.Append(new Choices("pizza", "pizza please"));

string debug = pizzaRequest.DebugShowPhrases;

//create the pizza grammar

Grammar pizzaGrammar = new Grammar(pizzaRequest);

//attach the event handler

pizzaGrammar.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(pizzaGrammar_SpeechRecognized);

//load the grammar into the recognizer

sharedRecognizer.LoadGrammar(pizzaGrammar);

}

void pizzaGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

{

StringBuilder resultString = new StringBuilder();

resultString.Append("Raw text result: ");

resultString.AppendLine(e.Result.Text);

resultString.Append("Size: ");

resultString.AppendLine(e.Result.Semantics["size"].Value.ToString());

resultString.Append("Crust: ");

resultString.AppendLine(e.Result.Semantics["crust"].Value.ToString());

resultString.Append("Topping: ");

resultString.AppendLine(

e.Result.Semantics["topping"].Value.ToString());

MessageBox.Show(resultString.ToString());

}

void cbRules_SelectionChanged(object sender, SelectionChangedEventArgs e)

{

//TODO

}

void btnSrgs_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = true;

sharedRecognizer.UnloadAllGrammars();

string ruleName = (string) cbRules.SelectedValue;

//SrgsRule rule = sdCmnrules.Rules[ruleName];

Grammar grammarSrgs = new Grammar(sdCmnrules, ruleName);

grammarSrgs.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarSrgs_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarSrgs);

MessageBox.Show("listening for user input based on the selected rule : " + ruleName);

}

void grammarSrgs_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

{

//send text to microphone bar

SpeechUI.SendTextFeedback(e.Result, e.Result.Text, true);

//send actual numeric value to TextBox on form

if (e.Result.Semantics.Value != null)

{

this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Semantics.Value.ToString());

}

void btnTapDictation_PreviewMouseLeftButtonDown(object sender, MouseButtonEventArgs e)

{

sharedRecognizer.Enabled = false;

dictationResult = String.Empty;

appRecognizer = new SpeechRecognitionEngine();

appRecognizer.SetInputToDefaultAudioDevice();

appRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(appRecognizer_SpeechRecognized);

DictationGrammar dg;

if (cbSpelling.IsChecked == false)

{

dg = new DictationGrammar();

}

else

{

dg = new DictationGrammar("grammar:dictation#spelling");

}

appRecognizer.LoadGrammar(dg);

appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

}

string dictationResult;

void appRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

{

//on UI thread

dictationResult += e.Result.Text;

txtReco.Text = dictationResult;

}

void btnTapDictation_PreviewMouseLeftButtonUp(object sender, MouseButtonEventArgs e)

{

appRecognizer.RecognizeAsyncStop();

appRecognizer.Dispose();

}

void btnInProcColor_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = false;

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);

Grammar grammarColors = new Grammar(gb);

grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

appRecognizer = new SpeechRecognitionEngine();

appRecognizer.SetInputToDefaultAudioDevice();

appRecognizer.LoadGrammar(grammarColors);

appRecognizer.LoadGrammar(new DictationGrammar());

appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

MessageBox.Show("listening for you to say a color (e.g. Green)");

}

private Choices GetColorChoices()

{

//build a grammar list of colors

Choices cColor = new Choices();

Type t = typeof(Colors);

MemberInfo[] mia = t.GetMembers(BindingFlags.Public | BindingFlags.Static);

foreach (MemberInfo mi in mia)

{

if (mi.Name.StartsWith("get_") == true)

continue;

cColor.Add(mi.Name);

}

return cColor;

}

void btnSharedColor_Click(object sender, RoutedEventArgs e)

{

sharedRecognizer.Enabled = true;

sharedRecognizer.UnloadAllGrammars();

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);

Grammar grammarColors = new Grammar(gb);

grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarColors);

MessageBox.Show("listening for you to say a color (e.g. Green)");

}

void grammarColors_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

{

//not on UI thread

//txtReco.Text = e.Result.Text;

//need to use Dispatcher to get back on UI thread

//TODO cannot convert from 'anonymous method' to 'System.Delegate' ... WTF?

//this.Dispatcher.Invoke(DispatcherPriority.Render,

// delegate()

// {

// txtReco.Text = e.Result.Text;

// });

//http://romanski.livejournal.com/1761.html

this.Dispatcher.Invoke(DispatcherPriority.Render,

(System.Windows.Forms.MethodInvoker) delegate

{

txtReco.Text = e.Result.Text;

});

//this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);

}

delegate void UpdateTxtRecoDelegate(string arg);

public void UpdateTextReco(string arg)

{

txtReco.Text = arg;

}

#region SHARED_RECOGNIZER_EVENTS

void sharedRecognizer_StateChanged(object sender, System.Speech.Recognition.StateChangedEventArgs e)

{

System.Console.WriteLine("StateChanged : " + e.RecognizerState.ToString());

}

void sharedRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)

{

//on UI thread

System.Console.WriteLine("SpeechRecognized : " + e.Result.Text);

//txtReco.Text = e.Result.Text;

}

void sharedRecognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)

{

System.Console.WriteLine("SpeechRecognitionRejected : " + e.Result.Text);

}

void sharedRecognizer_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)

{

System.Console.WriteLine("SpeechHypothesized : " + e.Result.Text);

}

void sharedRecognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)

{

System.Console.WriteLine("SpeechDetected : " + e.AudioPosition.TotalMilliseconds.ToString());

}

void sharedRecognizer_RecognizerUpdateReached(object sender, RecognizerUpdateReachedEventArgs e)

{

System.Console.WriteLine("RecognizerUpdateReached : " + e.AudioPosition.TotalMilliseconds.ToString());

}

void sharedRecognizer_LoadGrammarCompleted(object sender, LoadGrammarCompletedEventArgs e)

{

System.Console.WriteLine("LoadGrammarCompleted : " + e.Grammar.Name);

}

void sharedRecognizer_EmulateRecognizeCompleted(object sender, EmulateRecognizeCompletedEventArgs e)

{

if (e.Result != null)

{

System.Console.WriteLine("EmulateRecognizeCompleted : " + e.Result.Text);

}

else

{

System.Console.WriteLine("EmulateRecognizeCompleted : null result");

}

void sharedRecognizer_AudioStateChanged(object sender, AudioStateChangedEventArgs e)

{

System.Console.WriteLine("AudioStateChanged : " + e.AudioState.ToString());

}

void sharedRecognizer_AudioSignalProblemOccurred(object sender, AudioSignalProblemOccurredEventArgs e)

{

System.Console.WriteLine("AudioSignalProblemOccurred : " + e.AudioSignalProblem.ToString());

}

void sharedRecognizer_AudioLevelUpdated(object sender, AudioLevelUpdatedEventArgs e)

{

//System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());

}

#endregion

}

需要的留下Email,我给大家发