1 module speech.windows.sapi;
2 
3 import speech.windows.ObjIdl;
4 
5 import core.sys.windows.windows;
6 import std.c.windows.com;
7 import std.bitmanip;
8 
9 alias IID* REFIID;
10 alias CLSID* REFCLSID;
11 alias GUID* REFGUID;
12 
13 extern(C) extern CLSID CLSID_SpVoice;
14 extern(C) extern IID IID_ISpVoice;
15 
16 extern(C) extern CLSID CLSID_SpObjectTokenCategory;
17 extern(C) extern IID IID_ISpObjectTokenCategory;
18 
19 struct WAVEFORMATEX
20 {
21     WORD    wFormatTag;        /* format type */
22     WORD    nChannels;         /* number of channels (i.e. mono, stereo...) */
23     DWORD   nSamplesPerSec;    /* sample rate */
24     DWORD   nAvgBytesPerSec;   /* for buffer estimation */
25     WORD    nBlockAlign;       /* block size of data */
26     WORD    wBitsPerSample;    /* Number of bits per sample of mono data */
27     WORD    cbSize;            /* The count in bytes of the size of
28 	extra information (after cbSize) */
29 }
30 
31 alias WORD LANGID;
32 
33 alias WCHAR SPPHONEID;
34 alias LPWSTR PSPPHONEID;      // Use this with NULL-terminated SPPHONEID strings.  This gives the proper SAL annotation.
35 alias LPCWSTR PCSPPHONEID;    // Use this with const NULL-terminated SPPHONEID strings.  This gives the proper SAL annotation.
36 
37 //--- DataKey locations
38 enum
39 {
40     SPDKL_DefaultLocation = 0,
41 	SPDKL_CurrentUser = 1,
42 	SPDKL_LocalMachine = 2,
43 	SPDKL_CurrentConfig = 5
44 }
45 alias typeof(SPDKL_DefaultLocation) SPDATAKEYLOCATION;
46 
47 //--- TokenUI constants
48 enum SPDUI_EngineProperties = `EngineProperties`;
49 enum SPDUI_AddRemoveWord = `AddRemoveWord`;
50 enum SPDUI_UserTraining = `UserTraining`;
51 enum SPDUI_MicTraining = `MicTraining`;
52 enum SPDUI_RecoProfileProperties = `RecoProfileProperties`;
53 enum SPDUI_AudioProperties = `AudioProperties`;
54 enum SPDUI_AudioVolume = `AudioVolume`;
55 enum SPDUI_UserEnrollment = `UserEnrollment`;
56 enum SPDUI_ShareData = `ShareData`;
57 
58 // new for Vista.  Nothing prevents use downlevel if an engine exposes them
59 enum SPDUI_Tutorial = `Tutorial`;
60 
61 //--- Data formats - these are used by the CSpStreamFormat class in sphelper.h
62 enum
63 {
64     SPSF_Default = -1,
65 	SPSF_NoAssignedFormat = 0,  // Similar to GUID_NULL
66 	SPSF_Text,
67 	SPSF_NonStandardFormat,     // Non-SAPI standard format with no WAVEFORMATEX description
68 	SPSF_ExtendedAudioFormat,   // Non-SAPI standard format but has WAVEFORMATEX description
69 	// Standard PCM wave formats
70 	SPSF_8kHz8BitMono,
71 	SPSF_8kHz8BitStereo,
72 	SPSF_8kHz16BitMono,
73 	SPSF_8kHz16BitStereo,
74 	SPSF_11kHz8BitMono,
75 	SPSF_11kHz8BitStereo,
76 	SPSF_11kHz16BitMono,
77 	SPSF_11kHz16BitStereo,
78 	SPSF_12kHz8BitMono,
79 	SPSF_12kHz8BitStereo,
80 	SPSF_12kHz16BitMono,
81 	SPSF_12kHz16BitStereo,
82 	SPSF_16kHz8BitMono,
83 	SPSF_16kHz8BitStereo,
84 	SPSF_16kHz16BitMono,
85 	SPSF_16kHz16BitStereo,
86 	SPSF_22kHz8BitMono,
87 	SPSF_22kHz8BitStereo,
88 	SPSF_22kHz16BitMono,
89 	SPSF_22kHz16BitStereo,
90 	SPSF_24kHz8BitMono,
91 	SPSF_24kHz8BitStereo,
92 	SPSF_24kHz16BitMono,
93 	SPSF_24kHz16BitStereo,
94 	SPSF_32kHz8BitMono,
95 	SPSF_32kHz8BitStereo,
96 	SPSF_32kHz16BitMono,
97 	SPSF_32kHz16BitStereo,
98 	SPSF_44kHz8BitMono,
99 	SPSF_44kHz8BitStereo,
100 	SPSF_44kHz16BitMono,
101 	SPSF_44kHz16BitStereo,
102 	SPSF_48kHz8BitMono,
103 	SPSF_48kHz8BitStereo,
104 	SPSF_48kHz16BitMono,
105 	SPSF_48kHz16BitStereo,
106 	// TrueSpeech format
107 	SPSF_TrueSpeech_8kHz1BitMono,
108 	// A-Law formats
109 	SPSF_CCITT_ALaw_8kHzMono,
110 	SPSF_CCITT_ALaw_8kHzStereo,
111 	SPSF_CCITT_ALaw_11kHzMono,
112 	SPSF_CCITT_ALaw_11kHzStereo,
113 	SPSF_CCITT_ALaw_22kHzMono,
114 	SPSF_CCITT_ALaw_22kHzStereo,
115 	SPSF_CCITT_ALaw_44kHzMono,
116 	SPSF_CCITT_ALaw_44kHzStereo,
117 	// u-Law formats
118 	SPSF_CCITT_uLaw_8kHzMono,
119 	SPSF_CCITT_uLaw_8kHzStereo,
120 	SPSF_CCITT_uLaw_11kHzMono,
121 	SPSF_CCITT_uLaw_11kHzStereo,
122 	SPSF_CCITT_uLaw_22kHzMono,
123 	SPSF_CCITT_uLaw_22kHzStereo,
124 	SPSF_CCITT_uLaw_44kHzMono,
125 	SPSF_CCITT_uLaw_44kHzStereo,
126 	// ADPCM formats
127 	SPSF_ADPCM_8kHzMono,
128 	SPSF_ADPCM_8kHzStereo,
129 	SPSF_ADPCM_11kHzMono,
130 	SPSF_ADPCM_11kHzStereo,
131 	SPSF_ADPCM_22kHzMono,
132 	SPSF_ADPCM_22kHzStereo,
133 	SPSF_ADPCM_44kHzMono,
134 	SPSF_ADPCM_44kHzStereo,
135 	// GSM 6.10 formats
136 	SPSF_GSM610_8kHzMono,
137 	SPSF_GSM610_11kHzMono,
138 	SPSF_GSM610_22kHzMono,
139 	SPSF_GSM610_44kHzMono,
140 	SPSF_NUM_FORMATS
141 }
142 alias typeof(SPSF_Default) SPSTREAMFORMAT;
143 
144 extern(C) extern GUID SPDFID_Text;
145 extern(C) extern GUID SPDFID_WaveFormatEx;
146 
147 //--- Root of registry entries for speech use
148 enum SPREG_USER_ROOT = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech`;
149 enum SPREG_LOCAL_MACHINE_ROOT = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech`;
150 
151 //--- Categories for speech resource management
152 enum SPCAT_AUDIOOUT = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioOutput`;
153 enum SPCAT_AUDIOIN = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioInput`;
154 enum SPCAT_VOICES = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices`;
155 enum SPCAT_RECOGNIZERS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Recognizers`;
156 enum SPCAT_APPLEXICONS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AppLexicons`;
157 enum SPCAT_PHONECONVERTERS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\PhoneConverters`;
158 enum SPCAT_RECOPROFILES = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\RecoProfiles`;
159 
160 //--- Specific token ids of interest
161 enum SPMMSYS_AUDIO_IN_TOKEN_ID = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioInput\TokenEnums\MMAudioIn\`;
162 enum SPMMSYS_AUDIO_OUT_TOKEN_ID = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioOutput\TokenEnums\MMAudioOut\`;
163 enum SPCURRENT_USER_LEXICON_TOKEN_ID = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\CurrentUserLexicon`;
164 
165 /+ TODO
166 #if _SAPI_BUILD_VER >= 0x053
167 // Shortcuts only supported on Vista and above
168 enum SPCURRENT_USER_SHORTCUT_TOKEN_ID = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\CurrentUserShortcut`;
169 #endif // _SAPI_BUILD_VER >= 0x053
170 +/
171 
172 //--- Standard token values
173 enum SPTOKENVALUE_CLSID = `CLSID`;
174 enum SPTOKENKEY_FILES = `Files`;
175 enum SPTOKENKEY_UI = `UI`;
176 enum SPTOKENKEY_ATTRIBUTES = `Attributes`;
177 
178 /+ TODO
179 #if _SAPI_BUILD_VER >= 0x053
180 enum SPTOKENKEY_RETAINEDAUDIO = `SecondsPerRetainedAudioEvent`;
181 #endif // _SAPI_BUILD_VER >= 0x053
182 +/
183 
184 //--- Standard voice category values
185 enum SPVOICECATEGORY_TTSRATE = `DefaultTTSRate`;
186 
187 //--- Standard SR Engine properties
188 enum SPPROP_RESOURCE_USAGE = `ResourceUsage`;
189 enum SPPROP_HIGH_CONFIDENCE_THRESHOLD = `HighConfidenceThreshold`;
190 enum SPPROP_NORMAL_CONFIDENCE_THRESHOLD = `NormalConfidenceThreshold`;
191 enum SPPROP_LOW_CONFIDENCE_THRESHOLD = `LowConfidenceThreshold`;
192 enum SPPROP_RESPONSE_SPEED = `ResponseSpeed`;
193 enum SPPROP_COMPLEX_RESPONSE_SPEED = `ComplexResponseSpeed`;
194 enum SPPROP_ADAPTATION_ON = `AdaptationOn`;
195 
196 // new for Vista, but nothing prevents engines that run downlevel from supporting these
197 enum SPPROP_PERSISTED_BACKGROUND_ADAPTATION = `PersistedBackgroundAdaptation`;
198 enum SPPROP_PERSISTED_LANGUAGE_MODEL_ADAPTATION = `PersistedLanguageModelAdaptation`;
199 enum SPPROP_UX_IS_LISTENING = `UXIsListening`;
200 
201 //--- Standard SAPI Recognition Topics
202 enum SPTOPIC_SPELLING = `Spelling`;
203 
204 // CFG Wildcard token
205 enum SPWILDCARD = `...`;
206 
207 // CFG Dication token
208 enum SPDICTATION = `*`;
209 enum SPINFDICTATION = `*+`;
210 
211 /+ TODO
212 #if _SAPI_BUILD_VER >= 0x053
213 // Registry key that stores a list of object token CLSIDs marked as safe to instantiate from HKCU
214 enum SPREG_SAFE_USER_TOKENS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\UserTokens`;
215 #endif // _SAPI_BUILD_VER >= 0x053
216 +/
217 
218 // CFG confidence scores
219 enum byte SP_LOW_CONFIDENCE = -1;
220 enum byte SP_NORMAL_CONFIDENCE = 0;
221 enum byte SP_HIGH_CONFIDENCE = +1;
222 
223 // CFG default weight
224 // MIDL does not support floating point in the RHS.
225 // Thus, using 1.0 instead of 1 resulted in unexpected behavior in the resulting type library.
226 enum float DEFAULT_WEIGHT = 1;
227 
228 // Lexicon word and pronunciation limits
229 enum ULONG SP_MAX_WORD_LENGTH = 128;
230 enum ULONG SP_MAX_PRON_LENGTH = 384;
231 
232 /+ TODO
233 #if _SAPI_BUILD_VER >= 0x053
234 // Flag used in EmulateRecognitionEx to indicate re-sending an existing result
235 enum ULONG SP_EMULATE_RESULT = 0x40000000;
236 #endif // _SAPI_BUILD_VER >= 0x053
237 +/
238 
239 
240 //--- ISpNotifyCallback -----------------------------------------------------
241 
242 extern(C++) interface ISpNotifyCallback
243 {
244 	/+
245 	HRESULT STDMETHODCALLTYPE NotifyCallback(
246 		WPARAM wParam,
247 		LPARAM lParam);
248 	+/
249 }
250 
251 alias extern(Windows) void function(WPARAM wParam, LPARAM lParam) SPNOTIFYCALLBACK;
252 
253 //--- ISpNotifySource -------------------------------------------------------
254 interface ISpNotifySource : IUnknown
255 {
256     HRESULT SetNotifySink(ISpNotifySink pNotifySink);
257     HRESULT SetNotifyWindowMessage(
258 										   HWND hWnd,
259 										   UINT Msg,
260 										   WPARAM wParam,
261 										   LPARAM lParam);
262     HRESULT SetNotifyCallbackFunction(
263 											  SPNOTIFYCALLBACK pfnCallback,
264 											  WPARAM wParam,
265 											  LPARAM lParam);
266     HRESULT SetNotifyCallbackInterface(
267 											   ISpNotifyCallback pSpCallback,
268 											   WPARAM wParam,
269 											   LPARAM lParam);
270     HRESULT SetNotifyWin32Event();
271     HRESULT WaitForNotifyEvent(DWORD dwMilliseconds);
272     HANDLE  GetNotifyEventHandle();
273 }
274 
275 //--- ISpNotifySink ---------------------------------------------------------
276 interface ISpNotifySink : IUnknown
277 {
278     HRESULT Notify();
279 }
280 
281 //--- ISpDataKey ------------------------------------------------------------
282 interface ISpDataKey : IUnknown
283 {
284     HRESULT SetData( LPCWSTR pszValueName, ULONG cbData, const BYTE * pData);
285     HRESULT GetData( LPCWSTR pszValueName, ULONG * pcbData, BYTE * pData);
286     HRESULT SetStringValue( LPCWSTR pszValueName, LPCWSTR pszValue );
287     HRESULT GetStringValue( LPCWSTR pszValueName, LPWSTR * ppszValue);
288     HRESULT SetDWORD(LPCWSTR pszValueName, DWORD dwValue );
289     HRESULT GetDWORD(LPCWSTR pszValueName, DWORD *pdwValue );
290     HRESULT OpenKey(LPCWSTR pszSubKeyName, ISpDataKey * ppSubKey);
291     HRESULT CreateKey(LPCWSTR pszSubKey, ISpDataKey * ppSubKey);
292     HRESULT DeleteKey(LPCWSTR pszSubKey);
293     HRESULT DeleteValue(LPCWSTR pszValueName);
294     HRESULT EnumKeys(ULONG Index, LPWSTR * ppszSubKeyName);
295     HRESULT EnumValues(ULONG Index, LPWSTR * ppszValueName);
296 };
297 
298 //--- ISpRegDataKey ---------------------------------------------------------
299 interface ISpRegDataKey : ISpDataKey
300 {
301     HRESULT SetKey(HKEY hkey, BOOL fReadOnly);
302 }
303 
304 //--- ISpObjectTokenCategory ------------------------------------------------
305 interface ISpObjectTokenCategory : ISpDataKey
306 {
307     HRESULT SetId(LPCWSTR pszCategoryId, BOOL fCreateIfNotExist);
308     HRESULT GetId(LPWSTR * ppszCoMemCategoryId);
309     HRESULT GetDataKey(SPDATAKEYLOCATION spdkl, ISpDataKey * ppDataKey);
310 
311     HRESULT EnumTokens(
312 					   LPCWSTR pzsReqAttribs,
313 					   LPCWSTR pszOptAttribs,
314 					   IEnumSpObjectTokens* ppEnum);
315 
316     HRESULT SetDefaultTokenId(LPCWSTR pszTokenId);
317     HRESULT GetDefaultTokenId(LPWSTR * ppszCoMemTokenId);
318 };
319 
320 //--- ISpObjectToken --------------------------------------------------------
321 interface ISpObjectToken : ISpDataKey
322 {
323     HRESULT SetId(LPCWSTR pszCategoryId, LPCWSTR pszTokenId, BOOL fCreateIfNotExist);
324     HRESULT GetId(LPWSTR * ppszCoMemTokenId);
325     HRESULT GetCategory(ISpObjectTokenCategory * ppTokenCategory);
326 
327     HRESULT CreateInstance(
328 						   IUnknown pUnkOuter,
329 						   DWORD dwClsContext,
330 						   REFIID riid,
331 						   void ** ppvObject);
332 
333     HRESULT GetStorageFileName(
334 							   REFCLSID clsidCaller,
335 							   LPCWSTR pszValueName,
336 							   LPCWSTR pszFileNameSpecifier,
337 							   ULONG nFolder,       // Same as SHGetFolderPath -- If non-zero, must set CSIDL_FLAG_CREATE
338 							   LPWSTR * ppszFilePath);
339     HRESULT RemoveStorageFileName(
340 								  REFCLSID clsidCaller,
341 								  LPCWSTR pszKeyName,
342 								  BOOL fDeleteFile);
343 
344     HRESULT Remove(const CLSID * pclsidCaller);
345 
346     HRESULT IsUISupported(
347 								  LPCWSTR pszTypeOfUI,
348 								  void * pvExtraData,
349 								  ULONG cbExtraData,
350 								  IUnknown punkObject,
351 								  BOOL *pfSupported);
352     HRESULT DisplayUI(
353 							  HWND hwndParent,
354 							  LPCWSTR pszTitle,
355 							  LPCWSTR pszTypeOfUI,
356 							  void * pvExtraData,
357 							  ULONG cbExtraData,
358 							  IUnknown punkObject);
359     HRESULT MatchesAttributes(
360 							  LPCWSTR pszAttributes,
361 							  BOOL *pfMatches);
362 };
363 
364 interface ISpObjectTokenInit : ISpObjectToken
365 {
366     HRESULT InitFromDataKey(
367 							LPCWSTR pszCategoryId,
368 							LPCWSTR pszTokenId,
369 							ISpDataKey pDataKey);
370 };
371 
372 //--- IEnumSpObjectTokens ---------------------------------------------------
373 // This interface is used to enumerate speech object tokens
374 
375 interface IEnumSpObjectTokens : IUnknown
376 {
377     HRESULT Next(ULONG celt,
378                  ISpObjectToken * pelt,
379                  ULONG *pceltFetched);
380     HRESULT Skip(ULONG celt);
381 
382     HRESULT Reset();
383     HRESULT Clone(IEnumSpObjectTokens *ppEnum);
384 
385     HRESULT Item(ULONG Index, ISpObjectToken * ppToken);
386 
387     HRESULT GetCount(ULONG* pCount);
388 };
389 
390 //--- ISpEventSource --------------------------------------------------------
391 enum
392 {
393     SPET_LPARAM_IS_UNDEFINED = 0,
394 	SPET_LPARAM_IS_TOKEN,
395 	SPET_LPARAM_IS_OBJECT,
396 	SPET_LPARAM_IS_POINTER,
397 	SPET_LPARAM_IS_STRING,
398 }
399 alias typeof(SPET_LPARAM_IS_UNDEFINED) SPEVENTLPARAMTYPE;
400 
401 enum
402 {
403     SPEI_UNDEFINED           = 0,
404 
405 	//--- TTS engine
406 	SPEI_START_INPUT_STREAM  = 1,
407 	SPEI_END_INPUT_STREAM    = 2,
408 	SPEI_VOICE_CHANGE        = 3,   // LPARAM_IS_TOKEN
409 	SPEI_TTS_BOOKMARK        = 4,   // LPARAM_IS_STRING
410 	SPEI_WORD_BOUNDARY       = 5,
411 	SPEI_PHONEME             = 6,
412 	SPEI_SENTENCE_BOUNDARY   = 7,
413 	SPEI_VISEME              = 8,
414 	SPEI_TTS_AUDIO_LEVEL     = 9,   // wParam contains current output audio level
415 
416 	SPEI_TTS_PRIVATE         = 15, //--- Engine vendors use this reserved value.
417 
418 	SPEI_MIN_TTS             = 1,
419 	SPEI_MAX_TTS             = 15,
420 
421 	//--- Speech Recognition
422 	SPEI_END_SR_STREAM       = 34,      // LPARAM contains HRESULT, WPARAM contains flags (SPESF_xxx)
423 	SPEI_SOUND_START         = 35,
424 	SPEI_SOUND_END           = 36,
425 	SPEI_PHRASE_START        = 37,
426 	SPEI_RECOGNITION         = 38,
427 	SPEI_HYPOTHESIS          = 39,
428 	SPEI_SR_BOOKMARK         = 40,
429 	SPEI_PROPERTY_NUM_CHANGE   = 41,  // LPARAM points to a string, WPARAM is the attrib value
430 	SPEI_PROPERTY_STRING_CHANGE= 42,  // LPARAM pointer to buffer.  Two concatinated null terminated strings.
431 	SPEI_FALSE_RECOGNITION   = 43,  // apparent speech with no valid recognition
432 	SPEI_INTERFERENCE        = 44,  // LPARAM is any combination of SPINTERFERENCE flags
433 	SPEI_REQUEST_UI          = 45,  // LPARAM is string.
434 	SPEI_RECO_STATE_CHANGE   = 46,  // wParam contains new reco state
435 	SPEI_ADAPTATION          = 47,  // we are now ready to accept the adaptation buffer
436 	SPEI_START_SR_STREAM     = 48,
437 	SPEI_RECO_OTHER_CONTEXT  = 49,  // Phrase finished and recognized, but for other context
438 	SPEI_SR_AUDIO_LEVEL      = 50,  // wParam contains current input audio level
439 	/+
440 	#if _SAPI_BUILD_VER >= 0x053
441 	SPEI_SR_RETAINEDAUDIO    = 51,
442 	#endif // _SAPI_BUILD_VER >= 0x053
443 	+/
444 	SPEI_SR_PRIVATE          = 52, // Engine vendors use this reserved value.
445 	/+
446 	#if _SAPI_BUILD_VER >= 0x053
447 	#if _SAPI_BUILD_VER >= 0x054
448 	SPEI_ACTIVE_CATEGORY_CHANGED = 53, // WPARAM and LPARAM are null.
449 	#else // _SAPI_BUILD_VER >= 0x054
450 	SPEI_RESERVED4           = 53, // Reserved for system use.
451 	#endif // _SAPI_BUILD_VER >= 0x054
452 	SPEI_RESERVED5           = 54, // Reserved for system use.
453 	SPEI_RESERVED6           = 55, // Reserved for system use.
454 	#endif // _SAPI_BUILD_VER >= 0x053
455 	+/
456 
457 	SPEI_MIN_SR              = 34,
458 	/+
459 	#if _SAPI_BUILD_VER >= 0x053
460 	SPEI_MAX_SR              = 55,  // Value in SAPI 5.3
461 	#else
462 	SPEI_MAX_SR              = 52,  // Value in SAPI 5.1
463 	#endif // _SAPI_BUILD_VER >= 0x053
464 	+/
465 
466 	SPEI_RESERVED1           = 30,  // do not use
467 	SPEI_RESERVED2           = 33,  // do not use
468 	SPEI_RESERVED3           = 63   // do not use
469 }
470 alias typeof(SPEI_UNDEFINED) SPEVENTENUM;
471 
472 //cpp_quote("#define SPFEI_FLAGCHECK ( (1ui64 << SPEI_RESERVED1) | (1ui64 << SPEI_RESERVED2) )")
473 
474 //cpp_quote("#define SPFEI_ALL_TTS_EVENTS (0x000000000000FFFEui64 | SPFEI_FLAGCHECK)")
475 //cpp_quote("#define SPFEI_ALL_SR_EVENTS  (0x001FFFFC00000000ui64 | SPFEI_FLAGCHECK)")
476 //cpp_quote("#define SPFEI_ALL_EVENTS      0xEFFFFFFFFFFFFFFFui64")
477 
478 // The SPFEI macro converts an SPEVENTENUM event value into a 64-bit value.
479 // Multiple values can then be OR-ed together and passed to SetInterest.
480 //cpp_quote("#define SPFEI(SPEI_ord) ((1ui64 << SPEI_ord) | SPFEI_FLAGCHECK)")
481 
482 struct SPEVENT
483 {
484 	mixin(bitfields!(
485 		SPEVENTENUM, "eEventId", 16,
486 		SPEVENTLPARAMTYPE, "elParamType", 16
487 	));
488 
489     ULONG       ulStreamNum;
490     ULONGLONG   ullAudioStreamOffset;
491     WPARAM      wParam;
492     LPARAM      lParam;
493 }
494 
495 struct SPSERIALIZEDEVENT
496 {
497 	mixin(bitfields!(
498 		SPEVENTENUM, "eEventId", 16,
499 		SPEVENTLPARAMTYPE, "elParamType", 16
500 	));
501 
502     ULONG       ulStreamNum;
503     ULONGLONG   ullAudioStreamOffset;
504     ULONG       SerializedwParam;
505     LONG        SerializedlParam;
506 }
507 
508 struct SPSERIALIZEDEVENT64
509 {
510 	mixin(bitfields!(
511 		SPEVENTENUM, "eEventId", 16,
512 		SPEVENTLPARAMTYPE, "elParamType", 16
513 	));
514 
515     ULONG       ulStreamNum;
516     ULONGLONG   ullAudioStreamOffset;
517     ULONGLONG   SerializedwParam;
518     LONGLONG    SerializedlParam;
519 }
520 
521 /+
522 #if _SAPI_BUILD_VER >= 0x053
523 cpp_quote("#if 0")
524 typedef [restricted, hidden] struct SPEVENTEX
525 {
526     WORD        eEventId;      //SPEVENTENUM
527     WORD        elParamType;   //SPEVENTLPARAMTYPE
528     ULONG       ulStreamNum;        // Input stream number this event is associated with
529     ULONGLONG   ullAudioStreamOffset;
530     WPARAM      wParam;
531     LPARAM      lParam;
532     ULONGLONG   ullAudioTimeOffset;
533 } SPEVENTEX;
534 
535 cpp_quote("#else")
536 cpp_quote("typedef struct SPEVENTEX")
537 cpp_quote("{")
538 cpp_quote("    SPEVENTENUM        eEventId : 16;")
539 cpp_quote("    SPEVENTLPARAMTYPE  elParamType : 16;")
540 cpp_quote("    ULONG       ulStreamNum;")
541 cpp_quote("    ULONGLONG   ullAudioStreamOffset;")
542 cpp_quote("    WPARAM      wParam;")
543 cpp_quote("    LPARAM      lParam;")
544 cpp_quote("    ULONGLONG   ullAudioTimeOffset;")
545 cpp_quote("} SPEVENTEX;")
546 cpp_quote("#endif")
547 #endif // _SAPI_BUILD_VER >= 0x053
548 +/
549 
550 //--- Types of interference
551 enum
552 {
553     SPINTERFERENCE_NONE     = 0,
554 	SPINTERFERENCE_NOISE,
555 	SPINTERFERENCE_NOSIGNAL,
556 	SPINTERFERENCE_TOOLOUD,
557 	SPINTERFERENCE_TOOQUIET,
558 	SPINTERFERENCE_TOOFAST,
559 	SPINTERFERENCE_TOOSLOW
560 }
561 alias typeof(SPINTERFERENCE_NONE) SPINTERFERENCE;
562 
563 //--- Flags for END_SR_STREAM event (in WPARAM)
564 enum
565 {
566     SPESF_NONE              = 0,
567 	SPESF_STREAM_RELEASED   = (1 << 0)
568 	/+ TODO
569 	#if _SAPI_BUILD_VER >= 0x053
570 	, SPESF_EMULATED          = (1 << 1)
571 	#endif // _SAPI_BUILD_VER >= 0x053
572 	+/
573 }
574 alias typeof(SPESF_NONE) SPENDSRSTREAMFLAGS;
575 
576 //--- Viseme features
577 enum
578 {
579     SPVFEATURE_STRESSED = (1L << 0),
580 	SPVFEATURE_EMPHASIS = (1L << 1)
581 }
582 alias typeof(SPVFEATURE_STRESSED) SPVFEATURE;
583 
584 
585 //--- Viseme event groups
586 enum
587 {
588 	// English examples
589 	//------------------
590     SP_VISEME_0 = 0,    // Silence
591 	SP_VISEME_1,        // AE, AX, AH
592 	SP_VISEME_2,        // AA
593 	SP_VISEME_3,        // AO
594 	SP_VISEME_4,        // EY, EH, UH
595 	SP_VISEME_5,        // ER
596 	SP_VISEME_6,        // y, IY, IH, IX
597 	SP_VISEME_7,        // w, UW
598 	SP_VISEME_8,        // OW
599 	SP_VISEME_9,        // AW
600 	SP_VISEME_10,       // OY
601 	SP_VISEME_11,       // AY
602 	SP_VISEME_12,       // h
603 	SP_VISEME_13,       // r
604 	SP_VISEME_14,       // l
605 	SP_VISEME_15,       // s, z
606 	SP_VISEME_16,       // SH, CH, JH, ZH
607 	SP_VISEME_17,       // TH, DH
608 	SP_VISEME_18,       // f, v
609 	SP_VISEME_19,       // d, t, n
610 	SP_VISEME_20,       // k, g, NG
611 	SP_VISEME_21,       // p, b, m
612 }
613 alias typeof(SP_VISEME_0) SPVISEMES;
614 
615 struct SPEVENTSOURCEINFO
616 {
617     ULONGLONG   ullEventInterest;
618     ULONGLONG   ullQueuedInterest;
619     ULONG       ulCount;
620 }
621 
622 interface ISpEventSource : ISpNotifySource
623 {
624     // It is neccessary to use the SPFEI macro to convert the
625     // SPEVENTENUM values into ULONGULONG values.
626     HRESULT SetInterest(
627 						ULONGLONG ullEventInterest,
628 						ULONGLONG ullQueuedInterest);
629 
630     HRESULT GetEvents(
631 					  ULONG ulCount,
632 					  SPEVENT* pEventArray,
633 					  ULONG *pulFetched);
634 
635     HRESULT GetInfo(SPEVENTSOURCEINFO * pInfo);
636 };
637 
638 //--- ISpStreamFormat -------------------------------------------------------
639 interface ISpStreamFormat : IStream
640 {
641     HRESULT GetFormat(GUID * pguidFormatId, WAVEFORMATEX ** ppCoMemWaveFormatEx);
642 }
643 
644 enum
645 {
646     SPFM_OPEN_READONLY,     // Open existing file, read-only
647 	SPFM_OPEN_READWRITE,    // (Not supported for wav files) Open existing file, read-write
648 	SPFM_CREATE,            // (Not supported for wav files) Open file if exists, else create if does not exist (opens read-write)
649 	SPFM_CREATE_ALWAYS,     // Create file even if file exists.  Destroys old file.
650 	SPFM_NUM_MODES          // Used for limit checking
651 }
652 alias typeof(SPFM_OPEN_READONLY) SPFILEMODE;
653 
654 //--- ISpStream -------------------------------------------------------------
655 interface ISpStream : ISpStreamFormat
656 {
657     HRESULT SetBaseStream(IStream pStream, REFGUID rguidFormat, const WAVEFORMATEX * pWaveFormatEx);
658     HRESULT GetBaseStream(IStream * ppStream);
659     HRESULT BindToFile(LPCWSTR pszFileName, SPFILEMODE eMode,
660                        const GUID * pFormatId,
661                        const WAVEFORMATEX * pWaveFormatEx,
662                        ULONGLONG ullEventInterest);
663     HRESULT Close();
664 }
665 
666 //--- ISpVoice --------------------------------------------------------------
667 //  These structures maintain the absolute state of the voice relative to
668 //  the voice's baseline XML state.
669 struct SPVPITCH
670 {
671 	long MiddleAdj;
672 	long RangeAdj;
673 }
674 
675 enum
676 {
677 	SPVA_Speak = 0,
678 	SPVA_Silence,
679 	SPVA_Pronounce,
680 	SPVA_Bookmark,
681 	SPVA_SpellOut,
682 	SPVA_Section,
683 	SPVA_ParseUnknownTag
684 }
685 alias typeof(SPVA_Speak) SPVACTIONS;
686 
687 struct SPVCONTEXT
688 {
689 	LPCWSTR pCategory;
690 	LPCWSTR pBefore;
691 	LPCWSTR pAfter;
692 }
693 
694 struct SPVSTATE
695 {
696 	//--- Action
697 	SPVACTIONS  eAction;
698 
699 	//--- Running state values
700 	LANGID		  LangID;
701 	WORD			wReserved;
702 	long			EmphAdj;
703 	long			RateAdj;
704 	ULONG		   Volume;
705 	SPVPITCH		PitchAdj;
706 	ULONG		   SilenceMSecs;
707 	SPPHONEID*	  pPhoneIds;			  // NULL terminated array of phone ids
708 	SPPARTOFSPEECH  ePartOfSpeech;
709 	SPVCONTEXT	  Context;
710 }
711 
712 enum
713 {
714 	SPRS_DONE		= (1L << 0),		   // The voice is done rendering all queued phrases
715 	SPRS_IS_SPEAKING = (1L << 1)			// The SpVoice currently has the audio queue claimed
716 }
717 alias typeof(SPRS_DONE) SPRUNSTATE;
718 
719 enum
720 {
721 	SPMIN_VOLUME =   0,
722 	SPMAX_VOLUME = 100,
723 	SPMIN_RATE   = -10,
724 	SPMAX_RATE   =  10
725 }
726 alias typeof(SPMIN_VOLUME) SPVLIMITS;
727 
728 enum
729 {
730 	SPVPRI_NORMAL = 0,
731 	SPVPRI_ALERT  = (1L << 0),
732 	SPVPRI_OVER   = (1L << 1)
733 }
734 alias typeof(SPVPRI_NORMAL) SPVPRIORITY;
735 
736 struct SPVOICESTATUS
737 {
738 	ULONG	   ulCurrentStream;		// Current stream being rendered
739 	ULONG	   ulLastStreamQueued;	 // Number of the last stream queued
740 	HRESULT	 hrLastResult;		   // Result of last speak
741 	DWORD	   dwRunningState;		 // SPRUNSTATE
742 	ULONG	   ulInputWordPos;		 // Input position of current word being rendered
743 	ULONG	   ulInputWordLen;		 // Length of current word being rendered
744 	ULONG	   ulInputSentPos;		 // Input position of current sentence being rendered
745 	ULONG	   ulInputSentLen;		 // Length of current sentence being rendered
746 	LONG		lBookmarkId;			// Current bookmark converted to a long integer
747 	SPPHONEID   PhonemeId;			  // Current phoneme id
748 	SPVISEMES   VisemeId;			   // Current viseme
749 	DWORD	   dwReserved1;			// Reserved for future expansion
750 	DWORD	   dwReserved2;			// Reserved for future expansion
751 }
752 
753 enum
754 {
755 	//--- SpVoice flags
756 	SPF_DEFAULT			= 0,			 // Synchronous, no purge, xml auto detect
757 	SPF_ASYNC			  = (1L << 0),	 // Asynchronous call
758 	SPF_PURGEBEFORESPEAK   = (1L << 1),	 // Purge current data prior to speaking this
759 	SPF_IS_FILENAME		= (1L << 2),	 // The string passed to Speak() is a file name
760 	SPF_IS_XML			 = (1L << 3),	 // The input text will be parsed for XML markup
761 	SPF_IS_NOT_XML		 = (1L << 4),	 // The input text will not be parsed for XML markup
762 	SPF_PERSIST_XML		= (1L << 5),	 // Persists XML global state changes
763 
764 	//--- Normalizer flags
765 	SPF_NLP_SPEAK_PUNC	 = (1L << 6),	 // The normalization processor should speak the punctuation
766 
767 	//#if _SAPI_BUILD_VER >= 0x053
768 	//--- TTS Format
769 	SPF_PARSE_SAPI		 = (1L << 7),	 // Force XML parsing as MS SAPI
770 	SPF_PARSE_SSML		 = (1L << 8),	 // Force XML parsing as W3C SSML
771 	SPF_PARSE_AUTODETECT   = 0,			 // No set flag in bits 7 or 8 results in autodetection
772 	//#endif // _SAPI_BUILD_VER >= 0x053
773 
774 	//--- Masks
775 	SPF_NLP_MASK		   = (SPF_NLP_SPEAK_PUNC),
776 
777 	/+ TODO
778 	#if _SAPI_BUILD_VER >= 0x053
779 	SPF_PARSE_MASK		 = (SPF_PARSE_SAPI|SPF_PARSE_SSML),
780 	SPF_VOICE_MASK		 = (SPF_ASYNC|SPF_PURGEBEFORESPEAK|SPF_IS_FILENAME|SPF_IS_XML|SPF_IS_NOT_XML|SPF_NLP_MASK|SPF_PERSIST_XML|SPF_PARSE_MASK),
781 	#else
782 	SPF_VOICE_MASK		 = (SPF_ASYNC|SPF_PURGEBEFORESPEAK|SPF_IS_FILENAME|SPF_IS_XML|SPF_IS_NOT_XML|SPF_NLP_MASK|SPF_PERSIST_XML),
783 	#endif // _SAPI_BUILD_VER >= 0x053
784 
785 	SPF_UNUSED_FLAGS	   = ~(SPF_VOICE_MASK)
786 	+/
787 }
788 alias typeof(SPF_DEFAULT) SPEAKFLAGS;
789 
790 interface ISpVoice : ISpEventSource
791 {
792 	HRESULT SetOutput( IUnknown pUnkOutput, BOOL fAllowFormatChanges );
793 	HRESULT GetOutputObjectToken( ISpObjectToken * ppObjectToken );
794 	HRESULT GetOutputStream( ISpStreamFormat * ppStream );
795 
796 	HRESULT Pause();
797 	HRESULT Resume();
798 
799 	HRESULT SetVoice( ISpObjectToken pToken);
800 	HRESULT GetVoice( ISpObjectToken *ppToken);
801 
802 	HRESULT Speak(
803 				  LPCWSTR pwcs,
804 				  DWORD dwFlags,
805 				  ULONG * pulStreamNumber);
806 	HRESULT SpeakStream(
807 						IStream pStream,  // If not ISpStreamFormat supported then SPDFID_Text assumed
808 						DWORD dwFlags,
809 						ULONG * pulStreamNumber);
810 
811 	HRESULT GetStatus(
812 					  SPVOICESTATUS *pStatus,
813 					  LPWSTR * ppszLastBookmark);
814 
815 	HRESULT Skip( LPCWSTR pItemType, long lNumItems, ULONG* pulNumSkipped );
816 
817 	HRESULT SetPriority( SPVPRIORITY ePriority );
818 	HRESULT GetPriority( SPVPRIORITY* pePriority );
819 
820 	HRESULT SetAlertBoundary( SPEVENTENUM eBoundary );
821 	HRESULT GetAlertBoundary( SPEVENTENUM* peBoundary );
822 
823 	HRESULT SetRate( long RateAdjust );
824 	HRESULT GetRate( long* pRateAdjust);
825 
826 	HRESULT SetVolume( USHORT usVolume );
827 	HRESULT GetVolume( USHORT* pusVolume );
828 
829 	HRESULT WaitUntilDone( ULONG msTimeout );
830 
831 	HRESULT SetSyncSpeakTimeout( ULONG msTimeout );
832 	HRESULT GetSyncSpeakTimeout( ULONG * pmsTimeout );
833 
834 	HANDLE SpeakCompleteEvent();
835 
836 	HRESULT IsUISupported(
837 								  LPCWSTR pszTypeOfUI,
838 								  void * pvExtraData,
839 								  ULONG cbExtraData,
840 								  BOOL *pfSupported);
841 	HRESULT DisplayUI(
842 							  HWND hwndParent,
843 							  LPCWSTR pszTitle,
844 							  LPCWSTR pszTypeOfUI,
845 							  void * pvExtraData,
846 							  ULONG cbExtraData);
847 
848 }
849 
850 //--- ISpLexicon ------------------------------------------------------------
851 enum
852 {
853     //--- SAPI5 public POS category values (bits 28-31)
854     SPPS_NotOverriden  = -1,
855 	SPPS_Unknown       = 0,
856 	SPPS_Noun          = 0x1000,
857 	SPPS_Verb          = 0x2000,
858 	SPPS_Modifier      = 0x3000,
859 	SPPS_Function      = 0x4000,
860 	SPPS_Interjection  = 0x5000
861 		/+ TODO
862 		#if _SAPI_BUILD_VER >= 0x053
863 		,
864 		SPPS_Noncontent    = 0x6000,
865 		SPPS_LMA           = 0x7000,    // Words learned through LMA
866 		SPPS_SuppressWord  = 0xF000,    // Special flag to indicate this word should not be recognized
867 		#endif // _SAPI_BUILD_VER >= 0x053
868 		+/
869 }
870 // WTF? TODO
871 //alias typeof(SPPS_NotOverriden) SPPARTOFSPEECH;
872 alias uint SPPARTOFSPEECH;