1 module speech.windows.sapi; 2 3 import speech.windows.ObjIdl; 4 5 import core.sys.windows.windows; 6 import std.c.windows.com; 7 import std.bitmanip; 8 9 alias IID* REFIID; 10 alias CLSID* REFCLSID; 11 alias GUID* REFGUID; 12 13 extern(C) extern CLSID CLSID_SpVoice; 14 extern(C) extern IID IID_ISpVoice; 15 16 extern(C) extern CLSID CLSID_SpObjectTokenCategory; 17 extern(C) extern IID IID_ISpObjectTokenCategory; 18 19 struct WAVEFORMATEX 20 { 21 WORD wFormatTag; /* format type */ 22 WORD nChannels; /* number of channels (i.e. mono, stereo...) */ 23 DWORD nSamplesPerSec; /* sample rate */ 24 DWORD nAvgBytesPerSec; /* for buffer estimation */ 25 WORD nBlockAlign; /* block size of data */ 26 WORD wBitsPerSample; /* Number of bits per sample of mono data */ 27 WORD cbSize; /* The count in bytes of the size of 28 extra information (after cbSize) */ 29 } 30 31 alias WORD LANGID; 32 33 alias WCHAR SPPHONEID; 34 alias LPWSTR PSPPHONEID; // Use this with NULL-terminated SPPHONEID strings. This gives the proper SAL annotation. 35 alias LPCWSTR PCSPPHONEID; // Use this with const NULL-terminated SPPHONEID strings. This gives the proper SAL annotation. 36 37 //--- DataKey locations 38 enum 39 { 40 SPDKL_DefaultLocation = 0, 41 SPDKL_CurrentUser = 1, 42 SPDKL_LocalMachine = 2, 43 SPDKL_CurrentConfig = 5 44 } 45 alias typeof(SPDKL_DefaultLocation) SPDATAKEYLOCATION; 46 47 //--- TokenUI constants 48 enum SPDUI_EngineProperties = `EngineProperties`; 49 enum SPDUI_AddRemoveWord = `AddRemoveWord`; 50 enum SPDUI_UserTraining = `UserTraining`; 51 enum SPDUI_MicTraining = `MicTraining`; 52 enum SPDUI_RecoProfileProperties = `RecoProfileProperties`; 53 enum SPDUI_AudioProperties = `AudioProperties`; 54 enum SPDUI_AudioVolume = `AudioVolume`; 55 enum SPDUI_UserEnrollment = `UserEnrollment`; 56 enum SPDUI_ShareData = `ShareData`; 57 58 // new for Vista. Nothing prevents use downlevel if an engine exposes them 59 enum SPDUI_Tutorial = `Tutorial`; 60 61 //--- Data formats - these are used by the CSpStreamFormat class in sphelper.h 62 enum 63 { 64 SPSF_Default = -1, 65 SPSF_NoAssignedFormat = 0, // Similar to GUID_NULL 66 SPSF_Text, 67 SPSF_NonStandardFormat, // Non-SAPI standard format with no WAVEFORMATEX description 68 SPSF_ExtendedAudioFormat, // Non-SAPI standard format but has WAVEFORMATEX description 69 // Standard PCM wave formats 70 SPSF_8kHz8BitMono, 71 SPSF_8kHz8BitStereo, 72 SPSF_8kHz16BitMono, 73 SPSF_8kHz16BitStereo, 74 SPSF_11kHz8BitMono, 75 SPSF_11kHz8BitStereo, 76 SPSF_11kHz16BitMono, 77 SPSF_11kHz16BitStereo, 78 SPSF_12kHz8BitMono, 79 SPSF_12kHz8BitStereo, 80 SPSF_12kHz16BitMono, 81 SPSF_12kHz16BitStereo, 82 SPSF_16kHz8BitMono, 83 SPSF_16kHz8BitStereo, 84 SPSF_16kHz16BitMono, 85 SPSF_16kHz16BitStereo, 86 SPSF_22kHz8BitMono, 87 SPSF_22kHz8BitStereo, 88 SPSF_22kHz16BitMono, 89 SPSF_22kHz16BitStereo, 90 SPSF_24kHz8BitMono, 91 SPSF_24kHz8BitStereo, 92 SPSF_24kHz16BitMono, 93 SPSF_24kHz16BitStereo, 94 SPSF_32kHz8BitMono, 95 SPSF_32kHz8BitStereo, 96 SPSF_32kHz16BitMono, 97 SPSF_32kHz16BitStereo, 98 SPSF_44kHz8BitMono, 99 SPSF_44kHz8BitStereo, 100 SPSF_44kHz16BitMono, 101 SPSF_44kHz16BitStereo, 102 SPSF_48kHz8BitMono, 103 SPSF_48kHz8BitStereo, 104 SPSF_48kHz16BitMono, 105 SPSF_48kHz16BitStereo, 106 // TrueSpeech format 107 SPSF_TrueSpeech_8kHz1BitMono, 108 // A-Law formats 109 SPSF_CCITT_ALaw_8kHzMono, 110 SPSF_CCITT_ALaw_8kHzStereo, 111 SPSF_CCITT_ALaw_11kHzMono, 112 SPSF_CCITT_ALaw_11kHzStereo, 113 SPSF_CCITT_ALaw_22kHzMono, 114 SPSF_CCITT_ALaw_22kHzStereo, 115 SPSF_CCITT_ALaw_44kHzMono, 116 SPSF_CCITT_ALaw_44kHzStereo, 117 // u-Law formats 118 SPSF_CCITT_uLaw_8kHzMono, 119 SPSF_CCITT_uLaw_8kHzStereo, 120 SPSF_CCITT_uLaw_11kHzMono, 121 SPSF_CCITT_uLaw_11kHzStereo, 122 SPSF_CCITT_uLaw_22kHzMono, 123 SPSF_CCITT_uLaw_22kHzStereo, 124 SPSF_CCITT_uLaw_44kHzMono, 125 SPSF_CCITT_uLaw_44kHzStereo, 126 // ADPCM formats 127 SPSF_ADPCM_8kHzMono, 128 SPSF_ADPCM_8kHzStereo, 129 SPSF_ADPCM_11kHzMono, 130 SPSF_ADPCM_11kHzStereo, 131 SPSF_ADPCM_22kHzMono, 132 SPSF_ADPCM_22kHzStereo, 133 SPSF_ADPCM_44kHzMono, 134 SPSF_ADPCM_44kHzStereo, 135 // GSM 6.10 formats 136 SPSF_GSM610_8kHzMono, 137 SPSF_GSM610_11kHzMono, 138 SPSF_GSM610_22kHzMono, 139 SPSF_GSM610_44kHzMono, 140 SPSF_NUM_FORMATS 141 } 142 alias typeof(SPSF_Default) SPSTREAMFORMAT; 143 144 extern(C) extern GUID SPDFID_Text; 145 extern(C) extern GUID SPDFID_WaveFormatEx; 146 147 //--- Root of registry entries for speech use 148 enum SPREG_USER_ROOT = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech`; 149 enum SPREG_LOCAL_MACHINE_ROOT = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech`; 150 151 //--- Categories for speech resource management 152 enum SPCAT_AUDIOOUT = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioOutput`; 153 enum SPCAT_AUDIOIN = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioInput`; 154 enum SPCAT_VOICES = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices`; 155 enum SPCAT_RECOGNIZERS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Recognizers`; 156 enum SPCAT_APPLEXICONS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AppLexicons`; 157 enum SPCAT_PHONECONVERTERS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\PhoneConverters`; 158 enum SPCAT_RECOPROFILES = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\RecoProfiles`; 159 160 //--- Specific token ids of interest 161 enum SPMMSYS_AUDIO_IN_TOKEN_ID = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioInput\TokenEnums\MMAudioIn\`; 162 enum SPMMSYS_AUDIO_OUT_TOKEN_ID = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioOutput\TokenEnums\MMAudioOut\`; 163 enum SPCURRENT_USER_LEXICON_TOKEN_ID = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\CurrentUserLexicon`; 164 165 /+ TODO 166 #if _SAPI_BUILD_VER >= 0x053 167 // Shortcuts only supported on Vista and above 168 enum SPCURRENT_USER_SHORTCUT_TOKEN_ID = `HKEY_CURRENT_USER\SOFTWARE\Microsoft\Speech\CurrentUserShortcut`; 169 #endif // _SAPI_BUILD_VER >= 0x053 170 +/ 171 172 //--- Standard token values 173 enum SPTOKENVALUE_CLSID = `CLSID`; 174 enum SPTOKENKEY_FILES = `Files`; 175 enum SPTOKENKEY_UI = `UI`; 176 enum SPTOKENKEY_ATTRIBUTES = `Attributes`; 177 178 /+ TODO 179 #if _SAPI_BUILD_VER >= 0x053 180 enum SPTOKENKEY_RETAINEDAUDIO = `SecondsPerRetainedAudioEvent`; 181 #endif // _SAPI_BUILD_VER >= 0x053 182 +/ 183 184 //--- Standard voice category values 185 enum SPVOICECATEGORY_TTSRATE = `DefaultTTSRate`; 186 187 //--- Standard SR Engine properties 188 enum SPPROP_RESOURCE_USAGE = `ResourceUsage`; 189 enum SPPROP_HIGH_CONFIDENCE_THRESHOLD = `HighConfidenceThreshold`; 190 enum SPPROP_NORMAL_CONFIDENCE_THRESHOLD = `NormalConfidenceThreshold`; 191 enum SPPROP_LOW_CONFIDENCE_THRESHOLD = `LowConfidenceThreshold`; 192 enum SPPROP_RESPONSE_SPEED = `ResponseSpeed`; 193 enum SPPROP_COMPLEX_RESPONSE_SPEED = `ComplexResponseSpeed`; 194 enum SPPROP_ADAPTATION_ON = `AdaptationOn`; 195 196 // new for Vista, but nothing prevents engines that run downlevel from supporting these 197 enum SPPROP_PERSISTED_BACKGROUND_ADAPTATION = `PersistedBackgroundAdaptation`; 198 enum SPPROP_PERSISTED_LANGUAGE_MODEL_ADAPTATION = `PersistedLanguageModelAdaptation`; 199 enum SPPROP_UX_IS_LISTENING = `UXIsListening`; 200 201 //--- Standard SAPI Recognition Topics 202 enum SPTOPIC_SPELLING = `Spelling`; 203 204 // CFG Wildcard token 205 enum SPWILDCARD = `...`; 206 207 // CFG Dication token 208 enum SPDICTATION = `*`; 209 enum SPINFDICTATION = `*+`; 210 211 /+ TODO 212 #if _SAPI_BUILD_VER >= 0x053 213 // Registry key that stores a list of object token CLSIDs marked as safe to instantiate from HKCU 214 enum SPREG_SAFE_USER_TOKENS = `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\UserTokens`; 215 #endif // _SAPI_BUILD_VER >= 0x053 216 +/ 217 218 // CFG confidence scores 219 enum byte SP_LOW_CONFIDENCE = -1; 220 enum byte SP_NORMAL_CONFIDENCE = 0; 221 enum byte SP_HIGH_CONFIDENCE = +1; 222 223 // CFG default weight 224 // MIDL does not support floating point in the RHS. 225 // Thus, using 1.0 instead of 1 resulted in unexpected behavior in the resulting type library. 226 enum float DEFAULT_WEIGHT = 1; 227 228 // Lexicon word and pronunciation limits 229 enum ULONG SP_MAX_WORD_LENGTH = 128; 230 enum ULONG SP_MAX_PRON_LENGTH = 384; 231 232 /+ TODO 233 #if _SAPI_BUILD_VER >= 0x053 234 // Flag used in EmulateRecognitionEx to indicate re-sending an existing result 235 enum ULONG SP_EMULATE_RESULT = 0x40000000; 236 #endif // _SAPI_BUILD_VER >= 0x053 237 +/ 238 239 240 //--- ISpNotifyCallback ----------------------------------------------------- 241 242 extern(C++) interface ISpNotifyCallback 243 { 244 /+ 245 HRESULT STDMETHODCALLTYPE NotifyCallback( 246 WPARAM wParam, 247 LPARAM lParam); 248 +/ 249 } 250 251 alias extern(Windows) void function(WPARAM wParam, LPARAM lParam) SPNOTIFYCALLBACK; 252 253 //--- ISpNotifySource ------------------------------------------------------- 254 interface ISpNotifySource : IUnknown 255 { 256 HRESULT SetNotifySink(ISpNotifySink pNotifySink); 257 HRESULT SetNotifyWindowMessage( 258 HWND hWnd, 259 UINT Msg, 260 WPARAM wParam, 261 LPARAM lParam); 262 HRESULT SetNotifyCallbackFunction( 263 SPNOTIFYCALLBACK pfnCallback, 264 WPARAM wParam, 265 LPARAM lParam); 266 HRESULT SetNotifyCallbackInterface( 267 ISpNotifyCallback pSpCallback, 268 WPARAM wParam, 269 LPARAM lParam); 270 HRESULT SetNotifyWin32Event(); 271 HRESULT WaitForNotifyEvent(DWORD dwMilliseconds); 272 HANDLE GetNotifyEventHandle(); 273 } 274 275 //--- ISpNotifySink --------------------------------------------------------- 276 interface ISpNotifySink : IUnknown 277 { 278 HRESULT Notify(); 279 } 280 281 //--- ISpDataKey ------------------------------------------------------------ 282 interface ISpDataKey : IUnknown 283 { 284 HRESULT SetData( LPCWSTR pszValueName, ULONG cbData, const BYTE * pData); 285 HRESULT GetData( LPCWSTR pszValueName, ULONG * pcbData, BYTE * pData); 286 HRESULT SetStringValue( LPCWSTR pszValueName, LPCWSTR pszValue ); 287 HRESULT GetStringValue( LPCWSTR pszValueName, LPWSTR * ppszValue); 288 HRESULT SetDWORD(LPCWSTR pszValueName, DWORD dwValue ); 289 HRESULT GetDWORD(LPCWSTR pszValueName, DWORD *pdwValue ); 290 HRESULT OpenKey(LPCWSTR pszSubKeyName, ISpDataKey * ppSubKey); 291 HRESULT CreateKey(LPCWSTR pszSubKey, ISpDataKey * ppSubKey); 292 HRESULT DeleteKey(LPCWSTR pszSubKey); 293 HRESULT DeleteValue(LPCWSTR pszValueName); 294 HRESULT EnumKeys(ULONG Index, LPWSTR * ppszSubKeyName); 295 HRESULT EnumValues(ULONG Index, LPWSTR * ppszValueName); 296 }; 297 298 //--- ISpRegDataKey --------------------------------------------------------- 299 interface ISpRegDataKey : ISpDataKey 300 { 301 HRESULT SetKey(HKEY hkey, BOOL fReadOnly); 302 } 303 304 //--- ISpObjectTokenCategory ------------------------------------------------ 305 interface ISpObjectTokenCategory : ISpDataKey 306 { 307 HRESULT SetId(LPCWSTR pszCategoryId, BOOL fCreateIfNotExist); 308 HRESULT GetId(LPWSTR * ppszCoMemCategoryId); 309 HRESULT GetDataKey(SPDATAKEYLOCATION spdkl, ISpDataKey * ppDataKey); 310 311 HRESULT EnumTokens( 312 LPCWSTR pzsReqAttribs, 313 LPCWSTR pszOptAttribs, 314 IEnumSpObjectTokens* ppEnum); 315 316 HRESULT SetDefaultTokenId(LPCWSTR pszTokenId); 317 HRESULT GetDefaultTokenId(LPWSTR * ppszCoMemTokenId); 318 }; 319 320 //--- ISpObjectToken -------------------------------------------------------- 321 interface ISpObjectToken : ISpDataKey 322 { 323 HRESULT SetId(LPCWSTR pszCategoryId, LPCWSTR pszTokenId, BOOL fCreateIfNotExist); 324 HRESULT GetId(LPWSTR * ppszCoMemTokenId); 325 HRESULT GetCategory(ISpObjectTokenCategory * ppTokenCategory); 326 327 HRESULT CreateInstance( 328 IUnknown pUnkOuter, 329 DWORD dwClsContext, 330 REFIID riid, 331 void ** ppvObject); 332 333 HRESULT GetStorageFileName( 334 REFCLSID clsidCaller, 335 LPCWSTR pszValueName, 336 LPCWSTR pszFileNameSpecifier, 337 ULONG nFolder, // Same as SHGetFolderPath -- If non-zero, must set CSIDL_FLAG_CREATE 338 LPWSTR * ppszFilePath); 339 HRESULT RemoveStorageFileName( 340 REFCLSID clsidCaller, 341 LPCWSTR pszKeyName, 342 BOOL fDeleteFile); 343 344 HRESULT Remove(const CLSID * pclsidCaller); 345 346 HRESULT IsUISupported( 347 LPCWSTR pszTypeOfUI, 348 void * pvExtraData, 349 ULONG cbExtraData, 350 IUnknown punkObject, 351 BOOL *pfSupported); 352 HRESULT DisplayUI( 353 HWND hwndParent, 354 LPCWSTR pszTitle, 355 LPCWSTR pszTypeOfUI, 356 void * pvExtraData, 357 ULONG cbExtraData, 358 IUnknown punkObject); 359 HRESULT MatchesAttributes( 360 LPCWSTR pszAttributes, 361 BOOL *pfMatches); 362 }; 363 364 interface ISpObjectTokenInit : ISpObjectToken 365 { 366 HRESULT InitFromDataKey( 367 LPCWSTR pszCategoryId, 368 LPCWSTR pszTokenId, 369 ISpDataKey pDataKey); 370 }; 371 372 //--- IEnumSpObjectTokens --------------------------------------------------- 373 // This interface is used to enumerate speech object tokens 374 375 interface IEnumSpObjectTokens : IUnknown 376 { 377 HRESULT Next(ULONG celt, 378 ISpObjectToken * pelt, 379 ULONG *pceltFetched); 380 HRESULT Skip(ULONG celt); 381 382 HRESULT Reset(); 383 HRESULT Clone(IEnumSpObjectTokens *ppEnum); 384 385 HRESULT Item(ULONG Index, ISpObjectToken * ppToken); 386 387 HRESULT GetCount(ULONG* pCount); 388 }; 389 390 //--- ISpEventSource -------------------------------------------------------- 391 enum 392 { 393 SPET_LPARAM_IS_UNDEFINED = 0, 394 SPET_LPARAM_IS_TOKEN, 395 SPET_LPARAM_IS_OBJECT, 396 SPET_LPARAM_IS_POINTER, 397 SPET_LPARAM_IS_STRING, 398 } 399 alias typeof(SPET_LPARAM_IS_UNDEFINED) SPEVENTLPARAMTYPE; 400 401 enum 402 { 403 SPEI_UNDEFINED = 0, 404 405 //--- TTS engine 406 SPEI_START_INPUT_STREAM = 1, 407 SPEI_END_INPUT_STREAM = 2, 408 SPEI_VOICE_CHANGE = 3, // LPARAM_IS_TOKEN 409 SPEI_TTS_BOOKMARK = 4, // LPARAM_IS_STRING 410 SPEI_WORD_BOUNDARY = 5, 411 SPEI_PHONEME = 6, 412 SPEI_SENTENCE_BOUNDARY = 7, 413 SPEI_VISEME = 8, 414 SPEI_TTS_AUDIO_LEVEL = 9, // wParam contains current output audio level 415 416 SPEI_TTS_PRIVATE = 15, //--- Engine vendors use this reserved value. 417 418 SPEI_MIN_TTS = 1, 419 SPEI_MAX_TTS = 15, 420 421 //--- Speech Recognition 422 SPEI_END_SR_STREAM = 34, // LPARAM contains HRESULT, WPARAM contains flags (SPESF_xxx) 423 SPEI_SOUND_START = 35, 424 SPEI_SOUND_END = 36, 425 SPEI_PHRASE_START = 37, 426 SPEI_RECOGNITION = 38, 427 SPEI_HYPOTHESIS = 39, 428 SPEI_SR_BOOKMARK = 40, 429 SPEI_PROPERTY_NUM_CHANGE = 41, // LPARAM points to a string, WPARAM is the attrib value 430 SPEI_PROPERTY_STRING_CHANGE= 42, // LPARAM pointer to buffer. Two concatinated null terminated strings. 431 SPEI_FALSE_RECOGNITION = 43, // apparent speech with no valid recognition 432 SPEI_INTERFERENCE = 44, // LPARAM is any combination of SPINTERFERENCE flags 433 SPEI_REQUEST_UI = 45, // LPARAM is string. 434 SPEI_RECO_STATE_CHANGE = 46, // wParam contains new reco state 435 SPEI_ADAPTATION = 47, // we are now ready to accept the adaptation buffer 436 SPEI_START_SR_STREAM = 48, 437 SPEI_RECO_OTHER_CONTEXT = 49, // Phrase finished and recognized, but for other context 438 SPEI_SR_AUDIO_LEVEL = 50, // wParam contains current input audio level 439 /+ 440 #if _SAPI_BUILD_VER >= 0x053 441 SPEI_SR_RETAINEDAUDIO = 51, 442 #endif // _SAPI_BUILD_VER >= 0x053 443 +/ 444 SPEI_SR_PRIVATE = 52, // Engine vendors use this reserved value. 445 /+ 446 #if _SAPI_BUILD_VER >= 0x053 447 #if _SAPI_BUILD_VER >= 0x054 448 SPEI_ACTIVE_CATEGORY_CHANGED = 53, // WPARAM and LPARAM are null. 449 #else // _SAPI_BUILD_VER >= 0x054 450 SPEI_RESERVED4 = 53, // Reserved for system use. 451 #endif // _SAPI_BUILD_VER >= 0x054 452 SPEI_RESERVED5 = 54, // Reserved for system use. 453 SPEI_RESERVED6 = 55, // Reserved for system use. 454 #endif // _SAPI_BUILD_VER >= 0x053 455 +/ 456 457 SPEI_MIN_SR = 34, 458 /+ 459 #if _SAPI_BUILD_VER >= 0x053 460 SPEI_MAX_SR = 55, // Value in SAPI 5.3 461 #else 462 SPEI_MAX_SR = 52, // Value in SAPI 5.1 463 #endif // _SAPI_BUILD_VER >= 0x053 464 +/ 465 466 SPEI_RESERVED1 = 30, // do not use 467 SPEI_RESERVED2 = 33, // do not use 468 SPEI_RESERVED3 = 63 // do not use 469 } 470 alias typeof(SPEI_UNDEFINED) SPEVENTENUM; 471 472 //cpp_quote("#define SPFEI_FLAGCHECK ( (1ui64 << SPEI_RESERVED1) | (1ui64 << SPEI_RESERVED2) )") 473 474 //cpp_quote("#define SPFEI_ALL_TTS_EVENTS (0x000000000000FFFEui64 | SPFEI_FLAGCHECK)") 475 //cpp_quote("#define SPFEI_ALL_SR_EVENTS (0x001FFFFC00000000ui64 | SPFEI_FLAGCHECK)") 476 //cpp_quote("#define SPFEI_ALL_EVENTS 0xEFFFFFFFFFFFFFFFui64") 477 478 // The SPFEI macro converts an SPEVENTENUM event value into a 64-bit value. 479 // Multiple values can then be OR-ed together and passed to SetInterest. 480 //cpp_quote("#define SPFEI(SPEI_ord) ((1ui64 << SPEI_ord) | SPFEI_FLAGCHECK)") 481 482 struct SPEVENT 483 { 484 mixin(bitfields!( 485 SPEVENTENUM, "eEventId", 16, 486 SPEVENTLPARAMTYPE, "elParamType", 16 487 )); 488 489 ULONG ulStreamNum; 490 ULONGLONG ullAudioStreamOffset; 491 WPARAM wParam; 492 LPARAM lParam; 493 } 494 495 struct SPSERIALIZEDEVENT 496 { 497 mixin(bitfields!( 498 SPEVENTENUM, "eEventId", 16, 499 SPEVENTLPARAMTYPE, "elParamType", 16 500 )); 501 502 ULONG ulStreamNum; 503 ULONGLONG ullAudioStreamOffset; 504 ULONG SerializedwParam; 505 LONG SerializedlParam; 506 } 507 508 struct SPSERIALIZEDEVENT64 509 { 510 mixin(bitfields!( 511 SPEVENTENUM, "eEventId", 16, 512 SPEVENTLPARAMTYPE, "elParamType", 16 513 )); 514 515 ULONG ulStreamNum; 516 ULONGLONG ullAudioStreamOffset; 517 ULONGLONG SerializedwParam; 518 LONGLONG SerializedlParam; 519 } 520 521 /+ 522 #if _SAPI_BUILD_VER >= 0x053 523 cpp_quote("#if 0") 524 typedef [restricted, hidden] struct SPEVENTEX 525 { 526 WORD eEventId; //SPEVENTENUM 527 WORD elParamType; //SPEVENTLPARAMTYPE 528 ULONG ulStreamNum; // Input stream number this event is associated with 529 ULONGLONG ullAudioStreamOffset; 530 WPARAM wParam; 531 LPARAM lParam; 532 ULONGLONG ullAudioTimeOffset; 533 } SPEVENTEX; 534 535 cpp_quote("#else") 536 cpp_quote("typedef struct SPEVENTEX") 537 cpp_quote("{") 538 cpp_quote(" SPEVENTENUM eEventId : 16;") 539 cpp_quote(" SPEVENTLPARAMTYPE elParamType : 16;") 540 cpp_quote(" ULONG ulStreamNum;") 541 cpp_quote(" ULONGLONG ullAudioStreamOffset;") 542 cpp_quote(" WPARAM wParam;") 543 cpp_quote(" LPARAM lParam;") 544 cpp_quote(" ULONGLONG ullAudioTimeOffset;") 545 cpp_quote("} SPEVENTEX;") 546 cpp_quote("#endif") 547 #endif // _SAPI_BUILD_VER >= 0x053 548 +/ 549 550 //--- Types of interference 551 enum 552 { 553 SPINTERFERENCE_NONE = 0, 554 SPINTERFERENCE_NOISE, 555 SPINTERFERENCE_NOSIGNAL, 556 SPINTERFERENCE_TOOLOUD, 557 SPINTERFERENCE_TOOQUIET, 558 SPINTERFERENCE_TOOFAST, 559 SPINTERFERENCE_TOOSLOW 560 } 561 alias typeof(SPINTERFERENCE_NONE) SPINTERFERENCE; 562 563 //--- Flags for END_SR_STREAM event (in WPARAM) 564 enum 565 { 566 SPESF_NONE = 0, 567 SPESF_STREAM_RELEASED = (1 << 0) 568 /+ TODO 569 #if _SAPI_BUILD_VER >= 0x053 570 , SPESF_EMULATED = (1 << 1) 571 #endif // _SAPI_BUILD_VER >= 0x053 572 +/ 573 } 574 alias typeof(SPESF_NONE) SPENDSRSTREAMFLAGS; 575 576 //--- Viseme features 577 enum 578 { 579 SPVFEATURE_STRESSED = (1L << 0), 580 SPVFEATURE_EMPHASIS = (1L << 1) 581 } 582 alias typeof(SPVFEATURE_STRESSED) SPVFEATURE; 583 584 585 //--- Viseme event groups 586 enum 587 { 588 // English examples 589 //------------------ 590 SP_VISEME_0 = 0, // Silence 591 SP_VISEME_1, // AE, AX, AH 592 SP_VISEME_2, // AA 593 SP_VISEME_3, // AO 594 SP_VISEME_4, // EY, EH, UH 595 SP_VISEME_5, // ER 596 SP_VISEME_6, // y, IY, IH, IX 597 SP_VISEME_7, // w, UW 598 SP_VISEME_8, // OW 599 SP_VISEME_9, // AW 600 SP_VISEME_10, // OY 601 SP_VISEME_11, // AY 602 SP_VISEME_12, // h 603 SP_VISEME_13, // r 604 SP_VISEME_14, // l 605 SP_VISEME_15, // s, z 606 SP_VISEME_16, // SH, CH, JH, ZH 607 SP_VISEME_17, // TH, DH 608 SP_VISEME_18, // f, v 609 SP_VISEME_19, // d, t, n 610 SP_VISEME_20, // k, g, NG 611 SP_VISEME_21, // p, b, m 612 } 613 alias typeof(SP_VISEME_0) SPVISEMES; 614 615 struct SPEVENTSOURCEINFO 616 { 617 ULONGLONG ullEventInterest; 618 ULONGLONG ullQueuedInterest; 619 ULONG ulCount; 620 } 621 622 interface ISpEventSource : ISpNotifySource 623 { 624 // It is neccessary to use the SPFEI macro to convert the 625 // SPEVENTENUM values into ULONGULONG values. 626 HRESULT SetInterest( 627 ULONGLONG ullEventInterest, 628 ULONGLONG ullQueuedInterest); 629 630 HRESULT GetEvents( 631 ULONG ulCount, 632 SPEVENT* pEventArray, 633 ULONG *pulFetched); 634 635 HRESULT GetInfo(SPEVENTSOURCEINFO * pInfo); 636 }; 637 638 //--- ISpStreamFormat ------------------------------------------------------- 639 interface ISpStreamFormat : IStream 640 { 641 HRESULT GetFormat(GUID * pguidFormatId, WAVEFORMATEX ** ppCoMemWaveFormatEx); 642 } 643 644 enum 645 { 646 SPFM_OPEN_READONLY, // Open existing file, read-only 647 SPFM_OPEN_READWRITE, // (Not supported for wav files) Open existing file, read-write 648 SPFM_CREATE, // (Not supported for wav files) Open file if exists, else create if does not exist (opens read-write) 649 SPFM_CREATE_ALWAYS, // Create file even if file exists. Destroys old file. 650 SPFM_NUM_MODES // Used for limit checking 651 } 652 alias typeof(SPFM_OPEN_READONLY) SPFILEMODE; 653 654 //--- ISpStream ------------------------------------------------------------- 655 interface ISpStream : ISpStreamFormat 656 { 657 HRESULT SetBaseStream(IStream pStream, REFGUID rguidFormat, const WAVEFORMATEX * pWaveFormatEx); 658 HRESULT GetBaseStream(IStream * ppStream); 659 HRESULT BindToFile(LPCWSTR pszFileName, SPFILEMODE eMode, 660 const GUID * pFormatId, 661 const WAVEFORMATEX * pWaveFormatEx, 662 ULONGLONG ullEventInterest); 663 HRESULT Close(); 664 } 665 666 //--- ISpVoice -------------------------------------------------------------- 667 // These structures maintain the absolute state of the voice relative to 668 // the voice's baseline XML state. 669 struct SPVPITCH 670 { 671 long MiddleAdj; 672 long RangeAdj; 673 } 674 675 enum 676 { 677 SPVA_Speak = 0, 678 SPVA_Silence, 679 SPVA_Pronounce, 680 SPVA_Bookmark, 681 SPVA_SpellOut, 682 SPVA_Section, 683 SPVA_ParseUnknownTag 684 } 685 alias typeof(SPVA_Speak) SPVACTIONS; 686 687 struct SPVCONTEXT 688 { 689 LPCWSTR pCategory; 690 LPCWSTR pBefore; 691 LPCWSTR pAfter; 692 } 693 694 struct SPVSTATE 695 { 696 //--- Action 697 SPVACTIONS eAction; 698 699 //--- Running state values 700 LANGID LangID; 701 WORD wReserved; 702 long EmphAdj; 703 long RateAdj; 704 ULONG Volume; 705 SPVPITCH PitchAdj; 706 ULONG SilenceMSecs; 707 SPPHONEID* pPhoneIds; // NULL terminated array of phone ids 708 SPPARTOFSPEECH ePartOfSpeech; 709 SPVCONTEXT Context; 710 } 711 712 enum 713 { 714 SPRS_DONE = (1L << 0), // The voice is done rendering all queued phrases 715 SPRS_IS_SPEAKING = (1L << 1) // The SpVoice currently has the audio queue claimed 716 } 717 alias typeof(SPRS_DONE) SPRUNSTATE; 718 719 enum 720 { 721 SPMIN_VOLUME = 0, 722 SPMAX_VOLUME = 100, 723 SPMIN_RATE = -10, 724 SPMAX_RATE = 10 725 } 726 alias typeof(SPMIN_VOLUME) SPVLIMITS; 727 728 enum 729 { 730 SPVPRI_NORMAL = 0, 731 SPVPRI_ALERT = (1L << 0), 732 SPVPRI_OVER = (1L << 1) 733 } 734 alias typeof(SPVPRI_NORMAL) SPVPRIORITY; 735 736 struct SPVOICESTATUS 737 { 738 ULONG ulCurrentStream; // Current stream being rendered 739 ULONG ulLastStreamQueued; // Number of the last stream queued 740 HRESULT hrLastResult; // Result of last speak 741 DWORD dwRunningState; // SPRUNSTATE 742 ULONG ulInputWordPos; // Input position of current word being rendered 743 ULONG ulInputWordLen; // Length of current word being rendered 744 ULONG ulInputSentPos; // Input position of current sentence being rendered 745 ULONG ulInputSentLen; // Length of current sentence being rendered 746 LONG lBookmarkId; // Current bookmark converted to a long integer 747 SPPHONEID PhonemeId; // Current phoneme id 748 SPVISEMES VisemeId; // Current viseme 749 DWORD dwReserved1; // Reserved for future expansion 750 DWORD dwReserved2; // Reserved for future expansion 751 } 752 753 enum 754 { 755 //--- SpVoice flags 756 SPF_DEFAULT = 0, // Synchronous, no purge, xml auto detect 757 SPF_ASYNC = (1L << 0), // Asynchronous call 758 SPF_PURGEBEFORESPEAK = (1L << 1), // Purge current data prior to speaking this 759 SPF_IS_FILENAME = (1L << 2), // The string passed to Speak() is a file name 760 SPF_IS_XML = (1L << 3), // The input text will be parsed for XML markup 761 SPF_IS_NOT_XML = (1L << 4), // The input text will not be parsed for XML markup 762 SPF_PERSIST_XML = (1L << 5), // Persists XML global state changes 763 764 //--- Normalizer flags 765 SPF_NLP_SPEAK_PUNC = (1L << 6), // The normalization processor should speak the punctuation 766 767 //#if _SAPI_BUILD_VER >= 0x053 768 //--- TTS Format 769 SPF_PARSE_SAPI = (1L << 7), // Force XML parsing as MS SAPI 770 SPF_PARSE_SSML = (1L << 8), // Force XML parsing as W3C SSML 771 SPF_PARSE_AUTODETECT = 0, // No set flag in bits 7 or 8 results in autodetection 772 //#endif // _SAPI_BUILD_VER >= 0x053 773 774 //--- Masks 775 SPF_NLP_MASK = (SPF_NLP_SPEAK_PUNC), 776 777 /+ TODO 778 #if _SAPI_BUILD_VER >= 0x053 779 SPF_PARSE_MASK = (SPF_PARSE_SAPI|SPF_PARSE_SSML), 780 SPF_VOICE_MASK = (SPF_ASYNC|SPF_PURGEBEFORESPEAK|SPF_IS_FILENAME|SPF_IS_XML|SPF_IS_NOT_XML|SPF_NLP_MASK|SPF_PERSIST_XML|SPF_PARSE_MASK), 781 #else 782 SPF_VOICE_MASK = (SPF_ASYNC|SPF_PURGEBEFORESPEAK|SPF_IS_FILENAME|SPF_IS_XML|SPF_IS_NOT_XML|SPF_NLP_MASK|SPF_PERSIST_XML), 783 #endif // _SAPI_BUILD_VER >= 0x053 784 785 SPF_UNUSED_FLAGS = ~(SPF_VOICE_MASK) 786 +/ 787 } 788 alias typeof(SPF_DEFAULT) SPEAKFLAGS; 789 790 interface ISpVoice : ISpEventSource 791 { 792 HRESULT SetOutput( IUnknown pUnkOutput, BOOL fAllowFormatChanges ); 793 HRESULT GetOutputObjectToken( ISpObjectToken * ppObjectToken ); 794 HRESULT GetOutputStream( ISpStreamFormat * ppStream ); 795 796 HRESULT Pause(); 797 HRESULT Resume(); 798 799 HRESULT SetVoice( ISpObjectToken pToken); 800 HRESULT GetVoice( ISpObjectToken *ppToken); 801 802 HRESULT Speak( 803 LPCWSTR pwcs, 804 DWORD dwFlags, 805 ULONG * pulStreamNumber); 806 HRESULT SpeakStream( 807 IStream pStream, // If not ISpStreamFormat supported then SPDFID_Text assumed 808 DWORD dwFlags, 809 ULONG * pulStreamNumber); 810 811 HRESULT GetStatus( 812 SPVOICESTATUS *pStatus, 813 LPWSTR * ppszLastBookmark); 814 815 HRESULT Skip( LPCWSTR pItemType, long lNumItems, ULONG* pulNumSkipped ); 816 817 HRESULT SetPriority( SPVPRIORITY ePriority ); 818 HRESULT GetPriority( SPVPRIORITY* pePriority ); 819 820 HRESULT SetAlertBoundary( SPEVENTENUM eBoundary ); 821 HRESULT GetAlertBoundary( SPEVENTENUM* peBoundary ); 822 823 HRESULT SetRate( long RateAdjust ); 824 HRESULT GetRate( long* pRateAdjust); 825 826 HRESULT SetVolume( USHORT usVolume ); 827 HRESULT GetVolume( USHORT* pusVolume ); 828 829 HRESULT WaitUntilDone( ULONG msTimeout ); 830 831 HRESULT SetSyncSpeakTimeout( ULONG msTimeout ); 832 HRESULT GetSyncSpeakTimeout( ULONG * pmsTimeout ); 833 834 HANDLE SpeakCompleteEvent(); 835 836 HRESULT IsUISupported( 837 LPCWSTR pszTypeOfUI, 838 void * pvExtraData, 839 ULONG cbExtraData, 840 BOOL *pfSupported); 841 HRESULT DisplayUI( 842 HWND hwndParent, 843 LPCWSTR pszTitle, 844 LPCWSTR pszTypeOfUI, 845 void * pvExtraData, 846 ULONG cbExtraData); 847 848 } 849 850 //--- ISpLexicon ------------------------------------------------------------ 851 enum 852 { 853 //--- SAPI5 public POS category values (bits 28-31) 854 SPPS_NotOverriden = -1, 855 SPPS_Unknown = 0, 856 SPPS_Noun = 0x1000, 857 SPPS_Verb = 0x2000, 858 SPPS_Modifier = 0x3000, 859 SPPS_Function = 0x4000, 860 SPPS_Interjection = 0x5000 861 /+ TODO 862 #if _SAPI_BUILD_VER >= 0x053 863 , 864 SPPS_Noncontent = 0x6000, 865 SPPS_LMA = 0x7000, // Words learned through LMA 866 SPPS_SuppressWord = 0xF000, // Special flag to indicate this word should not be recognized 867 #endif // _SAPI_BUILD_VER >= 0x053 868 +/ 869 } 870 // WTF? TODO 871 //alias typeof(SPPS_NotOverriden) SPPARTOFSPEECH; 872 alias uint SPPARTOFSPEECH;