1 /*************************************************************************** 2 * Copyright (C) 2005 to 2012 by Jonathan Duddington * 3 * email: jonsd@users.sourceforge.net * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 3 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, see: * 17 * <http://www.gnu.org/licenses/>. * 18 ***************************************************************************/ 19 20 21 /*************************************************************/ 22 /* This is the header file for the library version of espeak */ 23 /* */ 24 /*************************************************************/ 25 module speech.espeak.espeak; 26 27 import core.stdc.stdio; 28 import core.stdc.stddef; 29 30 extern(C): 31 32 enum ESPEAK_API_REVISION = 9; 33 34 /* 35 Revision 2 36 Added parameter "options" to eSpeakInitialize() 37 38 Revision 3 39 Added espeakWORDGAP to espeak_PARAMETER 40 41 Revision 4 42 Added flags parameter to espeak_CompileDictionary() 43 44 Revision 5 45 Added espeakCHARS_16BIT 46 47 Revision 6 48 Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL 49 50 Revision 7 24.Dec.2011 51 Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics. 52 Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names. 53 54 Revision 8 26.Apr.2013 55 Added function espeak_TextToPhonemes(). 56 57 Revision 9 30.May.2013 58 Changed function espeak_TextToPhonemes(). 59 60 */ 61 /********************/ 62 /* Initialization */ 63 /********************/ 64 65 // values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute 66 enum espeakRATE_MINIMUM = 80; 67 enum espeakRATE_MAXIMUM = 450; 68 enum espeakRATE_NORMAL = 175; 69 70 enum { 71 espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list. 72 espeakEVENT_WORD = 1, // Start of word 73 espeakEVENT_SENTENCE = 2, // Start of sentence 74 espeakEVENT_MARK = 3, // Mark 75 espeakEVENT_PLAY = 4, // Audio element 76 espeakEVENT_END = 5, // End of sentence or clause 77 espeakEVENT_MSG_TERMINATED = 6, // End of message 78 espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize() 79 espeakEVENT_SAMPLERATE = 8 // internal use, set sample rate 80 } 81 82 alias espeak_EVENT_TYPE = int; 83 84 struct espeak_EVENT { 85 espeak_EVENT_TYPE type; 86 uint unique_identifier; // message identifier (or 0 for key or character) 87 int text_position; // the number of characters from the start of the text 88 int length; // word length, in characters (for espeakEVENT_WORD) 89 int audio_position; // the time in mS within the generated speech output data 90 int sample; // sample id (internal use) 91 void* user_data; // pointer supplied by the calling program 92 union { 93 int number; // used for WORD and SENTENCE events. 94 const(char)* name; // used for MARK and PLAY events. UTF8 string 95 char[8] string; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes. 96 } 97 } 98 /* 99 When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called. 100 101 102 In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED). 103 104 In PLAYBACK mode, the callback function is called as soon as an event happens. 105 106 For example suppose that the following message is supplied to espeak_Synth: 107 "hello, hello." 108 109 110 * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function : 111 112 ** Block 1: 113 <audio data> + 114 List of events: SENTENCE + WORD + LIST_TERMINATED 115 116 ** Block 2: 117 <audio data> + 118 List of events: WORD + END + LIST_TERMINATED 119 120 ** Block 3: 121 no audio data 122 List of events: MSG_TERMINATED + LIST_TERMINATED 123 124 125 * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function: 126 127 ** SENTENCE 128 ** WORD (call when the sounds are actually played) 129 ** WORD 130 ** END (call when the end of sentence is actually played.) 131 ** MSG_TERMINATED 132 133 134 The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message. 135 So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event. 136 137 A MARK event indicates a <mark> element in the text. 138 A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file. 139 */ 140 enum { 141 POS_CHARACTER = 1, 142 POS_WORD, 143 POS_SENTENCE 144 } 145 146 alias espeak_POSITION_TYPE = int; 147 148 enum { 149 /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/ 150 AUDIO_OUTPUT_PLAYBACK, 151 152 /* RETRIEVAL mode: supplies audio data and events to the calling program */ 153 AUDIO_OUTPUT_RETRIEVAL, 154 155 /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */ 156 AUDIO_OUTPUT_SYNCHRONOUS, 157 158 /* Synchronous playback */ 159 AUDIO_OUTPUT_SYNCH_PLAYBACK 160 } 161 162 alias espeak_AUDIO_OUTPUT = int; 163 164 enum { 165 EE_OK=0, 166 EE_INTERNAL_ERROR=-1, 167 EE_BUFFER_FULL=1, 168 EE_NOT_FOUND=2 169 } 170 171 alias espeak_ERROR = int; 172 173 enum espeakINITIALIZE_PHONEME_EVENTS = 0x0001; 174 enum espeakINITIALIZE_PHONEME_IPA = 0x0002; 175 enum espeakINITIALIZE_DONT_EXIT = 0x8000; 176 177 int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const(char)* path, int options); 178 /* Must be called before any synthesis functions are called. 179 output: the audio data can either be played by eSpeak or passed back by the SynthCallback function. 180 181 buflength: The length in mS of sound buffers passed to the SynthCallback function. 182 Value=0 gives a default of 200mS. 183 This paramater is only used for AUDIO_OUTPUT_RETRIEVAL and AUDIO_OUTPUT_SYNCHRONOUS modes. 184 185 path: The directory which contains the espeak-data directory, or NULL for the default location. 186 187 options: bit 0: 1=allow espeakEVENT_PHONEME events. 188 bit 1: 1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names 189 bit 15: 1=don't exit if espeak_data is not found (used for --help) 190 191 Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR). 192 */ 193 alias t_espeak_callback = int function(short*, int, espeak_EVENT*); 194 195 void espeak_SetSynthCallback(t_espeak_callback SynthCallback); 196 /* Must be called before any synthesis functions are called. 197 This specifies a function in the calling program which is called when a buffer of 198 speech sound data has been produced. 199 200 201 The callback function is of the form: 202 203 int SynthCallback(short *wav, int numsamples, espeak_EVENT *events); 204 205 wav: is the speech sound data which has been produced. 206 NULL indicates that the synthesis has been completed. 207 208 numsamples: is the number of entries in wav. This number may vary, may be less than 209 the value implied by the buflength parameter given in espeak_Initialize, and may 210 sometimes be zero (which does NOT indicate end of synthesis). 211 212 events: an array of espeak_EVENT items which indicate word and sentence events, and 213 also the occurance if <mark> and <audio> elements within the text. The list of 214 events is terminated by an event of type = 0. 215 216 217 Callback returns: 0=continue synthesis, 1=abort synthesis. 218 */ 219 220 alias t_espeak_uri_callback = int function(int, const(char)*, const(char)*); 221 222 void espeak_SetUriCallback(t_espeak_uri_callback UriCallback); 223 /* This function may be called before synthesis functions are used, in order to deal with 224 <audio> tags. It specifies a callback function which is called when an <audio> element is 225 encountered and allows the calling program to indicate whether the sound file which 226 is specified in the <audio> element is available and is to be played. 227 228 The callback function is of the form: 229 230 int UriCallback(int type, const char *uri, const char *base); 231 232 type: type of callback event. Currently only 1= <audio> element 233 234 uri: the "src" attribute from the <audio> element 235 236 base: the "xml:base" attribute (if any) from the <speak> element 237 238 Return: 1=don't play the sound, but speak the text alternative. 239 0=place a PLAY event in the event list at the point where the <audio> element 240 occurs. The calling program can then play the sound at that point. 241 */ 242 243 244 /********************/ 245 /* Synthesis */ 246 /********************/ 247 248 249 enum espeakCHARS_AUTO = 0; 250 enum espeakCHARS_UTF8 = 1; 251 enum espeakCHARS_8BIT = 2; 252 enum espeakCHARS_WCHAR = 3; 253 enum espeakCHARS_16BIT = 4; 254 255 enum espeakSSML = 0x10; 256 enum espeakPHONEMES = 0x100; 257 enum espeakENDPAUSE = 0x1000; 258 enum espeakKEEP_NAMEDATA = 0x2000; 259 260 espeak_ERROR espeak_Synth(const(void)* text, 261 size_t size, 262 uint position, 263 espeak_POSITION_TYPE position_type, 264 uint end_position, 265 uint flags, 266 uint* unique_identifier, 267 void* user_data); 268 /* Synthesize speech for the specified text. The speech sound data is passed to the calling 269 program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak. 270 271 text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters, 272 wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags" 273 parameter. 274 275 size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order 276 to allocate internal storage space for the text. This value is not used for 277 AUDIO_OUTPUT_SYNCHRONOUS mode. 278 279 position: The position in the text where speaking starts. Zero indicates speak from the 280 start of the text. 281 282 position_type: Determines whether "position" is a number of characters, words, or sentences. 283 Values: 284 285 end_position: If set, this gives a character position at which speaking will stop. A value 286 of zero indicates no end position. 287 288 flags: These may be OR'd together: 289 Type of character codes, one of: 290 espeakCHARS_UTF8 UTF8 encoding 291 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language. 292 espeakCHARS_AUTO 8 bit or UTF8 (this is the default) 293 espeakCHARS_WCHAR Wide characters (wchar_t) 294 espeakCHARS_16BIT 16 bit characters. 295 296 espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored. 297 298 espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding). 299 300 espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then 301 this pause is suppressed. 302 303 unique_identifier: This must be either NULL, or point to an integer variable to 304 which eSpeak writes a message identifier number. 305 eSpeak includes this number in espeak_EVENT messages which are the result of 306 this call of espeak_Synth(). 307 308 user_data: a pointer (or NULL) which will be passed to the callback function in 309 espeak_EVENT messages. 310 311 Return: EE_OK: operation achieved 312 EE_BUFFER_FULL: the command can not be buffered; 313 you may try after a while to call the function again. 314 EE_INTERNAL_ERROR. 315 */ 316 317 espeak_ERROR espeak_Synth_Mark(const(void)* text, 318 size_t size, 319 const(char)* index_mark, 320 uint end_position, 321 uint flags, 322 uint* unique_identifier, 323 void* user_data); 324 /* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is 325 specified by the name of a <mark> element in the text. 326 327 index_mark: The "name" attribute of a <mark> element within the text which specified the 328 point at which synthesis starts. UTF8 string. 329 330 For the other parameters, see espeak_Synth() 331 332 Return: EE_OK: operation achieved 333 EE_BUFFER_FULL: the command can not be buffered; 334 you may try after a while to call the function again. 335 EE_INTERNAL_ERROR. 336 */ 337 338 espeak_ERROR espeak_Key(const(char)* key_name); 339 /* Speak the name of a keyboard key. 340 If key_name is a single character, it speaks the name of the character. 341 Otherwise, it speaks key_name as a text string. 342 343 Return: EE_OK: operation achieved 344 EE_BUFFER_FULL: the command can not be buffered; 345 you may try after a while to call the function again. 346 EE_INTERNAL_ERROR. 347 */ 348 349 espeak_ERROR espeak_Char(wchar_t character); 350 /* Speak the name of the given character 351 352 Return: EE_OK: operation achieved 353 EE_BUFFER_FULL: the command can not be buffered; 354 you may try after a while to call the function again. 355 EE_INTERNAL_ERROR. 356 */ 357 358 359 360 361 /***********************/ 362 /* Speech Parameters */ 363 /***********************/ 364 365 enum { 366 espeakSILENCE=0, /* internal use */ 367 espeakRATE=1, 368 espeakVOLUME=2, 369 espeakPITCH=3, 370 espeakRANGE=4, 371 espeakPUNCTUATION=5, 372 espeakCAPITALS=6, 373 espeakWORDGAP=7, 374 espeakOPTIONS=8, // reserved for misc. options. not yet used 375 espeakINTONATION=9, 376 377 espeakRESERVED1=10, 378 espeakRESERVED2=11, 379 espeakEMPHASIS, /* internal use */ 380 espeakLINELENGTH, /* internal use */ 381 espeakVOICETYPE, // internal, 1=mbrola 382 N_SPEECH_PARAM /* last enum */ 383 } 384 385 alias espeak_PARAMETER = int; 386 387 enum { 388 espeakPUNCT_NONE=0, 389 espeakPUNCT_ALL=1, 390 espeakPUNCT_SOME=2 391 } 392 393 alias espeak_PUNCT_TYPE = int; 394 395 espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative); 396 /* Sets the value of the specified parameter. 397 relative=0 Sets the absolute value of the parameter. 398 relative=1 Sets a relative value of the parameter. 399 400 parameter: 401 espeakRATE: speaking speed in word per minute. Values 80 to 450. 402 403 espeakVOLUME: volume in range 0-200 or more. 404 0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion 405 406 espeakPITCH: base pitch, range 0-100. 50=normal 407 408 espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 409 410 espeakPUNCTUATION: which punctuation characters to announce: 411 value in espeak_PUNCT_TYPE (none, all, some), 412 see espeak_GetParameter() to specify which characters are announced. 413 414 espeakCAPITALS: announce capital letters by: 415 0=none, 416 1=sound icon, 417 2=spelling, 418 3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch 419 of a word raised to indicate it has a capital letter. 420 421 espeakWORDGAP: pause between words, units of 10mS (at the default speed) 422 423 Return: EE_OK: operation achieved 424 EE_BUFFER_FULL: the command can not be buffered; 425 you may try after a while to call the function again. 426 EE_INTERNAL_ERROR. 427 */ 428 429 int espeak_GetParameter(espeak_PARAMETER parameter, int current); 430 /* current=0 Returns the default value of the specified parameter. 431 current=1 Returns the current value of the specified parameter, as set by SetParameter() 432 */ 433 434 espeak_ERROR espeak_SetPunctuationList(const(wchar_t)* punctlist); 435 /* Specified a list of punctuation characters whose names are to be spoken when the 436 value of the Punctuation parameter is set to "some". 437 438 punctlist: A list of character codes, terminated by a zero character. 439 440 Return: EE_OK: operation achieved 441 EE_BUFFER_FULL: the command can not be buffered; 442 you may try after a while to call the function again. 443 EE_INTERNAL_ERROR. 444 */ 445 446 void espeak_SetPhonemeTrace(int value, FILE *stream); 447 /* Controls the output of phoneme symbols for the text 448 value=0 No phoneme output (default) 449 value=1 Output the translated phoneme symbols for the text 450 value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries) 451 value=3 as (1), but produces IPA rather than ascii phoneme names 452 453 stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout. 454 */ 455 456 const(char)* espeak_TextToPhonemes(const(void)** textptr, int textmode, int phonememode); 457 /* Translates text into phonemes. Call espeak_SetVoiceByName() first, to select a language. 458 459 It returns a pointer to a character string which contains the phonemes for the text up to 460 end of a sentence, or comma, semicolon, colon, or similar punctuation. 461 462 textptr: The address of a pointer to the input text which is terminated by a zero character. 463 On return, the pointer has been advanced past the text which has been translated, or else set 464 to NULL to indicate that the end of the text has been reached. 465 466 textmode: Type of character codes, one of: 467 espeakCHARS_UTF8 UTF8 encoding 468 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language. 469 espeakCHARS_AUTO 8 bit or UTF8 (this is the default) 470 espeakCHARS_WCHAR Wide characters (wchar_t) 471 espeakCHARS_16BIT 16 bit characters. 472 473 phonememode: bits0-3: 474 0= just phonemes. 475 1= include ties (U+361) for phoneme names of more than one letter. 476 2= include zero-width-joiner for phoneme names of more than one letter. 477 3= separate phonemes with underscore characters. 478 479 bits 4-7: 480 0= eSpeak's ascii phoneme names. 481 1= International Phonetic Alphabet (as UTF-8 characters). 482 */ 483 484 void espeak_CompileDictionary(const(char)* path, FILE *log, int flags); 485 /* Compile pronunciation dictionary for a language which corresponds to the currently 486 selected voice. The required voice should be selected before calling this function. 487 488 path: The directory which contains the language's '_rules' and '_list' files. 489 'path' should end with a path separator character ('/'). 490 log: Stream for error reports and statistics information. If log=NULL then stderr will be used. 491 492 flags: Bit 0: include source line information for debug purposes (This is displayed with the 493 -X command line option). 494 */ 495 /***********************/ 496 /* Voice Selection */ 497 /***********************/ 498 499 500 // voice table 501 struct espeak_VOICE { 502 immutable(char)* name; // a given name for this voice. UTF8 string. 503 immutable(char)* languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier) 504 const(char)* identifier; // the filename for this voice within espeak-data/voices 505 char gender; // 0=none 1=male, 2=female, 506 char age; // 0=not specified, or age in years 507 char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties 508 char xx1; // for internal use 509 int score; // for internal use 510 void *spare; // for internal use 511 } 512 513 /* Note: The espeak_VOICE structure is used for two purposes: 514 1. To return the details of the available voices. 515 2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria. 516 517 In (1), the "languages" field consists of a list of (UTF8) language names for which this voice 518 may be used, each language name in the list is terminated by a zero byte and is also preceded by 519 a single byte which gives a "priority" number. The list of languages is terminated by an 520 additional zero byte. 521 522 A language name consists of a language code, optionally followed by one or more qualifier (dialect) 523 names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and 524 "en". Even without "en" listed, voice would still be selected for the "en" language (because 525 "en-uk" is related) but at a lower priority. 526 527 The priority byte indicates how the voice is preferred for the language. A low number indicates a 528 more preferred voice, a higher number indicates a less preferred voice. 529 530 In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding 531 priority byte. 532 */ 533 534 const(espeak_VOICE)** espeak_ListVoices(espeak_VOICE *voice_spec); 535 /* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers. 536 The list is terminated by a NULL pointer 537 538 If voice_spec is NULL then all voices are listed. 539 If voice spec is given, then only the voices which are compatible with the voice_spec 540 are listed, and they are listed in preference order. 541 */ 542 543 espeak_ERROR espeak_SetVoiceByName(const(char)* name); 544 /* Searches for a voice with a matching "name" field. Language is not considered. 545 "name" is a UTF8 string. 546 547 Return: EE_OK: operation achieved 548 EE_BUFFER_FULL: the command can not be buffered; 549 you may try after a while to call the function again. 550 EE_INTERNAL_ERROR. 551 */ 552 553 espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec); 554 /* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following 555 fields may be set: 556 557 name NULL, or a voice name 558 559 languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en" 560 561 gender 0=not specified, 1=male, 2=female 562 563 age 0=not specified, or an age in years 564 565 variant After a list of candidates is produced, scored and sorted, "variant" is used to index 566 that list and choose a voice. 567 variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc 568 */ 569 570 espeak_VOICE *espeak_GetCurrentVoice(); 571 /* Returns the espeak_VOICE data for the currently selected voice. 572 This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s> 573 */ 574 575 espeak_ERROR espeak_Cancel(); 576 /* Stop immediately synthesis and audio output of the current text. When this 577 function returns, the audio output is fully stopped and the synthesizer is ready to 578 synthesize a new message. 579 580 Return: EE_OK: operation achieved 581 EE_INTERNAL_ERROR. 582 */ 583 584 int espeak_IsPlaying(); 585 /* Returns 1 if audio is played, 0 otherwise. 586 */ 587 588 espeak_ERROR espeak_Synchronize(); 589 /* This function returns when all data have been spoken. 590 Return: EE_OK: operation achieved 591 EE_INTERNAL_ERROR. 592 */ 593 594 espeak_ERROR espeak_Terminate(); 595 /* last function to be called. 596 Return: EE_OK: operation achieved 597 EE_INTERNAL_ERROR. 598 */ 599 600 const(char)* espeak_Info(const(char)** path_data); 601 /* Returns the version number string. 602 path_data returns the path to espeak_data 603 */ 604