speech.espeak.synthesis source code

1 module speech.espeak.synthesis;
2 
3 import std.range.primitives : isInputRange, ElementType;
4 import std.traits : isSomeChar;
5 
6 import deimos.portaudio;
7 
8 import speech.espeak.espeak;
9 import speech.audio.portaudio;
10 
11 private immutable int hzSampleRate;
12 
13 private extern(C) int synthCallback(short* wav, int numSamples, espeak_EVENT* events)
14 {
15 	if(wav && numSamples != 0)
16 	{
17 		auto stream = cast(PaStream*)events.user_data;
18 		Pa_WriteStream(stream, wav, numSamples); // Handle PaOutputUnderflowed result?
19 	}
20 	return 0;
21 }
22 
23 shared static this()
24 {
25 	hzSampleRate = espeak_Initialize(AUDIO_OUTPUT_RETRIEVAL, 0, null, 0);
26 	espeak_SetSynthCallback(&synthCallback);
27 }
28 
29 shared static ~this()
30 {
31 	espeak_Terminate();
32 }
33 
34 struct Synthesizer
35 {
36 	private:
37 	PaStream* stream = null;
38 
39 	this(Device outputDevice)
40 	{
41 		PaStreamParameters params;
42 		params.device = outputDevice.index;
43 		params.channelCount = 1;
44 		params.sampleFormat = paInt16;
45 		params.suggestedLatency = outputDevice.info.defaultLowInputLatency;
46 
47 		paEnforce(Pa_IsFormatSupported(null, &params, hzSampleRate));
48 
49 		paEnforce(Pa_OpenStream(&stream,
50 				null,
51 				&params,
52 				hzSampleRate,
53 				paFramesPerBufferUnspecified,
54 				paNoFlag,
55 				null,
56 				null));
57 	}
58 
59 	uint synth(in char[] text, uint extraFlags)
60 	{
61 		uint identifier;
62 		espeak_Synth(text.ptr, text.length + 1, 0, POS_CHARACTER, cast(uint)text.length, espeakCHARS_UTF8 | extraFlags, &identifier, stream);
63 		return identifier;
64 	}
65 
66 	public:
67 	/// Create a new speech synthesis interface using the system default voice.
68 	static Synthesizer create()
69 	{
70 		return Synthesizer(defaultOutputDevice);
71 	}
72 
73 	static Synthesizer create(Device outputDevice)
74 	{
75 		return Synthesizer(outputDevice);
76 	}
77 
78 	void speak(in char[] text)
79 	{
80 		Pa_StartStream(stream);
81 		synth(text, espeakSSML);
82 		espeak_Synchronize();
83 		Pa_StopStream(stream);
84 	}
85 
86 	// TODO: handle SSML properly
87 	void speak(Range)(Range range)
88 		if(isInputRange!Range && isSomeChar!(ElementType!Range))
89 	{
90 		import speech.buffer : bufferSpeech;
91 
92 		Pa_StartStream(stream);
93 		scope(exit) Pa_StopStream(stream);
94 
95 		char[1024] buffer = void;
96 
97 		foreach(chunk; bufferSpeech(range, buffer[]))
98 		{
99 			synth(chunk, 0);
100 			espeak_Synchronize();
101 		}
102 	}
103 
104 	alias put = speak;
105 
106 	void queue(in char[] text)
107 	{
108 		synth(text, espeakSSML);
109 	}
110 
111 	void voice(Voice newVoice) @property
112 	{
113 		if(newVoice)
114 			espeak_SetVoiceByName(newVoice.voice.name);
115 	}
116 
117 	Voice voice() @property
118 	{
119 		return Voice(espeak_GetCurrentVoice());
120 	}
121 
122 	void volume(uint newVolume) @property
123 	{
124 		espeak_SetParameter(espeakVOLUME, newVolume, 0);
125 	}
126 
127 	uint volume() @property
128 	{
129 		return espeak_GetParameter(espeakVOLUME, 1);
130 	}
131 
132 	void rate(int newRate) @property
133 	{
134 		espeak_SetParameter(espeakRATE, newRate, 0);
135 	}
136 
137 	int rate() @property
138 	{
139 		return espeak_GetParameter(espeakRATE, 1);
140 	}
141 }
142 
143 struct Voice
144 {
145 	private const(espeak_VOICE)* voice = null;
146 
147 	bool opCast(T : bool)()
148 	{
149 		return voice != null;
150 	}
151 
152 	string name() @property
153 	{
154 		import std.string : fromStringz;
155 		assert(this);
156 		return fromStringz(voice.name);
157 	}
158 
159 	// ISO 639-1 2-letter code, with fallback to ISO 639-3 3-letter code
160 	string language() @property
161 	{
162 		import std.algorithm.searching : findSplit;
163 		import std.string : fromStringz;
164 		assert(this);
165 		//ubyte priority = cast(ubyte)*voice.languages;
166 		auto langSpec = fromStringz(voice.languages + 1);
167 		auto split = langSpec.findSplit("-");
168 		return split[0]; // ignore dialect for now
169 	}
170 }
171 
172 auto voiceList()
173 {
174 	struct Result
175 	{
176 		private const(espeak_VOICE)** list;
177 
178 		Voice front() @property
179 		{
180 			assert(!empty);
181 			return Voice(*list);
182 		}
183 
184 		bool empty() @property
185 		{
186 			return *list == null;
187 		}
188 
189 		void popFront() @property
190 		{
191 			assert(!empty);
192 			++list;
193 		}
194 
195 		Result save() @property
196 		{
197 			return this;
198 		}
199 	}
200 
201 	return Result(espeak_ListVoices(null));
202 }
203