1 module speech.windows.synthesis;
2 
3 import std.conv;
4 import std.traits;
5 import std.range.primitives;
6 import std.utf;
7 
8 import core.sys.windows.windows;
9 import core.sys.windows.com;
10 
11 import speech.windows.sapi;
12 import speech.windows.sphelper;
13 import speech.windows.comref;
14 import speech.windows.localeinfo;
15 
16 private inout(wchar)[] fromStringz(inout(wchar)* cStr) @system /* pure */
17 {
18 	import core.stdc.wchar_ : wcslen;
19 	return cStr[0 .. wcslen(cStr)];
20 }
21 
22 version(speech4d_manualcominit) {}
23 else
24 {
25 	bool shouldUninitialize;
26 
27 	static this()
28 	{
29 		HRESULT hr = CoInitializeEx(null, COINIT_MULTITHREADED);
30 		if(hr < 0 && hr != RPC_E_CHANGED_MODE)
31 			throw new COMException(hr);
32 
33 		shouldUninitialize = hr != RPC_E_CHANGED_MODE;
34 	}
35 
36 	static ~this()
37 	{
38 		if(shouldUninitialize)
39 			CoUninitialize();
40 	}
41 }
42 
43 struct Synthesizer
44 {
45 	private:
46 	CoReference!ISpVoice synth;
47 
48 	public:
49 	/// Create a new speech synthesis interface using the system default voice.
50 	static Synthesizer create()
51 	{
52 		return Synthesizer(CoReference!ISpVoice(&CLSID_SpVoice, &IID_ISpVoice));
53 	}
54 
55 	void speak(Range)(Range range)
56 		if(isInputRange!Range && isSomeChar!(ElementType!Range))
57 	{
58 		import speech.buffer;
59 
60 		wchar[512] buffer;
61 
62 		version(none) static if(is(Range : const(wchar)[]))
63 		{
64 			if(range.length < 512)
65 			{
66 				buffer[0 .. range.length] = range[];
67 				buffer[range.length] = '\0';
68 				coEnforce(synth.Speak(buffer.ptr, SPF_DEFAULT, null));
69 				return;
70 			}
71 		}
72 
73 		foreach(chunk; range.bufferSpeech!(NullTerminate.yes)(buffer[]))
74 		{
75 			coEnforce(synth.Speak(chunk.ptr, SPF_DEFAULT, null));
76 		}
77 	}
78 
79 	alias speak put;
80 
81 	void queue(in char[] text)
82 	{
83 		queuez(toUTFz!(const(wchar)*)(text));
84 	}
85 
86 	void queue(in wchar[] text)
87 	{
88 		queuez(toUTFz!(const(wchar)*)(text));
89 	}
90 
91 	void queuez(in wchar* text)
92 	{
93 		coEnforce(synth.Speak(text, SPF_ASYNC, null));
94 	}
95 
96 	void voice(Voice newVoice) @property
97 	{
98 		coEnforce(synth.SetVoice(newVoice.cpVoiceToken));
99 	}
100 
101 	Voice voice() @property
102 	{
103 		ISpObjectToken voiceToken;
104 		coEnforce(synth.GetVoice(&voiceToken));
105 		return Voice(CoReference!ISpObjectToken(voiceToken));
106 	}
107 
108 	void volume(uint newVolume) @property
109 	{
110 		coEnforce(synth.SetVolume(cast(USHORT)newVolume));
111 	}
112 
113 	uint volume() @property
114 	{
115 		USHORT vol;
116 		coEnforce(synth.GetVolume(&vol));
117 		return vol;
118 	}
119 
120 	void rate(int newRate) @property
121 	{
122 		coEnforce(synth.SetRate(cast(long)newRate));
123 	}
124 
125 	int rate() @property
126 	{
127 		long r;
128 		coEnforce(synth.GetRate(&r));
129 		return cast(int)r;
130 	}
131 }
132 
133 struct Voice
134 {
135 	//BUG, TODO: causes weird access violation, workaround leaks
136 	//private ISpObjectToken cpVoiceToken;
137 	private CoReference!ISpObjectToken cpVoiceToken;
138 
139 	string name() @property
140 	{
141 		LPWSTR name;
142 		coEnforce(cpVoiceToken.GetStringValue(null, &name));
143 		return to!string(fromStringz(name));
144 	}
145 
146 	string language() @property
147 	{
148 		import std.algorithm.searching : findSplit;
149 
150 		ISpDataKey attributes;
151 		coEnforce(cpVoiceToken.OpenKey("Attributes", &attributes));
152 
153 		LPWSTR localeSpec;
154 		coEnforce(attributes.GetStringValue("Language", &localeSpec));
155 		auto split = fromStringz(localeSpec).findSplit(";");
156 		LCID locale = to!LCID(split[0], 16);
157 
158 		// Note: use LOCALE_SISO639LANGNAME2 for ISO-639-2 three-letter codes
159 		wchar[16] langCodeBuffer;
160 		int nchars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, null, 0);
161 		assert(nchars < langCodeBuffer.length);
162 		GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, langCodeBuffer.ptr, nchars);
163 
164 		return to!string(langCodeBuffer[0 .. nchars - 1]); // Don't include null terminator
165 	}
166 }
167 
168 auto voiceList()
169 {
170 	IEnumSpObjectTokens cpEnum;
171 	coEnforce(SpEnumTokens(SPCAT_VOICES, null, null, &cpEnum));
172 
173 	ISpObjectToken cpVoiceToken;
174 	coEnforce(cpEnum.Next(1, &cpVoiceToken, null));
175 
176 	long remaining;
177 
178 	struct Result
179 	{
180 		bool empty() @property
181 		{
182 			return remaining == -1;
183 		}
184 
185 		Voice front() @property
186 		{
187 			return Voice(CoReference!ISpObjectToken(cpVoiceToken));
188 		}
189 
190 		void popFront()
191 		{
192 			if(remaining > 0)
193 			{
194 				coEnforce(cpEnum.Next(1, &cpVoiceToken, null));
195 			}
196 
197 			--remaining;
198 		}
199 
200 		size_t length() @property
201 		{
202 			ULONG count;
203 			coEnforce(cpEnum.GetCount(&count));
204 			return cast(size_t)count;
205 		}
206 
207 		Voice[] array()
208 		{
209 			if(remaining <= 1)
210 				return null;
211 
212 			auto voiceArray = new ISpObjectToken[cast(size_t)remaining];
213 			voiceArray[0] = cpVoiceToken;
214 
215 			ULONG fetched;
216 			coEnforce(cpEnum.Next(cast(ULONG)voiceArray.length - 1, &voiceArray[1], &fetched));
217 			return cast(Voice[])voiceArray;
218 		}
219 	}
220 
221 	auto result = Result();
222 	remaining = cast(long)result.length - 1;
223 
224 	return result;
225 }