1 module speech.buffer; 2 3 import std.range.primitives; 4 import std.traits; 5 import std.typecons : Flag; 6 7 alias NullTerminate = Flag!"nullTerminate"; 8 9 // TODO: try not to split SSML tags 10 // TODO: optimize for UTF-8/UTF-16 strings and ranges of char/wchar 11 // TODO: handle space / punctuation separator as grapheme cluster? 12 // TODO: prioritize punctuation over whitespace? 13 auto bufferSpeech(NullTerminate nullTerminate = NullTerminate.no, Range, C)(Range text, C[] buffer) 14 if(isInputRange!Range && isSomeChar!(ElementType!Range) && 15 (is(C == char) || is(C == wchar))) 16 { 17 import std.uni : unicode; 18 import std.utf : encode; 19 20 static if(nullTerminate == NullTerminate.yes) 21 enum reservedEndSpaceSize = (4 / C.sizeof) + 1; 22 else 23 enum reservedEndSpaceSize = 4 / C.sizeof; 24 25 assert(buffer.length > reservedEndSpaceSize); 26 27 static if(is(Unqual!(ElementType!Range) == dchar)) 28 alias r = text; 29 else 30 { 31 import std.utf : byDchar; 32 auto r = text.byDchar(); 33 } 34 35 static struct Result 36 { 37 private: 38 typeof(r) range; 39 C[] buffer; 40 size_t postSeparatorIndex; 41 size_t usedLength; 42 43 public: 44 C[] front() @property 45 { 46 assert(!empty); 47 return buffer[0 .. postSeparatorIndex == buffer.length? usedLength : postSeparatorIndex]; 48 } 49 50 bool empty() @property 51 { 52 return buffer == null; 53 } 54 55 void popFront() 56 { 57 import core.stdc.string : memmove; 58 59 assert(!empty); 60 if(range.empty) 61 { 62 buffer = null; 63 return; 64 } 65 66 if(postSeparatorIndex != buffer.length) // include leftovers from previous iteration 67 { 68 immutable leftoverLength = usedLength - postSeparatorIndex; 69 memmove(buffer.ptr, buffer.ptr + postSeparatorIndex, leftoverLength * C.sizeof); 70 usedLength = leftoverLength; 71 postSeparatorIndex = buffer.length; 72 } 73 else // otherwise simply start writing at the beginning 74 { 75 usedLength = 0; 76 } 77 78 do 79 { 80 auto codePoint = range.front; 81 C[4 / C.sizeof] encodeBuffer; 82 auto codeUnits = encodeBuffer[0 .. encode(encodeBuffer, codePoint)]; 83 buffer[usedLength .. usedLength + codeUnits.length] = codeUnits[]; 84 usedLength += codeUnits.length; 85 86 static immutable sep = unicode.White_Space | unicode.Punctuation; 87 88 if(sep[codePoint]) 89 { 90 postSeparatorIndex = usedLength; 91 } 92 93 range.popFront(); 94 } 95 while(!range.empty && buffer.length - usedLength >= reservedEndSpaceSize); 96 97 if(range.empty) 98 { 99 static if(nullTerminate == NullTerminate.yes) 100 buffer[usedLength] = '\0'; 101 postSeparatorIndex = buffer.length; 102 } 103 else static if(nullTerminate == NullTerminate.yes) 104 { 105 // Move leftovers one to the right to make room for terminator 106 immutable leftoverLength = usedLength - postSeparatorIndex; 107 memmove(buffer.ptr + postSeparatorIndex + C.sizeof, buffer.ptr + postSeparatorIndex, leftoverLength * C.sizeof); 108 buffer[postSeparatorIndex++] = '\0'; 109 } 110 } 111 } 112 113 auto result = Result(r, buffer, buffer.length, buffer.length); 114 result.popFront(); 115 return result; 116 } 117 118 unittest 119 { 120 import std.algorithm.comparison : equal; 121 import std.meta : AliasSeq; 122 123 static bool test(Char, size_t bufSize)(in Char[] text, in Char[][] expectedChunks) 124 { 125 Char[bufSize] buffer; 126 return text.bufferSpeech(buffer[]).equal(expectedChunks); 127 } 128 129 foreach(i, Char; AliasSeq!(char, wchar)) 130 { 131 enum smallSize = AliasSeq!(12, 10)[i]; 132 enum medSize = AliasSeq!(16, 14)[i]; 133 alias testSmall = test!(Char, smallSize); 134 alias testMedium = test!(Char, medSize); 135 136 assert(testSmall("te.te", ["te.te"])); // 5 137 assert(testSmall("te.ttestte", ["te.", "ttestte"])); // 3 + 7 138 assert(testSmall("te.ttestt.st", ["te.", "ttestt.st"])); // 3 + 9 139 140 assert(testMedium("Hi friend, my friend.", ["Hi friend, ", "my friend."])); // 11 + 10 141 assert(testMedium("Hi friend\nmy friend.", ["Hi friend\nmy ", "friend."])); // 13 + 7 142 143 assert(testSmall("testtesttesttest", ["testtestt", "esttest"])); // 9 + 7 144 assert(testSmall("testtesttes.test", ["testtestt", "es.test"])); // 9 + 7 145 assert(testSmall("testtest.testtest", ["testtest.", "testtest"])); // 9 + 8 146 } 147 148 assert(test!(char, 12)("t。ttestt。t", ["t。", "ttestt。", "t"])); // 3 + 8 + 1 149 assert(test!(wchar, 10)("t。ttestt。t", ["t。ttestt。", "t"])); // 9 + 1 150 151 assert(test!(char, 12)("うん、OK", ["うん、", "OK"])); // 6 + 4 152 assert(test!(wchar, 10)("うん、OK", ["うん、OK"])); // 5 153 154 assert(test!(char, 12)("testtestte。test", ["testtestt", "e。test"])); // 9 + 7 155 assert(test!(wchar, 10)("testtestte。test", ["testtestt", "e。test"])); // 9 + 6 156 157 assert(test!(char, 12)("testtes。testtest", ["testtes。", "testtest"])); // 9 + 8 158 assert(test!(wchar, 10)("testtes。testtest", ["testtes。", "testtest"])); // 8 + 8 159 } 160