| Hash | Commit message | Author | Date | Files | + | - |
1 | commit d6783e28875386e360c76dc2f676c242951a6db6 |
2 | Author: Connor Etherington <[email protected]> |
3 | Date: Tue Aug 1 22:26:15 2023 +0200 |
4 | |
5 | Auto-Commit Update 01.08.2023 - 22:26:15 |
6 | --- |
7 | recipe/meta.yaml | 29 +++++ |
8 | saywoof.egg-info/PKG-INFO | 6 ++ |
9 | saywoof.egg-info/SOURCES.txt | 9 ++ |
10 | saywoof.egg-info/dependency_links.txt | 1 + |
11 | saywoof.egg-info/entry_points.txt | 2 + |
12 | saywoof.egg-info/requires.txt | 7 ++ |
13 | saywoof.egg-info/top_level.txt | 1 + |
14 | saywoof/__init__.py | 18 ++++ |
15 | saywoof/main.py | 195 ++++++++++++++++++++++++++++++++++ |
16 | setup.py | 24 +++++ |
17 | 10 files changed, 292 insertions(+) |
18 | |
19 | diff --git a/recipe/meta.yaml b/recipe/meta.yaml |
20 | new file mode 100644 |
21 | index 0000000..7e56c72 |
22 | --- /dev/null |
23 | +++ b/recipe/meta.yaml |
24 | @@ -0,0 +1,29 @@ |
25 | +package: |
26 | + name: saywoof |
27 | + version: 0.1.0 |
28 | + |
29 | +source: |
30 | + path: .. |
31 | + |
32 | +build: |
33 | + number: 0 |
34 | + script: '{{ PYTHON }} -m pip install . --no-deps -vv' |
35 | + |
36 | +requirements: |
37 | + host: |
38 | + - python |
39 | + - pip |
40 | + run: |
41 | + - python |
42 | + - "numpy<=1.24" |
43 | + - pandas |
44 | + |
45 | +about: |
46 | + home: https://gitlab.com/a4to/saywoof |
47 | + license: MIT |
48 | + license_file: LICENSE |
49 | + summary: 'An easy to use wrapper for the BARK text-to-speech engine' |
50 | + |
51 | +extra: |
52 | + recipe-maintainers: |
53 | + - concise |
54 | diff --git a/saywoof.egg-info/PKG-INFO b/saywoof.egg-info/PKG-INFO |
55 | new file mode 100644 |
56 | index 0000000..2067cf6 |
57 | --- /dev/null |
58 | +++ b/saywoof.egg-info/PKG-INFO |
59 | @@ -0,0 +1,6 @@ |
60 | +Metadata-Version: 2.1 |
61 | +Name: saywoof |
62 | +Version: 0.1.0 |
63 | +Summary: An easy to use wrapper for the BARK text-to-speech engine |
64 | +Author: Connor Etherington |
65 | +Author-email: [email protected] |
66 | diff --git a/saywoof.egg-info/SOURCES.txt b/saywoof.egg-info/SOURCES.txt |
67 | new file mode 100644 |
68 | index 0000000..1eac9c0 |
69 | --- /dev/null |
70 | +++ b/saywoof.egg-info/SOURCES.txt |
71 | @@ -0,0 +1,9 @@ |
72 | +setup.py |
73 | +saywoof/__init__.py |
74 | +saywoof/main.py |
75 | +saywoof.egg-info/PKG-INFO |
76 | +saywoof.egg-info/SOURCES.txt |
77 | +saywoof.egg-info/dependency_links.txt |
78 | +saywoof.egg-info/entry_points.txt |
79 | +saywoof.egg-info/requires.txt |
80 | +saywoof.egg-info/top_level.txt |
81 | diff --git a/saywoof.egg-info/dependency_links.txt b/saywoof.egg-info/dependency_links.txt |
82 | new file mode 100644 |
83 | index 0000000..8b13789 |
84 | --- /dev/null |
85 | +++ b/saywoof.egg-info/dependency_links.txt |
86 | @@ -0,0 +1 @@ |
87 | + |
88 | diff --git a/saywoof.egg-info/entry_points.txt b/saywoof.egg-info/entry_points.txt |
89 | new file mode 100644 |
90 | index 0000000..7b5f261 |
91 | --- /dev/null |
92 | +++ b/saywoof.egg-info/entry_points.txt |
93 | @@ -0,0 +1,2 @@ |
94 | +[console_scripts] |
95 | +saywoof = saywoof.main:main |
96 | diff --git a/saywoof.egg-info/requires.txt b/saywoof.egg-info/requires.txt |
97 | new file mode 100644 |
98 | index 0000000..29024b4 |
99 | --- /dev/null |
100 | +++ b/saywoof.egg-info/requires.txt |
101 | @@ -0,0 +1,7 @@ |
102 | +pyaudio |
103 | +numpy |
104 | +soundfile |
105 | +transformers |
106 | +saywoof |
107 | +colr |
108 | +argparse |
109 | diff --git a/saywoof.egg-info/top_level.txt b/saywoof.egg-info/top_level.txt |
110 | new file mode 100644 |
111 | index 0000000..8bcccc1 |
112 | --- /dev/null |
113 | +++ b/saywoof.egg-info/top_level.txt |
114 | @@ -0,0 +1 @@ |
115 | +saywoof |
116 | diff --git a/saywoof/__init__.py b/saywoof/__init__.py |
117 | new file mode 100644 |
118 | index 0000000..26d7b22 |
119 | --- /dev/null |
120 | +++ b/saywoof/__init__.py |
121 | @@ -0,0 +1,18 @@ |
122 | +import argparse |
123 | +import sys |
124 | + |
125 | +parser = argparse.ArgumentParser(description='A wrapper for the BARK text-to-speech engine') |
126 | +parser.add_argument("-l", "--local", help="Load local model", action="store_true") |
127 | +parser.add_argument("-L", "--large", help="Load large model (Uses more vram)", action="store_true") |
128 | +parser.add_argument("-s", "--save", help="Save model", action="store_true") |
129 | +parser.add_argument("-t", "--text", help="Text to convert to speech") |
130 | +parser.add_argument("-f", "--file", help="File to convert to speech") |
131 | +parser.add_argument("-o", "--output", help="Output file name (default: output.wav)") |
132 | +parser.add_argument("-r", "--rate", help="Sample rate of output file (default: 22050)", type=int) |
133 | +parser.add_argument("-p", "--persistent", help="Persistent mode (Keep running after conversions to save time)", action="store_true") |
134 | + |
135 | +args = parser.parse_args() |
136 | + |
137 | +if len(sys.argv) == 1: |
138 | + parser.print_help() |
139 | + sys.exit(1) |
140 | diff --git a/saywoof/main.py b/saywoof/main.py |
141 | new file mode 100644 |
142 | index 0000000..4b7cae7 |
143 | --- /dev/null |
144 | +++ b/saywoof/main.py |
145 | @@ -0,0 +1,195 @@ |
146 | +import time |
147 | +import pyaudio |
148 | +import numpy as np |
149 | +import soundfile as sf |
150 | +from transformers import AutoProcessor, AutoModel, AutoTokenizer |
151 | +from saywoof import args |
152 | +from colr import color |
153 | +import argparse |
154 | +import os |
155 | +import sys |
156 | + |
157 | +save_path = None, None |
158 | + |
159 | +blue_1 = '#67D0A8' |
160 | +red_1 = '#FF6B6B' |
161 | + |
162 | +def pr(text: str, hex_color: str = None): |
163 | + |
164 | + if not hex_color: |
165 | + hex_color = blue_1 |
166 | + |
167 | + print(color(text, fore=hex_color)) |
168 | + |
169 | + |
170 | +def load_model(): |
171 | + save_path = "./Models/bark-small" |
172 | + os.environ["SUNO_OFFLOAD_CPU"] = "True" |
173 | + os.environ["SUNO_USE_SMALL_MODELS"] = "True" |
174 | + |
175 | + pr("\nLoading Processor...\n") |
176 | + processor = AutoProcessor.from_pretrained("suno/bark-small") |
177 | + |
178 | + pr("\nLoading Model...\n") |
179 | + model = AutoModel.from_pretrained("suno/bark-small") |
180 | + |
181 | + pr("\nLoading Tokenizer...\n") |
182 | + tokenizer = AutoTokenizer.from_pretrained("suno/bark-small") |
183 | + |
184 | + return processor, model, tokenizer, save_path |
185 | + |
186 | + |
187 | +def load_large(): |
188 | + save_path = "./Models/bark" |
189 | + |
190 | + pr("\nLoading Processor...\n") |
191 | + processor = AutoProcessor.from_pretrained("suno/bark") |
192 | + |
193 | + pr("\nLoading Model...\n") |
194 | + model = AutoModel.from_pretrained("suno/bark") |
195 | + |
196 | + pr("\nLoading Tokenizer...\n") |
197 | + tokenizer = AutoTokenizer.from_pretrained("suno/bark") |
198 | + |
199 | + return processor, model, tokenizer, save_path |
200 | + |
201 | + |
202 | +def load_local(): |
203 | + os.environ["SUNO_OFFLOAD_CPU"] = "True" |
204 | + os.environ["SUNO_USE_SMALL_MODELS"] = "True" |
205 | + |
206 | + if args.large: |
207 | + if not os.path.exists("./Models/bark"): |
208 | + pr("\nBark model not found, please download it first (run without '-l' flag)\n", red_1) |
209 | + sys.exit(1) |
210 | + else: |
211 | + pr("\nLoading Processor...\n") |
212 | + processor = AutoProcessor.from_pretrained("./Models/bark") |
213 | + |
214 | + pr("\nLoading Model...\n") |
215 | + model = AutoModel.from_pretrained("./Models/bark") |
216 | + |
217 | + pr("\nLoading Tokenizer...\n") |
218 | + tokenizer = AutoTokenizer.from_pretrained("./Models/bark") |
219 | + |
220 | + else: |
221 | + if not os.path.exists("./Models/bark-small"): |
222 | + pr("\nBark-small model not found, please download it first (run without '-l' flag)\n", red_1) |
223 | + sys.exit(1) |
224 | + else: |
225 | + pr("\nLoading Processor...\n") |
226 | + processor = AutoProcessor.from_pretrained("./Models/bark-small") |
227 | + |
228 | + pr("\nLoading Model...\n") |
229 | + model = AutoModel.from_pretrained("./Models/bark-small") |
230 | + |
231 | + pr("\nLoading Tokenizer...\n") |
232 | + tokenizer = AutoTokenizer.from_pretrained("./Models/bark-small") |
233 | + |
234 | + return processor, model, tokenizer |
235 | + |
236 | + |
237 | +def save_model(processor, model, tokenizer, save_path): |
238 | + |
239 | + if not os.path.exists(save_path): |
240 | + os.makedirs(save_path) |
241 | + |
242 | + pr("\nSaving Processor...\n") |
243 | + processor.save_pretrained(save_path) |
244 | + |
245 | + pr("\nSaving Model...\n") |
246 | + model.save_pretrained(save_path) |
247 | + |
248 | + pr("\nSaving Tokenizer...\n") |
249 | + tokenizer.save_pretrained(save_path) |
250 | + |
251 | + |
252 | +def text_to_speech(processor, model, tokenizer, text): |
253 | + |
254 | + voice_preset = "v2/en_speaker_9" |
255 | + |
256 | + inputs = processor( |
257 | + text=[text], |
258 | + return_tensors="pt", |
259 | + voice_preset=voice_preset |
260 | + ) |
261 | + |
262 | + # speech_values = model.generate(**inputs, do_sample=True) |
263 | + audio_array = model.generate(**inputs) |
264 | + audio_array = audio_array.cpu().numpy().squeeze() |
265 | + |
266 | + return audio_array |
267 | + |
268 | + |
269 | +def write(filename, sample_rate, audio_array): |
270 | + sf.write(filename, audio_array, sample_rate) |
271 | + |
272 | + |
273 | +def main(): |
274 | + |
275 | + if args.rate: |
276 | + rate = int(args.rate) |
277 | + else: |
278 | + rate = 22050 |
279 | + |
280 | + if args.local: |
281 | + processor, model, tokenizer = load_local() |
282 | + elif args.large: |
283 | + processor, model, tokenizer, save_path = load_large() |
284 | + else: |
285 | + processor, model, tokenizer, save_path = load_model() |
286 | + |
287 | + if args.save: |
288 | + if not args.local: |
289 | + save_model(processor, model, tokenizer, save_path) |
290 | + else: |
291 | + print("Local model is already saved, Its local...") |
292 | + sys.exit(0) |
293 | + |
294 | + if args.persistent: |
295 | + persistent(rate, processor, model, tokenizer) |
296 | + sys.exit(0) |
297 | + |
298 | + if not args.text and not args.file: |
299 | + print("No input text or file specified") |
300 | + sys.exit(1) |
301 | + elif args.text and args.file: |
302 | + print("Both text and file specified") |
303 | + sys.exit(1) |
304 | + |
305 | + elif args.text: |
306 | + speech = text_to_speech(processor, model, tokenizer, args.text) |
307 | + if args.output: |
308 | + output_file = args.output |
309 | + else: |
310 | + output_file = "output.wav" |
311 | + |
312 | + write(output_file, rate, speech) |
313 | + |
314 | + elif args.file: |
315 | + speech = text_to_speech(processor, model, tokenizer, args.file) |
316 | + if args.output: |
317 | + output_file = args.output |
318 | + else: |
319 | + output_file = "output.wav" |
320 | + |
321 | + write(output_file, 18000, speech) |
322 | + |
323 | + |
324 | +def persistent(rate, processor, model, tokenizer): |
325 | + |
326 | + p = pyaudio.PyAudio() |
327 | + stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True) |
328 | + |
329 | + while True: |
330 | + text = input("Enter text: ") |
331 | + speech = text_to_speech(processor, model, tokenizer, text) |
332 | + stream.write(speech.astype(np.float32).tobytes()) |
333 | + |
334 | + stream.stop_stream() |
335 | + stream.close() |
336 | + p.terminate() |
337 | + |
338 | + |
339 | +if __name__ == "__main__": |
340 | + main() |
341 | diff --git a/setup.py b/setup.py |
342 | new file mode 100644 |
343 | index 0000000..ad3e2a0 |
344 | --- /dev/null |
345 | +++ b/setup.py |
346 | @@ -0,0 +1,24 @@ |
347 | +from setuptools import setup, find_packages |
348 | + |
349 | +setup( |
350 | + name='saywoof', |
351 | + version='0.1.0', |
352 | + description='An easy to use wrapper for the BARK text-to-speech engine', |
353 | + author='Connor Etherington', |
354 | + author_email='[email protected]', |
355 | + packages=find_packages(), |
356 | + install_requires=[ |
357 | + "pyaudio", |
358 | + "numpy", |
359 | + "soundfile", |
360 | + "transformers", |
361 | + "saywoof", |
362 | + "colr", |
363 | + "argparse" |
364 | + ], |
365 | + entry_points={ |
366 | + 'console_scripts': [ |
367 | + 'saywoof=saywoof.main:main' |
368 | + ] |
369 | + } |
370 | +) |