saywoof


Logs | Files | README | README | LICENSE | LICENSE | GitLab


1
commit d6783e28875386e360c76dc2f676c242951a6db6
2
Author: Connor Etherington <[email protected]>
3
Date:   Tue Aug 1 22:26:15 2023 +0200
4
5
    Auto-Commit Update 01.08.2023 - 22:26:15
6
---
7
 recipe/meta.yaml                      |  29 +++++
8
 saywoof.egg-info/PKG-INFO             |   6 ++
9
 saywoof.egg-info/SOURCES.txt          |   9 ++
10
 saywoof.egg-info/dependency_links.txt |   1 +
11
 saywoof.egg-info/entry_points.txt     |   2 +
12
 saywoof.egg-info/requires.txt         |   7 ++
13
 saywoof.egg-info/top_level.txt        |   1 +
14
 saywoof/__init__.py                   |  18 ++++
15
 saywoof/main.py                       | 195 ++++++++++++++++++++++++++++++++++
16
 setup.py                              |  24 +++++
17
 10 files changed, 292 insertions(+)
18
19
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
20
new file mode 100644
21
index 0000000..7e56c72
22
--- /dev/null
23
+++ b/recipe/meta.yaml
24
@@ -0,0 +1,29 @@
25
+package:
26
+  name: saywoof
27
+  version: 0.1.0
28
+
29
+source:
30
+  path: ..
31
+
32
+build:
33
+  number: 0
34
+  script: '{{ PYTHON }} -m pip install . --no-deps -vv'
35
+
36
+requirements:
37
+  host:
38
+    - python
39
+    - pip
40
+  run:
41
+    - python
42
+    - "numpy<=1.24"
43
+    - pandas
44
+
45
+about:
46
+  home: https://gitlab.com/a4to/saywoof
47
+  license: MIT
48
+  license_file: LICENSE
49
+  summary: 'An easy to use wrapper for the BARK text-to-speech engine'
50
+
51
+extra:
52
+  recipe-maintainers:
53
+    - concise
54
diff --git a/saywoof.egg-info/PKG-INFO b/saywoof.egg-info/PKG-INFO
55
new file mode 100644
56
index 0000000..2067cf6
57
--- /dev/null
58
+++ b/saywoof.egg-info/PKG-INFO
59
@@ -0,0 +1,6 @@
60
+Metadata-Version: 2.1
61
+Name: saywoof
62
+Version: 0.1.0
63
+Summary: An easy to use wrapper for the BARK text-to-speech engine
64
+Author: Connor Etherington
65
+Author-email: [email protected]
66
diff --git a/saywoof.egg-info/SOURCES.txt b/saywoof.egg-info/SOURCES.txt
67
new file mode 100644
68
index 0000000..1eac9c0
69
--- /dev/null
70
+++ b/saywoof.egg-info/SOURCES.txt
71
@@ -0,0 +1,9 @@
72
+setup.py
73
+saywoof/__init__.py
74
+saywoof/main.py
75
+saywoof.egg-info/PKG-INFO
76
+saywoof.egg-info/SOURCES.txt
77
+saywoof.egg-info/dependency_links.txt
78
+saywoof.egg-info/entry_points.txt
79
+saywoof.egg-info/requires.txt
80
+saywoof.egg-info/top_level.txt
81
diff --git a/saywoof.egg-info/dependency_links.txt b/saywoof.egg-info/dependency_links.txt
82
new file mode 100644
83
index 0000000..8b13789
84
--- /dev/null
85
+++ b/saywoof.egg-info/dependency_links.txt
86
@@ -0,0 +1 @@
87
+
88
diff --git a/saywoof.egg-info/entry_points.txt b/saywoof.egg-info/entry_points.txt
89
new file mode 100644
90
index 0000000..7b5f261
91
--- /dev/null
92
+++ b/saywoof.egg-info/entry_points.txt
93
@@ -0,0 +1,2 @@
94
+[console_scripts]
95
+saywoof = saywoof.main:main
96
diff --git a/saywoof.egg-info/requires.txt b/saywoof.egg-info/requires.txt
97
new file mode 100644
98
index 0000000..29024b4
99
--- /dev/null
100
+++ b/saywoof.egg-info/requires.txt
101
@@ -0,0 +1,7 @@
102
+pyaudio
103
+numpy
104
+soundfile
105
+transformers
106
+saywoof
107
+colr
108
+argparse
109
diff --git a/saywoof.egg-info/top_level.txt b/saywoof.egg-info/top_level.txt
110
new file mode 100644
111
index 0000000..8bcccc1
112
--- /dev/null
113
+++ b/saywoof.egg-info/top_level.txt
114
@@ -0,0 +1 @@
115
+saywoof
116
diff --git a/saywoof/__init__.py b/saywoof/__init__.py
117
new file mode 100644
118
index 0000000..26d7b22
119
--- /dev/null
120
+++ b/saywoof/__init__.py
121
@@ -0,0 +1,18 @@
122
+import argparse
123
+import sys
124
+
125
+parser = argparse.ArgumentParser(description='A wrapper for the BARK text-to-speech engine')
126
+parser.add_argument("-l", "--local", help="Load local model", action="store_true")
127
+parser.add_argument("-L", "--large", help="Load large model (Uses more vram)", action="store_true")
128
+parser.add_argument("-s", "--save", help="Save model", action="store_true")
129
+parser.add_argument("-t", "--text", help="Text to convert to speech")
130
+parser.add_argument("-f", "--file", help="File to convert to speech")
131
+parser.add_argument("-o", "--output", help="Output file name (default: output.wav)")
132
+parser.add_argument("-r", "--rate", help="Sample rate of output file (default: 22050)", type=int)
133
+parser.add_argument("-p", "--persistent", help="Persistent mode (Keep running after conversions to save time)", action="store_true")
134
+
135
+args = parser.parse_args()
136
+
137
+if len(sys.argv) == 1:
138
+    parser.print_help()
139
+    sys.exit(1)
140
diff --git a/saywoof/main.py b/saywoof/main.py
141
new file mode 100644
142
index 0000000..4b7cae7
143
--- /dev/null
144
+++ b/saywoof/main.py
145
@@ -0,0 +1,195 @@
146
+import time
147
+import pyaudio
148
+import numpy as np
149
+import soundfile as sf
150
+from transformers import AutoProcessor, AutoModel, AutoTokenizer
151
+from saywoof import args
152
+from colr import color
153
+import argparse
154
+import os
155
+import sys
156
+
157
+save_path = None, None
158
+
159
+blue_1 = '#67D0A8'
160
+red_1 = '#FF6B6B'
161
+
162
+def pr(text: str, hex_color: str = None):
163
+
164
+    if not hex_color:
165
+        hex_color = blue_1
166
+
167
+    print(color(text, fore=hex_color))
168
+
169
+
170
+def load_model():
171
+    save_path = "./Models/bark-small"
172
+    os.environ["SUNO_OFFLOAD_CPU"] = "True"
173
+    os.environ["SUNO_USE_SMALL_MODELS"] = "True"
174
+
175
+    pr("\nLoading Processor...\n")
176
+    processor = AutoProcessor.from_pretrained("suno/bark-small")
177
+
178
+    pr("\nLoading Model...\n")
179
+    model = AutoModel.from_pretrained("suno/bark-small")
180
+
181
+    pr("\nLoading Tokenizer...\n")
182
+    tokenizer = AutoTokenizer.from_pretrained("suno/bark-small")
183
+
184
+    return processor, model, tokenizer, save_path
185
+
186
+
187
+def load_large():
188
+    save_path = "./Models/bark"
189
+
190
+    pr("\nLoading Processor...\n")
191
+    processor = AutoProcessor.from_pretrained("suno/bark")
192
+
193
+    pr("\nLoading Model...\n")
194
+    model = AutoModel.from_pretrained("suno/bark")
195
+
196
+    pr("\nLoading Tokenizer...\n")
197
+    tokenizer = AutoTokenizer.from_pretrained("suno/bark")
198
+
199
+    return processor, model, tokenizer, save_path
200
+
201
+
202
+def load_local():
203
+    os.environ["SUNO_OFFLOAD_CPU"] = "True"
204
+    os.environ["SUNO_USE_SMALL_MODELS"] = "True"
205
+
206
+    if args.large:
207
+        if not os.path.exists("./Models/bark"):
208
+            pr("\nBark model not found, please download it first (run without '-l' flag)\n", red_1)
209
+            sys.exit(1)
210
+        else:
211
+            pr("\nLoading Processor...\n")
212
+            processor = AutoProcessor.from_pretrained("./Models/bark")
213
+
214
+            pr("\nLoading Model...\n")
215
+            model = AutoModel.from_pretrained("./Models/bark")
216
+
217
+            pr("\nLoading Tokenizer...\n")
218
+            tokenizer = AutoTokenizer.from_pretrained("./Models/bark")
219
+
220
+    else:
221
+        if not os.path.exists("./Models/bark-small"):
222
+            pr("\nBark-small model not found, please download it first (run without '-l' flag)\n", red_1)
223
+            sys.exit(1)
224
+        else:
225
+            pr("\nLoading Processor...\n")
226
+            processor = AutoProcessor.from_pretrained("./Models/bark-small")
227
+
228
+            pr("\nLoading Model...\n")
229
+            model = AutoModel.from_pretrained("./Models/bark-small")
230
+
231
+            pr("\nLoading Tokenizer...\n")
232
+            tokenizer = AutoTokenizer.from_pretrained("./Models/bark-small")
233
+
234
+    return processor, model, tokenizer
235
+
236
+
237
+def save_model(processor, model, tokenizer, save_path):
238
+
239
+    if not os.path.exists(save_path):
240
+        os.makedirs(save_path)
241
+
242
+    pr("\nSaving Processor...\n")
243
+    processor.save_pretrained(save_path)
244
+
245
+    pr("\nSaving Model...\n")
246
+    model.save_pretrained(save_path)
247
+
248
+    pr("\nSaving Tokenizer...\n")
249
+    tokenizer.save_pretrained(save_path)
250
+
251
+
252
+def text_to_speech(processor, model, tokenizer, text):
253
+
254
+    voice_preset = "v2/en_speaker_9"
255
+
256
+    inputs = processor(
257
+        text=[text],
258
+        return_tensors="pt",
259
+        voice_preset=voice_preset
260
+    )
261
+
262
+    # speech_values = model.generate(**inputs, do_sample=True)
263
+    audio_array = model.generate(**inputs)
264
+    audio_array = audio_array.cpu().numpy().squeeze()
265
+
266
+    return audio_array
267
+
268
+
269
+def write(filename, sample_rate, audio_array):
270
+    sf.write(filename, audio_array, sample_rate)
271
+
272
+
273
+def main():
274
+
275
+    if args.rate:
276
+        rate = int(args.rate)
277
+    else:
278
+        rate = 22050
279
+
280
+    if args.local:
281
+        processor, model, tokenizer = load_local()
282
+    elif args.large:
283
+        processor, model, tokenizer, save_path = load_large()
284
+    else:
285
+        processor, model, tokenizer, save_path = load_model()
286
+
287
+    if args.save:
288
+        if not args.local:
289
+            save_model(processor, model, tokenizer, save_path)
290
+        else:
291
+            print("Local model is already saved, Its local...")
292
+        sys.exit(0)
293
+
294
+    if args.persistent:
295
+        persistent(rate, processor, model, tokenizer)
296
+        sys.exit(0)
297
+
298
+    if not args.text and not args.file:
299
+        print("No input text or file specified")
300
+        sys.exit(1)
301
+    elif args.text and args.file:
302
+        print("Both text and file specified")
303
+        sys.exit(1)
304
+
305
+    elif args.text:
306
+        speech = text_to_speech(processor, model, tokenizer, args.text)
307
+        if args.output:
308
+            output_file = args.output
309
+        else:
310
+            output_file = "output.wav"
311
+
312
+        write(output_file, rate, speech)
313
+
314
+    elif args.file:
315
+        speech = text_to_speech(processor, model, tokenizer, args.file)
316
+        if args.output:
317
+            output_file = args.output
318
+        else:
319
+            output_file = "output.wav"
320
+
321
+        write(output_file, 18000, speech)
322
+
323
+
324
+def persistent(rate, processor, model, tokenizer):
325
+
326
+    p = pyaudio.PyAudio()
327
+    stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
328
+
329
+    while True:
330
+        text = input("Enter text: ")
331
+        speech = text_to_speech(processor, model, tokenizer, text)
332
+        stream.write(speech.astype(np.float32).tobytes())
333
+
334
+    stream.stop_stream()
335
+    stream.close()
336
+    p.terminate()
337
+
338
+
339
+if __name__ == "__main__":
340
+    main()
341
diff --git a/setup.py b/setup.py
342
new file mode 100644
343
index 0000000..ad3e2a0
344
--- /dev/null
345
+++ b/setup.py
346
@@ -0,0 +1,24 @@
347
+from setuptools import setup, find_packages
348
+
349
+setup(
350
+    name='saywoof',
351
+    version='0.1.0',
352
+    description='An easy to use wrapper for the BARK text-to-speech engine',
353
+    author='Connor Etherington',
354
+    author_email='[email protected]',
355
+    packages=find_packages(),
356
+    install_requires=[
357
+        "pyaudio",
358
+        "numpy",
359
+        "soundfile",
360
+        "transformers",
361
+        "saywoof",
362
+        "colr",
363
+        "argparse"
364
+    ],
365
+    entry_points={
366
+        'console_scripts': [
367
+            'saywoof=saywoof.main:main'
368
+        ]
369
+    }
370
+)