(22) Text-to-speech, TTS, long text, Edge-TTS

(22) Text-to-speech, TTS, long text, Edge-TTS

The code in this article uses Edge-TTS to perform text-to-speech operations, which can be stored as mp3 or wav files. There is no limit to the text length.
What is called is the cloud Edge-TTS interface. I just made a simple encapsulation and compiled a UI.
Directly executable files can be downloaded from Baidu Netdisk:
https://pan.baidu.com/s/1ntMnDWFvnS7tLUd9jku8Ew?pwd=hims


code show as below:

#Text to Speech Tool V1.0
import asyncio
import traceback
# import librosa
import edge_tts
import os, sys, time
import cv2
importyaml

import hbt_funcs as hbt

from playsound import playsound
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QWidget, QMessageBox, QFileDialog, QApplication, QSlider
from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal, pyqtSlot
from txt2audio_UI import Ui_txt2voice

asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

hbt.is_contains_chinese(os.getcwd()) #Check whether there is a Chinese path

rates = ' + 0%'
run_flag = 0
stop_flag = False
bar = 0
voices_list = ['XiaoxiaNeural', 'XiaoyiNeural', 'YunxiaNeural', 'liaoning-XiaobeiNeural', 'shaanxi-XiaoniNeural',
              'YunjianNeural', 'YunxiNeural', 'YunyangNeural']

my_title = "iCANX text-to-speech tool"

settings_file = "settings.yaml" #Read the configuration file icanx_settings.yaml
settings = {<!-- -->}
if os.path.exists(settings_file):
    with open(settings_file, 'r') as f:
        settings = yaml.safe_load(f)
if settings == None: settings = {<!-- -->} # Happens if the file is empty.
out_dir = settings.get('out_dir', os.getcwd())
voices_select = settings.get('voices_select', 0)
mp3_wav = settings.get('mp3_wav', 0)
voices = 'zh-CN-' + voices_list[voices_select]

# from subprocess import run, PIPE, STDOUT
# def get_media_length(file_path):
# cmdline = f'ffprobe -i "{file_path}" -show_entries format=duration -v quiet -of csv="p=0"'
# # print(cmdline)
# result = run(cmdline, stdout=PIPE, stderr=STDOUT)
# try: lenth = int(float(result.stdout.decode('utf-8').strip()))
# except: lenth = 0; print('ffprobe error in detecting length...')
# return length

from mutagen.mp3 import MP3
def get_media_length(file_path):
    audio = MP3(file_path)
    length = audio.info.length
    return length

class EdgeTTSTrans(QThread):
    sinout = pyqtSignal(str)
    def __init__(self, winshot, texts, filename):
        super(EdgeTTSTrans, self).__init__()
        self.main_win = winshot
        self.rates = rates
        self.texts = texts
        self.filename = filename + '.mp3'

    def run(self):
        try:
            asyncio.run(self.edge_tts_trans(self.texts))
            self.sinout.emit('OK')
        except: self.sinout.emit('ERROR')

    async def edge_tts_trans(self, text):
        communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
        await communicate.save(self.filename)

class PlayAudioWav(QThread):
    def __init__(self, winshot, texts):
        super(PlayAudioWav, self).__init__()
        self.winshot = winshot
        self.rates = rates
        self.texts = texts
        if os.path.exists("temp.mp3"): os.remove("temp.mp3")
    def run(self):
        asyncio.run(self.edge_tts_trans(self.texts))
        try: playsound("temp.mp3")
        except: traceback.print_exc()
        if os.path.exists("temp.mp3"): os.remove("temp.mp3")
        self.winshot.try_lisson.setEnabled(True)
    async def edge_tts_trans(self, text):
        self.communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
        try:
            # loop = asyncio.get_event_loop()
            await self.communicate.save('temp.mp3')
            # loop.run_until_complete(communicate.save('temp.mp3'))
        except: print('Error in Async...;'); traceback.print_exc()

class Winshot(QWidget, Ui_txt2voice):
    def __init__(self):
        super(Winshot, self).__init__()
        self.start_time = 0
        self.voice_len = 0
        self.setupUi(self)
        global hwnd, run_flag

        self.createLayout()
        self.setWindowTitle(my_title)
        self.setWindowIcon(hbt.GetIco('ican'))
        self.setFixedSize(self.size())
        self.setWindowFlags(Qt.WindowMinimizeButtonHint)
        self.my_timer = QTimer(self)
        self.show(); run_flag = 1
        self.text_len = 0

    def show_error(self,str):
        r_button = QMessageBox.question(self, my_title,'\\
\\
' + str + '\\
\\
', QMessageBox.Ok)
    def set_False_Btn(self):
        self.outButton.setEnabled(False)
        self.startButton.setEnabled(False)
        self.quitButton.setEnabled(False)
        self.out_path.setEnabled(False)
    def set_True_Btn(self):
        self.outButton.setEnabled(True)
        self.startButton.setEnabled(True)
        self.quitButton.setEnabled(True)
        self.out_path.setEnabled(True)

    def start_run(self):
        global stop_flag
        self.save_yaml()
        stop_flag = False; self.set_False_Btn()
        self.start_time = time.time()

        text = self.textEdit.toPlainText()
        self.text_len = len(text)
        if text == "" :
            self.show_error('The text in the text box cannot be empty...');
            self.set_True_Btn()
            stop_flag = 1; return
        # print('Text to be converted:', text)

        self.filename = out_dir + '/' + time.strftime("%Y_%m_%d_%H.%M.%S")
        # print(self.filename)
        self.my_thread = EdgeTTSTrans(self, text, self.filename)
        self.my_thread.sinout.connect(self.signal_coming)
        self.my_thread.start()

        self.my_timer.start(500)
        self.my_timer.timeout.connect(self.running)

    # def EdgeTTSTrans(self, text):
    # async def edge_tts_trans():
    # communicate = edge_tts.Communicate(text=text, rate=rates, voice=voices)
    # await communicate.save('audio.mp3')
    # asyncio.run(edge_tts_trans())

    def signal_coming(self, str):
        global stop_flag
        # if os.path.exists("audio.mp3"): os.remove("audio.mp3")
        if str == 'OK':
            # self.voice_len = librosa.get_duration(filename=self.filename + '.mp3')
            self.voice_len = get_media_length(self.filename + '.mp3')
            total_time = time.time() - self.start_time
            run_stat_text = f"Statistics information: Text length ({self.text_len} words) | Audio length ({self.voice_len:.1f} seconds) | Time consumed ({total_time:.1f} seconds)"
            self.run_state.setText(run_stat_text)
            self.my_timer.stop(); self.progressBar.setValue(100)
            cv2.waitKey(10)
            r_button = QMessageBox.question(self, my_title, "\\
\\
\\
Complete this text-to-speech conversion process...\\
\\
Do you need to play it?\\
\\
\ n",
                                            QMessageBox.Yes | QMessageBox.No)
            if r_button == QMessageBox.Yes:
                try: os.startfile(self.filename + '.mp3')
                except: print("Unable to play file...")
            if mp3_wav == 1:
                os.system(f"sysenv\ffmpeg -i {self.filename}.mp3 {self.filename}.wav")
        else:

            self.show_error('An error occurred during the conversion process...\\
Possible reasons:\\
The file or directory cannot contain Chinese...\\
The network is unavailable...\\
The network cannot use a proxy... ')

        self.set_True_Btn()
        self.progressBar.setValue(0)

    def running(self):
        global bar
        bar + = 2
        total_time = time.time() - self.start_time
        self.progressBar.setValue(bar)
        if bar >= 100: bar = 0
        run_stat_text = f"Statistics information: Text length ({self.text_len} words) | Audio length ({self.voice_len:.1f} seconds) | Time consumed ({total_time:.1f} seconds)"
        self.run_state.setText(run_stat_text)

    def helpWin(self):
        str="\\
\\
\\
 The copyright of this software belongs to: XXX Website: www.xxx.com \\
\\
\\
"
        QMessageBox.question(self, my_title, str, QMessageBox.Ok)
    def quitWin(self):
        r_button = QMessageBox.question(self, "my_title",
                                        "\\
\\
\\
Exiting will terminate this program...\\
\\
Are you sure to exit? \\
\\
\\
", QMessageBox.Yes | QMessageBox. No)
        self.save_yaml()
        if r_button == QMessageBox.Yes:
            sys.exit()

    def outButton_fuc(self):
        global out_dir
        tmp_path = out_dir
        out_dir = QFileDialog.getExistingDirectory(self,'Select the converted output folder', out_dir)
        if out_dir == '': out_dir = tmp_path
        self.out_path.setText(out_dir)
        # print('Selected save directory:', out_dir)

    def open_fold_fuc(self):
        try: os.startfile(out_dir)
        except:pass
    def rates_slider_fuc(self):
        global rates
        self.audio_rates.setText(f'{self.rates_slider.value()}%')
        _rates = self.rates_slider.value()
        if _rates >= 0: rates = f' + {_rates}%'
        else: rates = f'{_rates}%'

    def click_audio_select(self, str1):
        global voices, voices_select
        voices_select = self.audio_select.currentIndex()
        voices = 'zh-CN-' + voices_list[voices_select]
        print('Selected voice:', voices)
    def click_try_lisson(self, str1):
        self.try_lisson.setEnabled(False)
        text = "Thank you for choosing my voice"
        self.play_thread = PlayAudioWav(self, text) #Start the Play thread
        self.play_thread.start()
    def click_checkBox_mp3(self):
        global mp3_wav
        mp3_wav = 0
    def click_checkBox_wav(self):
        global mp3_wav
        mp3_wav=1

    def click_textEdit(self):
        txt_len = len(self.textEdit.toPlainText())
        print(self.textEdit.toPlainText())
        self.run_state.setText(f"Statistics: Text length ({txt_len} words)")

    def save_yaml(self):
        settings = {<!-- -->'out_dir': out_dir, 'voices_select': voices_select, 'mp3_wav': mp3_wav}
        with open(settings_file, 'w + ') as f:
            yaml.dump(settings, f)

    def createLayout(self):
        self.out_path.setText(out_dir)

        if mp3_wav == 0: self.checkBox_mp3.setChecked(True)
        else: self.checkBox_wav.setChecked(True)
        self.checkBox_mp3.stateChanged.connect(self.click_checkBox_mp3)
        self.checkBox_wav.stateChanged.connect(self.click_checkBox_wav)

        self.outButton.clicked.connect(self.outButton_fuc)
        self.chk_outputfile.clicked.connect(self.open_fold_fuc)
        self.try_lisson.clicked.connect(self.click_try_lisson)

        self.textEdit.textChanged.connect(self.click_textEdit)
        self.textEdit.setPlainText("This software uses Microsoft Edge-TTS to quickly convert text into speech.")

        self.startButton.clicked.connect(self.start_run)
        self.helpButton.clicked.connect(self.helpWin)
        self.quitButton.clicked.connect(self.quitWin)

        self.rates_slider.setTickPosition(QSlider.TicksAbove)
        self.rates_slider.valueChanged.connect(self.rates_slider_fuc)

        self.audio_select.addItems(['Xiaoxiao:Female', 'Xiaoyi:Female', 'Yunxia:Female', 'Northeast:Female', 'Shaanxi:Female', \ 'Yunjian:Male', 'Yunxi:Male','Yunyang:Male'])
        self.audio_select.setCurrentIndex(voices_select)
        self.audio_select.activated[str].connect(self.click_audio_select)

#if __name__ == '__main__':
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
app = QtWidgets.QApplication(sys.argv)
winshot = Winshot()
sys.exit(app.exec_())

The UI code is as follows:

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'txt2audio_UI.ui'
#
# Created by: PyQt5 UI code generator 5.15.2
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_txt2voice(object):
    def setupUi(self, txt2voice):
        txt2voice.setObjectName("txt2voice")
        txt2voice.resize(435, 431)
        self.startButton = QtWidgets.QPushButton(txt2voice)
        self.startButton.setGeometry(QtCore.QRect(160, 371, 91, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.startButton.setFont(font)
        self.startButton.setObjectName("startButton")
        self.helpButton = QtWidgets.QPushButton(txt2voice)
        self.helpButton.setGeometry(QtCore.QRect(270, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.helpButton.setFont(font)
        self.helpButton.setObjectName("helpButton")
        self.quitButton = QtWidgets.QPushButton(txt2voice)
        self.quitButton.setGeometry(QtCore.QRect(350, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.quitButton.setFont(font)
        self.quitButton.setObjectName("quitButton")
        self.textEdit = QtWidgets.QPlainTextEdit(txt2voice)
        self.textEdit.setGeometry(QtCore.QRect(20, 30, 391, 175))
        self.textEdit.setObjectName("textEdit")
        self.chk_outputfile = QtWidgets.QPushButton(txt2voice)
        self.chk_outputfile.setGeometry(QtCore.QRect(20, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.chk_outputfile.setFont(font)
        self.chk_outputfile.setObjectName("chk_outputfile")
        self.outButton = QtWidgets.QPushButton(txt2voice)
        self.outButton.setGeometry(QtCore.QRect(20, 280, 61, 21))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.outButton.setFont(font)
        self.outButton.setObjectName("outButton")
        self.out_path = QtWidgets.QLabel(txt2voice)
        self.out_path.setGeometry(QtCore.QRect(90, 280, 311, 20))
        self.out_path.setObjectName("out_path")
        self.lbl_3 = QtWidgets.QLabel(txt2voice)
        self.lbl_3.setGeometry(QtCore.QRect(26, 222, 51, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_3.setFont(font)
        self.lbl_3.setObjectName("lbl_3")
        self.audio_select = QtWidgets.QComboBox(txt2voice)
        self.audio_select.setGeometry(QtCore.QRect(86, 221, 71, 18))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_select.setFont(font)
        self.audio_select.setObjectName("audio_select")
        self.rates_slider = QtWidgets.QSlider(txt2voice)
        self.rates_slider.setGeometry(QtCore.QRect(271, 219, 111, 20))
        self.rates_slider.setMinimum(-99)
        self.rates_slider.setTracking(True)
        self.rates_slider.setOrientation(QtCore.Qt.Horizontal)
        self.rates_slider.setInvertedAppearance(False)
        self.rates_slider.setInvertedControls(False)
        self.rates_slider.setObjectName("rates_slider")
        self.aud = QtWidgets.QLabel(txt2voice)
        self.aud.setGeometry(QtCore.QRect(211, 220, 61, 20))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.aud.setFont(font)
        self.aud.setObjectName("aud")
        self.lbl_4 = QtWidgets.QLabel(txt2voice)
        self.lbl_4.setGeometry(QtCore.QRect(27, 336, 61, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_4.setFont(font)
        self.lbl_4.setObjectName("lbl_4")
        self.progressBar = QtWidgets.QProgressBar(txt2voice)
        self.progressBar.setGeometry(QtCore.QRect(87, 340, 321, 8))
        self.progressBar.setProperty("value", 0)
        self.progressBar.setTextVisible(False)
        self.progressBar.setInvertedAppearance(False)
        self.progressBar.setObjectName("progressBar")
        self.try_lisson = QtWidgets.QPushButton(txt2voice)
        self.try_lisson.setGeometry(QtCore.QRect(160, 220, 31, 21))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.try_lisson.setFont(font)
        self.try_lisson.setObjectName("try_lisson")
        self.line = QtWidgets.QFrame(txt2voice)
        self.line.setGeometry(QtCore.QRect(0, 401, 441, 16))
        self.line.setFrameShape(QtWidgets.QFrame.HLine)
        self.line.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line.setObjectName("line")
        self.run_state = QtWidgets.QLabel(txt2voice)
        self.run_state.setGeometry(QtCore.QRect(10, 410, 381, 20))
        self.run_state.setObjectName("run_state")
        self.audio_file_path_txt_2 = QtWidgets.QLabel(txt2voice)
        self.audio_file_path_txt_2.setGeometry(QtCore.QRect(23, 10, 241, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_file_path_txt_2.setFont(font)
        self.audio_file_path_txt_2.setObjectName("audio_file_path_txt_2")
        self.lbl_5 = QtWidgets.QLabel(txt2voice)
        self.lbl_5.setGeometry(QtCore.QRect(26, 252, 61, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_5.setFont(font)
        self.lbl_5.setObjectName("lbl_5")
        self.checkBox_mp3 = QtWidgets.QCheckBox(txt2voice)
        self.checkBox_mp3.setGeometry(QtCore.QRect(90, 252, 41, 16))
        self.checkBox_mp3.setObjectName("checkBox_mp3")
        self.buttonGroup = QtWidgets.QButtonGroup(txt2voice)
        self.buttonGroup.setObjectName("buttonGroup")
        self.buttonGroup.addButton(self.checkBox_mp3)
        self.checkBox_wav = QtWidgets.QCheckBox(txt2voice)
        self.checkBox_wav.setGeometry(QtCore.QRect(140, 252, 68, 16))
        self.checkBox_wav.setObjectName("checkBox_wav")
        self.buttonGroup.addButton(self.checkBox_wav)
        self.line_2 = QtWidgets.QFrame(txt2voice)
        self.line_2.setGeometry(QtCore.QRect(0, 310, 441, 16))
        self.line_2.setFrameShape(QtWidgets.QFrame.HLine)
        self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_2.setObjectName("line_2")
        self.audio_rates = QtWidgets.QLabel(txt2voice)
        self.audio_rates.setGeometry(QtCore.QRect(388, 220, 31, 20))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_rates.setFont(font)
        self.audio_rates.setObjectName("audio_rates")

        self.retranslateUi(txt2voice)
        QtCore.QMetaObject.connectSlotsByName(txt2voice)

    def retranslateUi(self, txt2voice):
        _translate = QtCore.QCoreApplication.translate
        txt2voice.setWindowTitle(_translate("txt2voice", "AI"))
        self.startButton.setText(_translate("txt2voice", "Start Translate"))
        self.helpButton.setText(_translate("txt2voice", "help"))
        self.quitButton.setText(_translate("txt2voice", "Quit"))
        self.chk_outputfile.setText(_translate("txt2voice", "View results"))
        self.outButton.setText(_translate("txt2voice", "Output directory"))
        self.out_path.setText(_translate("txt2voice", "Generated video output directory"))
        self.lbl_3.setText(_translate("txt2voice", "Voice selection:"))
        self.aud.setText(_translate("txt2voice", "Speech speed selection:"))
        self.lbl_4.setText(_translate("txt2voice", "Conversion progress:"))
        self.try_lisson.setText(_translate("txt2voice", "Audition"))
        self.run_state.setText(_translate("txt2voice", "Statistics:"))
        self.audio_file_path_txt_2.setText(_translate("txt2voice", "Please enter text:"))
        self.lbl_5.setText(_translate("txt2voice", "Output format:"))
        self.checkBox_mp3.setText(_translate("txt2voice", "MP3"))
        self.checkBox_wav.setText(_translate("txt2voice", "WAV"))
        self.audio_rates.setText(_translate("txt2voice", "0%"))