vue uses WEB’s own TTS to realize voice and text conversion

Foreword

After many days, I haven’t updated my article for a long time; this year I have been following the company’s policy [BEI YA ZHA] and made one demand after another, but I don’t have much time of my own, let alone breakthrough myself.

o·(? ? )?o·?(fog)

Then recently, my friend suddenly asked me if I had ever done TTS. My first reaction was?

? ……
Looking innocent

So here comes our topic today
What is TTS?

I went to do some research. Simply put, it is a technology of speech-to-text conversion

  • This involves the concept of speech synthesis. Speech synthesis is a technology that produces artificial speech through mechanical and electronic methods. TTS technology (also known as text-to-speech technology) belongs to speech synthesis
  • The WEB, that is, our browser, has already encapsulated TTS for us and can easily call the API. Basically, we can use native front-end elements to directly implement text-to-speech and speech-to-text

Therefore, any front-end framework can use this set of logic to implement TTS.

WEB comes with TTS

It has its own official documentation, and we can easily find the implementation logic we need through the API documentation.

WEB comes with TTS official Chinese document API

Basic events

Text-to-speech basic events

Here are a few commonly used basic events for you. For more information, please visit the API documentation above.

//Create SpeechSynthesisUtterance object
var speechUtterance = new SpeechSynthesisUtterance('Hello, how are you?');

// Create SpeechSynthesis object
var synthesis = window.speechSynthesis;

//Set the event handler function for speech synthesis

// Start speech synthesis
speechUtterance.onstart = function(event) {<!-- -->
  console.log('Speech synthesis started.');
};

//End speech synthesis
speechUtterance.onend = function(event) {<!-- -->
  console.log('Speech synthesis ended.');
};

// Pause speech synthesis
speechUtterance.onpause = function(event) {<!-- -->
  console.log('Speech synthesis paused.');
};

//Restore speech synthesis
speechUtterance.onresume = function(event) {<!-- -->
  console.log('Speech synthesis resumed.');
};

// Segmented speech synthesis
speechUtterance.onboundary = function(event) {<!-- -->
  console.log('Speech boundary reached at character index ' + event.charIndex + '.');
};

// Start speech synthesis
var btn = document.querySelector('button');
btn.addEventListener('click', function() {<!-- -->
  synthesis.speak(speechUtterance);
});

Speech to text basic events

//Create SpeechRecognition object
var recognition = new window.SpeechRecognition();

//Set the event handler function for speech recognition

// Start speech recognition
recognition.onstart = function(event) {<!-- -->
  console.log('Speech recognition started.');
};

//End speech recognition
recognition.onend = function(event) {<!-- -->
  console.log('Speech recognition ended.');
};

// Recognize the voice result
recognition.onresult = function(event) {<!-- -->
  var transcript = event.results[0][0].transcript;
  console.log('Recognized speech: ' + transcript);
};

// Start speech recognition
var btn = document.querySelector('button');
btn.addEventListener('click', function() {<!-- -->
  recognition.start();
});

VUE Project

I have put the results of this research on my git, the following are screenshots from my project


1

There is also a text that changes color as it is read aloud. This is actually a research function that my friend needs. In fact, the interface is almost the same. At the end, I will release the git link of my project for your reference.

Speech to text

In my project, the code for Vue to convert speech to text is as follows:

  • interface
<template>
  <div>
    <el-page-header @back="goBack" content="Speech to text"/>
    <div class="bank"></div>
    <el-card header="speech to text">
      <el-card>
        <el-input :readonly="true" id="word" v-model="word"></el-input>
      </el-card>
      <el-card>
        <el-button type="primary" @click="audioCHangeWord"><span v-if="isListening">Voice recognition in progress...</span><span v-else>Voice recognition</span></ el-button>
      </el-card>
    </el-card>
  </div>
</template>
  • logic
<script>
export default {<!-- -->
  name: "AudioToWord",
  data() {<!-- -->
    return {<!-- -->
      word: "",
      isListening: false, // Determine whether voice monitoring is in progress
    }
  },
  methods: {<!-- -->
    audioCHangeWord() {<!-- -->
      var that = this;
      that.word = "";
      // Create SpeechRecognition object
      // eslint-disable-next-line no-undef
      var recognition = new webkitSpeechRecognition();
      if (!recognition) {<!-- -->
        // eslint-disable-next-line no-undef
        recognition = new SpeechRecognition();
      }
      // language setting
      recognition.lang = 'zh-CN';
      // Start speech recognition
      recognition.start();
      that.isListening = true;
      // Monitor the recognition results
      recognition.onresult = function (event) {<!-- -->
        var result = event.results[0][0].transcript;
        that.word = result;
      };

      //Listen for error events
      recognition.onerror = function (event) {<!-- -->
        that.isListening = false;
        that.$message("Failed to monitor voice:" + event.error);
        console.error(event.error);
      };
      // Listen for end events (including recognition success, recognition error and user stop)
      recognition.onend = function () {<!-- -->
        that.isListening = false;
        console.log("Speech recognition stopped");
      };

    },
    goBack() {<!-- -->
      this.$router.push({<!-- --> path: "/entry" })
    }
  }
}
</script>

Text to speech

  • interface
<template>
  <div>
    <el-page-header @back="goBack" content="Text to Speech"/>
    <div class="bank"></div>
    <el-card header="text-to-speech">
      <el-input
        id="word"
        type="textarea"
        placeholder="Please enter text"
        v-model="word"
        maxlength="300"
        rows="4"
        show-word-limit
      >
      </el-input>
      <div class="bank"></div>
      <el-card>
        <el-button @click="changeToAudio" type="primary">Change to voice</el-button>
      </el-card>
      <div class="bank"></div>
      <el-card>
          <el-button @click="pause" type="warning">Pause</el-button>
          <el-button @click="resume" type="success">Continue</el-button>
          <el-button @click="cancel" type="info">Cancel</el-button>
      </el-card>
      <div class="bank"></div>
      <el-card>
        <el-button @click="getvoice" type="primary">Get speech synthesis data (F12)</el-button>
      </el-card>
    </el-card>
  </div>
</template>
  • logic
<script>
export default {<!-- -->
  name: "WordToAudio",
  data() {<!-- -->
    return {<!-- -->
      word: "",
      isPaused: false, // Determine whether to pause
    }
  },
  methods: {<!-- -->
    // select
    changeToAudio() {<!-- -->
      if (!this.word) {<!-- -->
        this.$message("Please enter text");
        return;
      }

      if (this.isPaused) {<!-- -->
        this.$message("The current voice has been paused, please click to continue!");
        return;
      } else if (window.speechSynthesis.speaking) {<!-- -->
        this.$message("There is currently a voice playing!");
        return;
      }
      // In order to prevent switching to voice in the paused state, set the continue playback before calling.
      window.speechSynthesis.resume();
      //Set up playback
      var textArea = document.getElementById('word');
      var range = document.createRange();
      range.selectNodeContents(textArea);
      var speech = new SpeechSynthesisUtterance();
      speech.text = this.word; // content
      speech.lang = "zh-cn"; // Language
      speech.voiceURI = "Microsoft Huihui - Chinese (Simplified, PRC)"; // Voice and services
      // eslint-disable-next-line no-irregular-whitespace
      speech.volume = 0.7; // The volume range of the sound is 0? to 1. The default is 1
      // eslint-disable-next-line no-irregular-whitespace
      speech.rate = 1; // Speech rate, numerical value, the default value is 1?, the range is 0.1? to 10?, indicating a multiple of the speech rate, such as 2? ? means twice the normal speaking speed
      // eslint-disable-next-line no-irregular-whitespace
      speech.pitch = 1; // Indicates the speaking pitch, numerical value, ranging from 0? (minimum) to 2? (maximum). The default value is 1.
      window.speechSynthesis.speak(speech);
      var highlight = document.createElement('span');
      highlight.style.backgroundColor = 'red';
      range.surroundContents(highlight);
    },
    // pause
    pause() {<!-- -->
      this.isPaused = true;
      window.speechSynthesis.pause();
    },
    // continue
    resume() {<!-- -->
      this.isPaused = false;
      window.speechSynthesis.resume();
    },
    // Cancel
    cancel() {<!-- -->
      window.speechSynthesis.cancel();
    },
    getvoice() {<!-- -->
      console.log(window.speechSynthesis.getVoices());
    },
    goBack() {<!-- -->
      this.$router.push({<!-- -->path: "/entry"})
    }
  }
}
</script>

<style>
.bank {<!-- -->
  padding: 10px;
}
</style>

git link

WEB comes with TTS to realize voice and text conversion to git

Conclusion

The above is the process of using Vue to implement WEB’s built-in TTS to realize voice and text conversion. If there is more content, this article will be updated.