34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
|
from google.cloud import texttospeech
|
||
|
|
||
|
# Instantiates a client
|
||
|
client = texttospeech.TextToSpeechClient()
|
||
|
|
||
|
|
||
|
def gen_speech(text, language_code, output_file):
|
||
|
"""Synthesizes speech from the input string of text or ssml.
|
||
|
|
||
|
Note: ssml must be well-formed according to:
|
||
|
https://www.w3.org/TR/speech-synthesis/
|
||
|
"""
|
||
|
|
||
|
# Set the text input to be synthesized
|
||
|
synthesis_input = texttospeech.types.SynthesisInput(text=text)
|
||
|
|
||
|
# Build the voice request, select the language code ("en-US") and the ssml
|
||
|
# voice gender ("neutral")
|
||
|
voice = texttospeech.types.VoiceSelectionParams(
|
||
|
language_code=language_code,
|
||
|
ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
|
||
|
|
||
|
# Select the type of audio file you want returned
|
||
|
audio_config = texttospeech.types.AudioConfig(
|
||
|
audio_encoding=texttospeech.enums.AudioEncoding.MP3)
|
||
|
|
||
|
# Perform the text-to-speech request on the text input with the selected
|
||
|
# voice parameters and audio file type
|
||
|
response = client.synthesize_speech(synthesis_input, voice, audio_config)
|
||
|
|
||
|
# The response's audio_content is binary.
|
||
|
with open(output_file, 'wb') as out:
|
||
|
# Write the response to the output file.
|
||
|
out.write(response.audio_content)
|