from google.cloud import texttospeech # Instantiates a client client = texttospeech.TextToSpeechClient() def gen_speech(text, language_code, output_file): """Synthesizes speech from the input string of text or ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ """ # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code=language_code, ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. with open(output_file, 'wb') as out: # Write the response to the output file. out.write(response.audio_content)