Source code for wxc_sdk.telephony.text_to_speech

import builtins
from datetime import datetime
from typing import Any, Optional

from pydantic import TypeAdapter

from wxc_sdk.api_child import ApiChild
from wxc_sdk.base import ApiModel
from wxc_sdk.base import SafeEnum as Enum

__all__ = ['TtsUsageResponse', 'TtsStatusResponse', 'TtsVoice', 'TextToSpeechApi']


[docs] class TtsUsageResponse(ApiModel): #: The number of text-to-speech API calls made in the current time window. no_of_api_calls: Optional[int] = None #: The maximum number of text-to-speech API calls allowed in the current time window. max_allowed_api_calls: Optional[int] = None #: The timestamp when the usage counter will reset. It will be returned when reaching the maximum allowed API calls #: in the time window. usage_reset_timestamp: Optional[datetime] = None
class TtsStatusResponseStatus(str, Enum): in_progress = 'IN_PROGRESS' success = 'SUCCESS' failure = 'FAILURE'
[docs] class TtsStatusResponse(ApiModel): #: Unique identifier of the text-to-speech generation request. id: Optional[str] = None #: The voice ID used to generate the audio prompt. voice: Optional[str] = None #: The input text used to generate the audio prompt. text: Optional[str] = None #: The language code used to generate the audio prompt. language_code: Optional[str] = None #: The status of the text-to-speech generation request. status: Optional[TtsStatusResponseStatus] = None #: A URL to download the encrypted audio prompt. Only available when status is `SUCCESS`. prompt_url: Optional[str] = None #: The KMS key URI required to decrypt the prompt downloaded from `promptUrl`. Only available when status is #: `SUCCESS`. kms_key_uri: Optional[str] = None #: A file URI you can use when configuring an announcement. Only available when status is `SUCCESS`. file_uri: Optional[str] = None #: A detailed message describing why generation failed. Only present when status is `FAILURE`. error_message: Optional[str] = None
[docs] class TtsVoice(ApiModel): #: The voice ID used to generate the audio prompt. id: Optional[str] = None #: The voice label, including the voice name and gender. label: Optional[str] = None
[docs] class TextToSpeechApi(ApiChild, base='telephony/config'):
[docs] def generate(self, voice: str, text: str, language_code: str, org_id: str = None) -> str: """ Generate a Text-to-Speech Prompt Generate a text-to-speech prompt from the provided text, voice, and language. Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file, eliminating the need for manual recording. This API requires a full administrator or location administrator auth token with a scope of `spark-admin:telephony_config_write`. :param voice: The voice ID used to generate the audio prompt. Use the List Text-to-Speech Voices API to retrieve available voices. :type voice: str :param text: The text to convert to speech. :type text: str :param language_code: The language code used to generate the audio prompt. Use the Read the List of Announcement Languages API to retrieve supported language codes. :type language_code: str :param org_id: Generate text-to-speech for this organization. :type org_id: str :rtype: str """ params: dict[str, Any] = dict() if org_id is not None: params['orgId'] = org_id body: dict[str, Any] = dict() body['voice'] = voice body['text'] = text body['languageCode'] = language_code url = self.ep('textToSpeech/actions/generate/invoke') data = super().post(url, params=params, json=body) r = data['id'] return r
[docs] def usage(self, org_id: str = None) -> TtsUsageResponse: """ Get Text-to-Speech Usage Retrieve text-to-speech usage information, including the number of API calls made, the maximum allowed within the time window, and the timestamp indicating when the usage will reset. Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file, eliminating the need for manual recording. This API requires a full or read-only administrator or location administrator auth token with a scope of `spark-admin:telephony_config_read`. :param org_id: Get text-to-speech usage for this organization. :type org_id: str :rtype: :class:`TtsUsageResponse` """ params: dict[str, Any] = dict() if org_id is not None: params['orgId'] = org_id url = self.ep('textToSpeech/usage') data = super().get(url, params=params) r = TtsUsageResponse.model_validate(data) return r
[docs] def voices(self, org_id: str = None) -> builtins.list[TtsVoice]: """ List Text-to-Speech Voices Fetch a list of available text-to-speech voices. Use the returned voice ID in the generation request. Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file, eliminating the need for manual recording. This API requires a full or read-only administrator or location administrator auth token with a scope of `spark-admin:telephony_config_read`. :param org_id: List text-to-speech voices supported for this organization. :type org_id: str :rtype: list[TtsVoice] """ params: dict[str, Any] = dict() if org_id is not None: params['orgId'] = org_id url = self.ep('textToSpeech/voices') data = super().get(url, params=params) r = TypeAdapter(list[TtsVoice]).validate_python(data['voices']) return r
[docs] def status(self, tts_id: str, org_id: str = None) -> TtsStatusResponse: """ Get Text-to-Speech Generation Status Get the status of a text-to-speech generation request by its ID. If the status is SUCCESS, the response includes `promptUrl`, `kmsKeyUri`, and `fileUri` to preview or use the audio prompt. To preview the audio prompt: 1. Download the KMS key - use the Webex Node.js SDK and provide `kmsKeyUri` to download the key from KMS. 2. Download the encrypted audio - The encrypted audio file content is stored in cloud and can be retrieved using `promptURL`. 3. Decrypt the audio content - Use the jose library to decrypt the content downloaded from `promptUrl` using the downloaded key. Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file, eliminating the need for manual recording. This API requires a full or read-only administrator or location administrator auth token with a scope of `spark-admin:telephony_config_read`. :param tts_id: Unique identifier of the text-to-speech generation request. :type tts_id: str :param org_id: Get text-to-speech status for this organization. :type org_id: str :rtype: :class:`TtsStatusResponse` """ params: dict[str, Any] = dict() if org_id is not None: params['orgId'] = org_id url = self.ep(f'textToSpeech/{tts_id}') data = super().get(url, params=params) r = TtsStatusResponse.model_validate(data) return r