import builtins
from datetime import datetime
from typing import Any, Optional
from pydantic import TypeAdapter
from wxc_sdk.api_child import ApiChild
from wxc_sdk.base import ApiModel
from wxc_sdk.base import SafeEnum as Enum
__all__ = ['TtsUsageResponse', 'TtsStatusResponse', 'TtsStatusResponseStatus', 'TtsVoice', 'TextToSpeechApi']
[docs]
class TtsUsageResponse(ApiModel):
#: The number of text-to-speech API calls made in the current time window.
no_of_api_calls: Optional[int] = None
#: The maximum number of text-to-speech API calls allowed in the current time window.
max_allowed_api_calls: Optional[int] = None
#: The timestamp when the usage counter will reset. It will be returned when reaching the maximum allowed API calls
#: in the time window.
usage_reset_timestamp: Optional[datetime] = None
[docs]
class TtsStatusResponseStatus(str, Enum):
in_progress = 'IN_PROGRESS'
success = 'SUCCESS'
failure = 'FAILURE'
[docs]
class TtsStatusResponse(ApiModel):
#: Unique identifier of the text-to-speech generation request.
id: Optional[str] = None
#: The voice ID used to generate the audio prompt.
voice: Optional[str] = None
#: The input text used to generate the audio prompt.
text: Optional[str] = None
#: The language code used to generate the audio prompt.
language_code: Optional[str] = None
#: The status of the text-to-speech generation request.
status: Optional[TtsStatusResponseStatus] = None
#: A URL to download the encrypted audio prompt. Only available when status is `SUCCESS`.
prompt_url: Optional[str] = None
#: The KMS key URI required to decrypt the prompt downloaded from `promptUrl`. Only available when status is
#: `SUCCESS`.
kms_key_uri: Optional[str] = None
#: A file URI you can use when configuring an announcement. Only available when status is `SUCCESS`.
file_uri: Optional[str] = None
#: A detailed message describing why generation failed. Only present when status is `FAILURE`.
error_message: Optional[str] = None
[docs]
class TtsVoice(ApiModel):
#: The voice ID used to generate the audio prompt.
id: Optional[str] = None
#: The voice label, including the voice name and gender.
label: Optional[str] = None
[docs]
class TextToSpeechApi(ApiChild, base='telephony/config'):
[docs]
def generate(self, voice: str, text: str, language_code: str, org_id: str = None) -> str:
"""
Generate a Text-to-Speech Prompt
Generate a text-to-speech prompt from the provided text, voice, and language.
Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text
into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file,
eliminating the need for manual recording.
This API requires a full administrator or location administrator auth token with a scope of
`spark-admin:telephony_config_write`.
:param voice: The voice ID used to generate the audio prompt. Use the List Text-to-Speech Voices API to
retrieve available voices.
:type voice: str
:param text: The text to convert to speech.
:type text: str
:param language_code: The language code used to generate the audio prompt. Use the Read the List of
Announcement Languages API to retrieve supported language codes.
:type language_code: str
:param org_id: Generate text-to-speech for this organization.
:type org_id: str
:rtype: str
"""
params: dict[str, Any] = dict()
if org_id is not None:
params['orgId'] = org_id
body: dict[str, Any] = dict()
body['voice'] = voice
body['text'] = text
body['languageCode'] = language_code
url = self.ep('textToSpeech/actions/generate/invoke')
data = super().post(url, params=params, json=body)
r = data['id']
return r
[docs]
def usage(self, org_id: str = None) -> TtsUsageResponse:
"""
Get Text-to-Speech Usage
Retrieve text-to-speech usage information, including the number of API calls made, the maximum allowed within
the time window, and the timestamp indicating when the usage will reset.
Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text
into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file,
eliminating the need for manual recording.
This API requires a full or read-only administrator or location administrator auth token with a scope of
`spark-admin:telephony_config_read`.
:param org_id: Get text-to-speech usage for this organization.
:type org_id: str
:rtype: :class:`TtsUsageResponse`
"""
params: dict[str, Any] = dict()
if org_id is not None:
params['orgId'] = org_id
url = self.ep('textToSpeech/usage')
data = super().get(url, params=params)
r = TtsUsageResponse.model_validate(data)
return r
[docs]
def voices(self, org_id: str = None) -> builtins.list[TtsVoice]:
"""
List Text-to-Speech Voices
Fetch a list of available text-to-speech voices. Use the returned voice ID in the generation request.
Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text
into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file,
eliminating the need for manual recording.
This API requires a full or read-only administrator or location administrator auth token with a scope of
`spark-admin:telephony_config_read`.
:param org_id: List text-to-speech voices supported for this organization.
:type org_id: str
:rtype: list[TtsVoice]
"""
params: dict[str, Any] = dict()
if org_id is not None:
params['orgId'] = org_id
url = self.ep('textToSpeech/voices')
data = super().get(url, params=params)
r = TypeAdapter(list[TtsVoice]).validate_python(data['voices'])
return r
[docs]
def status(self, tts_id: str, org_id: str = None) -> TtsStatusResponse:
"""
Get Text-to-Speech Generation Status
Get the status of a text-to-speech generation request by its ID. If the status is SUCCESS, the response
includes `promptUrl`, `kmsKeyUri`, and `fileUri` to preview or use the audio prompt.
To preview the audio prompt:
1. Download the KMS key - use the Webex Node.js SDK and provide `kmsKeyUri` to download the key from KMS.
2. Download the encrypted audio - The encrypted audio file content is stored in cloud and can be retrieved
using `promptURL`.
3. Decrypt the audio content - Use the jose library to decrypt the content downloaded from `promptUrl` using
the downloaded key.
Text-to-speech (TTS) efficiently generates prompts, greetings, and announcements by converting written text
into synthesized audio using the specified voice. The generated audio functions like a recorded WAV file,
eliminating the need for manual recording.
This API requires a full or read-only administrator or location administrator auth token with a scope of
`spark-admin:telephony_config_read`.
:param tts_id: Unique identifier of the text-to-speech generation request.
:type tts_id: str
:param org_id: Get text-to-speech status for this organization.
:type org_id: str
:rtype: :class:`TtsStatusResponse`
"""
params: dict[str, Any] = dict()
if org_id is not None:
params['orgId'] = org_id
url = self.ep(f'textToSpeech/{tts_id}')
data = super().get(url, params=params)
r = TtsStatusResponse.model_validate(data)
return r