Use the tool directly (without agent)
from agentlego.apis import load_tool
# load tool
tool = load_tool('TextToSpeech', device='cuda')
# apply tool
audio = tool('Hello, this is a text to audio demo.')
print(audio)
With Lagent
from lagent import ReAct, GPTAPI, ActionExecutor
from agentlego.apis import load_tool
# load tools and build agent
# please set `OPENAI_API_KEY` in your environment variable.
tool = load_tool('TextToSpeech', device='cuda').to_lagent()
agent = ReAct(GPTAPI(temperature=0.), action_executor=ActionExecutor([tool]))
# agent running with the tool.
ret = agent.chat(f'Please introduce the highest mountain and speak out.')
for step in ret.inner_steps[1:]:
print('------')
print(step['content'])
Before using the tool, please confirm you have installed the related dependencies by the below commands.
pip install TTS, langid
This tool uses a XTTS-v2 model in default settings. See the repo details.
Use the tool directly (without agent)
from agentlego.apis import load_tool
# load tool
tool = load_tool('SpeechToText', device='cuda')
# apply tool
text = tool('examples/demo.m4a')
print(text)
With Lagent
from lagent import ReAct, GPTAPI, ActionExecutor
from agentlego.apis import load_tool
# load tools and build agent
# please set `OPENAI_API_KEY` in your environment variable.
tool = load_tool('SpeechToText', device='cuda').to_lagent()
agent = ReAct(GPTAPI(temperature=0.), action_executor=ActionExecutor([tool]))
# agent running with the tool.
audio_path = 'examples/demo.m4a'
ret = agent.chat(f'Please tell me the content of the audio `{audio_path}`')
for step in ret.inner_steps[1:]:
print('------')
print(step['content'])
Before using the tool, please confirm you have installed the related dependencies by the below commands.
pip install -U transformers
This tool uses a Whisper model in default settings. See the following paper for details.
@misc{radford2022whisper,
doi = {10.48550/ARXIV.2212.04356},
url = {https://arxiv.org/abs/2212.04356},
author = {Radford, Alec and Kim, Jong Wook and Xu, Tao and Brockman, Greg and McLeavey, Christine and Sutskever, Ilya},
title = {Robust Speech Recognition via Large-Scale Weak Supervision},
publisher = {arXiv},
year = {2022},
copyright = {arXiv.org perpetual, non-exclusive license}
}