Get started
The Verbum API provides programmatic access to translate speech to text through artificial intelligence in an easy and fast way.
With just two simple steps you will be able to transcribe any audio/mic, without the need to indicate the language to be transcribed.
To use the Verbum API services you will have to have an active account, if you don't, you can get one HERE.
login
For use the verbum API you need to login, this login will return the verbumToken, this token expired every 24 hours.
To logged in you need to make a POST call to the following url :
https://verbumapi.onemeta.ai:3001/login
BODY PARAMETERS (Form Data)
Field | Type | Description |
---|---|---|
name | String | Your API username. |
password | String | Your API password. |
Response (String)
Field | Type | Description |
---|---|---|
token | String | Your verbumToken for the next 24 hours. |
Examples
curl --location --request POST 'https://verbumapi.onemeta.ai:3001/login' \
--form 'name="my-user-name"' \
--form 'password="my-user-password"'
const form = new FormData();
form.append('name', '"my-user-name"');
form.append('password', '"my-user-password"');
fetch('https://verbumapi.onemeta.ai:3001/login', {
method: 'POST',
body: form
});
func chilkatTest() {
let rest = CkoRest()
var success: Bool
// URL: https://verbumapi.onemeta.ai:3001/login
var bTls: Bool = true
var port: Int = 3001
var bAutoReconnect: Bool = true
success = rest.Connect("verbumapi.onemeta.ai", port: port, tls: bTls, autoReconnect: bAutoReconnect)
if success != true {
print("ConnectFailReason: \(rest.ConnectFailReason.intValue)")
print("\(rest.LastErrorText)")
return
}
// Note: The above code does not need to be repeatedly called for each REST request.
// The rest object can be setup once, and then many requests can be sent. Chilkat will automatically
// reconnect within a FullRequest* method as needed. It is only the very first connection that is explicitly
// made via the Connect method.
rest.PartSelector = "1"
rest.AddHeader("Content-Disposition", value: "form-data; name=\"my-user-name\"")
rest.SetMultipartBodyString("")
rest.PartSelector = "2"
rest.AddHeader("Content-Disposition", value: "form-data; name=\"password\"")
rest.SetMultipartBodyString("my-user-password")
rest.PartSelector = "0"
rest.PartSelector = "0"
rest.AddHeader("Content-Type", value: "multipart/form-data")
var strResponseBody: String? = rest.FullRequestMultipart("POST", uriPath: "/login")
if rest.LastMethodSuccess != true {
print("\(rest.LastErrorText)")
return
}
var respStatusCode: Int = rest.ResponseStatusCode.intValue
print("response status code = \(respStatusCode)")
if respStatusCode >= 400 {
print("Response Status Code = \(respStatusCode)")
print("Response Header:")
print("\(rest.ResponseHeader)")
print("Response Body:")
print("\(strResponseBody!)")
return
}
let jsonResponse = CkoJsonObject()
jsonResponse.Load(strResponseBody)
}
Speech to text
To use the speech to text service you have to instantiate a websocket conexion, and send a serie of parameters:
Translation Code | url | parameters |
---|---|---|
Websocket | wss://https://verbumapi.onemeta.ai:3001/ws/{your-verbum-token} |
object to send: buffer from mic (see example). The buffer and config from the audio to send (mic) need to have some configuration:
|
list of supported languages | this is the list/json of supported languages to transcribe | {"translation":{"af":{"name":"Afrikaans","nativeName":"Afrikaans","dir":"ltr"},"am":{"name":"Amharic","nativeName":"አማርኛ","dir":"ltr"},"ar":{"name":"Arabic","nativeName":"العربية","dir":"rtl"},"as":{"name":"Assamese","nativeName":"অসমীয়া","dir":"ltr"},"az":{"name":"Azerbaijani","nativeName":"Azərbaycan","dir":"ltr"},"ba":{"name":"Bashkir","nativeName":"Bashkir","dir":"ltr"},"bg":{"name":"Bulgarian","nativeName":"Български","dir":"ltr"},"bn":{"name":"Bangla","nativeName":"বাংলা","dir":"ltr"},"bo":{"name":"Tibetan","nativeName":"བོད་སྐད་","dir":"ltr"},"bs":{"name":"Bosnian","nativeName":"Bosnian","dir":"ltr"},"ca":{"name":"Catalan","nativeName":"Català","dir":"ltr"},"cs":{"name":"Czech","nativeName":"Čeština","dir":"ltr"},"cy":{"name":"Welsh","nativeName":"Cymraeg","dir":"ltr"},"da":{"name":"Danish","nativeName":"Dansk","dir":"ltr"},"de":{"name":"German","nativeName":"Deutsch","dir":"ltr"},"dv":{"name":"Divehi","nativeName":"ދިވެހިބަސް","dir":"rtl"},"el":{"name":"Greek","nativeName":"Ελληνικά","dir":"ltr"},"en":{"name":"English","nativeName":"English","dir":"ltr"},"es":{"name":"Spanish","nativeName":"Español","dir":"ltr"},"et":{"name":"Estonian","nativeName":"Eesti","dir":"ltr"},"eu":{"name":"Basque","nativeName":"Euskara","dir":"ltr"},"fa":{"name":"Persian","nativeName":"فارسی","dir":"rtl"},"fi":{"name":"Finnish","nativeName":"Suomi","dir":"ltr"},"fil":{"name":"Filipino","nativeName":"Filipino","dir":"ltr"},"fj":{"name":"Fijian","nativeName":"Na Vosa Vakaviti","dir":"ltr"},"fo":{"name":"Faroese","nativeName":"Føroyskt","dir":"ltr"},"fr":{"name":"French","nativeName":"Français","dir":"ltr"},"fr-CA":{"name":"French (Canada)","nativeName":"Français (Canada)","dir":"ltr"},"ga":{"name":"Irish","nativeName":"Gaeilge","dir":"ltr"},"gl":{"name":"Galician","nativeName":"Galego","dir":"ltr"},"gu":{"name":"Gujarati","nativeName":"ગુજરાતી","dir":"ltr"},"he":{"name":"Hebrew","nativeName":"עברית","dir":"rtl"},"hi":{"name":"Hindi","nativeName":"हिन्दी","dir":"ltr"},"hr":{"name":"Croatian","nativeName":"Hrvatski","dir":"ltr"},"hsb":{"name":"Upper Sorbian","nativeName":"Hornjoserbšćina","dir":"ltr"},"ht":{"name":"Haitian Creole","nativeName":"Haitian Creole","dir":"ltr"},"hu":{"name":"Hungarian","nativeName":"Magyar","dir":"ltr"},"hy":{"name":"Armenian","nativeName":"Հայերեն","dir":"ltr"},"id":{"name":"Indonesian","nativeName":"Indonesia","dir":"ltr"},"ikt":{"name":"Inuinnaqtun","nativeName":"Inuinnaqtun","dir":"ltr"},"is":{"name":"Icelandic","nativeName":"Íslenska","dir":"ltr"},"it":{"name":"Italian","nativeName":"Italiano","dir":"ltr"},"iu":{"name":"Inuktitut","nativeName":"ᐃᓄᒃᑎᑐᑦ","dir":"ltr"},"iu-Latn":{"name":"Inuktitut (Latin)","nativeName":"Inuktitut (Latin)","dir":"ltr"},"ja":{"name":"Japanese","nativeName":"日本語","dir":"ltr"},"ka":{"name":"Georgian","nativeName":"ქართული","dir":"ltr"},"kk":{"name":"Kazakh","nativeName":"Қазақ Тілі","dir":"ltr"},"km":{"name":"Khmer","nativeName":"ខ្មែរ","dir":"ltr"},"kmr":{"name":"Kurdish (Northern)","nativeName":"Kurdî (Bakur)","dir":"ltr"},"kn":{"name":"Kannada","nativeName":"ಕನ್ನಡ","dir":"ltr"},"ko":{"name":"Korean","nativeName":"한국어","dir":"ltr"},"ku":{"name":"Kurdish (Central)","nativeName":"Kurdî (Navîn)","dir":"rtl"},"ky":{"name":"Kyrgyz","nativeName":"Кыргызча","dir":"ltr"},"lo":{"name":"Lao","nativeName":"ລາວ","dir":"ltr"},"lt":{"name":"Lithuanian","nativeName":"Lietuvių","dir":"ltr"},"lv":{"name":"Latvian","nativeName":"Latviešu","dir":"ltr"},"lzh":{"name":"Chinese (Literary)","nativeName":"中文 (文言文)","dir":"ltr"},"mg":{"name":"Malagasy","nativeName":"Malagasy","dir":"ltr"},"mi":{"name":"Māori","nativeName":"Te Reo Māori","dir":"ltr"},"mk":{"name":"Macedonian","nativeName":"Македонски","dir":"ltr"},"ml":{"name":"Malayalam","nativeName":"മലയാളം","dir":"ltr"},"mn-Cyrl":{"name":"Mongolian (Cyrillic)","nativeName":"Mongolian (Cyrillic)","dir":"ltr"},"mn-Mong":{"name":"Mongolian (Traditional)","nativeName":"ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ","dir":"ltr"},"mr":{"name":"Marathi","nativeName":"मराठी","dir":"ltr"},"ms":{"name":"Malay","nativeName":"Melayu","dir":"ltr"},"mt":{"name":"Maltese","nativeName":"Malti","dir":"ltr"},"mww":{"name":"Hmong Daw","nativeName":"Hmong Daw","dir":"ltr"},"my":{"name":"Myanmar (Burmese)","nativeName":"မြန်မာ","dir":"ltr"},"nb":{"name":"Norwegian","nativeName":"Norsk Bokmål","dir":"ltr"},"ne":{"name":"Nepali","nativeName":"नेपाली","dir":"ltr"},"nl":{"name":"Dutch","nativeName":"Nederlands","dir":"ltr"},"or":{"name":"Odia","nativeName":"ଓଡ଼ିଆ","dir":"ltr"},"otq":{"name":"Querétaro Otomi","nativeName":"Hñähñu","dir":"ltr"},"pa":{"name":"Punjabi","nativeName":"ਪੰਜਾਬੀ","dir":"ltr"},"pl":{"name":"Polish","nativeName":"Polski","dir":"ltr"},"prs":{"name":"Dari","nativeName":"دری","dir":"rtl"},"ps":{"name":"Pashto","nativeName":"پښتو","dir":"rtl"},"pt":{"name":"Portuguese (Brazil)","nativeName":"Português (Brasil)","dir":"ltr"},"pt-PT":{"name":"Portuguese (Portugal)","nativeName":"Português (Portugal)","dir":"ltr"},"ro":{"name":"Romanian","nativeName":"Română","dir":"ltr"},"ru":{"name":"Russian","nativeName":"Русский","dir":"ltr"},"sk":{"name":"Slovak","nativeName":"Slovenčina","dir":"ltr"},"sl":{"name":"Slovenian","nativeName":"Slovenščina","dir":"ltr"},"sm":{"name":"Samoan","nativeName":"Gagana Sāmoa","dir":"ltr"},"so":{"name":"Somali","nativeName":"Soomaali","dir":"ltr"},"sq":{"name":"Albanian","nativeName":"Shqip","dir":"ltr"},"sr-Cyrl":{"name":"Serbian (Cyrillic)","nativeName":"Српски (ћирилица)","dir":"ltr"},"sr-Latn":{"name":"Serbian (Latin)","nativeName":"Srpski (latinica)","dir":"ltr"},"sv":{"name":"Swedish","nativeName":"Svenska","dir":"ltr"},"sw":{"name":"Swahili","nativeName":"Kiswahili","dir":"ltr"},"ta":{"name":"Tamil","nativeName":"தமிழ்","dir":"ltr"},"te":{"name":"Telugu","nativeName":"తెలుగు","dir":"ltr"},"th":{"name":"Thai","nativeName":"ไทย","dir":"ltr"},"ti":{"name":"Tigrinya","nativeName":"ትግር","dir":"ltr"},"tk":{"name":"Turkmen","nativeName":"Türkmen Dili","dir":"ltr"},"tlh-Latn":{"name":"Klingon (Latin)","nativeName":"Klingon (Latin)","dir":"ltr"},"tlh-Piqd":{"name":"Klingon (pIqaD)","nativeName":"Klingon (pIqaD)","dir":"ltr"},"to":{"name":"Tongan","nativeName":"Lea Fakatonga","dir":"ltr"},"tr":{"name":"Turkish","nativeName":"Türkçe","dir":"ltr"},"tt":{"name":"Tatar","nativeName":"Татар","dir":"ltr"},"ty":{"name":"Tahitian","nativeName":"Reo Tahiti","dir":"ltr"},"ug":{"name":"Uyghur","nativeName":"ئۇيغۇرچە","dir":"rtl"},"uk":{"name":"Ukrainian","nativeName":"Українська","dir":"ltr"},"ur":{"name":"Urdu","nativeName":"اردو","dir":"rtl"},"uz":{"name":"Uzbek (Latin)","nativeName":"Uzbek (Latin)","dir":"ltr"},"vi":{"name":"Vietnamese","nativeName":"Tiếng Việt","dir":"ltr"},"yua":{"name":"Yucatec Maya","nativeName":"Yucatec Maya","dir":"ltr"},"yue":{"name":"Cantonese (Traditional)","nativeName":"粵語 (繁體)","dir":"ltr"},"zh-Hans":{"name":"Chinese Simplified","nativeName":"中文 (简体)","dir":"ltr"},"zh-Hant":{"name":"Chinese Traditional","nativeName":"繁體中文 (繁體)","dir":"ltr"},"zu":{"name":"Zulu","nativeName":"Isi-Zulu","dir":"ltr"}}}; |
Examples
let sampleRate = 16000;
let bitsPerSample = 16;
let channelCount = 1;
// Request access to the microphone
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
let context = new AudioContext({ sampleRate });
let source = context.createMediaStreamSource(stream);
let processor = context.createScriptProcessor(4096, 1, 1);
let samples = [];
processor.onaudioprocess = event => {
let input = event.inputBuffer.getChannelData(0);
for (let i = 0; i < input.length; i++) {
let sample = Math.round(input[i] * 0x7FFF);
samples.push(sample);
}
};
source.connect(processor);
processor.connect(context.destination);
// Connect to a WebSocket
let socket = new WebSocket("wss://https://verbumapi.onemeta.ai:3001/ws/{your-verbum-token}");
socket.binaryType = 'arraybuffer';
socket.onopen = () => {
// set language to spanish Argentina
const data_to_send = `{
"speechLanguage": "es-AR"
}`;
socket.send(data_to_send);
// Start sending audio data to the server
let chunkSize = sampleRate * channelCount * bitsPerSample / 8;
let intervalId = setInterval(() => {
if (samples.length >= chunkSize) {
let chunk = samples.splice(0, chunkSize);
// Encode chunk as a little-endian, 16-bit, signed integer array
let buffer = new ArrayBuffer(chunk.length * 2);
let view = new DataView(buffer);
for (let i = 0; i < chunk.length; i++) {
view.setInt16(i * 2, chunk[i], true /* little endian */);
}
// Send chunk to the WebSocket server
socket.send(buffer);
}
}, 20);
socket.onmessage = event => {
// Get data after receiving from WebSocket
console.log("Data after receiving: ", event.data);
console.log(event.data);
};
// Stop sending audio data and close the WebSocket after 10 seconds
setTimeout(() => {
clearInterval(intervalId);
socket.close();
processor.disconnect();
source.disconnect();
context.close();
}, 10000);
};
});