Get started

The Verbum API provides programmatic access to translate speech to text through artificial intelligence in an easy and fast way.
With just two simple steps you will be able to transcribe any audio/mic, without the need to indicate the language to be transcribed.
To use the Verbum API services you will have to have an active account, if you don't, you can get one HERE.

login

For use the verbum API you need to login, this login will return the verbumToken, this token expired every 24 hours.
To logged in you need to make a POST call to the following url : https://verbumapi.onemeta.ai:3001/login


BODY PARAMETERS (Form Data)

Field Type Description
name String Your API username.
password String Your API password.

Response (String)

Field Type Description
token String Your verbumToken for the next 24 hours.

Examples

                            
curl --location --request POST 'https://verbumapi.onemeta.ai:3001/login' \
--form 'name="my-user-name"' \
--form 'password="my-user-password"'
                            
                        

                            
const form = new FormData();
form.append('name', '"my-user-name"');
form.append('password', '"my-user-password"');

fetch('https://verbumapi.onemeta.ai:3001/login', {
    method: 'POST',
    body: form
});
                            
                        

                            
func chilkatTest() {
    let rest = CkoRest()
    var success: Bool

    // URL: https://verbumapi.onemeta.ai:3001/login
    var bTls: Bool = true
    var port: Int = 3001
    var bAutoReconnect: Bool = true
    success = rest.Connect("verbumapi.onemeta.ai", port: port, tls: bTls, autoReconnect: bAutoReconnect)
    if success != true {
        print("ConnectFailReason: \(rest.ConnectFailReason.intValue)")
        print("\(rest.LastErrorText)")
        return
    }

    // Note: The above code does not need to be repeatedly called for each REST request.
    // The rest object can be setup once, and then many requests can be sent.  Chilkat will automatically
    // reconnect within a FullRequest* method as needed.  It is only the very first connection that is explicitly
    // made via the Connect method.

    rest.PartSelector = "1"
    rest.AddHeader("Content-Disposition", value: "form-data; name=\"my-user-name\"")
    rest.SetMultipartBodyString("")

    rest.PartSelector = "2"
    rest.AddHeader("Content-Disposition", value: "form-data; name=\"password\"")
    rest.SetMultipartBodyString("my-user-password")

    rest.PartSelector = "0"

    rest.PartSelector = "0"
    rest.AddHeader("Content-Type", value: "multipart/form-data")

    var strResponseBody: String? = rest.FullRequestMultipart("POST", uriPath: "/login")
    if rest.LastMethodSuccess != true {
        print("\(rest.LastErrorText)")
        return
    }

    var respStatusCode: Int = rest.ResponseStatusCode.intValue
    print("response status code = \(respStatusCode)")
    if respStatusCode >= 400 {
        print("Response Status Code = \(respStatusCode)")
        print("Response Header:")
        print("\(rest.ResponseHeader)")
        print("Response Body:")
        print("\(strResponseBody!)")
        return
    }

    let jsonResponse = CkoJsonObject()
    jsonResponse.Load(strResponseBody)

}
                            
                        

Speech to text

To use the speech to text service you have to instantiate a websocket conexion, and send a serie of parameters:

Translation Code url parameters
Websocket wss://https://verbumapi.onemeta.ai:3001/ws/{your-verbum-token} object to send:
buffer from mic (see example).
The buffer and config from the audio to send (mic) need to have some configuration:
  • sampleRate = 16000
  • bitsPerSample = 16
  • channelCount = 1
  • Encode chunk as a little-endian, 16-bit, signed integer array
list of supported languages this is the list/json of supported languages to transcribe {"translation":{"af":{"name":"Afrikaans","nativeName":"Afrikaans","dir":"ltr"},"am":{"name":"Amharic","nativeName":"አማርኛ","dir":"ltr"},"ar":{"name":"Arabic","nativeName":"العربية","dir":"rtl"},"as":{"name":"Assamese","nativeName":"অসমীয়া","dir":"ltr"},"az":{"name":"Azerbaijani","nativeName":"Azərbaycan","dir":"ltr"},"ba":{"name":"Bashkir","nativeName":"Bashkir","dir":"ltr"},"bg":{"name":"Bulgarian","nativeName":"Български","dir":"ltr"},"bn":{"name":"Bangla","nativeName":"বাংলা","dir":"ltr"},"bo":{"name":"Tibetan","nativeName":"བོད་སྐད་","dir":"ltr"},"bs":{"name":"Bosnian","nativeName":"Bosnian","dir":"ltr"},"ca":{"name":"Catalan","nativeName":"Català","dir":"ltr"},"cs":{"name":"Czech","nativeName":"Čeština","dir":"ltr"},"cy":{"name":"Welsh","nativeName":"Cymraeg","dir":"ltr"},"da":{"name":"Danish","nativeName":"Dansk","dir":"ltr"},"de":{"name":"German","nativeName":"Deutsch","dir":"ltr"},"dv":{"name":"Divehi","nativeName":"ދިވެހިބަސް","dir":"rtl"},"el":{"name":"Greek","nativeName":"Ελληνικά","dir":"ltr"},"en":{"name":"English","nativeName":"English","dir":"ltr"},"es":{"name":"Spanish","nativeName":"Español","dir":"ltr"},"et":{"name":"Estonian","nativeName":"Eesti","dir":"ltr"},"eu":{"name":"Basque","nativeName":"Euskara","dir":"ltr"},"fa":{"name":"Persian","nativeName":"فارسی","dir":"rtl"},"fi":{"name":"Finnish","nativeName":"Suomi","dir":"ltr"},"fil":{"name":"Filipino","nativeName":"Filipino","dir":"ltr"},"fj":{"name":"Fijian","nativeName":"Na Vosa Vakaviti","dir":"ltr"},"fo":{"name":"Faroese","nativeName":"Føroyskt","dir":"ltr"},"fr":{"name":"French","nativeName":"Français","dir":"ltr"},"fr-CA":{"name":"French (Canada)","nativeName":"Français (Canada)","dir":"ltr"},"ga":{"name":"Irish","nativeName":"Gaeilge","dir":"ltr"},"gl":{"name":"Galician","nativeName":"Galego","dir":"ltr"},"gu":{"name":"Gujarati","nativeName":"ગુજરાતી","dir":"ltr"},"he":{"name":"Hebrew","nativeName":"עברית","dir":"rtl"},"hi":{"name":"Hindi","nativeName":"हिन्दी","dir":"ltr"},"hr":{"name":"Croatian","nativeName":"Hrvatski","dir":"ltr"},"hsb":{"name":"Upper Sorbian","nativeName":"Hornjoserbšćina","dir":"ltr"},"ht":{"name":"Haitian Creole","nativeName":"Haitian Creole","dir":"ltr"},"hu":{"name":"Hungarian","nativeName":"Magyar","dir":"ltr"},"hy":{"name":"Armenian","nativeName":"Հայերեն","dir":"ltr"},"id":{"name":"Indonesian","nativeName":"Indonesia","dir":"ltr"},"ikt":{"name":"Inuinnaqtun","nativeName":"Inuinnaqtun","dir":"ltr"},"is":{"name":"Icelandic","nativeName":"Íslenska","dir":"ltr"},"it":{"name":"Italian","nativeName":"Italiano","dir":"ltr"},"iu":{"name":"Inuktitut","nativeName":"ᐃᓄᒃᑎᑐᑦ","dir":"ltr"},"iu-Latn":{"name":"Inuktitut (Latin)","nativeName":"Inuktitut (Latin)","dir":"ltr"},"ja":{"name":"Japanese","nativeName":"日本語","dir":"ltr"},"ka":{"name":"Georgian","nativeName":"ქართული","dir":"ltr"},"kk":{"name":"Kazakh","nativeName":"Қазақ Тілі","dir":"ltr"},"km":{"name":"Khmer","nativeName":"ខ្មែរ","dir":"ltr"},"kmr":{"name":"Kurdish (Northern)","nativeName":"Kurdî (Bakur)","dir":"ltr"},"kn":{"name":"Kannada","nativeName":"ಕನ್ನಡ","dir":"ltr"},"ko":{"name":"Korean","nativeName":"한국어","dir":"ltr"},"ku":{"name":"Kurdish (Central)","nativeName":"Kurdî (Navîn)","dir":"rtl"},"ky":{"name":"Kyrgyz","nativeName":"Кыргызча","dir":"ltr"},"lo":{"name":"Lao","nativeName":"ລາວ","dir":"ltr"},"lt":{"name":"Lithuanian","nativeName":"Lietuvių","dir":"ltr"},"lv":{"name":"Latvian","nativeName":"Latviešu","dir":"ltr"},"lzh":{"name":"Chinese (Literary)","nativeName":"中文 (文言文)","dir":"ltr"},"mg":{"name":"Malagasy","nativeName":"Malagasy","dir":"ltr"},"mi":{"name":"Māori","nativeName":"Te Reo Māori","dir":"ltr"},"mk":{"name":"Macedonian","nativeName":"Македонски","dir":"ltr"},"ml":{"name":"Malayalam","nativeName":"മലയാളം","dir":"ltr"},"mn-Cyrl":{"name":"Mongolian (Cyrillic)","nativeName":"Mongolian (Cyrillic)","dir":"ltr"},"mn-Mong":{"name":"Mongolian (Traditional)","nativeName":"ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ","dir":"ltr"},"mr":{"name":"Marathi","nativeName":"मराठी","dir":"ltr"},"ms":{"name":"Malay","nativeName":"Melayu","dir":"ltr"},"mt":{"name":"Maltese","nativeName":"Malti","dir":"ltr"},"mww":{"name":"Hmong Daw","nativeName":"Hmong Daw","dir":"ltr"},"my":{"name":"Myanmar (Burmese)","nativeName":"မြန်မာ","dir":"ltr"},"nb":{"name":"Norwegian","nativeName":"Norsk Bokmål","dir":"ltr"},"ne":{"name":"Nepali","nativeName":"नेपाली","dir":"ltr"},"nl":{"name":"Dutch","nativeName":"Nederlands","dir":"ltr"},"or":{"name":"Odia","nativeName":"ଓଡ଼ିଆ","dir":"ltr"},"otq":{"name":"Querétaro Otomi","nativeName":"Hñähñu","dir":"ltr"},"pa":{"name":"Punjabi","nativeName":"ਪੰਜਾਬੀ","dir":"ltr"},"pl":{"name":"Polish","nativeName":"Polski","dir":"ltr"},"prs":{"name":"Dari","nativeName":"دری","dir":"rtl"},"ps":{"name":"Pashto","nativeName":"پښتو","dir":"rtl"},"pt":{"name":"Portuguese (Brazil)","nativeName":"Português (Brasil)","dir":"ltr"},"pt-PT":{"name":"Portuguese (Portugal)","nativeName":"Português (Portugal)","dir":"ltr"},"ro":{"name":"Romanian","nativeName":"Română","dir":"ltr"},"ru":{"name":"Russian","nativeName":"Русский","dir":"ltr"},"sk":{"name":"Slovak","nativeName":"Slovenčina","dir":"ltr"},"sl":{"name":"Slovenian","nativeName":"Slovenščina","dir":"ltr"},"sm":{"name":"Samoan","nativeName":"Gagana Sāmoa","dir":"ltr"},"so":{"name":"Somali","nativeName":"Soomaali","dir":"ltr"},"sq":{"name":"Albanian","nativeName":"Shqip","dir":"ltr"},"sr-Cyrl":{"name":"Serbian (Cyrillic)","nativeName":"Српски (ћирилица)","dir":"ltr"},"sr-Latn":{"name":"Serbian (Latin)","nativeName":"Srpski (latinica)","dir":"ltr"},"sv":{"name":"Swedish","nativeName":"Svenska","dir":"ltr"},"sw":{"name":"Swahili","nativeName":"Kiswahili","dir":"ltr"},"ta":{"name":"Tamil","nativeName":"தமிழ்","dir":"ltr"},"te":{"name":"Telugu","nativeName":"తెలుగు","dir":"ltr"},"th":{"name":"Thai","nativeName":"ไทย","dir":"ltr"},"ti":{"name":"Tigrinya","nativeName":"ትግር","dir":"ltr"},"tk":{"name":"Turkmen","nativeName":"Türkmen Dili","dir":"ltr"},"tlh-Latn":{"name":"Klingon (Latin)","nativeName":"Klingon (Latin)","dir":"ltr"},"tlh-Piqd":{"name":"Klingon (pIqaD)","nativeName":"Klingon (pIqaD)","dir":"ltr"},"to":{"name":"Tongan","nativeName":"Lea Fakatonga","dir":"ltr"},"tr":{"name":"Turkish","nativeName":"Türkçe","dir":"ltr"},"tt":{"name":"Tatar","nativeName":"Татар","dir":"ltr"},"ty":{"name":"Tahitian","nativeName":"Reo Tahiti","dir":"ltr"},"ug":{"name":"Uyghur","nativeName":"ئۇيغۇرچە","dir":"rtl"},"uk":{"name":"Ukrainian","nativeName":"Українська","dir":"ltr"},"ur":{"name":"Urdu","nativeName":"اردو","dir":"rtl"},"uz":{"name":"Uzbek (Latin)","nativeName":"Uzbek (Latin)","dir":"ltr"},"vi":{"name":"Vietnamese","nativeName":"Tiếng Việt","dir":"ltr"},"yua":{"name":"Yucatec Maya","nativeName":"Yucatec Maya","dir":"ltr"},"yue":{"name":"Cantonese (Traditional)","nativeName":"粵語 (繁體)","dir":"ltr"},"zh-Hans":{"name":"Chinese Simplified","nativeName":"中文 (简体)","dir":"ltr"},"zh-Hant":{"name":"Chinese Traditional","nativeName":"繁體中文 (繁體)","dir":"ltr"},"zu":{"name":"Zulu","nativeName":"Isi-Zulu","dir":"ltr"}}};

Examples

                            
let sampleRate = 16000;
let bitsPerSample = 16;
let channelCount = 1;

// Request access to the microphone
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
  let context = new AudioContext({ sampleRate });
  let source = context.createMediaStreamSource(stream);
  let processor = context.createScriptProcessor(4096, 1, 1);

  let samples = [];
  processor.onaudioprocess = event => {
    let input = event.inputBuffer.getChannelData(0);
    for (let i = 0; i < input.length; i++) {
      let sample = Math.round(input[i] * 0x7FFF);
      samples.push(sample);
    }
  };

  source.connect(processor);
  processor.connect(context.destination);


  // Connect to a WebSocket
  let socket = new WebSocket("wss://https://verbumapi.onemeta.ai:3001/ws/{your-verbum-token}");

  socket.binaryType = 'arraybuffer';

  socket.onopen = () => {
    // set language to spanish Argentina
    const data_to_send = `{
      "speechLanguage": "es-AR"
    }`;
    socket.send(data_to_send);
    // Start sending audio data to the server
    let chunkSize = sampleRate * channelCount * bitsPerSample / 8;
    let intervalId = setInterval(() => {
      if (samples.length >= chunkSize) {
        let chunk = samples.splice(0, chunkSize);

        // Encode chunk as a little-endian, 16-bit, signed integer array
        let buffer = new ArrayBuffer(chunk.length * 2);
        let view = new DataView(buffer);
        for (let i = 0; i < chunk.length; i++) {
          view.setInt16(i * 2, chunk[i], true /* little endian */);
        }

        // Send chunk to the WebSocket server
        socket.send(buffer);
      }
    }, 20);

    socket.onmessage = event => {
      // Get data after receiving from WebSocket
      console.log("Data after receiving: ", event.data);
      console.log(event.data);
    };

    // Stop sending audio data and close the WebSocket after 10 seconds
    setTimeout(() => {
      clearInterval(intervalId);
      socket.close();
      processor.disconnect();
      source.disconnect();
      context.close();
    }, 10000);
  };
});