Quickstart by language
No SDK required. Every example is raw HTTP and works on any runtime that can make a request.
TypeScript / JavaScript (fetch)
async function main() {
const res = await fetch("https://transcribe.so/api/v1/transcriptions", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.TRANSCRIBE_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
source: "youtube",
url: "https://youtu.be/dQw4w9WgXcQ",
pipeline_code: "qwen3-asr-flash-filetrans",
}),
});
const job = await res.json();
console.log("queued", job.id);
}
main();Works on Node, Bun, Deno, Cloudflare Workers, browsers (CORS is open).
Python (requests)
import os, requests
job = requests.post(
"https://transcribe.so/api/v1/transcriptions",
headers={"Authorization": f"Bearer {os.environ['TRANSCRIBE_API_KEY']}"},
json={
"source": "youtube",
"url": "https://youtu.be/dQw4w9WgXcQ",
"pipeline_code": "qwen3-asr-flash-filetrans",
},
).json()
print("queued", job["id"])Bash (curl)
curl -sS -X POST https://transcribe.so/api/v1/transcriptions \
-H "Authorization: Bearer $TRANSCRIBE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"source": "youtube",
"url": "https://youtu.be/dQw4w9WgXcQ",
"pipeline_code": "qwen3-asr-flash-filetrans"
}'Go
package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"os"
)
func main() {
body, _ := json.Marshal(map[string]any{
"source": "youtube",
"url": "https://youtu.be/dQw4w9WgXcQ",
"pipeline_code": "qwen3-asr-flash-filetrans",
})
req, _ := http.NewRequest("POST", "https://transcribe.so/api/v1/transcriptions", bytes.NewReader(body))
req.Header.Set("Authorization", "Bearer "+os.Getenv("TRANSCRIBE_API_KEY"))
req.Header.Set("Content-Type", "application/json")
res, _ := http.DefaultClient.Do(req)
defer res.Body.Close()
var job struct{ Id string }
json.NewDecoder(res.Body).Decode(&job)
fmt.Println("queued", job.Id)
}Ruby
require "net/http"
require "json"
uri = URI("https://transcribe.so/api/v1/transcriptions")
req = Net::HTTP::Post.new(uri, {
"Authorization" => "Bearer #{ENV['TRANSCRIBE_API_KEY']}",
"Content-Type" => "application/json",
})
req.body = {
source: "youtube",
url: "https://youtu.be/dQw4w9WgXcQ",
pipeline_code: "qwen3-asr-flash-filetrans",
}.to_json
res = Net::HTTP.start(uri.host, uri.port, use_ssl: true) { |h| h.request(req) }
puts "queued", JSON.parse(res.body)["id"]PHP
<?php
$body = json_encode([
"source" => "youtube",
"url" => "https://youtu.be/dQw4w9WgXcQ",
"pipeline_code" => "qwen3-asr-flash-filetrans",
]);
$ch = curl_init("https://transcribe.so/api/v1/transcriptions");
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $body,
CURLOPT_HTTPHEADER => [
"Authorization: Bearer " . getenv("TRANSCRIBE_API_KEY"),
"Content-Type: application/json",
],
CURLOPT_RETURNTRANSFER => true,
]);
$job = json_decode(curl_exec($ch), true);
echo "queued " . $job["id"];Don't see your language? Every recipe is just curl + Bearer auth. Adapt the Bash example.
Upload a file
Two modes. Presigned S3 PUT for files under ~50 MB or stable connections. Resumable tus for large files or flaky networks: the upload resumes from the last byte after a network drop. Both end with the same POST /transcriptions.
TypeScript / JavaScript: presigned PUT
import fs from "node:fs";
const KEY = process.env.TRANSCRIBE_API_KEY!;
const FILE = "test-1min.m4a"; // try ours: curl -O https://transcribe.so/test-1min.m4a
async function main() {
const buf = fs.readFileSync(FILE);
// 1. Get a presigned URL
const presign = await (await fetch("https://transcribe.so/api/v1/uploads", {
method: "POST",
headers: { Authorization: `Bearer ${KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({ filename: FILE, content_type: "audio/mp4", file_size: buf.byteLength }),
})).json();
// 2. PUT the file straight to S3
await fetch(presign.upload_url, {
method: "PUT",
headers: { "Content-Type": "audio/mp4" },
body: buf,
});
// 3. Submit the transcription
const jobRes = await fetch("https://transcribe.so/api/v1/transcriptions", {
method: "POST",
headers: { Authorization: `Bearer ${KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({
source: "upload",
upload_id: presign.upload_id,
original_filename: FILE,
duration_seconds: 60,
pipeline_code: "qwen3-asr-flash-filetrans",
}),
});
if (jobRes.status === 402) {
const e = await jobRes.json();
throw new Error(`top up wallet at ${e.error.doc_url}`);
}
const job = await jobRes.json();
console.log("queued", job.id);
// 4. Poll until terminal
while (true) {
await new Promise(r => setTimeout(r, 5000));
const tx = await (await fetch(`https://transcribe.so/api/v1/transcriptions/${job.id}`, {
headers: { Authorization: `Bearer ${KEY}` },
})).json();
console.log(tx.status, tx.stage, `${tx.progress}%`);
if (tx.status === "completed" || tx.status === "failed") break;
}
}
main();TypeScript / JavaScript: resumable (tus)
// npm i tus-js-client
import fs from "node:fs";
import * as tus from "tus-js-client";
const KEY = process.env.TRANSCRIBE_API_KEY!;
const FILE = "test-1min.m4a";
async function main() {
const buf = fs.readFileSync(FILE);
// 1. Mint a tus upload token (1-hour TTL, bound to file_size)
const mint = await (await fetch("https://transcribe.so/api/v1/uploads/tus", {
method: "POST",
headers: { Authorization: `Bearer ${KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({ filename: FILE, file_size: buf.byteLength }),
})).json();
// 2. Resumable upload. retryDelays makes tus-js-client retry transient
// network errors automatically; the upload picks up from the last
// acked byte rather than restarting.
const uploadId: string = await new Promise((resolve, reject) => {
const upload = new tus.Upload(buf, {
endpoint: mint.upload_endpoint,
retryDelays: [0, 1000, 3000, 5000, 10000],
metadata: { filename: FILE, [mint.upload_metadata_key]: mint.upload_token },
onError: reject,
onSuccess: () => {
// upload.url ends with <id>+<resume-token>; server normalizes
const last = upload.url!.split("/").pop()!;
resolve("tus/" + last);
},
});
upload.start();
});
// 3. Submit the transcription (same call as the presigned recipe)
const jobRes = await fetch("https://transcribe.so/api/v1/transcriptions", {
method: "POST",
headers: { Authorization: `Bearer ${KEY}`, "Content-Type": "application/json" },
body: JSON.stringify({
source: "upload",
upload_id: uploadId,
original_filename: FILE,
duration_seconds: 60,
pipeline_code: "qwen3-asr-flash-filetrans",
}),
});
if (jobRes.status === 402) {
const e = await jobRes.json();
throw new Error(`top up wallet at ${e.error.doc_url}`);
}
const job = await jobRes.json();
console.log("queued", job.id);
}
main();For browser uploads (e.g. Uppy), use the same upload_endpoint + upload_token. CORS allows http://localhost:3000 for local dev plus production origins.
Python: presigned PUT
import os, time, requests
KEY = os.environ["TRANSCRIBE_API_KEY"]
FILE = "test-1min.m4a" # try ours: curl -O https://transcribe.so/test-1min.m4a
with open(FILE, "rb") as f:
body = f.read()
# 1. Get a presigned URL
presign = requests.post(
"https://transcribe.so/api/v1/uploads",
headers={"Authorization": f"Bearer {KEY}"},
json={"filename": FILE, "content_type": "audio/mp4", "file_size": len(body)},
).json()
# 2. PUT the file straight to S3
requests.put(
presign["upload_url"],
headers={"Content-Type": "audio/mp4"},
data=body,
).raise_for_status()
# 3. Submit the transcription
job_res = requests.post(
"https://transcribe.so/api/v1/transcriptions",
headers={"Authorization": f"Bearer {KEY}"},
json={
"source": "upload",
"upload_id": presign["upload_id"],
"original_filename": FILE,
"duration_seconds": 60,
"pipeline_code": "qwen3-asr-flash-filetrans",
},
)
if job_res.status_code == 402:
raise SystemExit(f"top up wallet at {job_res.json()['error']['doc_url']}")
job = job_res.json()
print("queued", job["id"])
# 4. Poll until terminal
while True:
time.sleep(5)
tx = requests.get(
f"https://transcribe.so/api/v1/transcriptions/{job['id']}",
headers={"Authorization": f"Bearer {KEY}"},
).json()
print(tx["status"], tx["stage"], f"{tx['progress']}%")
if tx["status"] in ("completed", "failed"):
breakPython: resumable (tus)
# pip install tuspy requests
import os, requests
from tusclient import client
KEY = os.environ["TRANSCRIBE_API_KEY"]
FILE = "test-1min.m4a"
size = os.path.getsize(FILE)
# 1. Mint a tus upload token (1-hour TTL, bound to file_size)
mint = requests.post(
"https://transcribe.so/api/v1/uploads/tus",
headers={"Authorization": f"Bearer {KEY}"},
json={"filename": FILE, "file_size": size},
).json()
# 2. Resumable upload. tuspy retries transient errors and resumes from
# the last acked byte. For long-running uploads, persist uploader.url
# to disk so a fresh process can resume after a crash.
tus = client.TusClient(mint["upload_endpoint"])
uploader = tus.uploader(
FILE,
metadata={"filename": FILE, mint["upload_metadata_key"]: mint["upload_token"]},
)
uploader.upload()
upload_id = "tus/" + uploader.url.split("/")[-1] # server strips +resume-token
# 3. Submit the transcription
job_res = requests.post(
"https://transcribe.so/api/v1/transcriptions",
headers={"Authorization": f"Bearer {KEY}"},
json={
"source": "upload",
"upload_id": upload_id,
"original_filename": FILE,
"duration_seconds": 60,
"pipeline_code": "qwen3-asr-flash-filetrans",
},
)
if job_res.status_code == 402:
raise SystemExit(f"top up wallet at {job_res.json()['error']['doc_url']}")
print("queued", job_res.json()["id"])Pass duration_seconds accurately. The wallet hold is sized from it; the real charge follows the worker's probed duration after processing.
AI agents
Modern agent frameworks call HTTP APIs natively. No SDK or MCP server required for most.
MCP server (@transcribe-so/mcp) is on the roadmap. See the launch post for updates.
Claude Code
Claude Code's bash tool can call our API directly. Drop this in your CLAUDE.md so the model knows the convention:
# Transcribe a URL with transcribe.so
curl -sS -X POST https://transcribe.so/api/v1/transcriptions \
-H "Authorization: Bearer $TRANSCRIBE_API_KEY" \
-H "Content-Type: application/json" \
-d '{"source":"youtube","url":"...","pipeline_code":"qwen3-asr-flash-filetrans"}'Cursor, Cline, Continue, and Aider work the same way: pass them the curl above and let them write the polling loop.
ChatGPT: public Custom GPT (no setup)
We publish a public Custom GPT. Open it, sign in once with your transcribe.so account, then paste any YouTube link or audio URL. Per-user OAuth means each ChatGPT user is billed against their own wallet, not yours.
Open in ChatGPT →Want to build your own GPT instead? Create a Custom GPT, add an Action, and paste https://transcribe.so/api/v1/openapi.yaml as the schema. Auth Type: OAuth (per-user billing) or API Key → Bearer (your wallet pays for everyone).
Claude: public Custom Connector (no setup)
We publish a public Claude Custom Connector. Install once, sign in with your transcribe.so account, then ask Claude to transcribe YouTube links or audio URLs in any conversation. Per-user OAuth bills each Claude user against their own wallet. Works on every paid Claude tier including Free (capped at one custom connector).
Open in Claude →Power user? You can also point your own Claude Connector at https://transcribe.so/mcp directly. Same MCP server, same tools, same per-user OAuth.
Generic agent (webhook handler)
Most agents are stateless. Kick off the job, return the id, and let a webhook fire when done. Verify the signature on your end:
import { createHmac, timingSafeEqual } from "crypto";
export function verifyTranscribeSignature(rawBody: string, header: string, secret: string) {
const m = header.match(/t=(\d+),v1=([0-9a-f]+)/);
if (!m) return false;
const [, t, v1] = m;
if (Math.abs(Math.floor(Date.now() / 1000) - Number(t)) > 300) return false;
const expected = createHmac("sha256", secret).update(`${t}.${rawBody}`).digest("hex");
return expected.length === v1.length &&
timingSafeEqual(Buffer.from(expected, "utf8"), Buffer.from(v1, "utf8"));
}Mobile
Tap Share → Transcribe from any URL on the go. Both platforms have native automation tools that wire to the API in 5 to 10 minutes.
iPhone: Shortcuts
Build a share-sheet shortcut in about 10 minutes:
- Open the Shortcuts app → + → name it "Transcribe".
- Add Get Contents of URL. Method: POST. URL:
https://transcribe.so/api/v1/transcriptions. - Headers:
Authorization: Bearer tsk_live_…,Content-Type: application/json. - Request body (JSON):
{
"source": "external_url",
"url": "<Shortcut Input>",
"pipeline_code": "qwen3-asr-flash-filetrans"
}- Add Show Notification with
https://transcribe.so/transcriptions/<job id>so you can tap to open the result. - In Settings → "Use with Share Sheet" → enable for URLs.
Use source: "youtube" if the input URL is a YouTube link.
Android: Tasker
Tasker's HTTP Request action wires to our API in a single step:
- Method: POST
- URL:
https://transcribe.so/api/v1/transcriptions - Headers:
Authorization: Bearer tsk_live_… - Body:
{
"source": "external_url",
"url": "%input",
"pipeline_code": "qwen3-asr-flash-filetrans"
}Pair with a Tasker profile that fires when a sharing intent contains a URL. That's your Android share-sheet equivalent.
Desktop
From a low-effort shell alias to a global hotkey, pick the level of integration you want. macOS, Linux, and Windows examples below.
Linux / macOS: shell alias
# Add to ~/.zshrc or ~/.bashrc
export TRANSCRIBE_API_KEY=tsk_live_...
transcribe() {
curl -sS -X POST https://transcribe.so/api/v1/transcriptions \
-H "Authorization: Bearer $TRANSCRIBE_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"source\":\"youtube\",\"url\":\"$1\",\"pipeline_code\":\"qwen3-asr-flash-filetrans\"}" \
| jq .
}
# Usage: transcribe "https://youtu.be/..."Requires jq (brew install jq or apt install jq). Drop the | jq . pipe if you don't have it. The same alias works inside Raycast Script Commands; an official Raycast extension is on the roadmap.
macOS: Hammerspoon hotkey
Requires Hammerspoon installed.
-- ~/.hammerspoon/init.lua: Cmd+Shift+T transcribes whatever URL is on the clipboard.
hs.hotkey.bind({"cmd", "shift"}, "T", function()
local url = hs.pasteboard.getContents()
hs.task.new("/usr/bin/curl", function(ec, out, err)
hs.notify.new({title = "transcribe.so", informativeText = "queued: " .. (out or err)}):send()
end, {
"-sS", "-X", "POST",
"https://transcribe.so/api/v1/transcriptions",
"-H", "Authorization: Bearer " .. os.getenv("TRANSCRIBE_API_KEY"),
"-H", "Content-Type: application/json",
"-d", string.format('{"source":"youtube","url":"%s","pipeline_code":"qwen3-asr-flash-filetrans"}', url),
}):start()
end)Windows: PowerShell
$env:TRANSCRIBE_API_KEY = "tsk_live_..."
$body = @{
source = "youtube"
url = "https://youtu.be/dQw4w9WgXcQ"
pipeline_code = "qwen3-asr-flash-filetrans"
} | ConvertTo-Json
Invoke-RestMethod -Method POST `
-Uri "https://transcribe.so/api/v1/transcriptions" `
-Headers @{ Authorization = "Bearer $env:TRANSCRIBE_API_KEY" } `
-ContentType "application/json" `
-Body $bodyPowerShell's backtick line-continuation must be the very last character on the line. No trailing whitespace.
Windows: AutoHotkey hotkey
; AutoHotkey v2: Win+Shift+T transcribes the clipboard URL.
#+t::
{
url := A_Clipboard
body := Format('{"source":"youtube","url":"{1}","pipeline_code":"qwen3-asr-flash-filetrans"}', url)
RunWait('powershell -NoProfile -Command "Invoke-RestMethod -Method POST -Uri https://transcribe.so/api/v1/transcriptions -Headers @{Authorization=\"Bearer ' EnvGet('TRANSCRIBE_API_KEY') '\"} -ContentType application/json -Body \"' body '\""', , 'Hide')
TrayTip('transcribe.so', 'queued')
}Backend / serverless
Verify webhooks and run jobs from any serverless runtime. Looking for the request side? See the language quickstarts.
Cloudflare Worker (with webhook verification)
Workers run on Web Crypto, so verification is async. The Node examples below use the synchronous node:crypto API.
// wrangler.toml: set TRANSCRIBE_API_KEY and TRANSCRIBE_WEBHOOK_SECRET
export default {
async fetch(req: Request, env: any) {
if (req.method === "POST" && new URL(req.url).pathname === "/webhook") {
const raw = await req.text();
const sig = req.headers.get("x-transcribe-signature") || "";
if (!(await verify(raw, sig, env.TRANSCRIBE_WEBHOOK_SECRET))) {
return new Response("invalid", { status: 401 });
}
const evt = JSON.parse(raw);
console.log("got", evt.event, evt.data?.transcription?.id);
return new Response("ok");
}
return new Response("ok");
},
};
async function verify(body: string, header: string, secret: string) {
const m = header.match(/t=(\d+),v1=([0-9a-f]+)/);
if (!m) return false;
const [, t, v1] = m;
if (Math.abs(Math.floor(Date.now() / 1000) - Number(t)) > 300) return false;
const enc = new TextEncoder();
const key = await crypto.subtle.importKey("raw", enc.encode(secret), { name: "HMAC", hash: "SHA-256" }, false, ["sign"]);
const sig = await crypto.subtle.sign("HMAC", key, enc.encode(`${t}.${body}`));
const expected = Array.from(new Uint8Array(sig)).map(b => b.toString(16).padStart(2, "0")).join("");
return expected.length === v1.length && expected === v1;
}Vercel / Next.js webhook receiver
// app/api/transcribe-webhook/route.ts
import { createHmac, timingSafeEqual } from "crypto";
export const runtime = "nodejs";
export async function POST(req: Request) {
const raw = await req.text();
const sig = req.headers.get("x-transcribe-signature") || "";
const m = sig.match(/t=(\d+),v1=([0-9a-f]+)/);
if (!m) return new Response("invalid", { status: 401 });
const [, t, v1] = m;
if (Math.abs(Math.floor(Date.now() / 1000) - Number(t)) > 300) {
return new Response("stale", { status: 401 });
}
const expected = createHmac("sha256", process.env.TRANSCRIBE_WEBHOOK_SECRET!)
.update(`${t}.${raw}`).digest("hex");
if (expected.length !== v1.length ||
!timingSafeEqual(Buffer.from(expected, "utf8"), Buffer.from(v1, "utf8"))) {
return new Response("invalid signature", { status: 401 });
}
const evt = JSON.parse(raw);
// ... do something with evt.data.transcription
return new Response("ok");
}If you're on Edge runtime, use the Cloudflare Worker pattern above. node:crypto is not available on the Edge.
AWS Lambda (Node.js)
import { createHmac, timingSafeEqual } from "node:crypto";
export const handler = async (event: any) => {
const raw = event.body;
const sig = event.headers["x-transcribe-signature"] || "";
const m = sig.match(/t=(\d+),v1=([0-9a-f]+)/);
if (!m) return { statusCode: 401, body: "invalid" };
const [, t, v1] = m;
if (Math.abs(Math.floor(Date.now() / 1000) - Number(t)) > 300) {
return { statusCode: 401, body: "stale" };
}
const expected = createHmac("sha256", process.env.TRANSCRIBE_WEBHOOK_SECRET!)
.update(`${t}.${raw}`).digest("hex");
if (expected.length !== v1.length ||
!timingSafeEqual(Buffer.from(expected, "utf8"), Buffer.from(v1, "utf8"))) {
return { statusCode: 401, body: "invalid" };
}
const evt = JSON.parse(raw);
// ...
return { statusCode: 200, body: "ok" };
};Wire up via Lambda Function URL (no auth) or API Gateway. The verifier is intentionally identical to the Vercel example so the two stay in sync.
n8n: HTTP Request node
- Method: POST
- URL:
https://transcribe.so/api/v1/transcriptions - Authentication: Header Auth → Name
Authorization→ ValueBearer tsk_live_… - Body Content Type: JSON
- JSON Body:
{ "source": "youtube", "url": "{{$json.url}}", "pipeline_code": "qwen3-asr-flash-filetrans" }
For the webhook side, use n8n's Webhook trigger and verify the X-Transcribe-Signature in a Function node with the same HMAC pattern as above. Zapier and Make.com follow the same pattern: Webhooks → POST for sending, "Catch Hook" / "Webhooks - Custom" + a Code step for receiving.
Browser
Call the API directly from a SPA or a bookmark. CORS is open on every /api/v1/* endpoint, but don't ship your Bearer token to the client in production. Proxy through a server route.
Server-route proxy (recommended)
The right pattern: keep the Bearer token in your server's env and expose a thin endpoint to your client.
// app/api/transcribe/route.ts (Next.js App Router)
export async function POST(req: Request) {
const { url } = await req.json();
const upstream = await fetch("https://transcribe.so/api/v1/transcriptions", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.TRANSCRIBE_API_KEY!}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
source: "youtube",
url,
pipeline_code: "qwen3-asr-flash-filetrans",
}),
});
return new Response(upstream.body, { status: upstream.status });
}Vanilla fetch from a SPA (proxied)
Now your client just calls your own endpoint. No Bearer token in the browser:
const res = await fetch("/api/transcribe", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ url: window.location.href }),
});
console.log(await res.json());Bookmarklet: transcribe the current tab
Save this as a bookmark. Set your key once in localStorage (localStorage.transcribe_key = "tsk_live_…"), then click the bookmark on any YouTube page to enqueue a transcription. Best-effort UX. For a real product, write a browser extension that calls a server proxy.
javascript:(async()=>{const k=localStorage.transcribe_key;if(!k){alert("Set localStorage.transcribe_key first");return}const r=await fetch("https://transcribe.so/api/v1/transcriptions",{method:"POST",headers:{Authorization:"Bearer "+k,"Content-Type":"application/json"},body:JSON.stringify({source:"youtube",url:location.href,pipeline_code:"qwen3-asr-flash-filetrans"})});const j=await r.json();alert(j.error?j.error.message:"queued: "+j.id)})();Missing a recipe?
Tell us what you'd like to see. We ship recipes faster than proper SDKs and they're a lot easier to iterate on.