Dive into our latest project using AI to transcribe audio and identify speakers!
Explore NowThis project leverages AI to transcribe audio files and perform speaker diarization, identifying who speaks when. Using Whisper for transcription and Pyannote for diarization, it processes audio into a JSON output with timestamps, speaker IDs, and text.
import torch
from transformers import pipeline
from pyannote.audio import Pipeline
# Load Whisper for transcription
transcriber = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny",
device=0 if torch.cuda.is_available() else -1
)
# Load Pyannote for diarization
diarizer = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.getenv("HF_TOKEN")
)