microsoft · Lubrsy706 · May 15, 2026
diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py
@@ -39,6 +39,7 @@
     EpubConverter,
     DocumentIntelligenceConverter,
     CsvConverter,
+    WebVttConverter,
 )
 
 from ._base_converter import DocumentConverter, DocumentConverterResult
@@ -202,6 +203,7 @@ def enable_builtins(self, **kwargs) -> None:
             self.register_converter(OutlookMsgConverter())
             self.register_converter(EpubConverter())
             self.register_converter(CsvConverter())
+            self.register_converter(WebVttConverter())
 
             # Register Document Intelligence converter at the top of the stack if endpoint is provided
             docintel_endpoint = kwargs.get("docintel_endpoint")

diff --git a/packages/markitdown/src/markitdown/converters/__init__.py b/packages/markitdown/src/markitdown/converters/__init__.py
@@ -23,6 +23,7 @@
 )
 from ._epub_converter import EpubConverter
 from ._csv_converter import CsvConverter
+from ._webvtt_converter import WebVttConverter
 
 __all__ = [
     "PlainTextConverter",
@@ -45,4 +46,5 @@
     "DocumentIntelligenceFileType",
     "EpubConverter",
     "CsvConverter",
+    "WebVttConverter",
 ]
diff --git a/packages/markitdown/src/markitdown/converters/_webvtt_converter.py b/packages/markitdown/src/markitdown/converters/_webvtt_converter.py
@@ -0,0 +1,80 @@
+import html
+import re
+from typing import Any, BinaryIO
+
+from charset_normalizer import from_bytes
+
+from .._base_converter import DocumentConverter, DocumentConverterResult
+from .._stream_info import StreamInfo
+
+ACCEPTED_MIME_TYPES = ["text/vtt"]
+ACCEPTED_FILE_EXTENSIONS = [".vtt"]
+
+_TIMESTAMP_RE = re.compile(
+    r"^\s*(?:\d{2}:)?\d{2}:\d{2}\.\d{3}\s+-->\s+(?:\d{2}:)?\d{2}:\d{2}\.\d{3}"
+)
+_VOICE_OPEN_RE = re.compile(r"<v(?:\.[^>\s]+)?\s+([^>]+)>")
+_TAG_RE = re.compile(r"<[^>]+>")
+
+
+class WebVttConverter(DocumentConverter):
+    """Convert WebVTT subtitle files to readable Markdown text."""
+
+    def accepts(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> bool:
+        mimetype = (stream_info.mimetype or "").lower()
+        extension = (stream_info.extension or "").lower()
+
+        return mimetype in ACCEPTED_MIME_TYPES or extension in ACCEPTED_FILE_EXTENSIONS
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        if stream_info.charset:
+            text = file_stream.read().decode(stream_info.charset)
+        else:
+            text = str(from_bytes(file_stream.read()).best())
+
+        cues = []
+        for block in re.split(r"\r?\n\s*\r?\n", text):
+            cue = self._convert_block(block)
+            if cue:
+                cues.append(cue)
+
+        return DocumentConverterResult(markdown="\n\n".join(cues))
+
+    def _convert_block(self, block: str) -> str:
+        lines = [line.strip() for line in block.splitlines() if line.strip()]
+        if not lines:
+            return ""
+
+        first = lines[0].lstrip("\ufeff")
+        if first.startswith(("WEBVTT", "NOTE", "STYLE", "REGION")):
+            return ""
+
+        timestamp_index = next(
+            (i for i, line in enumerate(lines) if _TIMESTAMP_RE.match(line)),
+            None,
+        )
+        if timestamp_index is None:
+            return ""
+
+        text_lines = [
+            self._clean_text_line(line)
+            for line in lines[timestamp_index + 1 :]
+            if line.strip()
+        ]
+        return "\n".join(line for line in text_lines if line)
+
+    def _clean_text_line(self, line: str) -> str:
+        line = _VOICE_OPEN_RE.sub(r"\1: ", line)
+        line = _TAG_RE.sub("", line)
+        line = html.unescape(line)
+        return re.sub(r"\s+", " ", line).strip()
diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py
@@ -432,6 +432,37 @@ def test_exceptions() -> None:
     assert type(exc_info.value.attempts[0].converter).__name__ == "PptxConverter"
 
 
+def test_webvtt_converter_outputs_clean_transcript(tmp_path) -> None:
+    vtt_file = tmp_path / "meeting.vtt"
+    vtt_file.write_text(
+        """WEBVTT
+Kind: captions
+
+NOTE this should be ignored
+
+1
+00:00:01.000 --> 00:00:03.000 position:10%
+<v Alice>Hello &amp; welcome</v>
+to the meeting.
+
+STYLE
+::cue { color: lime; }
+
+00:00:04.000 --> 00:00:06.000
+<i>Next action item</i>
+""",
+        encoding="utf-8",
+    )
+
+    result = MarkItDown().convert(str(vtt_file))
+
+    assert result.text_content == (
+        "Alice: Hello & welcome\n"
+        "to the meeting.\n\n"
+        "Next action item"
+    )
+
+
 @pytest.mark.skipif(
     skip_exiftool,
     reason="do not run if exiftool is not installed",