microsoft · Lubrsy706 · May 14, 2026
diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py
@@ -34,6 +34,7 @@
     PptxConverter,
     ImageConverter,
     AudioConverter,
+    EmlConverter,
     OutlookMsgConverter,
     ZipConverter,
     EpubConverter,
@@ -199,6 +200,7 @@ def enable_builtins(self, **kwargs) -> None:
             self.register_converter(ImageConverter())
             self.register_converter(IpynbConverter())
             self.register_converter(PdfConverter())
+            self.register_converter(EmlConverter())
             self.register_converter(OutlookMsgConverter())
             self.register_converter(EpubConverter())
             self.register_converter(CsvConverter())

diff --git a/packages/markitdown/src/markitdown/converters/__init__.py b/packages/markitdown/src/markitdown/converters/__init__.py
@@ -15,6 +15,7 @@
 from ._pptx_converter import PptxConverter
 from ._image_converter import ImageConverter
 from ._audio_converter import AudioConverter
+from ._eml_converter import EmlConverter
 from ._outlook_msg_converter import OutlookMsgConverter
 from ._zip_converter import ZipConverter
 from ._doc_intel_converter import (
@@ -39,6 +40,7 @@
     "PptxConverter",
     "ImageConverter",
     "AudioConverter",
+    "EmlConverter",
     "OutlookMsgConverter",
     "ZipConverter",
     "DocumentIntelligenceConverter",

diff --git a/packages/markitdown/src/markitdown/converters/_eml_converter.py b/packages/markitdown/src/markitdown/converters/_eml_converter.py
@@ -0,0 +1,134 @@
+from email import policy
+from email.message import EmailMessage, Message
+from email.parser import BytesParser
+from typing import Any, BinaryIO, Iterable
+
+from .._base_converter import DocumentConverter, DocumentConverterResult
+from .._stream_info import StreamInfo
+from ._html_converter import HtmlConverter
+
+ACCEPTED_MIME_TYPES = [
+    "message/rfc822",
+    "application/eml",
+]
+
+ACCEPTED_FILE_EXTENSIONS = [".eml"]
+
+
+class EmlConverter(DocumentConverter):
+    """Converts RFC 822 / EML email files to markdown."""
+
+    def __init__(self) -> None:
+        self._html_converter = HtmlConverter()
+
+    def accepts(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> bool:
+        mimetype = (stream_info.mimetype or "").lower()
+        extension = (stream_info.extension or "").lower()
+
+        if extension in ACCEPTED_FILE_EXTENSIONS:
+            return True
+
+        if mimetype in ACCEPTED_MIME_TYPES:
+            return True
+
+        cur_pos = file_stream.tell()
+        try:
+            message = BytesParser(policy=policy.default).parsebytes(
+                file_stream.read(65536)
+            )
+            return message.get("from") is not None and (
+                message.get("to") is not None or message.get("subject") is not None
+            )
+        finally:
+            file_stream.seek(cur_pos)
+
+    def convert(
+        self,
+        file_stream: BinaryIO,
+        stream_info: StreamInfo,
+        **kwargs: Any,
+    ) -> DocumentConverterResult:
+        message = BytesParser(policy=policy.default).parse(file_stream)
+        headers = self._headers(message)
+        body = self._body_to_markdown(message)
+        attachments = self._attachments(message)
+
+        md_content = "# Email Message\n\n"
+        for key, value in headers:
+            if value:
+                md_content += f"**{key}:** {value}\n"
+
+        md_content += "\n## Content\n\n"
+        if body:
+            md_content += body.strip()
+
+        if attachments:
+            md_content += "\n\n## Attachments\n\n"
+            for attachment in attachments:
+                md_content += f"- {attachment}\n"
+
+        return DocumentConverterResult(
+            markdown=md_content.strip(),
+            title=message.get("subject"),
+        )
+
+    def _headers(self, message: Message) -> list[tuple[str, str | None]]:
+        return [
+            ("From", message.get("from")),
+            ("To", message.get("to")),
+            ("Cc", message.get("cc")),
+            ("Date", message.get("date")),
+            ("Subject", message.get("subject")),
+        ]
+
+    def _body_to_markdown(self, message: Message) -> str:
+        plain_parts: list[str] = []
+        html_parts: list[str] = []
+
+        for part in self._iter_body_parts(message):
+            content_type = part.get_content_type()
+            content = part.get_content()
+            if not isinstance(content, str):
+                continue
+
+            if content_type == "text/plain":
+                plain_parts.append(content)
+            elif content_type == "text/html":
+                html_parts.append(
+                    self._html_converter.convert_string(content).markdown
+                )
+
+        if plain_parts:
+            return "\n\n".join(part.strip() for part in plain_parts if part.strip())
+
+        return "\n\n".join(part.strip() for part in html_parts if part.strip())
+
+    def _iter_body_parts(self, message: Message) -> Iterable[EmailMessage]:
+        if message.is_multipart():
+            for part in message.walk():
+                if part.is_multipart():
+                    continue
+                if part.get_content_disposition() == "attachment":
+                    continue
+                if part.get_content_type() in ("text/plain", "text/html"):
+                    yield part  # type: ignore[misc]
+        elif message.get_content_type() in ("text/plain", "text/html"):
+            yield message  # type: ignore[misc]
+
+    def _attachments(self, message: Message) -> list[str]:
+        attachments: list[str] = []
+
+        for part in message.walk() if message.is_multipart() else []:
+            if part.get_content_disposition() != "attachment":
+                continue
+
+            filename = part.get_filename() or "unnamed attachment"
+            content_type = part.get_content_type()
+            attachments.append(f"{filename} ({content_type})")
+
+        return attachments
diff --git a/packages/markitdown/tests/_test_vectors.py b/packages/markitdown/tests/_test_vectors.py
@@ -87,6 +87,29 @@ class FileTestVector(object):
         ],
         must_not_include=[],
     ),
+    FileTestVector(
+        filename="test_email.eml",
+        mimetype="message/rfc822",
+        charset="utf-8",
+        url=None,
+        must_include=[
+            "# Email Message",
+            "**From:** Test Sender <test.sender@example.com>",
+            "**To:** Test Recipient <test.recipient@example.com>",
+            "**Cc:** Copy Recipient <copy.recipient@example.com>",
+            "**Date:** Thu, 14 May 2026 09:50:00 +0800",
+            "**Subject:** Test EML Message",
+            "## Content",
+            "This is the plain text body of the EML test message.",
+            "It should be preferred over the HTML alternative.",
+            "## Attachments",
+            "- notes.txt (text/plain)",
+        ],
+        must_not_include=[
+            "This HTML body should not be used.",
+            "attachment content should not be inlined",
+        ],
+    ),
     FileTestVector(
         filename="test.pdf",
         mimetype="application/pdf",

diff --git a/packages/markitdown/tests/test_files/test_email.eml b/packages/markitdown/tests/test_files/test_email.eml
@@ -0,0 +1,27 @@
+From: Test Sender <test.sender@example.com>
+To: Test Recipient <test.recipient@example.com>
+Cc: Copy Recipient <copy.recipient@example.com>
+Date: Thu, 14 May 2026 09:50:00 +0800
+Subject: Test EML Message
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="markitdown-test-boundary"
+
+--markitdown-test-boundary
+Content-Type: text/plain; charset="utf-8"
+
+This is the plain text body of the EML test message.
+
+It should be preferred over the HTML alternative.
+
+--markitdown-test-boundary
+Content-Type: text/html; charset="utf-8"
+
+<html><body><p>This HTML body should not be used.</p></body></html>
+
+--markitdown-test-boundary
+Content-Type: text/plain; name="notes.txt"
+Content-Disposition: attachment; filename="notes.txt"
+
+attachment content should not be inlined
+
+--markitdown-test-boundary--