Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2e793ae
Enable native parsing to use source directly
bzoracler Apr 17, 2026
50b0860
Remove `file_exists` parameter from `mypy.parse.parse()` calls
bzoracler Apr 17, 2026
ac275e4
Test for invalid bytes
bzoracler Apr 28, 2026
149e459
Fix omitted argument
bzoracler Apr 28, 2026
47e45f7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 28, 2026
96e8e07
Merge branch 'master' into nativeparse-source
bzoracler May 15, 2026
beba478
Merge branch 'master' into nativeparse-source
ilevkivskyi May 17, 2026
99c7610
Remove fscache existence checks and parallel workarounds
bzoracler May 18, 2026
5c45fe2
Remove handling of sequential states
bzoracler May 18, 2026
8e53191
Don't pass dummy source
bzoracler May 18, 2026
b029c44
Refactor to allow a safer `source=None`
bzoracler May 18, 2026
d1691cc
Raise OSError in parallel mode for missing files
bzoracler May 18, 2026
2a523b5
Temporarily fix test
bzoracler May 18, 2026
6d72fff
Remove redundant condition
bzoracler May 18, 2026
1a28227
Don't create duplicate parallel states
bzoracler May 18, 2026
cb1bcae
Fix missing parse call
bzoracler May 18, 2026
df34d0b
Revert "Temporarily fix test"
bzoracler May 18, 2026
49c65b8
Revert "Refactor to allow a safer `source=None`"
bzoracler May 18, 2026
f47a898
Handle `source=None` in `parse()` function
bzoracler May 19, 2026
8cfffa0
Check for file existence before parallel parsing
bzoracler May 19, 2026
b9cc0b5
Handle `source=None` when `--package-root` is set
bzoracler May 19, 2026
20b035e
Simplify handling of `--package-root`
bzoracler May 19, 2026
52422db
Inline parallel parsing
bzoracler May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion misc/dump-ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def dump(fname: str, python_version: tuple[int, int], quiet: bool = False) -> No
options.python_version = python_version
with open(fname, "rb") as f:
s = f.read()
tree = parse(s, fname, None, errors=Errors(options), options=options, file_exists=True)
tree = parse(s, fname, None, errors=Errors(options), options=options)
if not quiet:
print(tree)

Expand Down
3 changes: 1 addition & 2 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,13 +1287,12 @@ def parse_file(

Raise CompileError if there is a parse error.
"""
file_exists = self.fscache.exists(path, real_only=True)
Comment thread
bzoracler marked this conversation as resolved.
t0 = time.time()
if raw_data:
# If possible, deserialize from known binary data instead of parsing from scratch.
tree = load_from_raw(path, id, raw_data, self.errors, options)
else:
tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists)
tree = parse(source, path, id, self.errors, options=options)
tree._fullname = id
if self.stats_enabled:
with self.stats_lock:
Expand Down
1 change: 0 additions & 1 deletion mypy/checkstrformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,6 @@ def apply_field_accessors(
module=None,
options=self.chk.options,
errors=temp_errors,
file_exists=False,
eager=True,
)
if temp_errors.is_errors():
Expand Down
13 changes: 10 additions & 3 deletions mypy/nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def add_error(


def native_parse(
filename: str, options: Options, skip_function_bodies: bool = False
filename: str,
options: Options,
source: str | bytes | None = None,
skip_function_bodies: bool = False,
) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
"""Parse a Python file using the native Rust-based parser.

Expand Down Expand Up @@ -211,7 +214,7 @@ def native_parse(
uses_template_strings,
source_hash,
mypy_comments,
) = parse_to_binary_ast(filename, options, skip_function_bodies)
) = parse_to_binary_ast(filename, options, source, skip_function_bodies)
node = MypyFile([], [])
node.path = filename
node.raw_data = FileRawData(
Expand Down Expand Up @@ -248,7 +251,10 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]:


def parse_to_binary_ast(
filename: str, options: Options, skip_function_bodies: bool = False
filename: str,
options: Options,
source: str | bytes | None = None,
skip_function_bodies: bool = False,
) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool, str, list[tuple[int, str]]]:
# This is a horrible hack to work around a mypyc bug where imported
# module may be not ready in a thread sometimes.
Expand All @@ -259,6 +265,7 @@ def parse_to_binary_ast(
raise ImportError("Cannot import ast_serialize")
ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse(
filename,
source,
skip_function_bodies=skip_function_bodies,
python_version=options.python_version,
platform=options.platform,
Expand Down
35 changes: 15 additions & 20 deletions mypy/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def parse(
module: str | None,
errors: Errors,
options: Options,
file_exists: bool,
eager: bool = False,
) -> MypyFile:
"""Parse a source file, without doing any semantic analysis.
Expand All @@ -29,25 +28,21 @@ def parse(
the parse errors, use eager=True.
"""
if options.native_parser:
# Native parser only works with actual files on disk
# Fall back to fastparse for in-memory source or non-existent files
if file_exists:
import mypy.nativeparse

ignore_errors = options.ignore_errors or fnam in errors.ignored_files
# If errors are ignored, we can drop many function bodies to speed up type checking.
strip_function_bodies = ignore_errors and not options.preserve_asts
tree, _, _ = mypy.nativeparse.native_parse(
fnam, options, skip_function_bodies=strip_function_bodies
)
# Set is_stub based on file extension
tree.is_stub = fnam.endswith(".pyi")
# Note: tree.imports is populated directly by load_from_raw() with deserialized
# import metadata, so we don't need to collect imports via AST traversal
if eager and tree.raw_data is not None:
tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
return tree
# Fall through to fastparse for non-existent files
import mypy.nativeparse

ignore_errors = options.ignore_errors or fnam in errors.ignored_files
# If errors are ignored, we can drop many function bodies to speed up type checking.
strip_function_bodies = ignore_errors and not options.preserve_asts
tree, _, _ = mypy.nativeparse.native_parse(
fnam, options, source, skip_function_bodies=strip_function_bodies
)
# Set is_stub based on file extension
tree.is_stub = fnam.endswith(".pyi")
# Note: tree.imports is populated directly by load_from_raw() with deserialized
# import metadata, so we don't need to collect imports via AST traversal
if eager and tree.raw_data is not None:
tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
return tree

if options.transform_source is not None:
source = options.transform_source(source)
Expand Down
8 changes: 1 addition & 7 deletions mypy/stubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,13 +1745,7 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
source = mypy.util.decode_python_encoding(data)
errors = Errors(mypy_options)
mod.ast = mypy.parse.parse(
source,
fnam=mod.path,
module=mod.module,
errors=errors,
options=mypy_options,
file_exists=True,
eager=True,
source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options, eager=True
)
mod.ast._fullname = mod.module
if errors.is_blockers():
Expand Down
30 changes: 24 additions & 6 deletions mypy/test/test_nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_parser(testcase: DataDrivenTestCase) -> None:

try:
with temp_source(source) as fnam:
node, errors, type_ignores = native_parse(fnam, options, skip_function_bodies)
node, errors, type_ignores = native_parse(fnam, options, None, skip_function_bodies)
errors += load_tree(node, options)
node.path = "main"
a = node.str_with_options(options).split("\n")
Expand Down Expand Up @@ -234,7 +234,7 @@ def format_reachable_imports(node: MypyFile) -> list[str]:

@unittest.skipUnless(has_nativeparse, "nativeparse not available")
class TestNativeParserBinaryFormat(unittest.TestCase):
def test_trivial_binary_data(self) -> None:
def _assert_trivial_binary_data(self, b: bytes, /) -> None:
# A quick sanity check to ensure the serialized data looks as expected. Only covers
# a few AST nodes.

Expand All @@ -250,9 +250,9 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
int_enc(end_column - start_column),
]

with temp_source("print('hello')") as fnam:
b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
assert list(b) == (
self.assertEqual(
list(b),
(
[LITERAL_INT, 22, nodes.EXPR_STMT, nodes.CALL_EXPR]
+ [nodes.NAME_EXPR, LITERAL_STR]
+ [int_enc(5)]
Expand All @@ -269,7 +269,25 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
+ [LIST_GEN, 22, LITERAL_NONE]
+ locs(1, 0, 1, 14)
+ [END_TAG, END_TAG]
)
),
)

def test_trivial_binary_data_from_file(self) -> None:
with temp_source("print('hello')") as fnam:
b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
self._assert_trivial_binary_data(b)

def test_trivial_binary_data_from_string_source(self) -> None:
b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), "print('hello')")
self._assert_trivial_binary_data(b)

def test_trivial_binary_data_from_bytes_source(self) -> None:
b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), b"print('hello')")
self._assert_trivial_binary_data(b)

def test_invalid_bytes_raises(self) -> None:
with self.assertRaises(UnicodeDecodeError):
parse_to_binary_ast("", Options(), b"\xff")


@contextlib.contextmanager
Expand Down
2 changes: 0 additions & 2 deletions mypy/test/testparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
module="__main__",
errors=errors,
options=options,
file_exists=False,
eager=True,
)
if errors.is_errors():
Expand Down Expand Up @@ -108,7 +107,6 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None:
"__main__",
errors=errors,
options=options,
file_exists=False,
eager=True,
)
if errors.is_errors():
Expand Down
Loading