-
-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Allow nativeparse to parse source code directly #21260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
2e793ae
50b0860
ac275e4
149e459
47e45f7
96e8e07
beba478
99c7610
5c45fe2
8e53191
b029c44
d1691cc
2a523b5
6d72fff
1a28227
cb1bcae
df34d0b
49c65b8
f47a898
8cfffa0
b9cc0b5
20b035e
52422db
d155dc6
630afc8
efa9a63
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -163,7 +163,7 @@ | |
| from mypy.modules_state import modules_state | ||
| from mypy.nodes import Expression | ||
| from mypy.options import Options | ||
| from mypy.parse import load_from_raw, parse | ||
| from mypy.parse import load_from_raw, parse, parse_native | ||
| from mypy.plugin import ChainedPlugin, Plugin, ReportConfigContext | ||
| from mypy.plugins.default import DefaultPlugin | ||
| from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor | ||
|
|
@@ -1024,35 +1024,30 @@ def parse_all(self, states: list[State], post_parse: bool = True) -> None: | |
| self.post_parse_all(states) | ||
| return | ||
|
|
||
| sequential_states = [] | ||
| parallel_states = [] | ||
| for state in states: | ||
| if state.tree is not None: | ||
| # The file was already parsed. | ||
| continue | ||
| if not self.fscache.exists(state.xpath, real_only=True): | ||
| # New parser only supports parsing on-disk files. | ||
| sequential_states.append(state) | ||
| state.needs_parse = False | ||
| continue | ||
| parallel_states.append(state) | ||
| if len(parallel_states) > 1: | ||
| self.parse_parallel(sequential_states, parallel_states) | ||
| else: | ||
| self.parse_parallel(parallel_states) | ||
| elif len(parallel_states) == 1: | ||
| # Avoid using executor when there is no parallelism. | ||
| for state in states: | ||
| state.parse_file() | ||
| parallel_states[0].parse_file() | ||
| if post_parse: | ||
| self.post_parse_all(states) | ||
|
|
||
| def parse_parallel(self, sequential_states: list[State], parallel_states: list[State]) -> None: | ||
| def parse_parallel(self, parallel_states: list[State]) -> None: | ||
| """Perform parallel parsing of states. | ||
|
|
||
| Note: this duplicates a bit of logic from State.parse_file(). This is done | ||
| as an optimization to parallelize only those parts of the code that can be | ||
| parallelized efficiently. | ||
| """ | ||
| parallel_parsed_states, parallel_parsed_states_set = self.parse_files_threaded_raw( | ||
| sequential_states, parallel_states | ||
| parallel_states | ||
| ) | ||
|
|
||
| for state in parallel_parsed_states: | ||
|
|
@@ -1097,12 +1092,9 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S | |
| state.check_blockers() | ||
| state.setup_errors() | ||
|
|
||
| def parse_files_threaded_raw( | ||
| self, sequential_states: list[State], parallel_states: list[State] | ||
| ) -> tuple[list[State], set[State]]: | ||
| """Parse files using a thread pool. | ||
| def parse_files_threaded_raw(self, states: list[State]) -> tuple[list[State], set[State]]: | ||
| """Parse files in parallel using a thread pool. | ||
|
|
||
| Also parse sequential states while waiting for the parallel results. | ||
| Trees from the new parser are left in raw (serialized) form. | ||
|
|
||
| Return (list, set) of states that were actually parsed (not cached). | ||
|
|
@@ -1118,25 +1110,23 @@ def parse_files_threaded_raw( | |
| # parse_file_inner() results in no visible improvement with more than 8 threads. | ||
| # TODO: reuse thread pool and/or batch small files in single submit() call. | ||
| with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor: | ||
| for state in parallel_states: | ||
| for state in states: | ||
| state.needs_parse = False | ||
| if state.id not in self.ast_cache: | ||
| self.log(f"Parsing {state.xpath} ({state.id})") | ||
| ignore_errors = state.ignore_all or state.options.ignore_errors | ||
| if ignore_errors: | ||
| self.errors.ignored_files.add(state.xpath) | ||
| futures.append(executor.submit(state.parse_file_inner, "")) | ||
| futures.append( | ||
| executor.submit(state.parse_file_inner, state.source, parallel=True) | ||
| ) | ||
| parallel_parsed_states.append(state) | ||
| parallel_parsed_states_set.add(state) | ||
| else: | ||
| self.log(f"Using cached AST for {state.xpath} ({state.id})") | ||
| state.tree, state.early_errors, source_hash = self.ast_cache[state.id] | ||
| state.source_hash = source_hash | ||
|
|
||
| # Parse sequential before waiting on parallel. | ||
| for state in sequential_states: | ||
| state.parse_file() | ||
|
|
||
| for fut in wait(futures).done: | ||
| fut.result() | ||
|
|
||
|
|
@@ -1279,21 +1269,32 @@ def parse_file( | |
| self, | ||
| id: str, | ||
| path: str, | ||
| source: str, | ||
| source: str | None, | ||
| options: Options, | ||
| raw_data: FileRawData | None = None, | ||
| parallel: bool = False, | ||
|
bzoracler marked this conversation as resolved.
Outdated
|
||
| ) -> MypyFile: | ||
| """Parse the source of a file with the given name. | ||
|
|
||
| Raise CompileError if there is a parse error. | ||
| """ | ||
| file_exists = self.fscache.exists(path, real_only=True) | ||
|
bzoracler marked this conversation as resolved.
|
||
| t0 = time.time() | ||
| if raw_data: | ||
| # If possible, deserialize from known binary data instead of parsing from scratch. | ||
| tree = load_from_raw(path, id, raw_data, self.errors, options) | ||
| else: | ||
| tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists) | ||
| if source is not None: | ||
| tree = parse(source, path, id, self.errors, options=options) | ||
| else: | ||
| assert parallel | ||
| if not os.path.exists(path): | ||
| build_error( | ||
| "Cannot read file '{}': {}".format( | ||
| path.replace(os.getcwd() + os.sep, ""), | ||
| os.strerror(2), # `errno.ENOENT` | ||
| ) | ||
| ) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is temporary, I plan on making
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is a good place for this check. This is executed in a thread, instead it should be done before parsing, to match existing logic. |
||
| tree = parse_native(source, path, id, self.errors, options=options) | ||
| tree._fullname = id | ||
| if self.stats_enabled: | ||
| with self.stats_lock: | ||
|
|
@@ -3192,10 +3193,12 @@ def get_source(self) -> str: | |
| self.time_spent_us += time_spent_us(t0) | ||
| return source | ||
|
|
||
| def parse_file_inner(self, source: str, raw_data: FileRawData | None = None) -> None: | ||
| def parse_file_inner( | ||
| self, source: str | None, raw_data: FileRawData | None = None, parallel: bool = False | ||
| ) -> None: | ||
| t0 = time_ref() | ||
| self.tree = self.manager.parse_file( | ||
| self.id, self.xpath, source, options=self.options, raw_data=raw_data | ||
| self.id, self.xpath, source, self.options, raw_data, parallel | ||
| ) | ||
| self.time_spent_us += time_spent_us(t0) | ||
|
|
||
|
|
@@ -3319,9 +3322,7 @@ def semantic_analysis_pass1(self) -> None: | |
| # | ||
| # TODO: This should not be considered as a semantic analysis | ||
| # pass -- it's an independent pass. | ||
| if not options.native_parser or not self.manager.fscache.exists( | ||
| self.xpath, real_only=True | ||
| ): | ||
| if not options.native_parser: | ||
| analyzer = SemanticAnalyzerPreAnalysis() | ||
| with self.wrap_context(): | ||
| analyzer.visit_file(self.tree, self.xpath, self.id, options) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.