Skip to content

Commit aa9b516

Browse files
authored
Write serialized options to a file to broadcast to workers (#21306)
Fixes #21305 In theory we can also send options over the socket, but they are needed very early, so it is easier to read them from a file with how the code is organized now.
1 parent a1d7781 commit aa9b516

3 files changed

Lines changed: 39 additions & 22 deletions

File tree

mypy/build.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import gc
1919
import json
2020
import os
21-
import pickle
2221
import platform
2322
import re
2423
import stat
@@ -44,7 +43,6 @@
4443
final,
4544
)
4645

47-
from librt.base64 import b64encode
4846
from librt.internal import (
4947
cache_version,
5048
read_bool,
@@ -281,7 +279,7 @@ def __init__(self, status_file: str, options_data: str, env: Mapping[str, str])
281279
"-m",
282280
"mypy.build_worker",
283281
f"--status-file={status_file}",
284-
f'--options-data="{options_data}"',
282+
f"--options-data={options_data}",
285283
]
286284
# Return early without waiting, caller must call connect() before using the client.
287285
self.proc = subprocess.Popen(command, env=env)
@@ -389,10 +387,12 @@ def default_flush_errors(
389387
connect_threads = []
390388
# A quasi-unique ID for this specific mypy invocation.
391389
build_id = os.urandom(4).hex()
390+
options_data = None
392391
if options.num_workers > 0:
393-
# TODO: switch to something more efficient than pickle (also in the daemon).
394-
pickled_options = pickle.dumps(options.snapshot())
395-
options_data = b64encode(pickled_options).decode()
392+
os.makedirs(options.cache_dir, exist_ok=True)
393+
options_data = os_path_join(options.cache_dir, f".worker_options.{build_id}.data")
394+
with open(options_data, "wb") as f:
395+
f.write(options.to_bytes())
396396
workers = [
397397
WorkerClient(
398398
f".mypy_worker.{build_id}.{idx}.json", options_data, worker_env or os.environ
@@ -448,6 +448,8 @@ def connect(wc: WorkerClient, data: bytes) -> None:
448448
# shut them down cleanly. Otherwise, they will linger until connection timeout.
449449
for thread in connect_threads:
450450
thread.join()
451+
if options_data is not None:
452+
os.unlink(options_data)
451453
for worker in workers:
452454
if not worker.connected:
453455
continue
@@ -560,10 +562,11 @@ def build_inner(
560562
def warn_unused_configs(
561563
options: Options, flush_errors: Callable[[str | None, list[str], bool], None]
562564
) -> None:
563-
if options.warn_unused_configs and options.unused_configs and not options.non_interactive:
565+
unused_configs = options.get_unused_configs()
566+
if options.warn_unused_configs and unused_configs and not options.non_interactive:
564567
unused = get_config_module_names(
565568
options.config_file,
566-
[glob for glob in options.per_module_options.keys() if glob in options.unused_configs],
569+
[glob for glob in options.per_module_options.keys() if glob in unused_configs],
567570
)
568571
flush_errors(
569572
None, ["{}: note: unused section(s): {}".format(options.config_file, unused)], False

mypy/build_worker/worker.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* In a loop:
1212
- Receive an SCC id from coordinator, and start processing it.
1313
- SCC is processed in two phases: interface and implementation, send a response after each.
14-
- When prompted by coordinator (with a scc_id=None message), cleanup and shutdown.
14+
- When prompted by coordinator (with a scc_ids=[] message), cleanup and shutdown.
1515
"""
1616

1717
from __future__ import annotations
@@ -20,13 +20,11 @@
2020
import gc
2121
import json
2222
import os
23-
import pickle
2423
import platform
2524
import sys
2625
import time
2726
from typing import NamedTuple
2827

29-
from librt.base64 import b64decode
3028
from librt.internal import ReadBuffer, read_tag
3129

3230
from mypy import util
@@ -48,7 +46,7 @@
4846
process_stale_scc_implementation,
4947
process_stale_scc_interface,
5048
)
51-
from mypy.cache import Tag, read_int_list
49+
from mypy.cache import Tag, read_int_list, read_json
5250
from mypy.defaults import RECURSION_LIMIT, WORKER_CONNECTION_TIMEOUT, WORKER_IDLE_TIMEOUT
5351
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
5452
from mypy.fscache import FileSystemCache
@@ -61,7 +59,7 @@
6159

6260
parser = argparse.ArgumentParser(prog="mypy_worker", description="Mypy build worker")
6361
parser.add_argument("--status-file", help="status file to communicate worker details")
64-
parser.add_argument("--options-data", help="serialized mypy options")
62+
parser.add_argument("--options-data", help="file with serialized mypy options")
6563

6664
CONNECTION_NAME = "build_worker"
6765

@@ -85,11 +83,12 @@ def main(argv: list[str]) -> None:
8583
# This mimics how daemon receives the options. Note we need to postpone
8684
# processing error codes after plugins are loaded, because plugins can add
8785
# custom error codes.
88-
options_dict = pickle.loads(b64decode(args.options_data))
89-
options_obj = Options()
86+
with open(args.options_data, "rb") as f:
87+
buf = ReadBuffer(f.read())
88+
options_dict = read_json(buf)
9089
disable_error_code = options_dict.pop("disable_error_code", [])
9190
enable_error_code = options_dict.pop("enable_error_code", [])
92-
options = options_obj.apply_changes(options_dict)
91+
options = Options().apply_changes(options_dict)
9392

9493
status_file = args.status_file
9594
server = IPCServer(CONNECTION_NAME, WORKER_CONNECTION_TIMEOUT)

mypy/options.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from librt.internal import WriteBuffer, write_bool, write_str
1212

1313
from mypy import defaults
14+
from mypy.cache import write_json
1415
from mypy.errorcodes import ErrorCode, error_codes
1516
from mypy.util import get_class_descriptors, replace_object_state
1617

@@ -343,7 +344,7 @@ def __init__(self) -> None:
343344
# Per-module options (raw)
344345
self.per_module_options: dict[str, dict[str, object]] = {}
345346
self._glob_options: list[tuple[str, Pattern[str]]] = []
346-
self.unused_configs: set[str] = set()
347+
self._unused_configs: set[str] = set()
347348

348349
# -- development options --
349350
self.verbosity = 0 # More verbose messages (for troubleshooting)
@@ -451,6 +452,17 @@ def snapshot(self) -> dict[str, object]:
451452
d = {k: v for k, v in d.items() if not k.startswith("_")}
452453
return d
453454

455+
def to_bytes(self) -> bytes:
456+
"""Serialize this options object to binary data."""
457+
assert self.transform_source is None, "Source transform cannot be serialized"
458+
snapshot = self.snapshot()
459+
# Caller will need to use process_error_codes() to re-compute these.
460+
del snapshot["disabled_error_codes"]
461+
del snapshot["enabled_error_codes"]
462+
buf = WriteBuffer()
463+
write_json(buf, snapshot)
464+
return buf.getvalue()
465+
454466
def __repr__(self) -> str:
455467
return f"Options({pprint.pformat(self.snapshot())})"
456468

@@ -560,7 +572,7 @@ def build_per_module_cache(self) -> None:
560572
# sections as used if any real modules use them or if any
561573
# concrete config sections use them. This means we need to
562574
# track which get used while constructing.
563-
self.unused_configs = set(unstructured_glob_keys)
575+
self._unused_configs = set(unstructured_glob_keys)
564576

565577
for key in wildcards + concrete:
566578
# Find what the options for this key would be, just based
@@ -571,7 +583,7 @@ def build_per_module_cache(self) -> None:
571583

572584
# Add the more structured sections into unused configs, since
573585
# they only count as used if actually used by a real module.
574-
self.unused_configs.update(structured_keys)
586+
self._unused_configs.update(structured_keys)
575587

576588
def clone_for_module(self, module: str) -> Options:
577589
"""Create an Options object that incorporates per-module options.
@@ -585,7 +597,7 @@ def clone_for_module(self, module: str) -> Options:
585597

586598
# If the module just directly has a config entry, use it.
587599
if module in self._per_module_cache:
588-
self.unused_configs.discard(module)
600+
self._unused_configs.discard(module)
589601
return self._per_module_cache[module]
590602

591603
# If not, search for glob paths at all the parents. So if we are looking for
@@ -598,7 +610,7 @@ def clone_for_module(self, module: str) -> Options:
598610
for i in range(len(path), 0, -1):
599611
key = ".".join(path[:i] + ["*"])
600612
if key in self._per_module_cache:
601-
self.unused_configs.discard(key)
613+
self._unused_configs.discard(key)
602614
options = self._per_module_cache[key]
603615
break
604616

@@ -607,7 +619,7 @@ def clone_for_module(self, module: str) -> Options:
607619
if not module.endswith(".*"):
608620
for key, pattern in self._glob_options:
609621
if pattern.match(module):
610-
self.unused_configs.discard(key)
622+
self._unused_configs.discard(key)
611623
options = options.apply_changes(self.per_module_options[key])
612624

613625
# We could update the cache to directly point to modules once
@@ -616,6 +628,9 @@ def clone_for_module(self, module: str) -> Options:
616628

617629
return options
618630

631+
def get_unused_configs(self) -> set[str]:
632+
return self._unused_configs.copy()
633+
619634
def compile_glob(self, s: str) -> Pattern[str]:
620635
# Compile one of the glob patterns to a regex so that '.*' can
621636
# match *zero or more* module sections. This means we compile

0 commit comments

Comments
 (0)