Upgrade to Pro — share decks privately, control downloads, hide ads and more …

gokart Feature Proposal: ConditionalSignificantParameter

gokart Feature Proposal: ConditionalSignificantParameter

A82e268e52c06ad69b83f1a251c682d4?s=128

Keisuke OGAKI

June 11, 2022
Tweet

More Decks by Keisuke OGAKI

Other Decks in Programming

Transcript

  1. gokart Feature Proposal ConditionalSignificantParameter

  2. Proposal _special_value = object() class SpetialOptionalParameter(luigi.OptionalParameter): def __init__(self, *args, **kwargs):

    assert not 'default' in kwargs.keys() super().__init__(*args, default=_special_value, **kwargs) def serialize(self, x): if x == _special_value: return super().serialize(None) else: return super().serialize(x) def parse(self, x): x = super().parse(x) if x is None: return _special_value else: return x class TaskOnKart2(gokart.TaskOnKart): def to_str_params(self, only_significant=False, only_public=False): str_params = super().to_str_params() for param_name, param_value in self.param_kwargs.items(): if param_value == _special_value: del str_params[param_name] return str_params
  3. やりたいこと class Task(TaskOnKart2): a = luigi.OptionalParameter() print(Task(a="a").make_unique_id()) class Task(TaskOnKart2): a

    = luigi.OptionalParameter() cache_key_date = SpetialOptionalParameter() print(Task(a="a").make_unique_id()) print(Task(a="a", cache_key_date="20220531").make_unique_id()) 9be38116057aef53fbe1ba30dd6311f7 9be38116057aef53fbe1ba30dd6311f7 757516f66d7f410d3e771956aeecefce 例えば TaskOnKart: rerun_key = SpetialOptionalParameter() として、後方互換を保ちながら (hash変わらない)、全てのタス クにrerun機能を持たせる
  4. Parameterだけで解決できるか? class OptionalParameter(Parameter): """ A Parameter that treats empty string

    as None """ def serialize(self, x): if x is None: return '' else: return str(x) def parse(self, x): return x or None parse()でクラス自体の変数を変えるのもなぁ。。。
  5. gokart.TaskOnKart.make_unique_id def make_unique_id(self): unique_id = self.task_unique_id or self._make_hash_id() if self.cache_unique_id:

    self.task_unique_id = unique_id return unique_id def _make_hash_id(self): def _to_str_params(task): if isinstance(task, TaskOnKart): return str(task.make_unique_id()) if task.significant else None if not isinstance(task, luigi.Task): raise ValueError(f"Task.requires method returns {type(task)}. You should return luigi.Task.") return task.to_str_params(only_significant=True) dependencies = [_to_str_params(task) for task in luigi.task.flatten(self.requires())] ここに注入するしか
  6. luigi.Task.to_str_params() def to_str_params(self, only_significant=False, only_public=False): """ Convert all parameters to

    a str->str hash. """ params_str = {} params = dict(self.get_params()) for param_name, param_value in self.param_kwargs.items(): if param_value is None: continue if (((not only_significant) or params[param_name].significant) and ((not only_public) or params[param_name].visibility == ParameterVisibility.PUBLIC) and params[param_name].visibility != ParameterVisibility.PRIVATE): params_str[param_name] = params[param_name].serialize(param_value) return params_str とりあえず行ける。 • luigi.Taskに特定のParameterの機能入れるのか • 意図的にNoneを使いたいParameterでも ◦ isinstance(params[param_name])とる? それこそ密結合
  7. Proposal _special_value = object() class SpetialOptionalParameter(luigi.OptionalParameter): def __init__(self, *args, **kwargs):

    assert not 'default' in kwargs.keys() super().__init__(*args, default=_special_value, **kwargs) def serialize(self, x): if x == _special_value: return super().serialize(None) else: return super().serialize(x) def parse(self, x): x = super().parse(x) if x is None: return _special_value else: return x class TaskOnKart2(gokart.TaskOnKart): def to_str_params(self, only_significant=False, only_public=False): str_params = super().to_str_params() for param_name, param_value in self.param_kwargs.items(): if param_value == _special_value: del str_params[param_name] return str_params • luigi.Taskに特定のParameterの機能入れるのか ->to_str_paramsがkey->str表現のdictを返してくれるので、 後付けでdelで対応すればgokartレイヤ にとってこれる • 意図的にNoneを使いたいParameterでも -> Noneではなくオブジェクト ID比較 シリアライズ時は空文字になるので、マルチプロセスでも大丈夫なはず