Skip to content

ops

define_noteable_dagster_op(name, notebook_id, ins=None, outs=None, config_schema=None, required_resource_keys=None, output_notebook_name=None, asset_key_prefix=None, description=None, tags=None) #

Wrap a Jupyter notebook in a op. Copied from define_dagstermill_op.

Parameters:

Name Type Description Default
name str

The name of the op.

required
notebook_id str

ID of the backing notebook.

required
ins Optional[Mapping[str, In]]

The op's inputs.

None
outs Optional[Mapping[str, Out]]

The op's outputs. Your notebook should call :py:func:~dagstermill.yield_result to yield each of these outputs.

None
config_schema Optional[Union[Any, Dict[str, Any]]]

The op's config schema.

None
required_resource_keys Optional[Set[str]]

The string names of any required resources.

None
output_notebook_name Optional[str]

(Optional[str]): If set, will be used as the name of an injected output of type of :py:class:~dagster.BufferedIOBase that is the file object of the executed notebook (in addition to the :py:class:~dagster.AssetMaterialization that is always created). It allows the downstream ops to access the executed notebook via a file object.

None
asset_key_prefix Optional[Union[List[str], str]]

If set, will be used to prefix the asset keys for materialized notebooks.

None
description Optional[str]

If set, description used for op.

None
tags Optional[Dict[str, str]]

If set, additional tags used to annotate op. Dagster uses the tag keys notebook_path and kind, which cannot be overwritten by the user.

None

Returns:

Type Description

py:class:~dagster.OpDefinition

Source code in noteable_dagstermill/ops.py
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
def define_noteable_dagster_op(
    name: str,
    notebook_id: str,
    ins: Optional[Mapping[str, In]] = None,
    outs: Optional[Mapping[str, Out]] = None,
    config_schema: Optional[Union[Any, Dict[str, Any]]] = None,
    required_resource_keys: Optional[Set[str]] = None,
    output_notebook_name: Optional[str] = None,
    asset_key_prefix: Optional[Union[List[str], str]] = None,
    description: Optional[str] = None,
    tags: Optional[Dict[str, Any]] = None,
):
    """Wrap a Jupyter notebook in a op. Copied from `define_dagstermill_op`.

    Arguments:
        name (str): The name of the op.
        notebook_id (str): ID of the backing notebook.
        ins (Optional[Mapping[str, In]]): The op's inputs.
        outs (Optional[Mapping[str, Out]]): The op's outputs. Your notebook should
            call :py:func:`~dagstermill.yield_result` to yield each of these outputs.
        config_schema (Optional[Union[Any, Dict[str, Any]]]): The op's config schema.
        required_resource_keys (Optional[Set[str]]): The string names of any required resources.
        output_notebook_name: (Optional[str]): If set, will be used as the name of an injected output
            of type of :py:class:`~dagster.BufferedIOBase` that is the file object of the executed
            notebook (in addition to the :py:class:`~dagster.AssetMaterialization` that is always
            created). It allows the downstream ops to access the executed notebook via a file
            object.
        asset_key_prefix (Optional[Union[List[str], str]]): If set, will be used to prefix the
            asset keys for materialized notebooks.
        description (Optional[str]): If set, description used for op.
        tags (Optional[Dict[str, str]]): If set, additional tags used to annotate op.
            Dagster uses the tag keys `notebook_path` and `kind`, which cannot be
            overwritten by the user.
    Returns:
        :py:class:`~dagster.OpDefinition`
    """
    check.str_param(name, "name")

    # TODO - make these parameterizable/from env vars?
    notebook_path = f"noteable://{notebook_id}"
    domain = os.getenv("NOTEABLE_DOMAIN", "app.noteable.io")
    notebook_url = f"https://{domain}/f/{notebook_id}"

    required_resource_keys = set(
        check.opt_set_param(required_resource_keys, "required_resource_keys", of_type=str)
    )
    outs = check.opt_mapping_param(outs, "outs", key_type=str, value_type=Out)
    ins = check.opt_mapping_param(ins, "ins", key_type=str, value_type=In)

    if output_notebook_name is not None:
        required_resource_keys.add("output_notebook_io_manager")
        outs = {
            **outs,
            cast(str, output_notebook_name): Out(io_manager_key="output_notebook_io_manager"),
        }

    if isinstance(asset_key_prefix, str):
        asset_key_prefix = [asset_key_prefix]

    asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str)

    default_description = f"This op is backed by the notebook at {notebook_url}"
    description = check.opt_str_param(description, "description", default=default_description)

    user_tags = validate_tags(tags)
    if tags is not None:
        check.invariant(
            "notebook_path" not in tags,
            "user-defined solid tags contains the `notebook_path` key, but the `notebook_path` key is reserved for use by Dagster",  # noqa: E501
        )
        check.invariant(
            "kind" not in tags,
            "user-defined solid tags contains the `kind` key, but the `kind` key is reserved for use by Dagster",
        )
    default_tags = {"notebook_path": notebook_url, "kind": "noteable"}

    return OpDefinition(
        name=name,
        compute_fn=_dm_compute(
            "define_noteable_dagster_op",
            name,
            notebook_path,
            output_notebook_name,
            asset_key_prefix=asset_key_prefix,
        ),
        ins=ins,
        outs=outs,
        config_schema=config_schema,
        required_resource_keys=required_resource_keys,
        description=description,
        tags={**user_tags, **default_tags},
    )