Introduce Python code generator for OpenAPI spec to dataclasses

Add a [Python code generator][1] that takes an OpenAPI definition and outputs the corresponding data model as [dataclasses][2] This is intended to be used in the Remote Asset Library project, to create, download, parse, and validate information of a remote asset library. [1]: https://koxudaxi.github.io/datamodel-code-generator/ [2]: https://docs.python.org/3/library/dataclasses.html ## Running the Generator The generator is a Python script, which creates its own Python virtualenv, installs the dependencies it needs, and then runs the generator within that virtualenv. The script is intended to run via the `generate_datamodels` CMake target. For example, `ninja generate_datamodels` in the build directory. ## Details The virtualenv is created in Blender's build directory, and is not cleaned up after running. This means that subsequent runs will just use it directly, instead of reinstalling dependencies on every run. ## Generated Code & Interaction with Build System It is my intention that the code generation _only_ happens when the OpenAPI specification changes. This means that the generated code will be committed to Git like any hand-written code. Building Blender will therefore _not_ require the code generator to run. Only people working on the area that uses the generated code will have to deal with this. Pull Request: https://projects.blender.org/blender/blender/pulls/139495
2025-08-01 16:33:56 +02:00
parent 24a7c42766
commit 3ca28acbb3
8 changed files with 462 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -99,3 +99,5 @@
 *.pdf filter=lfs diff=lfs merge=lfs -text
 *.dat filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text
+
+scripts/modules/_bpy_internal/assets/remote_library_listing/*_openapi.py linguist-generated=true
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2803,6 +2803,12 @@ setup_heavy_lib_pool()
 include(build_files/cmake/packaging.cmake)


+# -----------------------------------------------------------------------------
+# OpenAPI-based Python code generator for data models
+
+include(build_files/cmake/generate_datamodels.cmake)
+
+
 # -----------------------------------------------------------------------------
 # Print Final Configuration

--- a/build_files/cmake/generate_datamodels.cmake
+++ b/build_files/cmake/generate_datamodels.cmake
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+set(GENERATE_DATAMODELS_SCRIPT "${CMAKE_SOURCE_DIR}/tools/utils/make_generate_datamodels.py")
+
+add_custom_target(generate_datamodels
+    COMMAND ${PYTHON_EXECUTABLE} ${GENERATE_DATAMODELS_SCRIPT} ${CMAKE_SOURCE_DIR}
+    DEPENDS ${GENERATE_DATAMODELS_SCRIPT}
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    COMMENT "Generating datamodels"
+    VERBATIM
+)
--- a/scripts/modules/_bpy_internal/assets/init.py
+++ b/scripts/modules/_bpy_internal/assets/init.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
--- a/scripts/modules/_bpy_internal/assets/remote_library_listing/init.py
+++ b/scripts/modules/_bpy_internal/assets/remote_library_listing/init.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
--- a/scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.py
+++ b/scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.py
@@ -0,0 +1,98 @@
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Generated by datamodel-codegen:
+#   source filename:  blender_asset_library_openapi.yaml
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class Contact:
+    name: str
+    url: Optional[str] = None
+    email: Optional[str] = None
+
+
+@dataclass
+class AssetLibraryMeta:
+    api_versions: dict[str, str]
+    name: str
+    contact: Contact
+
+
+# This OpenAPI specification was used to generate the above code.
+# It is here so that Blender does not have to parse the YAML file.
+OPENAPI_SPEC = {
+    'openapi': '3.0.0',
+    'info': {
+        'version': '1.0.0',
+        'title': 'Blender Asset Library API',
+        'description': "Blender's API for describing and fetching assets from online libraries.",
+        'contact': {
+            'name': 'Blender',
+            'url': 'https://www.blender.org/'},
+        'license': {
+            'name': 'GPLv3',
+            'url': 'https://www.gnu.org/licenses/gpl-3.0.en.html'}},
+    'servers': [
+        {
+            'url': '/'}],
+    'paths': {
+        '/_asset-library-meta.json': {
+            'summary': 'Meta-information about this asset library.',
+            'get': {
+                'summary': 'Retrieve the asset library meta info.',
+                'operationId': 'getLibraryMeta',
+                'responses': {
+                    '200': {
+                        'description': 'normal response',
+                        'content': {
+                            'application/json': {
+                                'schema': {
+                                    '$ref': '#/components/schemas/AssetLibraryMeta'}}}}}}}},
+    'components': {
+        'schemas': {
+            'AssetLibraryMeta': {
+                'type': 'object',
+                'description': 'Meta-data of this asset library.',
+                'properties': {
+                    'api_versions': {
+                        'type': 'object',
+                        'description': 'API versions of this asset library. This is reflected in the URLs of all OpenAPI operations except the one to get this metadata.\nA single asset library can expose multiple versions, in order to be backward-compatible with older versions of Blender.\nProperties should be "v1", "v2", etc. and their values should point to their respective index files.\n',
+                        'additionalProperties': {
+                            'type': 'string'},
+                        'patternProperties': {
+                            '^v[0-9]+$': {
+                                'type': 'string'}}},
+                    'name': {
+                        'type': 'string',
+                        'description': 'Name of this asset library.'},
+                    'contact': {
+                        '$ref': '#/components/schemas/Contact'}},
+                'required': [
+                    'api_versions',
+                    'name',
+                    'contact'],
+                'example': {
+                    'api_versions': {
+                        'v1': '_v1/asset-index.json'},
+                    'name': 'Blender Essentials',
+                    'contact': {
+                        'name': 'Blender',
+                        'url': 'https://www.blender.org/'}}},
+            'Contact': {
+                'type': 'object',
+                'description': 'Owner / publisher of this asset library.',
+                'properties': {
+                    'name': {
+                        'type': 'string'},
+                    'url': {
+                        'type': 'string'},
+                    'email': {
+                        'type': 'string'}},
+                'required': ['name']}}}}
--- a/scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.yaml
+++ b/scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.yaml
@@ -0,0 +1,83 @@
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# This is the OpenAPI specification for Blender's Remote Assets system.
+#
+# It has been intentionally trimmed down to the bare minimum for a review of the
+# code generator in general, and how it integrates into Blender's build system.
+#
+# The `paths` section is not used by the Blender code, and is here just for
+# referencing by humans. The Python code generator just uses the data structures
+# specified by the `components` section.
+
+openapi: 3.0.0
+info:
+  version: 1.0.0
+  title: Blender Asset Library API
+  description: Blender's API for describing and fetching assets from online libraries.
+  contact:
+    name: Blender
+    url: https://www.blender.org/
+  license:
+    name: GPLv3
+    url: https://www.gnu.org/licenses/gpl-3.0.en.html
+servers:
+  - url: /
+paths:
+  /_asset-library-meta.json:
+    summary: Meta-information about this asset library.
+    get:
+      summary: Retrieve the asset library meta info.
+      operationId: getLibraryMeta
+      responses:
+        "200":
+          description: normal response
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AssetLibraryMeta"
+
+components:
+  schemas:
+    AssetLibraryMeta:
+      type: object
+      description: "Meta-data of this asset library."
+      properties:
+        "api_versions":
+          type: object
+          description: >
+            API versions of this asset library. This is reflected in the URLs of
+            all OpenAPI operations except the one to get this metadata.
+
+            A single asset library can expose multiple versions, in order to be
+            backward-compatible with older versions of Blender.
+
+            Properties should be "v1", "v2", etc. and their values should point
+            to their respective index files.
+          additionalProperties:
+            type: string
+          patternProperties:
+            "^v[0-9]+$":
+              type: string
+        "name":
+          type: string
+          description: Name of this asset library.
+        "contact": { $ref: "#/components/schemas/Contact" }
+      required: [api_versions, name, contact]
+      example:
+        api_versions:
+          v1: _v1/asset-index.json
+        name: Blender Essentials
+        contact:
+          name: Blender
+          url: https://www.blender.org/
+
+    Contact:
+      type: object
+      description: Owner / publisher of this asset library.
+      properties:
+        "name": { type: string }
+        "url": { type: string }
+        "email": { type: string }
+      required: [name]
--- a/tools/utils/make_generate_datamodels.py
+++ b/tools/utils/make_generate_datamodels.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: 2025 Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+"""Self-bootstrapping script to run the OpenAPI-to-dataclasses code generator.
+
+Run this via `<your buildtool> generate_datamodels` in your build directory.
+
+This script creates its own virtualenv, installs its dependencies, and then runs
+the code generator. It processes OpenAPI spec files in YAML format (see
+`YAML_PATHS` below) to generate Python source files. Each `xxx.yaml` file will
+produce an `xxx.py` file in the same directory. These Python files also include
+the OpenAPI spec, as a Python dict.
+
+The generated Python files are tracked by Git. This generator is NOT part of the
+regular Blender build process, and only needs to be run when any of the YAML
+files change.
+"""
+
+__all__ = (
+    "main",
+)
+
+import argparse
+from pathlib import Path
+import sys
+import time
+
+# Paths of the OpenAPI YAML files to convert to Python code. These are relative
+# to Blender's top level source directory.
+#
+# The generated Python files will be written to the same path, just with the
+# `.py` suffix.
+#
+# When adding a file here, make sure it is named `..._openapi.yaml`. That way
+# the corresponding `.py`` file is automatically marked as 'generated' in
+# `.gitattributes`.
+YAML_PATHS = [
+    "scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.yaml",
+]
+
+# Packages to install in the virtualenv. These are only necessary to run this
+# generator. The generated code does not depend on these.
+REQUIREMENTS = [
+    "datamodel-code-generator ~= 0.28.2",
+    "PyYAML ~= 6.0.2",
+]
+
+# These arguments are quite likely to be used for all code generated with this
+# generator, also later when we use this approach in other areas.
+COMMON_ARGS = [
+    # Because of the Blender code standard:
+    "--use-double-quotes",
+
+    # Make it strict unless there's a good reason not to:
+    "--strict-nullable",
+
+    # Ignore unknown fields in the parsed JSON. This way, the code generated now
+    # has a chance of being be compatible with future versions of the schema (at
+    # least, when that future version just adds new stuff).
+    "--allow-extra-fields",
+
+    # Automatically target the currently-running version of Python:
+    f"--target-python-version={sys.version_info.major}.{sys.version_info.minor}",
+
+    # Use `list[T]` instead of `typing.List[T]`:
+    "--use-standard-collections",
+
+    # Because we use dataclasses:
+    "--output-model-type", "dataclasses.dataclass",
+
+    # Work around https://github.com/koxudaxi/datamodel-code-generator/issues/1870#issuecomment-2775689249
+    "--use-annotated",
+
+    # Remove the "generated on" timestamp from the output, so that running the
+    # generator is idempotent.
+    "--disable-timestamp",
+]
+
+
+CUSTOM_FILE_HEADER = """
+# SPDX-FileCopyrightText: {year!s} Blender Authors
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Generated by datamodel-codegen:
+#   source filename:  {source_path.name!s}
+"""
+
+
+def main() -> None:
+    """Run the datamodel code generator."""
+
+    # Late import, as this is only available once inside the virtualenv.
+    import yaml
+
+    argparser = argparse.ArgumentParser(description="Run the datamodel code generator.")
+    argparser.add_argument('source_root', type=Path, help="The root of Blender's source directory")
+    args = argparser.parse_args(sys.argv[1:])
+    root_path: Path = args.source_root.resolve()
+
+    if not root_path.is_dir():
+        raise SystemExit("Path {!s} should be a directory".format(root_path))
+
+    print("Generating data model files:")
+
+    py_paths: list[Path] = []
+    for yaml_relpath in YAML_PATHS:
+        yaml_path = root_path / yaml_relpath
+        py_path = yaml_path.with_suffix(".py")
+        py_paths.append(py_path)
+
+        print(f"  {yaml_path.relative_to(root_path)} -> {py_path.name}")
+
+        _generate_datamodel(
+            in_path=yaml_path,
+            in_type="openapi",
+            out_path=py_path,
+        )
+
+        # Append the OpenAPI specification as Python code. This is necessary to
+        # reference for runtime validation. Having it available as Python
+        # dictionary is easier than having to parse it from YAML or JSON later.
+        with yaml_path.open() as yamlfile:
+            openapi_spec = yaml.safe_load(yamlfile)
+        with py_path.open("a") as outfile:
+            print(file=outfile)
+            print("# This OpenAPI specification was used to generate the above code.", file=outfile)
+            print("# It is here so that Blender does not have to parse the YAML file.", file=outfile)
+            print("OPENAPI_SPEC = {!r}".format(openapi_spec), file=outfile)
+
+    # Make sure that output from subprocesses is flushed, before outputting more
+    # below. This prevents stderr and stdout going out of sync, ensuring things
+    # are shown in chronological order (i.e. generating files before
+    # reformatting them).
+    sys.stderr.flush()
+    sys.stdout.flush()
+
+    # Format the generated Python code. Autopep8 (used by Blender) does not
+    # seem to re-wrap long lines, so that's why this script relies on running
+    # ruff first.
+    print("Formatting Python files")
+    py_paths_as_str = [str(path) for path in py_paths]
+    subprocess.run(
+        ["make", "format", "PATHS={}".format(" ".join(py_paths_as_str))],
+        cwd=root_path,
+        check=True,
+    )
+
+    print("Done generating data model files!")
+
+
+def _generate_datamodel(in_path: Path, in_type: str, out_path: Path) -> None:
+    """Run datamodel-codegen."""
+
+    # `type: ignore` to ignore warnings that this module cannot be imported. Python checkers
+    # won't understand this is run from a self-managed virtualenv.
+    from datamodel_code_generator.__main__ import main as codegen_main  # type: ignore
+    from datamodel_code_generator.__main__ import Exit  # type: ignore
+
+    header = CUSTOM_FILE_HEADER.strip().format(
+        year=time.localtime().tm_year,
+        source_path=in_path,
+    )
+
+    args = [
+        *COMMON_ARGS,
+        "--input", str(in_path),
+        "--input-file-type", in_type,
+        "--output", str(out_path),
+        "--custom-file-header", header,
+    ]
+
+    status = codegen_main(args)
+
+    match status:
+        case Exit.OK:
+            return
+        case Exit.ERROR:
+            raise SystemExit("code generation failed")
+        case Exit.KeyboardInterrupt:
+            raise KeyboardInterrupt()
+        case _:
+            raise SystemExit(f"unknown result from code generation: {status}")
+
+
+# --------- Below this point is the self-bootstrapping logic ---------
+
+
+import importlib.util
+import subprocess
+import venv
+
+# Name of a module to import, to test whether dependencies have been installed or not.
+TEST_INSTALL_MODULE = "datamodel_code_generator"
+
+# Directory for the virtualenv. This script is expected to run with Blender's
+# build directory as its working directory.
+VENV_DIR = Path("generate_datamodels_venv").resolve()
+
+# Python executable inside the virtual environment.
+VENV_PYTHON = VENV_DIR / "Scripts/python.exe" if sys.platform == "win32" else VENV_DIR / "bin/python"
+
+
+def _create_virtualenv() -> None:
+    """Create the virtual environment if it does not exist."""
+    if VENV_DIR.exists():
+        return
+    print(f"Creating virtual environment at {VENV_DIR}")
+    venv.create(VENV_DIR, with_pip=True)
+
+
+def _install_dependencies() -> None:
+    """Install required dependencies into the virtual environment."""
+    print("Installing dependencies")
+    # Pip doesn't like to be used as Python library, invoking it via the CLI is the best option.
+    _run_command(str(VENV_PYTHON), "-m", "pip", "install", "--upgrade", "pip")
+    _run_command(str(VENV_PYTHON), "-m", "pip", "install", "--upgrade", *REQUIREMENTS)
+
+
+def _is_dependency_installed(package: str) -> bool:
+    """Try importing a package to check if it is installed."""
+    return importlib.util.find_spec(package) is not None
+
+
+def _is_running_in_virtualenv() -> bool:
+    """Check if the script is running inside the virtual environment."""
+    return sys.prefix != sys.base_prefix  # Virtualenv modifies `sys.prefix`
+
+
+def _run_command(*cmd: str) -> None:
+    """Run a shell command and handle errors."""
+    try:
+        subprocess.run(cmd, check=True, text=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error running command: {' '.join(cmd)}", file=sys.stderr)
+        print(f"Exit code: {e.returncode}", file=sys.stderr)
+        sys.exit(e.returncode)
+
+
+if __name__ == "__main__":
+    _create_virtualenv()
+
+    if not _is_running_in_virtualenv():
+        print(f"Re-executing inside virtual environment at {VENV_DIR}")
+        _run_command(str(VENV_PYTHON), *sys.argv)
+        sys.exit()
+
+    if not _is_dependency_installed(TEST_INSTALL_MODULE):
+        _install_dependencies()
+
+    # The virtual environment is active, so run the main script logic.
+    main()