Introduce Python code generator for OpenAPI spec to dataclasses

Add a [Python code generator][1] that takes an OpenAPI definition and
outputs the corresponding data model as [dataclasses][2]

This is intended to be used in the Remote Asset Library project, to
create, download, parse, and validate information of a remote asset
library.

[1]: https://koxudaxi.github.io/datamodel-code-generator/
[2]: https://docs.python.org/3/library/dataclasses.html

## Running the Generator

The generator is a Python script, which creates its own Python
virtualenv, installs the dependencies it needs, and then runs the
generator within that virtualenv.

The script is intended to run via the `generate_datamodels` CMake
target. For example, `ninja generate_datamodels` in the build
directory.

## Details

The virtualenv is created in Blender's build directory, and is not
cleaned up after running. This means that subsequent runs will just
use it directly, instead of reinstalling dependencies on every run.

## Generated Code & Interaction with Build System

It is my intention that the code generation _only_ happens when the
OpenAPI specification changes. This means that the generated code will
be committed to Git like any hand-written code. Building Blender will
therefore _not_ require the code generator to run. Only people working
on the area that uses the generated code will have to deal with this.

Pull Request: https://projects.blender.org/blender/blender/pulls/139495
This commit is contained in:
Sybren A. Stüvel
2025-08-01 16:33:56 +02:00
parent 24a7c42766
commit 3ca28acbb3
8 changed files with 462 additions and 0 deletions

2
.gitattributes vendored
View File

@@ -99,3 +99,5 @@
*.pdf filter=lfs diff=lfs merge=lfs -text
*.dat filter=lfs diff=lfs merge=lfs -text
*.csv filter=lfs diff=lfs merge=lfs -text
scripts/modules/_bpy_internal/assets/remote_library_listing/*_openapi.py linguist-generated=true

View File

@@ -2803,6 +2803,12 @@ setup_heavy_lib_pool()
include(build_files/cmake/packaging.cmake)
# -----------------------------------------------------------------------------
# OpenAPI-based Python code generator for data models
include(build_files/cmake/generate_datamodels.cmake)
# -----------------------------------------------------------------------------
# Print Final Configuration

View File

@@ -0,0 +1,13 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
set(GENERATE_DATAMODELS_SCRIPT "${CMAKE_SOURCE_DIR}/tools/utils/make_generate_datamodels.py")
add_custom_target(generate_datamodels
COMMAND ${PYTHON_EXECUTABLE} ${GENERATE_DATAMODELS_SCRIPT} ${CMAKE_SOURCE_DIR}
DEPENDS ${GENERATE_DATAMODELS_SCRIPT}
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating datamodels"
VERBATIM
)

View File

@@ -0,0 +1,3 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -0,0 +1,3 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -0,0 +1,98 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
#
# Generated by datamodel-codegen:
# source filename: blender_asset_library_openapi.yaml
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
@dataclass
class Contact:
name: str
url: Optional[str] = None
email: Optional[str] = None
@dataclass
class AssetLibraryMeta:
api_versions: dict[str, str]
name: str
contact: Contact
# This OpenAPI specification was used to generate the above code.
# It is here so that Blender does not have to parse the YAML file.
OPENAPI_SPEC = {
'openapi': '3.0.0',
'info': {
'version': '1.0.0',
'title': 'Blender Asset Library API',
'description': "Blender's API for describing and fetching assets from online libraries.",
'contact': {
'name': 'Blender',
'url': 'https://www.blender.org/'},
'license': {
'name': 'GPLv3',
'url': 'https://www.gnu.org/licenses/gpl-3.0.en.html'}},
'servers': [
{
'url': '/'}],
'paths': {
'/_asset-library-meta.json': {
'summary': 'Meta-information about this asset library.',
'get': {
'summary': 'Retrieve the asset library meta info.',
'operationId': 'getLibraryMeta',
'responses': {
'200': {
'description': 'normal response',
'content': {
'application/json': {
'schema': {
'$ref': '#/components/schemas/AssetLibraryMeta'}}}}}}}},
'components': {
'schemas': {
'AssetLibraryMeta': {
'type': 'object',
'description': 'Meta-data of this asset library.',
'properties': {
'api_versions': {
'type': 'object',
'description': 'API versions of this asset library. This is reflected in the URLs of all OpenAPI operations except the one to get this metadata.\nA single asset library can expose multiple versions, in order to be backward-compatible with older versions of Blender.\nProperties should be "v1", "v2", etc. and their values should point to their respective index files.\n',
'additionalProperties': {
'type': 'string'},
'patternProperties': {
'^v[0-9]+$': {
'type': 'string'}}},
'name': {
'type': 'string',
'description': 'Name of this asset library.'},
'contact': {
'$ref': '#/components/schemas/Contact'}},
'required': [
'api_versions',
'name',
'contact'],
'example': {
'api_versions': {
'v1': '_v1/asset-index.json'},
'name': 'Blender Essentials',
'contact': {
'name': 'Blender',
'url': 'https://www.blender.org/'}}},
'Contact': {
'type': 'object',
'description': 'Owner / publisher of this asset library.',
'properties': {
'name': {
'type': 'string'},
'url': {
'type': 'string'},
'email': {
'type': 'string'}},
'required': ['name']}}}}

View File

@@ -0,0 +1,83 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
# This is the OpenAPI specification for Blender's Remote Assets system.
#
# It has been intentionally trimmed down to the bare minimum for a review of the
# code generator in general, and how it integrates into Blender's build system.
#
# The `paths` section is not used by the Blender code, and is here just for
# referencing by humans. The Python code generator just uses the data structures
# specified by the `components` section.
openapi: 3.0.0
info:
version: 1.0.0
title: Blender Asset Library API
description: Blender's API for describing and fetching assets from online libraries.
contact:
name: Blender
url: https://www.blender.org/
license:
name: GPLv3
url: https://www.gnu.org/licenses/gpl-3.0.en.html
servers:
- url: /
paths:
/_asset-library-meta.json:
summary: Meta-information about this asset library.
get:
summary: Retrieve the asset library meta info.
operationId: getLibraryMeta
responses:
"200":
description: normal response
content:
application/json:
schema:
$ref: "#/components/schemas/AssetLibraryMeta"
components:
schemas:
AssetLibraryMeta:
type: object
description: "Meta-data of this asset library."
properties:
"api_versions":
type: object
description: >
API versions of this asset library. This is reflected in the URLs of
all OpenAPI operations except the one to get this metadata.
A single asset library can expose multiple versions, in order to be
backward-compatible with older versions of Blender.
Properties should be "v1", "v2", etc. and their values should point
to their respective index files.
additionalProperties:
type: string
patternProperties:
"^v[0-9]+$":
type: string
"name":
type: string
description: Name of this asset library.
"contact": { $ref: "#/components/schemas/Contact" }
required: [api_versions, name, contact]
example:
api_versions:
v1: _v1/asset-index.json
name: Blender Essentials
contact:
name: Blender
url: https://www.blender.org/
Contact:
type: object
description: Owner / publisher of this asset library.
properties:
"name": { type: string }
"url": { type: string }
"email": { type: string }
required: [name]

View File

@@ -0,0 +1,254 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
"""Self-bootstrapping script to run the OpenAPI-to-dataclasses code generator.
Run this via `<your buildtool> generate_datamodels` in your build directory.
This script creates its own virtualenv, installs its dependencies, and then runs
the code generator. It processes OpenAPI spec files in YAML format (see
`YAML_PATHS` below) to generate Python source files. Each `xxx.yaml` file will
produce an `xxx.py` file in the same directory. These Python files also include
the OpenAPI spec, as a Python dict.
The generated Python files are tracked by Git. This generator is NOT part of the
regular Blender build process, and only needs to be run when any of the YAML
files change.
"""
__all__ = (
"main",
)
import argparse
from pathlib import Path
import sys
import time
# Paths of the OpenAPI YAML files to convert to Python code. These are relative
# to Blender's top level source directory.
#
# The generated Python files will be written to the same path, just with the
# `.py` suffix.
#
# When adding a file here, make sure it is named `..._openapi.yaml`. That way
# the corresponding `.py`` file is automatically marked as 'generated' in
# `.gitattributes`.
YAML_PATHS = [
"scripts/modules/_bpy_internal/assets/remote_library_listing/blender_asset_library_openapi.yaml",
]
# Packages to install in the virtualenv. These are only necessary to run this
# generator. The generated code does not depend on these.
REQUIREMENTS = [
"datamodel-code-generator ~= 0.28.2",
"PyYAML ~= 6.0.2",
]
# These arguments are quite likely to be used for all code generated with this
# generator, also later when we use this approach in other areas.
COMMON_ARGS = [
# Because of the Blender code standard:
"--use-double-quotes",
# Make it strict unless there's a good reason not to:
"--strict-nullable",
# Ignore unknown fields in the parsed JSON. This way, the code generated now
# has a chance of being be compatible with future versions of the schema (at
# least, when that future version just adds new stuff).
"--allow-extra-fields",
# Automatically target the currently-running version of Python:
f"--target-python-version={sys.version_info.major}.{sys.version_info.minor}",
# Use `list[T]` instead of `typing.List[T]`:
"--use-standard-collections",
# Because we use dataclasses:
"--output-model-type", "dataclasses.dataclass",
# Work around https://github.com/koxudaxi/datamodel-code-generator/issues/1870#issuecomment-2775689249
"--use-annotated",
# Remove the "generated on" timestamp from the output, so that running the
# generator is idempotent.
"--disable-timestamp",
]
CUSTOM_FILE_HEADER = """
# SPDX-FileCopyrightText: {year!s} Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
#
# Generated by datamodel-codegen:
# source filename: {source_path.name!s}
"""
def main() -> None:
"""Run the datamodel code generator."""
# Late import, as this is only available once inside the virtualenv.
import yaml
argparser = argparse.ArgumentParser(description="Run the datamodel code generator.")
argparser.add_argument('source_root', type=Path, help="The root of Blender's source directory")
args = argparser.parse_args(sys.argv[1:])
root_path: Path = args.source_root.resolve()
if not root_path.is_dir():
raise SystemExit("Path {!s} should be a directory".format(root_path))
print("Generating data model files:")
py_paths: list[Path] = []
for yaml_relpath in YAML_PATHS:
yaml_path = root_path / yaml_relpath
py_path = yaml_path.with_suffix(".py")
py_paths.append(py_path)
print(f" {yaml_path.relative_to(root_path)} -> {py_path.name}")
_generate_datamodel(
in_path=yaml_path,
in_type="openapi",
out_path=py_path,
)
# Append the OpenAPI specification as Python code. This is necessary to
# reference for runtime validation. Having it available as Python
# dictionary is easier than having to parse it from YAML or JSON later.
with yaml_path.open() as yamlfile:
openapi_spec = yaml.safe_load(yamlfile)
with py_path.open("a") as outfile:
print(file=outfile)
print("# This OpenAPI specification was used to generate the above code.", file=outfile)
print("# It is here so that Blender does not have to parse the YAML file.", file=outfile)
print("OPENAPI_SPEC = {!r}".format(openapi_spec), file=outfile)
# Make sure that output from subprocesses is flushed, before outputting more
# below. This prevents stderr and stdout going out of sync, ensuring things
# are shown in chronological order (i.e. generating files before
# reformatting them).
sys.stderr.flush()
sys.stdout.flush()
# Format the generated Python code. Autopep8 (used by Blender) does not
# seem to re-wrap long lines, so that's why this script relies on running
# ruff first.
print("Formatting Python files")
py_paths_as_str = [str(path) for path in py_paths]
subprocess.run(
["make", "format", "PATHS={}".format(" ".join(py_paths_as_str))],
cwd=root_path,
check=True,
)
print("Done generating data model files!")
def _generate_datamodel(in_path: Path, in_type: str, out_path: Path) -> None:
"""Run datamodel-codegen."""
# `type: ignore` to ignore warnings that this module cannot be imported. Python checkers
# won't understand this is run from a self-managed virtualenv.
from datamodel_code_generator.__main__ import main as codegen_main # type: ignore
from datamodel_code_generator.__main__ import Exit # type: ignore
header = CUSTOM_FILE_HEADER.strip().format(
year=time.localtime().tm_year,
source_path=in_path,
)
args = [
*COMMON_ARGS,
"--input", str(in_path),
"--input-file-type", in_type,
"--output", str(out_path),
"--custom-file-header", header,
]
status = codegen_main(args)
match status:
case Exit.OK:
return
case Exit.ERROR:
raise SystemExit("code generation failed")
case Exit.KeyboardInterrupt:
raise KeyboardInterrupt()
case _:
raise SystemExit(f"unknown result from code generation: {status}")
# --------- Below this point is the self-bootstrapping logic ---------
import importlib.util
import subprocess
import venv
# Name of a module to import, to test whether dependencies have been installed or not.
TEST_INSTALL_MODULE = "datamodel_code_generator"
# Directory for the virtualenv. This script is expected to run with Blender's
# build directory as its working directory.
VENV_DIR = Path("generate_datamodels_venv").resolve()
# Python executable inside the virtual environment.
VENV_PYTHON = VENV_DIR / "Scripts/python.exe" if sys.platform == "win32" else VENV_DIR / "bin/python"
def _create_virtualenv() -> None:
"""Create the virtual environment if it does not exist."""
if VENV_DIR.exists():
return
print(f"Creating virtual environment at {VENV_DIR}")
venv.create(VENV_DIR, with_pip=True)
def _install_dependencies() -> None:
"""Install required dependencies into the virtual environment."""
print("Installing dependencies")
# Pip doesn't like to be used as Python library, invoking it via the CLI is the best option.
_run_command(str(VENV_PYTHON), "-m", "pip", "install", "--upgrade", "pip")
_run_command(str(VENV_PYTHON), "-m", "pip", "install", "--upgrade", *REQUIREMENTS)
def _is_dependency_installed(package: str) -> bool:
"""Try importing a package to check if it is installed."""
return importlib.util.find_spec(package) is not None
def _is_running_in_virtualenv() -> bool:
"""Check if the script is running inside the virtual environment."""
return sys.prefix != sys.base_prefix # Virtualenv modifies `sys.prefix`
def _run_command(*cmd: str) -> None:
"""Run a shell command and handle errors."""
try:
subprocess.run(cmd, check=True, text=True)
except subprocess.CalledProcessError as e:
print(f"Error running command: {' '.join(cmd)}", file=sys.stderr)
print(f"Exit code: {e.returncode}", file=sys.stderr)
sys.exit(e.returncode)
if __name__ == "__main__":
_create_virtualenv()
if not _is_running_in_virtualenv():
print(f"Re-executing inside virtual environment at {VENV_DIR}")
_run_command(str(VENV_PYTHON), *sys.argv)
sys.exit()
if not _is_dependency_installed(TEST_INSTALL_MODULE):
_install_dependencies()
# The virtual environment is active, so run the main script logic.
main()