Last active
December 2, 2021 10:58
-
-
Save jjerphan/6b90a9691e67444adc7a8a658747ebe1 to your computer and use it in GitHub Desktop.
std::vector to numpy array coercion via Cython
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cython compile instructions | |
import numpy | |
from setuptools import setup, Extension | |
from Cython.Build import build_ext | |
# To compile, use | |
# python setup.py build --inplace | |
extensions = [ | |
Extension("stdvect_to_ndarray", | |
sources=["main.pyx"], | |
include_dirs=[ | |
numpy.get_include(), | |
], | |
extra_compile_args=["-O3"], | |
language="c++", | |
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], | |
) | |
] | |
setup( | |
name="stdvect_to_ndarray", | |
version='0.1.0', | |
cmdclass={'build_ext': build_ext}, | |
ext_modules=extensions, | |
install_requires=[ | |
'setuptools>=18.0', | |
'cython>=0.27.3', | |
'numpy' | |
], | |
python_requires=">=3.6", | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
cimport numpy as np | |
from cpython.ref cimport Py_INCREF | |
from cython.operator cimport dereference as deref | |
from libcpp.vector cimport vector | |
import numpy as np | |
cimport numpy as np | |
from libc.math cimport sqrt | |
np.import_array() | |
# Context: | |
# | |
# In some Cython implementations, one may need resizable buffers | |
# which they would like to return as np.ndarray to Python callers. | |
# | |
# In term of technical choice, C++ std::vector comes as handy | |
# datastructures in Cython for interacting efficiently with resizable | |
# buffers. | |
# | |
# Yet to my knowledge, there isn't a way to return those vectors to | |
# python callers within Cython, especially as np.ndarray. | |
# | |
# To return numpy arrays wrapping those resizable buffers, | |
# we can use PyArray_SimpleNewFromData by providing a | |
# pointer to the start of the buffer with some metadata. | |
# | |
# Though it is possible to access the buffer first element address | |
# with std::vector::data, the buffer themselves can't be stolen: | |
# the buffers' lifetime is tight to their std::vectors and are | |
# deallocated when their std::vectors are. | |
# | |
# Solution proposal: | |
# | |
# To solve this, we propose introducing a StdVectorSentinel, a | |
# sentinel to be used as np.ndarrays' base objects, allowing | |
# performing a proper coercion between std::vector and np.ndarrays. | |
# | |
# This way, std::vectors can dynamically be allocated in Cython | |
# and returned as numpy arrays without inconsistency nor memory leaks. | |
# | |
# Question and further work: | |
# | |
# 1. Is there already a way to coerce std::vector in numpy array efficiently | |
# within Cython? | |
# 2. If not, is the proposed fixture this a good technical solution? | |
# 3. If so, could it further be improved to support type covariances, allowing | |
# using it easily dtype-invariantly? | |
#### | |
# The following defines portable dtypes that are to be used for vectors. | |
cdef enum: | |
DTYPECODE = np.NPY_FLOAT64 | |
ITYPECODE = np.NPY_INTP | |
ctypedef np.float64_t DTYPE_t | |
ctypedef np.intp_t ITYPE_t | |
ctypedef fused DITYPE_t: | |
ITYPE_t | |
DTYPE_t | |
ITYPE = np.intp | |
DTYPE = np.float64 | |
## std::vector to np.ndarray coercion | |
# | |
# As type covariance is not supported for C++ containers via Cython, | |
# we need to redefine fused types for vectors. | |
ctypedef fused vector_DITYPE_t: | |
vector[ITYPE_t] | |
vector[DTYPE_t] | |
ctypedef fused vector_vector_DITYPE_t: | |
vector[vector[ITYPE_t]] | |
vector[vector[DTYPE_t]] | |
cdef class StdVectorSentinel: | |
"""Wraps a reference to a vector which will be deallocated with this object. | |
When created, the StdVectorSentinel swaps the reference of its internal | |
vectors with the provided one (vec_ptr), thus making the StdVectorSentinel | |
manage the provided one's lifetime. | |
""" | |
pass | |
# We necessarily need to define two extension types extending StdVectorSentinel | |
# because we need to provide the dtype of the vector but can't use numeric fused types. | |
cdef class StdVectorSentinelDTYPE(StdVectorSentinel): | |
cdef vector[DTYPE_t] vec | |
@staticmethod | |
cdef StdVectorSentinel create_for(vector[DTYPE_t] * vec_ptr): | |
# This initializes the object directly without calling __init__ | |
cdef StdVectorSentinelDTYPE sentinel = StdVectorSentinelDTYPE.__new__(StdVectorSentinelDTYPE) | |
sentinel.vec.swap(deref(vec_ptr)) | |
return sentinel | |
cdef class StdVectorSentinelITYPE(StdVectorSentinel): | |
cdef vector[ITYPE_t] vec | |
@staticmethod | |
cdef StdVectorSentinel create_for(vector[ITYPE_t] * vec_ptr): | |
# This initializes the object directly without calling __init__ | |
cdef StdVectorSentinelITYPE sentinel = StdVectorSentinelITYPE.__new__(StdVectorSentinelITYPE) | |
sentinel.vec.swap(deref(vec_ptr)) | |
return sentinel | |
cdef np.ndarray vector_to_nd_array(vector_DITYPE_t * vect_ptr): | |
"""Create a numpy ndarray given a C++ vector. | |
The numpy array buffer is the one of the C++ vector. | |
A StdVectorSentinel is registered as the base object for the numpy array, | |
freeing the C++ vector it encapsulates when the numpy array is freed. | |
""" | |
typenum = DTYPECODE if vector_DITYPE_t is vector[DTYPE_t] else ITYPECODE | |
cdef: | |
np.npy_intp size = deref(vect_ptr).size() | |
np.ndarray arr = np.PyArray_SimpleNewFromData(1, &size, typenum, | |
deref(vect_ptr).data()) | |
StdVectorSentinel sentinel | |
if vector_DITYPE_t is vector[DTYPE_t]: | |
sentinel = StdVectorSentinelDTYPE.create_for(vect_ptr) | |
else: | |
sentinel = StdVectorSentinelITYPE.create_for(vect_ptr) | |
# Makes the numpy array responsible of the life-cycle of its buffer. | |
# A reference to the StdVectorSentinel will be stolen by the call bellow, | |
# so we increase its reference counter. | |
# See: https://docs.python.org/3/c-api/intro.html#reference-count-details | |
Py_INCREF(sentinel) | |
np.PyArray_SetBaseObject(arr, sentinel) | |
return arr | |
# Module callable front-end for interactive use and proof of concept. | |
cpdef np.ndarray get_np_array_std_vector_backed(ITYPE_t n_elements): | |
cdef vector[ITYPE_t] * vector_of_ints = new vector[ITYPE_t](n_elements, 1337) | |
return vector_to_nd_array(vector_of_ints) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment