实现缓冲协议
原文: http://docs.cython.org/en/latest/src/userguide/buffer.html
Cython 对象可以通过实现“缓冲协议”将内存缓冲区暴露给 Python 代码。本章介绍如何实现协议并使用 NumPy 中扩展类型管理的内存。
矩阵类
以下 Cython / C ++代码实现了一个浮点矩阵,其中列数在构造时固定,但行可以动态添加。
# 经典风格
# distutils: language = c++
# matrix.pyx
from libcpp.vector cimport vector
cdef class Matrix:
cdef unsigned ncols
cdef vector[float] v
def __cinit__(self, unsigned ncols):
self.ncols = ncols
def add_row(self):
"""Adds a row, initially zero-filled."""
self.v.resize(self.v.size() + self.ncols)
# 纯Python风格
# distutils: language = c++
# matrix.py
from cython.cimports.libcpp.vector import vector
@cython.cclass
class Matrix:
ncols: cython.uint
v: vector[cython.float]
def __cinit__(self, ncols: cython.uint):
self.ncols = ncols
def add_row(self):
"""Adds a row, initially zero-filled."""
self.v.resize(self.v.size() + self.ncols)
没有方法可以对矩阵的内容进行有意义的工作。我们可以为此实现自定义 __getitem__
,__setitem__
等,但我们将使用缓冲协议将矩阵的数据暴露给 Python,这样我们就可以使用 NumPy 来完成有用的工作。
实现缓冲协议需要添加两个方法,__getbuffer__
和 __releasebuffer__
(Cython 专门处理的方法)。
# 经典风格
# distutils: language = c++
from cpython cimport Py_buffer
from libcpp.vector cimport vector
cdef class Matrix:
cdef Py_ssize_t ncols
cdef Py_ssize_t shape[2]
cdef Py_ssize_t strides[2]
cdef vector[float] v
def __cinit__(self, Py_ssize_t ncols):
self.ncols = ncols
def add_row(self):
"""Adds a row, initially zero-filled."""
self.v.resize(self.v.size() + self.ncols)
def __getbuffer__(self, Py_buffer *buffer, int flags):
cdef Py_ssize_t itemsize = sizeof(self.v[0])
self.shape[0] = self.v.size() / self.ncols
self.shape[1] = self.ncols
# Stride 1 is the distance, in bytes, between two items in a row;
# this is the distance between two adjacent items in the vector.
# Stride 0 is the distance between the first elements of adjacent rows.
self.strides[1] = <Py_ssize_t>( <char *>&(self.v[1])
- <char *>&(self.v[0]))
self.strides[0] = self.ncols * self.strides[1]
buffer.buf = <char *>&(self.v[0])
buffer.format = 'f' # float
buffer.internal = NULL # see References
buffer.itemsize = itemsize
buffer.len = self.v.size() * itemsize # product(shape) * itemsize
buffer.ndim = 2
buffer.obj = self
buffer.readonly = 0
buffer.shape = self.shape
buffer.strides = self.strides
buffer.suboffsets = NULL # for pointer arrays only
def __releasebuffer__(self, Py_buffer *buffer):
pass
# 纯Python风格
# distutils: language = c++
from cython.cimports.cpython import Py_buffer
from cython.cimports.libcpp.vector import vector
@cython.cclass
class Matrix:
ncols: cython.Py_ssize_t
shape: cython.Py_ssize_t[2]
strides: cython.Py_ssize_t[2]
v: vector[cython.float]
def __cinit__(self, ncols: cython.Py_ssize_t):
self.ncols = ncols
def add_row(self):
"""Adds a row, initially zero-filled."""
self.v.resize(self.v.size() + self.ncols)
def __getbuffer__(self, buffer: cython.pointer(Py_buffer), flags: cython.int):
itemsize: cython.Py_ssize_t = cython.sizeof(self.v[0])
self.shape[0] = self.v.size() // self.ncols
self.shape[1] = self.ncols
# Stride 1 is the distance, in bytes, between two items in a row;
# this is the distance between two adjacent items in the vector.
# Stride 0 is the distance between the first elements of adjacent rows.
self.strides[1] = cython.cast(cython.Py_ssize_t, (
cython.cast(cython.p_char, cython.address(self.v[1]))
- cython.cast(cython.p_char, cython.address(self.v[0]))
)
)
self.strides[0] = self.ncols * self.strides[1]
buffer.buf = cython.cast(cython.p_char, cython.address(self.v[0]))
buffer.format = 'f' # float
buffer.internal = cython.NULL # see References
buffer.itemsize = itemsize
buffer.len = self.v.size() * itemsize # product(shape) * itemsize
buffer.ndim = 2
buffer.obj = self
buffer.readonly = 0
buffer.shape = self.shape
buffer.strides = self.strides
buffer.suboffsets = cython.NULL # for pointer arrays only
def __releasebuffer__(self, buffer: cython.pointer(Py_buffer)):
pass
Matrix.__getbuffer__
方法会填充由 Python C-API 定义的称为 Py_buffer
的描述符结构。它包含指向内存中实际缓冲区的指针,以及有关数组形状和步幅的元数据(从一个元素或行到下一个元素或行的步长)。它的shape
和strides
成员是必须指向类型和大小的数组 Py_ssize_t[ndim]
的指针。只要任何缓冲区查看数据,这些数组就必须保持活动状态,因此我们将它们作为成员存储在 Matrix
对象上。
代码尚未完成,但我们已经可以编译它并测试基本功能。
>>> from matrix import Matrix
>>> import numpy as np
>>> m = Matrix(10)
>>> np.asarray(m)
array([], shape=(0, 10), dtype=float32)
>>> m.add_row()
>>> a = np.asarray(m)
>>> a[:] = 1
>>> m.add_row()
>>> a = np.asarray(m)
>>> a
array([[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
现在我们可以将Matrix
视为 NumPy ndarray
,并使用标准的 NumPy 操作修改其内容。
内存安全和引用计数
到目前为止实施的Matrix
类是不安全的。 add_row
操作可以移动底层缓冲区,这会使数据上的任何 NumPy(或其他)视图无效。如果您尝试在add_row
调用后访问值,您将获得过时的值或段错误。
这就是__releasebuffer__
的用武之地。我们可以为每个矩阵添加一个引用计数,并在视图存在时锁定它阻止mutation(译者注:Python的mute概念)。
# 经典风格
# distutils: language = c++
from cpython cimport Py_buffer
from libcpp.vector cimport vector
cdef class Matrix:
cdef int view_count
cdef Py_ssize_t ncols
cdef vector[float] v
# ...
def __cinit__(self, Py_ssize_t ncols):
self.ncols = ncols
self.view_count = 0
def add_row(self):
if self.view_count > 0:
raise ValueError("can't add row while being viewed")
self.v.resize(self.v.size() + self.ncols)
def __getbuffer__(self, Py_buffer *buffer, int flags):
# ... as before
self.view_count += 1
def __releasebuffer__(self, Py_buffer *buffer):
self.view_count -= 1
# 纯Python风格
# distutils: language = c++
from cython.cimports.cpython import Py_buffer
from cython.cimports.libcpp.vector import vector
@cython.cclass
class Matrix:
view_count: cython.int
ncols: cython.Py_ssize_t
v: vector[cython.float]
# ...
def __cinit__(self, ncols: cython.Py_ssize_t):
self.ncols = ncols
self.view_count = 0
def add_row(self):
if self.view_count > 0:
raise ValueError("can't add row while being viewed")
self.v.resize(self.v.size() + self.ncols)
def __getbuffer__(self, buffer: cython.pointer(Py_buffer), flags: cython.int):
# ... as before
self.view_count += 1
def __releasebuffer__(self, buffer: cython.pointer(Py_buffer)):
self.view_count -= 1
标志
我们在代码中跳过了一些输入验证。 __getbuffer__
的flags
参数来自np.asarray
(和其他客户端),是一个描述所请求数组类型的布尔标志的或(OR)运算结果。严格地说,如果标志包含PyBUF_ND
,PyBUF_SIMPLE
或PyBUF_F_CONTIGUOUS
,__getbuffer__
则必须引发BufferError
。这些宏可以从 cpython.buffer
中 cimport
。
(矢量矩阵结构实际上符合PyBUF_ND
,但这会阻止__getbuffer__
填充步幅。单行矩阵是 F-连续的,但是更大的矩阵不是。)
参考文献
这里使用的缓冲接口在 PEP 3118 中列出,修改缓冲区方案。
有关使用 C 语言的教程,请参阅 Jake Vanderplas 的博客 Python 缓冲协议简介。
参考文档可用于 Python 3 和 Python 2 。 Py2 文档还描述了一个不再使用的旧缓冲区协议;自 Python 2.6 起, PEP 3118 协议已经被实现,旧协议仅与遗留代码相关。