pybind11—opencv图像处理(numpy数据交换)
前言
C++ opencv中图像和矩阵的表示采用
Mat
类,比如
imread()
读取的结果就是返回一个
Mat
对象。对于python而言,
numpy
通常用于矩阵运算, 矩阵,图像表示为
numpy.ndarray
类。
因此,想要将python
numpy.ndarray
的数据传递到C++ opencv
Mat
, 或者C++ Mat将数据返回到python
numpy.ndarray
, 核心问题——
如何绑定Mat
main.cpp
#include<iostream>
#include<vector>
#include<opencv2/opencv.hpp>
#include<pybind11/pybind11.h>
#include<pybind11/numpy.h>
#include<pybind11/stl.h>
#include"mat_warper.h"
namespace py = pybind11;
py::array_t<unsigned char> test_rgb_to_gray(py::array_t<unsigned char>& input) {
cv::Mat img_rgb = numpy_uint8_3c_to_cv_mat(input);
cv::Mat dst;
cv::cvtColor(img_rgb, dst, cv::COLOR_RGB2GRAY);
return cv_mat_uint8_1c_to_numpy(dst);
py::array_t<unsigned char> test_gray_canny(py::array_t<unsigned char>& input) {
cv::Mat src = numpy_uint8_1c_to_cv_mat(input);
cv::Mat dst;
cv::Canny(src, dst, 30, 60);
return cv_mat_uint8_1c_to_numpy(dst);
@return Python list
py::list test_pyramid_image(py::array_t<unsigned char>& input) {
cv::Mat src = numpy_uint8_1c_to_cv_mat(input);
std::vector<cv::Mat> dst;
cv::buildPyramid(src, dst, 4);
py::list out;
for (int i = 0; i < dst.size(); i++)
out.append<py::array_t<unsigned char>>(cv_mat_uint8_1c_to_numpy(dst.at(i)));
return out;
PYBIND11_MODULE(cv_demo1, m) {
m.doc() = "Simple opencv demo";
m.def("test_rgb_to_gray", &test_rgb_to_gray);
m.def("test_gray_canny", &test_gray_canny);
m.def("test_pyramid_image", &test_pyramid_image);
mat_warper.h
#ifndef MAT_WARPER_H_
#include<opencv2/opencv.hpp>
#include<pybind11/pybind11.h>
#include<pybind11/numpy.h>
namespace py = pybind11;
cv::Mat numpy_uint8_1c_to_cv_mat(py::array_t<unsigned char>& input);
cv::Mat numpy_uint8_3c_to_cv_mat(py::array_t<unsigned char>& input);
py::array_t<unsigned char> cv_mat_uint8_1c_to_numpy(cv::Mat & input);
py::array_t<unsigned char> cv_mat_uint8_3c_to_numpy(cv::Mat & input);
#endif // !MAT_WARPER_H_
mat_warper.cpp
#include"mat_warper.h"
#include <pybind11/numpy.h>
Python->C++ Mat
cv::Mat numpy_uint8_1c_to_cv_mat(py::array_t<unsigned char>& input) {
if (input.ndim() != 2)
throw std::runtime_error("1-channel image must be 2 dims ");
py::buffer_info buf = input.request();
cv::Mat mat(buf.shape[0], buf.shape[1], CV_8UC1, (unsigned char*)buf.ptr);
return mat;
cv::Mat numpy_uint8_3c_to_cv_mat(py::array_t<unsigned char>& input) {
if (input.ndim() != 3)
throw std::runtime_error("3-channel image must be 3 dims ");
py::buffer_info buf = input.request();
cv::Mat mat(buf.shape[0], buf.shape[1], CV_8UC3, (unsigned char*)buf.ptr);
return mat;
C++ Mat ->numpy
py::array_t<unsigned char> cv_mat_uint8_1c_to_numpy(cv::Mat& input) {
py::array_t<unsigned char> dst = py::array_t<unsigned char>({ input.rows,input.cols }, input.data);
return dst;
py::array_t<unsigned char> cv_mat_uint8_3c_to_numpy(cv::Mat& input) {
py::array_t<unsigned char> dst = py::array_t<unsigned char>({ input.rows,input.cols,3}, input.data);
return dst;
//PYBIND11_MODULE(cv_mat_warper, m) {
// m.doc() = "OpenCV Mat -> Numpy.ndarray warper";
// m.def("numpy_uint8_1c_to_cv_mat", &numpy_uint8_1c_to_cv_mat);
// m.def("numpy_uint8_1c_to_cv_mat", &numpy_uint8_1c_to_cv_mat);
python中测试
python代码
import cv2
import matplotlib.pyplot as plt
import demo11.cv_demo1 as cv_demo1
import numpy as np
image_rgb = cv2.imread('F:\\lena\\lena_rgb.jpg', cv2.IMREAD_UNCHANGED)
image_gray = cv2.imread('F:\\lena\\lena_gray.jpg', cv2.IMREAD_UNCHANGED)
var1 = cv_demo1.test_rgb_to_gray(image_rgb)
print(var1.shape)
plt.figure('rgb-gray')
plt.imshow(var1, cmap=plt.gray())
var2 = cv_demo1.test_gray_canny(image_gray)
plt.figure('canny')
plt.imshow(var2, cmap=plt.gray())
var3 = cv_demo1.test_pyramid_image(image_gray)
var3 = var3[1:]
plt.figure('pyramid_demo')
for i, image in enumerate(var3, 1):
plt.subplot(2, 2, i)
plt.axis('off')
plt.imshow(image, cmap=plt.gray())
plt.show()
测试图像:
RGB图像
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <string>
#include <iostream>
#include "ndarray_converter.h"
namespace py = pybind11;
void show_image(cv::Mat image)
cv::imshow("image_from_Cpp", image);
cv::waitKey(0);
cv::Mat read_image(std::string image_name)
cv::Mat image = cv::imread(image_name, CV_LOAD_IMAGE_COLOR);
return image;
cv::Mat passthru(cv::Mat image)
return image;
cv::Mat cloneimg(cv::Mat image)
return image.clone();
cv::Mat gaussian_blur_demo(cv::Mat& image) {
cv::Mat dst;
cv::GaussianBlur(image, dst, cv::Size(7, 7),1.5,1.5);
return dst;
cv::Mat image_filter(cv::Mat& image, cv::Mat& kernel){
cv::Mat dst;
cv::filter2D(image, dst, -1, kernel);
return dst;
PYBIND11_MODULE(example,m)
NDArrayConverter::init_numpy();
m.def("read_image", &read_image, "A function that read an image",
py::arg("image"));
m.def("show_image", &show_image, "A function that show an image",
py::arg("image"));
m.def("passthru", &passthru, "Passthru function", py::arg("image"));
m.def("clone", &cloneimg, "Clone function", py::arg("image"));
m.def("gaussian_blur_demo", &gaussian_blur_demo);
m.def("image_filter", &image_filter);
convert_.h
# ifndef __NDARRAY_CONVERTER_H__
# define __NDARRAY_CONVERTER_H__
#include <Python.h>
#include <opencv2/core/core.hpp>
class NDArrayConverter {
public:
// must call this first, or the other routines don't work!
static bool init_numpy();
static bool toMat(PyObject* o, cv::Mat &m);
static PyObject* toNDArray(const cv::Mat& mat);
// Define the type converter
#include <pybind11/pybind11.h>
namespace pybind11 { namespace detail {
template <> struct type_caster<cv::Mat> {
public:
PYBIND11_TYPE_CASTER(cv::Mat, _("numpy.ndarray"));
bool load(handle src, bool) {
return NDArrayConverter::toMat(src.ptr(), value);
static handle cast(const cv::Mat &m, return_value_policy, handle defval) {
return handle(NDArrayConverter::toNDArray(m));
}} // namespace pybind11::detail
# endif
// borrowed in spirit from https://github.com/yati-sagade/opencv-ndarray-conversion
// MIT License
#include "ndarray_converter.h"
#define NPY_NO_DEPRECATED_API NPY_1_15_API_VERSION
#include <numpy/ndarrayobject.h>
#if PY_VERSION_HEX >= 0x03000000
#define PyInt_Check PyLong_Check
#define PyInt_AsLong PyLong_AsLong
#endif
struct Tmp {
const char * name;
Tmp(const char * name ) : name(name) {}
Tmp info("return value");
bool NDArrayConverter::init_numpy() {
// this has to be in this file, since PyArray_API is defined as static
import_array1(false);
return true;
* The following conversion functions are taken/adapted from OpenCV's cv2.cpp file
* inside modules/python/src2 folder (OpenCV 3.1.0)
static PyObject* opencv_error = 0;
static int failmsg(const char *fmt, ...)
char str[1000];
va_list ap;
va_start(ap, fmt);
vsnprintf(str, sizeof(str), fmt, ap);
va_end(ap);
PyErr_SetString(PyExc_TypeError, str);
return 0;
class PyAllowThreads
public:
PyAllowThreads() : _state(PyEval_SaveThread()) {}
~PyAllowThreads()
PyEval_RestoreThread(_state);
private:
PyThreadState* _state;
class PyEnsureGIL
public:
PyEnsureGIL() : _state(PyGILState_Ensure()) {}
~PyEnsureGIL()
PyGILState_Release(_state);
private:
PyGILState_STATE _state;
#define ERRWRAP2(expr) \
try \
PyAllowThreads allowThreads; \
expr; \
catch (const cv::Exception &e) \
PyErr_SetString(opencv_error, e.what()); \
return 0; \
using namespace cv;
class NumpyAllocator : public MatAllocator
public:
NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); }
~NumpyAllocator() {}
UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const
UMatData* u = new UMatData(this);
u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o);
npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
for( int i = 0; i < dims - 1; i++ )
step[i] = (size_t)_strides[i];
step[dims-1] = CV_ELEM_SIZE(type);
u->size = sizes[0]*step[0];
u->userdata = o;
return u;
UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
if( data != 0 )
CV_Error(Error::StsAssert, "The data should normally be NULL!");
// probably this is safe to do in such extreme case
return stdAllocator->allocate(dims0, sizes, type, data, step, flags, usageFlags);
PyEnsureGIL gil;
int depth = CV_MAT_DEPTH(type);
int cn = CV_MAT_CN(type);
const int f = (int)(sizeof(size_t)/8);
int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE :
depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :
depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :
depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;
int i, dims = dims0;
cv::AutoBuffer<npy_intp> _sizes(dims + 1);
for( i = 0; i < dims; i++ )
_sizes[i] = sizes[i];
if( cn > 1 )
_sizes[dims++] = cn;
PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum);
if(!o)
CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims));
return allocate(o, dims0, sizes, type, step);
bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
return stdAllocator->allocate(u, accessFlags, usageFlags);
void deallocate(UMatData* u) const
if(!u)
return;
PyEnsureGIL gil;
CV_Assert(u->urefcount >= 0);
CV_Assert(u->refcount >= 0);
if(u->refcount == 0)
PyObject* o = (PyObject*)u->userdata;
Py_XDECREF(o);
delete u;
const MatAllocator* stdAllocator;
NumpyAllocator g_numpyAllocator;
bool NDArrayConverter::toMat(PyObject *o, Mat &m)
bool allowND = true;
if(!o || o == Py_None)
if( !m.data )
m.allocator = &g_numpyAllocator;
return true;
if( PyInt_Check(o) )
double v[] = {static_cast<double>(PyInt_AsLong((PyObject*)o)), 0., 0., 0.};
m = Mat(4, 1, CV_64F, v).clone();
return true;
if( PyFloat_Check(o) )
double v[] = {PyFloat_AsDouble((PyObject*)o), 0., 0., 0.};
m = Mat(4, 1, CV_64F, v).clone();
return true;
if( PyTuple_Check(o) )
int i, sz = (int)PyTuple_Size((PyObject*)o);
m = Mat(sz, 1, CV_64F);
for( i = 0; i < sz; i++ )
PyObject* oi = PyTuple_GET_ITEM(o, i);
if( PyInt_Check(oi) )
m.at<double>(i) = (double)PyInt_AsLong(oi);
else if( PyFloat_Check(oi) )
m.at<double>(i) = (double)PyFloat_AsDouble(oi);
failmsg("%s is not a numerical tuple", info.name);
m.release();
return false;
return true;
if( !PyArray_Check(o) )
failmsg("%s is not a numpy array, neither a scalar", info.name);
return false;
PyArrayObject* oarr = (PyArrayObject*) o;
bool needcopy = false, needcast = false;
int typenum = PyArray_TYPE(oarr), new_typenum = typenum;
int type = typenum == NPY_UBYTE ? CV_8U :
typenum == NPY_BYTE ? CV_8S :
typenum == NPY_USHORT ? CV_16U :
typenum == NPY_SHORT ? CV_16S :
typenum == NPY_INT ? CV_32S :
typenum == NPY_INT32 ? CV_32S :
typenum == NPY_FLOAT ? CV_32F :
typenum == NPY_DOUBLE ? CV_64F : -1;
if( type < 0 )
if( typenum == NPY_INT64 || typenum == NPY_UINT64 || typenum == NPY_LONG )
needcopy = needcast = true;
new_typenum = NPY_INT;
type = CV_32S;
failmsg("%s data type = %d is not supported", info.name, typenum);
return false;
#ifndef CV_MAX_DIM
const int CV_MAX_DIM = 32;
#endif
int ndims = PyArray_NDIM(oarr);
if(ndims >= CV_MAX_DIM)
failmsg("%s dimensionality (=%d) is too high", info.name, ndims);
return false;
int size[CV_MAX_DIM+1];
size_t step[CV_MAX_DIM+1];
size_t elemsize = CV_ELEM_SIZE1(type);
const npy_intp* _sizes = PyArray_DIMS(oarr);
const npy_intp* _strides = PyArray_STRIDES(oarr);
bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
for( int i = ndims-1; i >= 0 && !needcopy; i-- )
// these checks handle cases of
// a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases
// b) transposed arrays, where _strides[] elements go in non-descending order
// c) flipped arrays, where some of _strides[] elements are negative
// the _sizes[i] > 1 is needed to avoid spurious copies when NPY_RELAXED_STRIDES is set
if( (i == ndims-1 && _sizes[i] > 1 && (size_t)_strides[i] != elemsize) ||
(i < ndims-1 && _sizes[i] > 1 && _strides[i] < _strides[i+1]) )
needcopy = true;
if( ismultichannel && _strides[1] != (npy_intp)elemsize*_sizes[2] )
needcopy = true;
if (needcopy)
//if (info.outputarg)
// failmsg("Layout of the output array %s is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)", info.name);
// return false;
if( needcast ) {
o = PyArray_Cast(oarr, new_typenum);
oarr = (PyArrayObject*) o;
else {
oarr = PyArray_GETCONTIGUOUS(oarr);
o = (PyObject*) oarr;
_strides = PyArray_STRIDES(oarr);
// Normalize strides in case NPY_RELAXED_STRIDES is set
size_t default_step = elemsize;
for ( int i = ndims - 1; i >= 0; --i )
size[i] = (int)_sizes[i];
if ( size[i] > 1 )
step[i] = (size_t)_strides[i];
default_step = step[i] * size[i];
step[i] = default_step;
default_step *= size[i];
// handle degenerate case
if( ndims == 0) {
size[ndims] = 1;
step[ndims] = elemsize;
ndims++;
if( ismultichannel )
ndims--;
type |= CV_MAKETYPE(0, size[2]);
if( ndims > 2 && !allowND )
failmsg("%s has more than 2 dimensions", info.name);
return false;
m = Mat(ndims, size, type, PyArray_DATA(oarr), step);
m.u = g_numpyAllocator.allocate(o, ndims, size, type, step);
m.addref();
if( !needcopy )
Py_INCREF(o);
m.allocator = &g_numpyAllocator;
return true;
PyObject* NDArrayConverter::toNDArray(const cv::Mat& m)
if( !m.data )
Py_RETURN_NONE;
Mat temp, *p = (Mat*)&m;
if(!p->u || p->allocator != &g_numpyAllocator)
temp.allocator = &g_numpyAllocator;
ERRWRAP2(m.copyTo(temp));
p = &temp;
PyObject* o = (PyObject*)p->u->userdata;
Py_INCREF(o);
return o;
Gaussian模糊