We're team Marines, participated at Intel oneAPI hackathon under Medical image processing category.
Our sole aim is to do faster and efficient computational medical image processing using Intel SYCL/DPC++ libraries
Therefore we've chosen brain tumour segmentation/classification and Pneumonia detection for this & serialized the models in .pt
and .h5
format as we've used tensorflow
& pytorch
We've researched all possible ways to solve this problem & below are the ways:
Manually converting the python code to SYCL/DPC++ code.
Writing the CUDA code and using SYCLomatic / Intel base toolkit's compatilbility tool to convert it into SYCL/DPC++ code.
Using Python - C++ bindings like
to reduce the amount of SYCL/DPC++ code to be written.Using ONNX environment to export the serialized models into
format and import it into SYCL/DPC++ code for inferencing.
In the prototype phase, we've written a fully executable SYCL/DPC++ code that simulates pneumonia detection using a 1-dimensional array. In order to get good understanding of SYCL/DPC++ we've structured the algorithm and built this.
You can see the code here :
Below is a part of the code which does the core logic :
deviceQueue.submit([&](handler& cgh) {
// Accessors for input and output buffers
auto inputAccessor = inputBuffer.get_access<access::mode::read>(cgh);
auto outputAccessor = outputBuffer.get_access<access::mode::write>(cgh);
// Kernel function
cgh.parallel_for<class PneumoniaPredictionKernel>(range<1>(dataSize), [=](id<1> idx) {
float inputValue = inputAccessor[idx];
outputAccessor[idx] = (inputValue > 0.5f) ? 1 : 0; // Set the threshold here
Final hackathon
ONNX environment
In this approach we've :
Successfully built our models.
Successfully optimized a model using Intel-extension-for-pytorch.
Successfully exported the model into
format.Successfully integrated the
model into SYCL/DPC++ code we've written.
And the execution of the SYCL/DPC++ script needs an environment called ONNX runtime session ( ORT )
. And that has to be configured within the local machine.
We were an inch close to accomplish medical image processing using SYCL/DPC++ but we've not successfully accomplished this ORT
You can see the model conversion here at the last of this ipynb file :
And below is the code which does the conversion :
import torch.onnx as onnx
torch.onnx.export(model, torch.randn(1, 3, 224, 224).to(device), "onnxfile.onnx", export_params=True, opset_version=11)
Once we've got the .onnx
file we've integrated into the SYCL / DPC++ code that we've written. And you can view those SYCL/DPC++ scripts here :
DPC++ extension
We've also thought of making a SYCL/DPC++ extension module for python where one can just import this module into the python code using a simple command like the below :
from dpcpp_extension_module import image_processing
Here in this the input workload will be in python but the output workload will be executed in SYCL/DPC++ script.
As of now we've just made it to DPC++ for basic understanding at the beginning and we'll convert it to SYCL/DPC++ very soon.
Here's that script below :
#include <Python.h>
#include <CL/sycl.hpp>
#include "dpc_common.hpp"
#include <iostream>
#include <stdexcept>
#include "numpy/arrayobject.h"
#include <opencv2/opencv.hpp>
using namespace cl::sycl;
extern "C" {
static PyObject* perform_image_convolution(PyObject* self, PyObject* args) {
PyArrayObject *input_image, *conv_kernel;
PyObject *output_image;
if (!PyArg_ParseTuple(args, "O!O!", &PyArray_Type, &input_image, &PyArray_Type, &conv_kernel)) {
return nullptr;
output_image = PyArray_NewLikeArray(input_image, NPY_ANYORDER, NULL, 0);
uint8_t *input_buf = static_cast<uint8_t*>(PyArray_DATA(input_image));
uint8_t *output_buf = static_cast<uint8_t*>(PyArray_DATA(reinterpret_cast<PyArrayObject*>(output_image)));
float *kernel_buf = static_cast<float*>(PyArray_DATA(conv_kernel));
int ndim = PyArray_NDIM(input_image);
npy_intp *input_shape = PyArray_SHAPE(input_image);
npy_intp *kernel_shape = PyArray_SHAPE(conv_kernel);
std::cout << input_shape[0] << "," << input_shape[1] << std::endl;
std::cout << kernel_shape[0] << "," << kernel_shape[1] << std::endl;
size_t num_rows = input_shape[0];
size_t num_cols = input_shape[1];
size_t kernel_height = kernel_shape[0];
size_t kernel_width = kernel_shape[1];
int half_filter_width = static_cast<int>(kernel_width / 2);
int half_filter_height = static_cast<int>(kernel_height / 2);
queue q(default_selector{});
buffer<uint8_t, 1> input_buf_sycl(input_buf, range<1>(num_rows * num_cols));
buffer<uint8_t, 1> output_buf_sycl(output_buf, range<1>(num_rows * num_cols));
range<2> num_items{ num_rows, num_cols };
buffer<float, 1> kernel_buf_sycl(kernel_buf, range<1>(kernel_height * kernel_width));
q.submit([&](handler &h) {
auto input_ptr = input_buf_sycl.get_access<access::mode::read>(h);
auto output_ptr = output_buf_sycl.get_access<access::mode::write>(h);
auto kernel_ptr = kernel_buf_sycl.get_access<access::mode::read>(h);
h.parallel_for(num_items, [=](id<2> item) {
int row = item[0];
int col = item[1];
float sum = 0.0f;
for (int fy = -half_filter_height; fy <= half_filter_height; fy++) {
for (int fx = -half_filter_width; fx <= half_filter_width; fx++) {
int yy = row + fy;
int xx = col + fx;
yy = (yy < 0) ? 0 : yy;
xx = (xx < 0) ? 0 : xx;
yy = (yy >= num_rows) ? num_rows - 1 : yy;
xx = (xx >= num_cols) ? num_cols - 1 : xx;
float p = input_ptr[yy * num_cols + xx] *
kernel_ptr[(fy + half_filter_height) * kernel_width + (fx + half_filter_width)];
sum += p;
sum = (sum > 255.0) ? 255.0 : (sum < 0) ? 0 : sum;
output_ptr[row * num_cols + col] = static_cast<uint8_t>(sum);
return output_image;
PyMethodDef method_table[] = {
{"perform_image_convolution", static_cast<PyCFunction>(perform_image_convolution), METH_VARARGS, "Perform image convolution"},
PyModuleDef test_module = {
"DPC++ image convolution module",
PyMODINIT_FUNC PyInit_python_dpcpp_module(void) {
if (PyErr_Occurred()) {
return nullptr;
return PyModule_Create(&test_module);
} // extern "C"
Although we've not successfully achieved the task, we're very much confident that we've explored all possible ways to do medical image processing in SYCL/DPC++. And we Team Marines are committed to complete this hack.
