@inproceedings{250, author = {Bo Li and Janos Sallai and Peter Volgyesi and Akos Ledeczi}, title = {Rapid Prototyping of Image Processing Workflows on Massively Parallel Architectures}, abstract = {Many-core Graphics Processing Units (GPU) provide a high-performance parallel hardware platform on the desktop at an incredibly low cost. However, the widespread use of this computational capacity is hindered by the fact that programming GPUs is difficult. The state-of-the-art is to develop code utilizing the NVIDIA Compute Unified Device Architecture (CUDA). However, effective use of CUDA requires developers highly skilled in both low-level systems programming and parallel processing. Recognizing this roadblock to widespread adaption of General-Purpose Computing on GPUs (GPGPU), the NVIDIA Performance Primitives (NPP) library was released recently. While greatly easing the burden, utilizing NPP still requires one to learn CUDA. In this paper, we introduce a graphical environment for the design of image processing workflows that automatically generates all the CUDA code including NPP calls necessary to run the application on a GPU. Experimental results show that the generated code is almost as efficient as the equivalent hand written program and 10 times faster than running on the CPU alone in the typical case.}, year = {2012}, journal = {Workshop on Intelligent Solutions in Embedded Systems}, month = {06/2012}, address = {Klagenfurt, Austria}, }