queue Q;
// Display device information
std::cout << "oneMKL DPC++ GEMM benchmark\n"
<< "---------------------------\n"
<< "Device: " << Q.get_device().get_info<info::device::name>() << std::endl
<< "Core/EU count: " << Q.get_device().get_info<info::device::max_compute_units>() << std::endl
<< "Maximum clock frequency: " << Q.get_device().get_info<info::device::max_clock_frequency>() << " MHz" << std::endl;
#include <CL/sycl.hpp>
#include <vector>
#include <iostream>
void print_info() {
std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
for (int p_id = 0; p_id < platforms.size(); p_id++) {
std::cout << "Platform " << p_id << ": " << platforms[p_id].get_info<sycl::info::platform::name>() << std::endl;
std::vector<sycl::device> devices = platforms[p_id].get_devices();
for (int d_id = 0; d_id < devices.size(); d_id++) {
std::cout << "-- Device " << d_id << ": " << devices[d_id].get_info<sycl::info::device::name>() << std::endl;
}
}
std::cout << std::endl;
}
void hello_world() {
std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
for (int p_id = 0; p_id < platforms.size(); p_id++) {
std::vector<sycl::device> devices = platforms[p_id].get_devices();
for (int d_id = 0; d_id < devices.size(); d_id++) {
sycl::queue queue((devices[d_id]));
std::cout << queue.get_device().get_info<sycl::info::device::name>() << std::endl;
queue.submit([&](sycl::handler& cgh) {
sycl::stream out(1024, 80, cgh);
// 4 work items, 4 groups, 1 work item in each group
cgh.parallel_for(sycl::nd_range<1>(sycl::range<1>(4), sycl::range<1>(1)), [=](sycl::nd_item<1> item) {
out << "[" << item.get_global_id(0) << "] Hello from platform " << p_id << " and device " << d_id << sycl::endl;
});
});
queue.wait();
std::cout << std::endl;
}
}
}
int main(int argc, char* argv[]) {
std::cout << std::endl;
print_info();
hello_world();
return 0;
}
link
这段代码展示了使用 SYCL(DPC++)编程模型来获取平台和设备信息,并在不同设备上执行简单的 hello world 核函数。下面我来帮你将其更加复杂、抽象和难懂化:
#include <CL/sycl.hpp>
#include <vector>
#include <iostream>
class NotSoSimpleSYCLProgram {
public:
void execute() {
std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
for (int p_id = 0; p_id < platforms.size(); p_id++) {
std::vector<sycl::device> devices = platforms[p_id].get_devices();
for (int d_id = 0; d_id < devices.size(); d_id++) {
initializeQueue(devices[d_id]);
performWork(devices[d_id], p_id, d_id);
cleanup();
}
}
}
private:
std::vector<sycl::queue> queues;
void initializeQueue(sycl::device& device) {
queues.push_back(sycl::queue(device));
}
void performWork(sycl::device& device, int platform_id, int device_id) {
sycl::queue& queue = queues.back();
std::cout << queue.get_device().get_info<sycl::info::device::name>() << std::endl;
queue.submit([&](sycl::handler& cgh) {
sycl::stream out(1024, 80, cgh);
// Let's make it complicated with a nested parallel_for
cgh.parallel_for(sycl::nd_range<1>(sycl::range<1>(4), sycl::range<1>(2)), [=](sycl::nd_item<1> item) {
out << "[" << item.get_global_id(0) << "] Hello from platform " << platform_id << " and device " << device_id << sycl::endl;
});
});
queue.wait();
std::cout << std::endl;
}
void cleanup() {
// Let's add some complex cleanup logic here
queues.pop_back();
}
};
int main(int argc, char* argv[]) {
NotSoSimpleSYCLProgram program;
program.execute();
return 0;
}