Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions perf_tests/common/include/ze_app.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ class ZeApp {
void commandListCreate(uint32_t device_index,
uint32_t command_queue_group_ordinal,
ze_command_list_handle_t *phCommandList);
void immediateCommandListCreate(uint32_t device_index,
uint32_t command_queue_group_ordinal,
uint32_t command_queue_index,
ze_command_list_handle_t *phCommandList);
void commandListDestroy(ze_command_list_handle_t phCommandList);
void commandListClose(ze_command_list_handle_t phCommandList);
void commandListReset(ze_command_list_handle_t phCommandList);
Expand Down
13 changes: 13 additions & 0 deletions perf_tests/common/src/ze_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ void ZeApp::commandListCreate(uint32_t device_index,
phCommandList));
}

void ZeApp::immediateCommandListCreate(
uint32_t device_index, uint32_t command_queue_group_ordinal,
uint32_t command_queue_index, ze_command_list_handle_t *phCommandList) {
ze_command_queue_desc_t command_queue_description{};
command_queue_description.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
command_queue_description.pNext = nullptr;
command_queue_description.ordinal = command_queue_group_ordinal;
command_queue_description.index = command_queue_index;
SUCCESS_OR_TERMINATE(
zeCommandListCreateImmediate(context, _devices[device_index],
&command_queue_description, phCommandList));
}

void ZeApp::commandListDestroy(ze_command_list_handle_t command_list) {
SUCCESS_OR_TERMINATE(zeCommandListDestroy(command_list));
}
Expand Down
52 changes: 52 additions & 0 deletions perf_tests/ze_peer/include/ze_peer.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ static const char *usage_str =
"divide "
"buffers across available"
"\n engines specified with option -u."
"\n"
"\n -x for unidirectional parallel tests, select "
"where to place the queue"
"\n src use queue in source"
Expand All @@ -136,6 +137,9 @@ static const char *usage_str =
"\n with each device being managed by a "
"separate process."
"\n"
"\n --regular_cmdlist use regular command list instead of "
"immediate"
"\n"
"\n --version display version"
"\n -h, --help display help message"
"\n";
Expand All @@ -155,10 +159,18 @@ class ZePeer {
uint32_t local_device_id,
size_t buffer_size);

void perform_parallel_copy_to_single_target_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
uint32_t remote_device_id, uint32_t local_device_id, size_t buffer_size);

void perform_bidirectional_parallel_copy_to_single_target(
peer_test_t test_type, peer_transfer_t transfer_type,
uint32_t remote_device_id, uint32_t local_device_id, size_t buffer_size);

void perform_bidirectional_parallel_copy_to_single_target_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
uint32_t remote_device_id, uint32_t local_device_id, size_t buffer_size);

void bandwidth_latency_parallel_to_single_target(
peer_test_t test_type, peer_transfer_t transfer_type,
size_t number_buffer_elements, uint32_t remote_device_id,
Expand All @@ -170,22 +182,44 @@ class ZePeer {
std::vector<uint32_t> &local_device_ids, size_t buffer_size,
bool divide_buffers);

void perform_parallel_copy_to_multiple_targets_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<uint32_t> &remote_device_ids,
std::vector<uint32_t> &local_device_ids, size_t buffer_size,
bool divide_buffers);

void perform_bidirectional_parallel_copy_to_multiple_targets(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<uint32_t> &remote_device_ids,
std::vector<uint32_t> &local_device_ids, size_t buffer_size,
bool divide_buffers);

void perform_bidirectional_parallel_copy_to_multiple_targets_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<uint32_t> &remote_device_ids,
std::vector<uint32_t> &local_device_ids, size_t buffer_size,
bool divide_buffers);

void perform_parallel_copy_to_pair_targets(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<std::pair<uint32_t, uint32_t>> &pair_device_ids,
size_t buffer_size, bool divide_buffers);

void perform_parallel_copy_to_pair_targets_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<std::pair<uint32_t, uint32_t>> &pair_device_ids,
size_t buffer_size, bool divide_buffers);

void perform_bidirectional_parallel_copy_to_pair_targets(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<std::pair<uint32_t, uint32_t>> &pair_device_ids,
size_t buffer_size, bool divide_buffers);

void perform_bidirectional_parallel_copy_to_pair_targets_immediate(
peer_test_t test_type, peer_transfer_t transfer_type,
std::vector<std::pair<uint32_t, uint32_t>> &pair_device_ids,
size_t buffer_size, bool divide_buffers);

void bandwidth_latency_parallel_to_pair_targets(
peer_test_t test_type, peer_transfer_t transfer_type,
size_t number_buffer_elements,
Expand Down Expand Up @@ -214,17 +248,30 @@ class ZePeer {
ze_command_queue_handle_t command_queue, void *dst_buffer,
void *src_buffer, size_t buffer_size);

void perform_copy_immediate(peer_test_t test_type,
ze_command_list_handle_t command_list,
void *dst_buffer, void *src_buffer,
size_t buffer_size);

void bidirectional_perform_copy(uint32_t dst_device_id,
uint32_t src_device_id, uint32_t queue_index,
peer_test_t test_type,
peer_transfer_t transfer_type,
size_t buffer_size);

void bidirectional_perform_copy_immediate(
uint32_t remote_device_id, uint32_t local_device_id, uint32_t queue_index,
peer_test_t test_type, peer_transfer_t transfer_type, size_t buffer_size);

void initialize_src_buffer(ze_command_list_handle_t command_list,
ze_command_queue_handle_t command_queue,
void *local_buffer, char *host_buffer,
size_t buffer_size);

void initialize_src_buffer_immediate(ze_command_list_handle_t command_list,
void *src_buffer, char *host_buffer,
size_t buffer_size);

void initialize_buffers(ze_command_list_handle_t command_list,
ze_command_queue_handle_t command_queue,
void *src_buffer, char *host_buffer,
Expand All @@ -239,6 +286,10 @@ class ZePeer {
char *validate_buffer, void *dst_buffer,
char *host_buffer, size_t buffer_size);

void validate_buffer_immediate(ze_command_list_handle_t command_list,
char *validate_buffer, void *dst_buffer,
char *host_buffer, size_t buffer_size);

void set_up(size_t number_buffer_elements,
std::vector<uint32_t> &remote_device_ids,
std::vector<uint32_t> &local_device_ids, size_t &buffer_size);
Expand Down Expand Up @@ -297,6 +348,7 @@ class ZePeer {
static bool parallel_copy_to_multiple_targets;
static bool parallel_copy_to_pair_targets;
static bool parallel_divide_buffers;
static bool use_immediate_cmdlist;

static uint32_t number_iterations;
uint32_t warm_up_iterations = number_iterations / 5;
Expand Down
32 changes: 22 additions & 10 deletions perf_tests/ze_peer/src/ze_peer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ bool ZePeer::parallel_copy_to_single_target = false;
bool ZePeer::parallel_copy_to_multiple_targets = false;
bool ZePeer::parallel_copy_to_pair_targets = false;
bool ZePeer::parallel_divide_buffers = false;
bool ZePeer::use_immediate_cmdlist = true;
uint32_t ZePeer::number_iterations = 50;
const size_t max_elems = 268435456; /* 256 MB */

Expand Down Expand Up @@ -457,6 +458,8 @@ int main(int argc, char **argv) {
i++;
} else if (strcmp(argv[i], "-v") == 0) {
ZePeer::validate_results = true;
} else if (strcmp(argv[i], "--regular_cmdlist") == 0) {
ZePeer::use_immediate_cmdlist = false;
} else {
std::cout << usage_str;
exit(-1);
Expand Down Expand Up @@ -711,18 +714,25 @@ ZePeer::ZePeer(std::vector<uint32_t> &remote_device_ids,
uint32_t engineIndex = 0;
for (uint32_t g = 0; g < numQueueGroups; g++) {
for (uint32_t q = 0; q < queueProperties[g].numQueues; q++) {
ze_command_queue_handle_t command_queue;
benchmark->commandQueueCreate(d, g, q, &command_queue);
std::pair<ze_command_queue_handle_t, ze_command_list_handle_t>
enginePair;
if (ZePeer::use_immediate_cmdlist) {
ze_command_list_handle_t command_list;
benchmark->immediateCommandListCreate(d, g, q, &command_list);

ze_command_list_handle_t command_list;
benchmark->commandListCreate(d, g, &command_list);
enginePair = std::make_pair(nullptr, command_list);
} else {
ze_command_queue_handle_t command_queue;
benchmark->commandQueueCreate(d, g, q, &command_queue);

auto enginePair = std::make_pair(command_queue, command_list);
ze_peer_devices[d].engines.push_back(enginePair);
ze_command_list_handle_t command_list;
benchmark->commandListCreate(d, g, &command_list);

enginePair = std::make_pair(command_queue, command_list);
}

// use compute engines by default. Select the indexes from device 0
if (option_u_empty && (queueProperties[g].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
ze_peer_devices[d].engines.push_back(enginePair);
if (option_u_empty) {
this->queues.push_back(engineIndex);
}

Expand All @@ -745,7 +755,9 @@ ZePeer::ZePeer(std::vector<uint32_t> &remote_device_ids,
ZePeer::~ZePeer() {
for (auto &device : ze_peer_devices) {
for (auto enginePair : device.engines) {
benchmark->commandQueueDestroy(enginePair.first);
if (enginePair.first) {
benchmark->commandQueueDestroy(enginePair.first);
}
benchmark->commandListDestroy(enginePair.second);
}
}
Expand Down
111 changes: 99 additions & 12 deletions perf_tests/ze_peer/src/ze_peer_bidirectional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,75 @@ void ZePeer::bidirectional_perform_copy(
SUCCESS_OR_TERMINATE(zeCommandListReset(remote_command_list));
}

void ZePeer::bidirectional_perform_copy_immediate(
uint32_t remote_device_id, uint32_t local_device_id, uint32_t queue_index,
peer_test_t test_type, peer_transfer_t transfer_type, size_t buffer_size) {
ze_command_list_handle_t local_command_list =
ze_peer_devices[local_device_id].engines[queue_index].second;
ze_command_list_handle_t remote_command_list =
ze_peer_devices[remote_device_id].engines[queue_index].second;

Timer<std::chrono::microseconds::period> timer;

/* Warm up */
for (uint32_t i = 0U; i < warm_up_iterations; i++) {
if (transfer_type == PEER_WRITE) {
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
local_command_list, ze_dst_buffers[remote_device_id],
ze_src_buffers[local_device_id], buffer_size, nullptr, 1, &event));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
remote_command_list, ze_dst_buffers[local_device_id],
ze_src_buffers[remote_device_id], buffer_size, nullptr, 1, &event));
} else {
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
local_command_list, ze_dst_buffers[local_device_id],
ze_src_buffers[remote_device_id], buffer_size, nullptr, 1, &event));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
remote_command_list, ze_dst_buffers[remote_device_id],
ze_src_buffers[local_device_id], buffer_size, nullptr, 1, &event));
}
SUCCESS_OR_TERMINATE(zeEventHostSignal(event));
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(
local_command_list, std::numeric_limits<uint64_t>::max()));
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(
remote_command_list, std::numeric_limits<uint64_t>::max()));
SUCCESS_OR_TERMINATE(zeEventHostReset(event));
}

do {
long double time_usec = 0;
for (uint32_t i = 0U; i < number_iterations; i++) {
if (transfer_type == PEER_WRITE) {
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
local_command_list, ze_dst_buffers[remote_device_id],
ze_src_buffers[local_device_id], buffer_size, nullptr, 1, &event));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
remote_command_list, ze_dst_buffers[local_device_id],
ze_src_buffers[remote_device_id], buffer_size, nullptr, 1, &event));
} else {
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
local_command_list, ze_dst_buffers[local_device_id],
ze_src_buffers[remote_device_id], buffer_size, nullptr, 1, &event));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
remote_command_list, ze_dst_buffers[remote_device_id],
ze_src_buffers[local_device_id], buffer_size, nullptr, 1, &event));
}

timer.start();
SUCCESS_OR_TERMINATE(zeEventHostSignal(event));
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(
local_command_list, std::numeric_limits<uint64_t>::max()));
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(
remote_command_list, std::numeric_limits<uint64_t>::max()));
timer.end();
time_usec += timer.period_minus_overhead();

SUCCESS_OR_TERMINATE(zeEventHostReset(event));
}
print_results(true, test_type, buffer_size, time_usec);
} while (run_continuously);
}

void ZePeer::bidirectional_bandwidth_latency(peer_test_t test_type,
peer_transfer_t transfer_type,
size_t number_buffer_elements,
Expand All @@ -98,23 +167,41 @@ void ZePeer::bidirectional_bandwidth_latency(peer_test_t test_type,
initialize_buffers(remote_device_ids, local_device_ids, ze_host_buffer,
buffer_size);

bidirectional_perform_copy(remote_device_id, local_device_id, queue_index,
test_type, transfer_type, buffer_size);
if (ZePeer::use_immediate_cmdlist) {
bidirectional_perform_copy_immediate(remote_device_id, local_device_id,
queue_index, test_type, transfer_type,
buffer_size);
} else {
bidirectional_perform_copy(remote_device_id, local_device_id, queue_index,
test_type, transfer_type, buffer_size);
}

if (validate_results) {
validate_buffer(ze_peer_devices[remote_device_id].engines[0].second,
ze_peer_devices[remote_device_id].engines[0].first,
ze_host_validate_buffer, ze_dst_buffers[remote_device_id],
ze_host_buffer, buffer_size);

if (ZePeer::use_immediate_cmdlist) {
validate_buffer_immediate(
ze_peer_devices[remote_device_id].engines[0].second,
ze_host_validate_buffer, ze_dst_buffers[remote_device_id],
ze_host_buffer, buffer_size);
} else {
validate_buffer(ze_peer_devices[remote_device_id].engines[0].second,
ze_peer_devices[remote_device_id].engines[0].first,
ze_host_validate_buffer, ze_dst_buffers[remote_device_id],
ze_host_buffer, buffer_size);
}
for (size_t k = 0; k < buffer_size; k++) {
ze_host_validate_buffer[k] = 5;
}

validate_buffer(ze_peer_devices[local_device_id].engines[0].second,
ze_peer_devices[local_device_id].engines[0].first,
ze_host_validate_buffer, ze_dst_buffers[local_device_id],
ze_host_buffer, buffer_size);
if (ZePeer::use_immediate_cmdlist) {
validate_buffer_immediate(
ze_peer_devices[local_device_id].engines[0].second,
ze_host_validate_buffer, ze_dst_buffers[local_device_id],
ze_host_buffer, buffer_size);
} else {
validate_buffer(ze_peer_devices[local_device_id].engines[0].second,
ze_peer_devices[local_device_id].engines[0].first,
ze_host_validate_buffer, ze_dst_buffers[local_device_id],
ze_host_buffer, buffer_size);
}
}

tear_down(remote_device_ids, local_device_ids);
Expand Down
Loading
Loading