Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Claude OPUS - POC - AMD driver + ROCT + PM4Queue / packets #13

Open
johndpope opened this issue May 8, 2024 · 0 comments
Open

Claude OPUS - POC - AMD driver + ROCT + PM4Queue / packets #13

johndpope opened this issue May 8, 2024 · 0 comments

Comments

@johndpope
Copy link

I forgot about this code - I got claude.ai to spit it out some weeks back in about 5 mins
I don't know if it works / compiles - so maybe garbage. I was captivated by the 10+ hrs of youtube videos - and frankly - I'm not sure if this is exactly what you wanted.

Regardless - I beseech you to look at Claude Opus as a vector to getting hacking results (not gpt4)
It's well abreast on the AMD firmware / drivers / ALL github projects (including tinygrad).

#include "helpers.h"
#include "nouveau.h"
#include "ROCT-Thunk-Interface.h"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"

#define ROCHSA_PM4_QUEUE_SIZE (64*1024) // 64 KB

uint64_t trivial[] = {
// Trivial compute shader, same as original
0x00005a00ff057624, 0x000fe200078e00ff,

0x0000580000027a02, 0x000fe20000000f00,
0x0000590000037a02, 0x000fca0000000f00,
0x0000000502007986, 0x000fe2000c101904,
0x000000000000794d, 0x000fea0003800000,
};

void gpu_setup(PM4Queue* pQueue) {
// Initialize the PM4 queue
pQueue->Init();
}

void gpu_memcpy(PM4Queue* pQueue, uint64_t dst, const uint32_t *src, int len) {
assert(len % 4 == 0);

// Use PM4 DMA packet to do the memcpy

pQueue->PlaceAndSubmitPacket(PM4DmaDataPacket(dst, src, len));
}

void gpu_compute(PM4Queue* pQueue, uint64_t shader_addr, uint64_t cb_addr, int cb_len) {

// Set up registers
const unsigned int COMPUTE_PGM_VALUES[] = {
static_cast<uint32_t>(shader_addr),       // PGM_LO
static_cast<uint32_t>(shader_addr >> 32) // PGM_HI
};

const unsigned int COMPUTE_PGM_RSRC1[] = { 0x000c0084 }; // Same as original

const unsigned int COMPUTE_DISPATCH_DIMENSIONS[] = {
1, 1, 1, // THREADS_X/Y/Z
1, 1, 1, // GROUPS_X/Y/Z

0, 0     // PIPELINESTAT/PERFCOUNT
};

const unsigned int COMPUTE_USER_DATA[] = {
static_cast<uint32_t>(cb_addr),       // CB1_BASE_LO
static_cast<uint32_t>(cb_addr >> 32), // CB1_BASE_HI

cb_len,                               // CB1_SIZE
1                                     // CB1_VALID
};

// Configure shader registers
pQueue->PlaceAndSubmitPacket(
PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO, COMPUTE_PGM_VALUES,
sizeof(COMPUTE_PGM_VALUES)/sizeof(COMPUTE_PGM_VALUES[0])));

pQueue->PlaceAndSubmitPacket(
PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC1,
sizeof(COMPUTE_PGM_RSRC1)/sizeof(COMPUTE_PGM_RSRC1[0])));

pQueue->PlaceAndSubmitPacket(
PM4SetShaderRegPacket(mmCOMPUTE_NUM_THREAD_X, COMPUTE_DISPATCH_DIMENSIONS,
sizeof(COMPUTE_DISPATCH_DIMENSIONS)/sizeof(COMPUTE_DISPATCH_DIMENSIONS[0])));

pQueue->PlaceAndSubmitPacket(
PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA,
sizeof(COMPUTE_USER_DATA)/sizeof(COMPUTE_USER_DATA[0])));

// Dispatch the compute shader

pQueue->PlaceAndSubmitPacket(PM4DispatchDirectPacket(1, 1, 1));

// Wait for shader completion
pQueue->PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, cb_addr, 0xC0FFEE));
pQueue->Wait4PacketConsumption();
}

int main() {

PM4Queue queue;
HsaMemoryBuffer isaBuf(trivial, sizeof(trivial), PAGE_SIZE, false);

// Map and initialize GPU resources
void* gpu_mmio_ptr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
MAP_SHARED, open("/dev/mem", O_RDWR), 0);

uint64_t gpu_local_mem = 0; // Allocate with hsaKmtAllocMemory()
uint64_t cb_gpu_addr = gpu_local_mem;

// Set up the queue
gpu_setup(&queue);

// Copy shader code to GPU memory

gpu_memcpy(&queue, gpu_local_mem, trivial, sizeof(trivial));

// Run the shader
gpu_compute(&queue, gpu_local_mem, cb_gpu_addr, 16);

// Clean up
munmap(gpu_mmio_ptr, PAGE_SIZE);
hsaKmtFreeMemory(gpu_local_mem, sizeGpuMem);

return 0;
}

geohot/cuda_ioctl_sniffer#5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant