Open MPI logo

Hardware Locality Users' Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Hardware Locality Users mailing list

Subject: Re: [hwloc-users] windows PCI locality (was; DELL 8 core machine + Quadro K5000 GPU Card...)
From: Brice Goglin (Brice.Goglin_at_[hidden])
Date: 2013-11-19 07:40:12


Ah this could help already. Even if we don't have all PCI devices, we
may be able to just create a PCI device for each CUdevice, attach it to
the right NUMA node, and insert a CUDA OS device inside.
Now I need to borrow a CUDA-enabled laptop running windows for basic
testing.
thanks
Brice

Le 19/11/2013 05:36, Ashley Reid a écrit :
>
> Unfortunately, I can't commit time, but I would like to help where
> possible.
>
>
>
> This code should map a CUdevice to a numa node (by enumerating all PCI
> devices). I have not compiled the the code in this form or tested it
> as is, but the calls should work fine for mapping any cuda device to
> the OS enumeration wrt to PCI device location:
>
>
>
> long GetNumaNode(CUdevice dev)
>
> {
>
> BOOL ret;
>
> DWORD lastError;
>
> // get the cuda device string
>
> char cuDevString[CUDA_DEV_STRING_LEN];
>
> unsigned long cudaBus;
>
> unsigned long cudaSubdevice;
>
> unsigned long cudaFunction;
>
> CUresult status = cuDeviceGetPCIBusId(cuDevString,
> CUDA_DEV_STRING_LEN, dev);
>
>
>
> assert(CUDA_SUCCESS == status);
>
> if (CUDA_SUCCESS != status) {
>
> return 0;
>
> }
>
>
>
> char *tmp;
>
> char *tmp2;
>
> char del[] = ":.";
>
> // remove domain
>
> tmp = strtok_s(cuDevString, del, &tmp2);
>
> // get bus
>
> tmp = strtok_s(NULL, del, &tmp2);
>
> sscanf_s(tmp, "%x", &cudaBus);
>
> // get subdevice
>
> tmp = strtok_s(NULL, del, &tmp2);
>
> sscanf_s(tmp, "%x", &cudaSubdevice);
>
> // get function
>
> tmp = strtok_s(NULL, del, &tmp2);
>
> sscanf_s(tmp, "%x", &cudaFunction);
>
>
>
> // Use NULL as the first parameter as we need to look at non
> display devices too
>
> HDEVINFO hNvDevInfo = SetupDiGetClassDevs(NULL, NULL, NULL,
> DIGCF_PRESENT | DIGCF_ALLCLASSES);
>
>
>
> if(hNvDevInfo == INVALID_HANDLE_VALUE)
>
> {
>
> assert(!"INVALID_HANDLE_VALUE");
>
> return 0;
>
> }
>
>
>
> // Find the deviceInfoData for each GPU
>
> DWORD deviceIndex;
>
> for (deviceIndex = 0; ; deviceIndex++)
>
> {
>
> SP_DEVINFO_DATA deviceInfoData;
>
> unsigned long bus;
>
> unsigned long subdevice;
>
> unsigned long function;
>
> deviceInfoData.cbSize = sizeof(SP_DEVINFO_DATA);
>
>
>
> ret = SetupDiEnumDeviceInfo(hNvDevInfo, deviceIndex,
> &deviceInfoData);
>
> if (!ret)
>
> {
>
> // MSDN says:
>
> // call SetupDiEnumDeviceInfo until there are no more
> values (the function fails and a call
>
> // to GetLastError returns ERROR_NO_MORE_ITEMS).
>
> lastError = GetLastError();
>
> assert(lastError == ERROR_NO_MORE_ITEMS);
>
> break;
>
> }
>
>
>
> char locinfo[256];
>
> ret = SetupDiGetDeviceRegistryPropertyA(hNvDevInfo,
> &deviceInfoData, SPDRP_LOCATION_INFORMATION, NULL,
>
> (PBYTE)locinfo, sizeof(locinfo), NULL);
>
> if (!ret)
>
> {
>
> lastError = GetLastError();
>
> }
>
>
>
> bool dataSet = false;
>
> if (strncmp(locinfo, "PCI", 3) == 0) {
>
> char *busString = strstr(locinfo, "bus");
>
> if (busString) {
>
> busString += 3;
>
> char *deviceString = strstr(locinfo, ",");
>
> if (deviceString) {
>
> deviceString[0] = 0;
>
> bus = atoi(busString);
>
> deviceString++;
>
> deviceString = strstr(deviceString, "device");
>
>
>
> if (deviceString) {
>
> deviceString+=6;
>
> char *functionStr = strstr(deviceString, ",");
>
> if (functionStr) {
>
> functionStr[0] = 0;
>
> subdevice = atoi(deviceString);
>
> functionStr++;
>
> functionStr = strstr(functionStr, "function");
>
> if (functionStr) {
>
> functionStr+=8;
>
> function = atoi(functionStr);
>
> dataSet = true;
>
> }
>
> }
>
> }
>
> }
>
> }
>
>
>
> }
>
>
>
> if (dataSet &&
>
> (bus == cudaBus) &&
>
> (subdevice == cudaSubdevice) &&
>
> (function == cudaFunction))
>
> {
>
> ret = SetupDiGetDeviceRegistryPropertyA(hNvDevInfo,
> &deviceInfoData, SPDRP_HARDWAREID, NULL,
>
> (PBYTE)locinfo, sizeof(locinfo), NULL);
>
>
>
> printf("locinfo %s\n", locinfo);
>
>
>
> int data[20];
>
> data[0] = 0;
>
> DEVPROPTYPE type;
>
> DEVPROPKEY key = DEVPKEY_Numa_Proximity_Domain;
>
>
>
> lastError = 0;
>
>
>
> ret = SetupDiGetDeviceProperty(hNvDevInfo,
> &deviceInfoData,&key , &type, (PBYTE)&data[0], 20*sizeof(int), NULL,0);
>
>
>
> if (!ret)
>
> {
>
> lastError = GetLastError();
>
> }
>
>
>
> printf("DEVPKEY_Numa_Proximity_Domain %d err %d\n",
> data[0], lastError);
>
> key = DEVPKEY_Device_Numa_Node;
>
> lastError = 0;
>
> ret = SetupDiGetDeviceProperty(hNvDevInfo,
> &deviceInfoData,&key , &type, (PBYTE)&data[0], 20*sizeof(int), NULL,0);
>
>
>
> if (!ret)
>
> {
>
> lastError = GetLastError();
>
> }
>
>
>
> printf("DEVPKEY_Device_Numa_Node %d err %d\n", data[0],
> lastError);
>
>
>
> return data[0];
>
> }
>
> }
>
>
>
> return -1;
>
> }
>
>
>
> *From:*hwloc-users [mailto:hwloc-users-bounces_at_[hidden]] *On
> Behalf Of *Brice Goglin
> *Sent:* Monday, November 18, 2013 11:09 AM
> *To:* Hardware locality user list
> *Subject:* Re: [hwloc-users] windows PCI locality (was; DELL 8 core
> machine + Quadro K5000 GPU Card...)
>
>
>
> This seems unrelated since he seems to be running Linux anyway.
>
> We got that information a while ago but I couldn't do anything with it
> because (I think) I didn't have access to a Windows release that
> supported this. And, bigger problem, I don't have access to a Windows
> machine with more than one socket. I can't actually test the code
> anywhere.
>
> Are you volunteering to write some code? I am not saying that you
> should write the entire hwloc support, but some example would help a lot.
>
> Once we have the device locality, we'll need the devices too. The
> windows code misses the entire device listing code. Do you have any
> idea how to list PCI devices, match them with CUDA GPUs, etc ?
>
> Brice
>
>
>
>
> Le 18/11/2013 02:52, Ashley Reid a écrit :
>
> Maybe not completely related to your issue, but the windows code
> misses the correct enumeration to see where the GPU is in a NUMA
> system. The code needs to look at:
>
>
>
> Use "DEVPKEY_Numa_Proximity_Domain" and "DEVPKEY_Device_Numa_Node"
> when calling SetupDiGetDeviceProperty.
>
> Links:
>
>
>
> http://msdn.microsoft.com/en-us/library/windows/hardware/ff543536(v=vs.85).aspx
> <http://msdn.microsoft.com/en-us/library/windows/hardware/ff543536%28v=vs.85%29.aspx>
>
> "Windows Server 2003, Windows XP, and Windows 2000 do not
> support this property." -- So should be fine on win7 and win8?
>
> http://blogs.technet.com/b/winserverperformance/archive/2008/09/13/getting-system-topology-information-on-windows.aspx
>
>
>
> But this only works if the bios has the right ACPI entries, we
> filed a bug and got a update for the z820 from HP. This relies on
> the _PXM value in the ACPI tables.
>
>
>
> You can use windbg and !nstree to view the tables. There inside
> should be some _PXM values.
>
>
>
> Ash
>
>
>
>
>
> *From:*hwloc-users [mailto:hwloc-users-bounces_at_[hidden]] *On
> Behalf Of *Solibakke Per Bjarte
> *Sent:* Monday, November 18, 2013 10:15 AM
> *To:* hwloc-users_at_[hidden] <mailto:hwloc-users_at_[hidden]>
> *Subject:* [hwloc-users] DELL 8 core machine + Quadro K5000 GPU
> Card...
>
>
>
> Hello
>
>
>
> I recently got access to a very interesting and powerful machine:
> Dell 8 core + GPU Quadro K5000 (96 cores).
>
> A total of 1536 cores in the original machine configuration.
>
>
>
> I installed first HWLOC 1.7 version and I also installed the newly
> released beta 1.8. The final installation lines report PCI (linux)
> CUDA.
>
> However, the commands:
>
>
>
> Lstopo ---whole-system and lstopo ---whole-io
>
>
>
> report only the 8 CPU-cores. No reference to PCI-Bridges, eth0,
> seas +++ and the GPUs.
>
>
>
> Is the installation of the machine the problem or is my
>
> ./configure ---prefix=/usr/local/hwloc
>
>
>
> missing some vital elements?
>
>
>
> Regards
>
> PBSolibakke
>
>
>
> Dr.econ Per Bjarte Solibakke
>
> Professor
>
> per.b.solibakke_at_[hidden] <mailto:per.b.solibakke_at_[hidden]>
>
> Cell phone: 004790035606
>
> Phone: 004771214238
>
> ------------------------------------------------------------------------
>
> This email message is for the sole use of the intended
> recipient(s) and may contain confidential information. Any
> unauthorized review, use, disclosure or distribution is
> prohibited. If you are not the intended recipient, please contact
> the sender by reply email and destroy all copies of the original
> message.
>
> ------------------------------------------------------------------------
>
>
>
>
> _______________________________________________
>
> hwloc-users mailing list
>
> hwloc-users_at_[hidden] <mailto:hwloc-users_at_[hidden]>
>
> http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-users
>
>
>
>
>
> _______________________________________________
> hwloc-users mailing list
> hwloc-users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-users