Get the threshold used by the specified function to determine if the NVIDIA CUDA Toolkit algorithm will be used.
#include <imsls.h>
int imsls_cuda_get (Imsls_cuda cuda_name)
Imsls_cuda cuda_name
(Input)
An enumerator which specifies the IMSL
function for which the threshold will be retrieved. cuda_name must be one of the
values defined in Table
15.1.
Returns the threshold value used to determine when the NVIDIA CUDA Toolkit version of the function specified by cuda_name will be used. A return value of zero indicates that the IMSL version of the specified function will always be used. A return value greater than zero is the threshold value being used by the function specified by cuda_name. If the problem size is greater than or equal to threshold, the NVIDIA Toolkit algorithm will be used.
See “Programming Notes for Using NVIDIA CUDA Toolkit” for more information on NVIDIA’s CUDA Toolkit integration into IMSL C Numerical Library.
#include <imsls.h>
int
imsls_cuda_get (Imsls_cuda
cuda_name,
IMSLS_GET_DEVICE,
int *idevice,
0)
IMSLS_GET_DEVICE,
int *idevice
(Output)
Returns a value specifying whether the NVIDIA CUDA Toolkit
algorithm was used for the last call to the IMSL function specified by cuda_name.
Returns 1 if the NIVIDIA Toolkit algorithm was used and 0 if the IMSL version of
the algorithm was used. A value of -1 indicates that
the IMSL function specified by cuda_name has not been
used.
This function returns the threshold value for a specified function. It can optionally be used to return information on the last invocation of the specified function.
Function imsls_f_hypothesis_partial uses Basic Linear Algebra Subprogram SGEMM to improve performance. In this example the threshold value of the function SGEMM is manipulated to force the use of both the IMSL and NVIDIA CUDA Toolkit algorithms.
#include <imsls.h>
#include <stdio.h>
int main() {
int nreg, nobs = 16, n_independent=2, n_dependent=1,
n_coefficients=12, nh, nhp=6, idevice, iswitch;
float *coefficients, *h, z[16*11];
Imsls_f_regression *info;
float hp[72] = {0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1};
float x[16][2] = {1, 1,
1, 1,
1, 1,
1, 2,
1, 2,
1, 3,
1, 3,
1, 3,
2, 1,
2, 1,
2, 2,
2, 2,
2, 2,
2, 3,
2, 3,
2, 3};
float y[16] = {19, 20, 21, 24, 26, 22, 25, 25,
25, 27, 21, 24, 24, 31, 32, 33};
int nvef[3] = {1, 1, 2};
int indef[4] = {0, 1, 0, 1};
/* Turn off printing of warning errors */
imsls_error_options(
IMSLS_SET_PRINT,
IMSLS_WARNING, 0,
0);
nreg = imsls_f_regressors_for_glm(nobs, &x[0][0], 2, 0,
IMSLS_INDICES_EFFECTS, 3, nvef, indef,
IMSLS_REGRESSORS_USER, z,
0);
coefficients = imsls_f_regression(nobs, nreg, z, y,
IMSLS_N_DEPENDENT, n_dependent,
IMSLS_REGRESSION_INFO, &info,
0);
/* Turn printing of warning errors back on */
imsls_error_options(
IMSLS_SET_PRINT,
IMSLS_WARNING, 1,
0);
/* Get the current threshold value for sgemm */
iswitch = imsls_cuda_get(
IMSLS_CUDA_SGEMM,
0);
/* Set the threshold value of sgemm to 0
so that use of the IMSL version is ensured */
imsls_cuda_set(
IMSLS_CUDA_SGEMM, 0,
0);
/* Call routine which uses SGEMM */
nh = imsls_f_hypothesis_partial(info, nhp, hp,
IMSLS_H_MATRIX, &h,
0);
/* Check to see what version of sgemm was used */
imsls_cuda_get(
IMSLS_CUDA_SGEMM,
IMSLS_GET_DEVICE, &idevice,
0);
if (!idevice)
printf("The IMSL version of SGEMM was used.\n");
else
printf("Error: The CUDA version of SGEMM was used.\n");
/* Set the threshold value to be very small to ensure the
CUDA version of SGEMM will be used. */
imsls_cuda_set(
IMSLS_CUDA_SGEMM, 2,
0);
/* Call routine which uses SGEMM */
nh = imsls_f_hypothesis_partial(info, nhp, hp,
IMSLS_H_MATRIX, &h,
0);
/* Check to see what version of sgemm was used */
imsls_cuda_get(
IMSLS_CUDA_SGEMM,
IMSLS_GET_DEVICE, &idevice,
0);
if (!idevice)
printf("Error: The IMSL version of SGEMM was used.\n");
else
printf("The CUDA version of SGEMM was used.\n");
/* Set the threshold value to the original setting */
imsls_cuda_set(
IMSLS_CUDA_SGEMM, iswitch,
0);
/* Release GPU memory */
imsls_cuda_free();
}
The IMSL version of SGEMM was used.
The CUDA version of SGEMM was used.
IMSLS_CUDA_ENUM_NAME |
The argument specified for “cuda_name” = # is not valid. |
IMSLS_CUDA_NOT_IMPLEMENTED |
The specified function name does not have a CUDA implementation. |
IMSLS_FCN_NOT_USED |
The specified function name has not yet been used. |
IMSLS_CUDA_NOT_AVAIL |
The CUDA Toolkit algorithms are not implemented using this version of the library. Use the CUDA link environment variables to leverage the CUDA Toolkit algorithms. |