forked from mit-han-lab/parallel-computing-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
matmul.h
58 lines (54 loc) · 1.44 KB
/
matmul.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Data structures
struct matrix
{
int row;
int column;
float *data_ptr;
};
struct thread_args
{
const struct matrix *A;
const struct matrix *B;
const struct matrix *C;
int start_i, end_i, blk_size;
};
struct optimization_params
{
int blk_size;
int num_thread = 8;
};
struct matmul_params
{
struct matrix A, B, C;
struct optimization_params opt_params;
};
namespace matmul
{
class MatmulOperator
{
public:
enum IMP_TYPE
{
NAIVE,
UNROLL,
REORDER,
TILING,
MULTITHREAD,
TRANSPOSE_SIMD,
FAST,
CUDA,
};
void naive_mat_mul(const struct matmul_params *params);
void mat_mul_unrolling(const struct matmul_params *params);
void mat_mul_reordering(const struct matmul_params *params);
void mat_mul_tiling(const struct matmul_params *params);
void mat_mul_multithreading(const struct matmul_params *params);
void mat_mul_transpose(const struct matmul_params *params);
void mat_mul_transpose_simd(const struct matmul_params *params);
void mat_mul_fast(const struct matmul_params *params);
void mat_mul_cuda(const struct matmul_params *params);
void evaluate(IMP_TYPE type, const struct matmul_params *params);
private:
void CHECK_MATRICES(const struct matrix *A, const struct matrix *B, const struct matrix *C);
};
}