我想使用OpenMP求出矩阵中的总和、最小值和最大值(以及它们的位置),更具体地说,找出REDUCTION子句.

我遇到的问题是,我不能对一个 struct (在我的例子中,是struct Extreme)应用归约操作minmax,当我试图使用declare reduction(...)子句来创建我自己的操作时,我也不能让它工作(我想是因为归约不会在 struct 上执行).

为了暂时解决这个问题,我对代码应用了一个临界区,以便安全地读取和更新 struct minimummaximum的值,但这会使大容量矩阵的并发执行变得非常缓慢.

那么,我的问题是:我可以做些什么来改进这个解决方案?有没有一种方法可以像我希望的那样在 struct 中使用归约?我是否应该以不同的方式处理这个问题,并使用另一个OpenMP条款?

以下是我的代码(大多数变量注定要报告执行的统计数据):

/* Matrix summation using OpenMP
usage with gcc (version 4.2 or higher required):
gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
./matrixSum-openmp size numWorkers
*/

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

#define MAXSIZE 10000           // Maximum matrix size
#define MATRIX_LIMIT 100        // Matrix elements range [0, MATRIX_LIMIT - 1]
#define MAXWORKERS 4            // Maximum number of workers
#define PROGRAM_EXECUTIONS 5    // Number of program executions

int numWorkers, size;
int matrix[MAXSIZE][MAXSIZE];
double start_time, end_time;

struct Extreme {
    int value;
    int pos_i, pos_j;
};

struct Extreme min (struct Extreme e1, struct Extreme e2) {
    return e1.value < e2.value ? e1 : e2;
}

struct Extreme max(struct Extreme e1, struct Extreme e2) {
    return e1.value > e2.value ? e1 : e2;
}

void readCommandLine(int argc, char *argv[]) {
    size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
    numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
    if (size > MAXSIZE)
        size = MAXSIZE;
    if (numWorkers > MAXWORKERS)
        numWorkers = MAXWORKERS;
}

void initializeMatrix() {
    for (int i = 0; i < size; i++) {
        //printf("[ ");
        for (int j = 0; j < size; j++) {
            matrix[i][j] = rand() % MATRIX_LIMIT;
            //printf(" %d", matrix[i][j]);
        }
        //printf(" ]\n");
    }
}

double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
    double avg = 0;
    for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
        avg += v[i];
    return avg / PROGRAM_EXECUTIONS;
}

// Read command line, initialize, and create threads
int main(int argc, char *argv[]) {

    // Read command line args if any.
    readCommandLine(argc, argv);

    // Initialize the matrix
    initializeMatrix();

    // Store for each number of processors used, the times it took to execute each program.
    // execution_times[i][j] = time (s) it took to execute with i processors program j.
    // Note execution_times[0][i] is the sequential time of program execution i.
    double execution_times[numWorkers][PROGRAM_EXECUTIONS];

    for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
        // Set the number of threads for openMP.
        omp_set_num_threads(num_proc + 1);

        printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
        for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {

            // Reset variables.
            int total_sum = 0, i, j;
            struct Extreme minimum, maximum;
            minimum.value = MATRIX_LIMIT;
            maximum.value = -MATRIX_LIMIT;

            printf("\nProgram execution number %d: \n", num_prog + 1);

            // Start timer
            start_time = omp_get_wtime();

            //#pragma omp declare reduction(myMin : struct Extreme : combinerMin) initializer(omp_orig = initMin)
            //#pragma omp declare reduction(myMax : struct Extreme : combinerMax) initializer(omp_priv = initMax)
            #pragma omp parallel for reduction (+:total_sum) private(j) // (myMin: minimum) (myMax: maximum)
            for (i = 0; i < size; i++) {
                for (j = 0; j < size; j++) {
                    total_sum += matrix[i][j];
                    #pragma omp critical
                    {
                        struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
                        minimum = min(minimum, candidate);
                        maximum = max(maximum, candidate);
                    }
                }
            }

            // Implicit barrier
            end_time = omp_get_wtime();

            execution_times[num_proc][num_prog] = end_time - start_time;

            printf("The total sum is %d\n", total_sum);
            printf("The minimum is %d at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
            printf("The maximum is %d at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
            printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
            printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
        }

        double seq_avg = calculateAvg(execution_times[0]);
        double exec_avg = calculateAvg(execution_times[num_proc]);
        double speedup = (seq_avg / exec_avg) * 100;

        printf("\nAverage sequential time (%d executions): %f", PROGRAM_EXECUTIONS, seq_avg);
        printf("\nAverage execution time (%d executions) with %d processor(s): %f s\n", PROGRAM_EXECUTIONS, num_proc + 1, exec_avg);
        printf("Speedup: %.2f%%", speedup);
    }

    printf("\n\n");
}

推荐答案

我自己解决的:

谨慎地使用带有指针的两个组合器函数就足够了:

/* Matrix summation using OpenMP
usage with gcc (version 4.2 or higher required):
gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
./matrixSum-openmp size numWorkers
*/

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MAXSIZE 10500                // Maximum matrix size
#define MATRIX_LIMIT 10000000000           // Matrix elements range [0, MATRIX_LIMIT - 1]
#define MAXWORKERS 4               // Maximum number of workers
#define PROGRAM_EXECUTIONS 10       // Number of program executions

int numWorkers, size;
int matrix[MAXSIZE][MAXSIZE];
double start_time, end_time;

struct Extreme {
    long int value;
    int pos_i, pos_j;
};

struct Extreme min(struct Extreme e1, struct Extreme e2) {
    return e1.value < e2.value ? e1 : e2;
}

struct Extreme max(struct Extreme e1, struct Extreme e2) {
    return e1.value > e2.value ? e1 : e2;
}

void readCommandLine(int argc, char *argv[]) {
    size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
    numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
    if (size > MAXSIZE)
        size = MAXSIZE;
    if (numWorkers > MAXWORKERS)
        numWorkers = MAXWORKERS;
}

void initializeMatrix() {
    for (int i = 0; i < size; i++) {
        //printf("[ ");
        for (int j = 0; j < size; j++) {
            matrix[i][j] = rand() % MATRIX_LIMIT;
            //printf(" %d", matrix[i][j]);
        }
        //printf(" ]\n");
    }
}

double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
    double avg = 0;
    for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
        avg += v[i];
    return avg / PROGRAM_EXECUTIONS;
}

void combinerMin(struct Extreme *out, struct Extreme *in) {
    *out =  out->value < in->value ? *out : *in;
}

void combinerMax(struct Extreme *out, struct Extreme *in) {
    *out =  out->value > in->value ? *out : *in;
}

// Read command line, initialize, and create threads
int main(int argc, char *argv[]) {

    srand(time(0));

    // Read command line args if any.
    readCommandLine(argc, argv);

    // Initialize the matrix
    initializeMatrix();

    // Store for each number of processors used, the times it took to execute each program.
    // execution_times[i][j] = time (s) it took to execute with i processors program j.
    // Note execution_times[0][i] is the sequential time of program execution i.
    double execution_times[numWorkers][PROGRAM_EXECUTIONS];

    for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
        // Set the number of threads for openMP.
        omp_set_num_threads(num_proc + 1);

        printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
        for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {

            // Reset variables.
            int total_sum = 0, i, j;
            struct Extreme minimum, maximum;
            minimum.value = MATRIX_LIMIT;
            maximum.value = -MATRIX_LIMIT;

            printf("\nProgram execution number %d: \n", num_prog + 1);

            // Start timer
            start_time = omp_get_wtime();

            #pragma omp declare reduction(myMin : struct Extreme : combinerMin(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
            #pragma omp declare reduction(myMax : struct Extreme : combinerMax(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
            #pragma omp parallel for reduction (+:total_sum) reduction (myMin: minimum) reduction (myMax: maximum) private(j)
            for (i = 0; i < size; i++) {
                for (j = 0; j < size; j++) {
                    total_sum += matrix[i][j];
                    struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
                    minimum = min(minimum, candidate);
                    maximum = max(maximum, candidate);
                }
            } // Implicit barrier
            end_time = omp_get_wtime();

            execution_times[num_proc][num_prog] = end_time - start_time;

            printf("The total sum is %d\n", total_sum);
            printf("The minimum is %ld at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
            printf("The maximum is %ld at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
            printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
            printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
        }

        double seq_avg = calculateAvg(execution_times[0]);
        double exec_avg = calculateAvg(execution_times[num_proc]);
        double speedup = (seq_avg / exec_avg) * 100;

        printf("\nAverage sequential time (%d executions): %f s", PROGRAM_EXECUTIONS, seq_avg);
        printf("\nAverage execution time (%d executions): %f s\n", PROGRAM_EXECUTIONS, exec_avg);
        printf("Speedup: %.2f%%", speedup);
    }

    printf("\n\n");
}

C++相关问答推荐

函数指针始终为零,但在解除引用和调用时有效

ISO_C_BINDING,从Fortran调用C

为什么已经设置的值在C中被重置为for循环条件中的新值?

MISRA C:2012 11.3违规强制转换(FLOAT*)到(uint32_t*)

两个连续的语句是否按顺序排列?

如何按顺序将所有CSV文件数据读入 struct 数组?

==284==错误:AddressSaniizer:堆栈缓冲区下溢

在进程之间重定向输出和输入流的问题

我在C中运行和调试时得到了不同的输出

条件跳转或移动取决于未初始化值(S)/未初始化值由堆分配创建(Realloc)

为什么WcrTomb只支持ASCII?

如何在C宏定义中包含双引号?

浮动目标文件,数据段

C中的数组下标和指针算法给出了不同的结果

访问未对齐联合的成员是否为未定义行为,即使被访问的成员已充分对齐?

与指针的原始C数组或C++向量<;向量<;双>>;

中位数和众数不正确

GDB 跳过动态加载器代码

为什么实现文件中的自由函数默认没有内部链接?

为什么需要struct in_addr