mpi.h搜索找到了在ompi/include/mpi.h.in中的一个文件,查找一下最简单的函数 MPI_Comm_size 和 MPI_Init 函数,
面对的应用场景: 我要找OpenMpi文件目录下含有字符串 "MPI_Init"的文本文件,有什么工具吗?
有comm_rank.c 还有 init.c:
进入init.c发现了入门第一个函数: MPI_Init(int *argc, char ***argv)
#include "ompi_config.h" #include <stdlib.h> #include "opal/util/show_help.h" #include "ompi/mpi/c/bindings.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/constants.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Init = PMPI_Init #endif #define MPI_Init PMPI_Init #endif static const char FUNC_NAME[] = "MPI_Init"; int MPI_Init(int *argc, char ***argv) { int err; int provided; char *env; int required = MPI_THREAD_SINGLE; /* check for environment overrides for required thread level. If there is, check to see that it is a valid/supported thread level. If not, default to MPI_THREAD_MULTIPLE. */ if (NULL != (env = getenv("OMPI_MPI_THREAD_LEVEL"))) { required = atoi(env); if (required < MPI_THREAD_SINGLE || required > MPI_THREAD_MULTIPLE) { required = MPI_THREAD_MULTIPLE; } } /* Call the back-end initialization function (we need to put as little in this function as possible so that if it's profiled, we don't lose anything) */ if (NULL != argc && NULL != argv) { err = ompi_mpi_init(*argc, *argv, required, &provided); } else { err = ompi_mpi_init(0, NULL, required, &provided); } /* Since we don't have a communicator to invoke an errorhandler on here, don't use the fancy-schmancy ERRHANDLER macros; they're really designed for real communicator objects. Just use the back-end function directly. */ if (MPI_SUCCESS != err) { return ompi_errhandler_invoke(NULL, NULL, OMPI_ERRHANDLER_TYPE_COMM, err < 0 ? ompi_errcode_get_mpi_code(err) : err, FUNC_NAME); } OPAL_CR_INIT_LIBRARY(); return MPI_SUCCESS; }
int required = MPI_THREAD_SINGLE; /* check for environment overrides for required thread level. If there is, check to see that it is a valid/supported thread level. If not, default to MPI_THREAD_MULTIPLE. */ if (NULL != (env = getenv("OMPI_MPI_THREAD_LEVEL"))) { required = atoi(env); if (required < MPI_THREAD_SINGLE || required > MPI_THREAD_MULTIPLE) { required = MPI_THREAD_MULTIPLE; } }
查阅资料,参考 或者 某软 的MPI资料,
/* Call the back-end initialization function (we need to put as little in this function as possible so that if it's profiled, we don't lose anything) */ if (NULL != argc && NULL != argv) { err = ompi_mpi_init(*argc, *argv, required, &provided); } else { err = ompi_mpi_init(0, NULL, required, &provided); }
就是调用back-end初始化函数 ompi_mpi_init ,这个函数在 ompi_mpi_init.c定义了,但是很不幸,这个函数有600多行。
/* Since we don't have a communicator to invoke an errorhandler on here, don't use the fancy-schmancy ERRHANDLER macros; they're really designed for real communicator objects. Just use the back-end function directly. */ if (MPI_SUCCESS != err) { return ompi_errhandler_invoke(NULL, NULL, OMPI_ERRHANDLER_TYPE_COMM, err < 0 ? ompi_errcode_get_mpi_code(err) : err, FUNC_NAME); }
如果初始化函数返回的不是 MPI_SUCCESS, 就返回错误码,那这个函数在哪里呢?
在errhandler.h可以找到函数声明,返回和参数中一致的errcode————找了很久,最后用微软的黑科技findstr /S命令,终于找了对应的文件:
int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, int object_type, int err_code, const char *message) { MPI_Fint fortran_handle, fortran_err_code = OMPI_INT_2_FINT(err_code); ompi_communicator_t *comm; ompi_win_t *win; ompi_file_t *file; /* If we got no errorhandler, then just invoke errors_abort */ if (NULL == errhandler) { ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, message); //-------------注意到我们传入了NULL,所以Init失败后进入了这里 return err_code; } /* Figure out what kind of errhandler it is, figure out if it's fortran or C, and then invoke it */ switch (object_type) { case OMPI_ERRHANDLER_TYPE_COMM: // ompi/errhandler/errhandler.h: comm = (ompi_communicator_t *) mpi_object; // Enum used to describe what kind MPI object an error handler is used for switch (errhandler->eh_lang) { case OMPI_ERRHANDLER_LANG_C: // C语言 errhandler->eh_comm_fn(&comm, &err_code, message, NULL); break; case OMPI_ERRHANDLER_LANG_CXX: errhandler->eh_cxx_dispatch_fn(&comm, &err_code, message, (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_comm_fn); break; case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(comm->c_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); err_code = OMPI_FINT_2_INT(fortran_err_code); break; } break; case OMPI_ERRHANDLER_TYPE_WIN: win = (ompi_win_t *) mpi_object; switch (errhandler->eh_lang) { case OMPI_ERRHANDLER_LANG_C: errhandler->eh_win_fn(&win, &err_code, message, NULL); break; case OMPI_ERRHANDLER_LANG_CXX: errhandler->eh_cxx_dispatch_fn(&win, &err_code, message, (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_win_fn); break; case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(win->w_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); err_code = OMPI_FINT_2_INT(fortran_err_code); break; } break; case OMPI_ERRHANDLER_TYPE_FILE: file = (ompi_file_t *) mpi_object; switch (errhandler->eh_lang) { case OMPI_ERRHANDLER_LANG_C: errhandler->eh_file_fn(&file, &err_code, message, NULL); break; case OMPI_ERRHANDLER_LANG_CXX: errhandler->eh_cxx_dispatch_fn(&file, &err_code, message, (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_file_fn); break; case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(file->f_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); err_code = OMPI_FINT_2_INT(fortran_err_code); break; } break; } /* All done */ return err_code; }
可以看到,其实这里60多行的代码,就只是掉进了一个函数: ompi_errhandler_t 类的 eh_comm_fn 函数:
struct ompi_errhandler_t { opal_object_t super; char eh_name[MPI_MAX_OBJECT_NAME]; /* Type of MPI object that this handler is for */ ompi_errhandler_type_t eh_mpi_object_type; /* What language was the error handler created in */ ompi_errhandler_lang_t eh_lang; /* Function pointers. Note that we *have* to have all 4 types (vs., for example, a union) because the predefined errhandlers can be invoked on any MPI object type, so we need callbacks for all of three. */ MPI_Comm_errhandler_function *eh_comm_fn; ompi_file_errhandler_fn *eh_file_fn; MPI_Win_errhandler_function *eh_win_fn; ompi_errhandler_fortran_handler_fn_t *eh_fort_fn; /* Have separate callback for C++ errhandlers. This pointer is initialized to NULL and will be set explicitly by the C++ bindings for Create_errhandler. This function is invoked when eh_lang==OMPI_ERRHANDLER_LANG_CXX so that the user's callback function can be invoked with the right language semantics. */ ompi_errhandler_cxx_dispatch_fn_t *eh_cxx_dispatch_fn; /* index in Fortran <-> C translation array */ int eh_f_to_c_index; };
而这个 ompi_errhandler_t 对象的创建接口,来自于:
OMPI_DECLSPEC ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, ompi_errhandler_generic_handler_fn_t *func, //这就是 ompi_errhandler_t 结构体的 eh_comm_fn 函数 ompi_errhandler_lang_t language);
因为在 MPI_Init 函数中调用时传入的是NULL(MPI的这些东西初始化失败,自然也不能传入一个ompi_errhandler_t结构了)
1. ompi_mpi_init因为代码快过长,放在这里不合适
2. init失败,按照正常流程就会调用的: ompi_mpi_errors_are_fatal_comm_handler函数
3. ompi_errhandler_t 这个结构体包含的信息,这跟异常处理有关,对我们弄清楚以后并行计算实际运行可能发生的错误会有帮助