Skip to content

Commit

Permalink
Important bug fix related to MPI data type.
Browse files Browse the repository at this point in the history
Sending size_t with MPI is dangerous as it can represent many types:
http:https://stackoverflow.com/questions/40807833/sending-size-t-type-data-with-mpi

So we use int for all dimension variables for safety.
Although the array size is thus less flexible.

NetCDF nc_inq_dimlen returns size_t. Simply convert it back to int.
  • Loading branch information
JiaweiZhuang committed Apr 17, 2017
1 parent 69515e6 commit b6223f2
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 17 deletions.
13 changes: 5 additions & 8 deletions Parallel_Algorithm/MPI/Kmean_mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int main() {
MPI_Comm_size(MPI_COMM_WORLD, &size);
//printf("hello world from process %d of %d\n", rank, size);

size_t N_samples_all,N_samples,N_features,N_clusters,N_repeat;
int N_samples_all,N_samples,N_features,N_clusters,N_repeat;
//i for samples; j for features; k for clusters (typically)
int i,j,k;
int k_best,initial_idx;
Expand Down Expand Up @@ -68,8 +68,7 @@ int main() {
TBD: use MPI_Scatterv to handle arbitrary size
*/

//convert to int to prevent error from unsigned/signed
N_samples = (int)N_samples_all / size;
N_samples = N_samples_all / size;
// printf("%d, Local samples: %d \n",rank,N_samples);

if (rank==0){
Expand All @@ -82,8 +81,7 @@ int main() {

// check scattered results
if (rank==size-1){
//convert to int to prevent error from unsigned - signed
printf("Last element after sacattering %d: %f \n",rank,X[(int)N_samples-1][(int)N_features-1]);
printf("Last element after sacattering %d: %f \n",rank,X[N_samples-1][N_features-1]);
}

double iElaps1 = MPI_Wtime() - iStart1;
Expand All @@ -108,7 +106,6 @@ int main() {
// how many data points in the cluster
// needed by calculating the average position of data points in each cluster
int* cluster_sizes = (int *)malloc(N_clusters*sizeof(int));
// cluster_sizes[0] = 0; //why this will fail for rank1?

/*
======================================================
Expand Down Expand Up @@ -138,9 +135,9 @@ int main() {
if (rank==1){
// initialize other nodes
for (k=0; k<N_clusters; k++){
//cluster_sizes[k] = 0;
cluster_sizes[k] = 0;
for (j=0; j<N_features; j++){
//new_cluster_centers[k][j] = 0.0;
new_cluster_centers[k][j] = 0.0;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Parallel_Algorithm/OpenMP/Kmean_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ int main() {
---------------- Initialization ---------------------
======================================================
*/
size_t N_samples,N_features,N_clusters,N_repeat;
int N_samples,N_features,N_clusters,N_repeat;
//i for samples; j for features; k for clusters (typically)
int i,j,k;
int k_best,initial_idx;
Expand Down
19 changes: 13 additions & 6 deletions Parallel_Algorithm/shared/ncdf_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
* (need to pass the address)
*/
int readX(char* FILE_NAME, float*** p_X,int*** p_GUESS,
size_t* p_N_samples,size_t* p_N_features,
size_t* p_N_clusters,size_t* p_N_repeat ) {
int* p_N_samples,int* p_N_features,
int* p_N_clusters,int* p_N_repeat ) {
int ncid, varid,dimid;
int retval;
size_t N_temp;

printf("reading data \n");

/* Open the file. NC_NOWRITE tells netCDF we want read-only access
* to the file.*/
Expand All @@ -32,19 +35,23 @@ int readX(char* FILE_NAME, float*** p_X,int*** p_GUESS,

/* Get the size of the data for dynamical allocation*/
nc_inq_dimid(ncid,"N_samples",&dimid);
nc_inq_dimlen(ncid,dimid,p_N_samples);
nc_inq_dimlen(ncid,dimid,&N_temp);
*p_N_samples = (int)N_temp;
printf("Number of samples: %d \n",*p_N_samples);

nc_inq_dimid(ncid,"N_features",&dimid);
nc_inq_dimlen(ncid,dimid,p_N_features);
nc_inq_dimlen(ncid,dimid,&N_temp);
*p_N_features = (int)N_temp;
printf("Number of features: %d \n",*p_N_features);

nc_inq_dimid(ncid,"N_clusters",&dimid);
nc_inq_dimlen(ncid,dimid,p_N_clusters);
nc_inq_dimlen(ncid,dimid,&N_temp);
*p_N_clusters = (int)N_temp;
printf("Number of clusters: %d \n",*p_N_clusters);

nc_inq_dimid(ncid,"N_repeat",&dimid);
nc_inq_dimlen(ncid,dimid,p_N_repeat);
nc_inq_dimlen(ncid,dimid,&N_temp);
*p_N_repeat = (int)N_temp;
printf("Number of repeated runs: %d \n",*p_N_repeat);

/* Get the varid of the data variable, based on its name. */
Expand Down
4 changes: 2 additions & 2 deletions Parallel_Algorithm/shared/ncdf_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#define NCDF_UTIL_H

int readX(char* FILE_NAME, float*** p_X,int*** p_GUESS,
size_t* p_N_samples,size_t* p_N_features,
size_t* p_N_clusters,size_t* p_N_repeat );
int* p_N_samples,int* p_N_features,
int* p_N_clusters,int* p_N_repeat );

int writeY(char* FILE_NAME, int* labels, float inert);

Expand Down

0 comments on commit b6223f2

Please sign in to comment.