-
Notifications
You must be signed in to change notification settings - Fork 0
/
multistart_hooke_tasks.c
executable file
·422 lines (370 loc) · 17.5 KB
/
multistart_hooke_tasks.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
/* Nonlinear Optimization using the algorithm of Hooke and Jeeves */
/* 12 February 1994 author: Mark G. Johnson */
/* Find a point X where the nonlinear function f(X) has a local */
/* minimum. X is an n-vector and f(X) is a scalar. In mathe- */
/* matical notation f: R^n -> R^1. The objective function f() */
/* is not required to be continuous. Nor does f() need to be */
/* differentiable. The program does not use or require */
/* derivatives of f(). */
/* The software user supplies three things: a subroutine that */
/* computes f(X), an initial "starting guess" of the minimum point */
/* X, and values for the algorithm convergence parameters. Then */
/* the program searches for a local minimum, beginning from the */
/* starting guess, using the Direct Search algorithm of Hooke and */
/* Jeeves. */
/* This C program is adapted from the Algol pseudocode found in */
/* "Algorithm 178: Direct Search" by Arthur F. Kaupe Jr., Commun- */
/* ications of the ACM, Vol 6. p.313 (June 1963). It includes the */
/* improvements suggested by Bell and Pike (CACM v.9, p. 684, Sept */
/* 1966) and those of Tomlin and Smith, "Remark on Algorithm 178" */
/* (CACM v.12). The original paper, which I don't recommend as */
/* highly as the one by A. Kaupe, is: R. Hooke and T. A. Jeeves, */
/* "Direct Search Solution of Numerical and Statistical Problems", */
/* Journal of the ACM, Vol. 8, April 1961, pp. 212-229. */
/* Calling sequence: */
/* int hooke(nvars, startpt, endpt, rho, epsilon, itermax) */
/* */
/* nvars {an integer} This is the number of dimensions */
/* in the domain of f(). It is the number of */
/* coordinates of the starting point (and the */
/* minimum point.) */
/* startpt {an array of doubles} This is the user- */
/* supplied guess at the minimum. */
/* endpt {an array of doubles} This is the location of */
/* the local minimum, calculated by the program */
/* rho {a double} This is a user-supplied convergence */
/* parameter (more detail below), which should be */
/* set to a value between 0.0 and 1.0. Larger */
/* values of rho give greater probability of */
/* convergence on highly nonlinear functions, at a */
/* cost of more function evaluations. Smaller */
/* values of rho reduces the number of evaluations */
/* (and the program running time), but increases */
/* the risk of nonconvergence. See below. */
/* epsilon {a double} This is the criterion for halting */
/* the search for a minimum. When the algorithm */
/* begins to make less and less progress on each */
/* iteration, it checks the halting criterion: if */
/* the stepsize is below epsilon, terminate the */
/* iteration and return the current best estimate */
/* of the minimum. Larger values of epsilon (such */
/* as 1.0e-4) give quicker running time, but a */
/* less accurate estimate of the minimum. Smaller */
/* values of epsilon (such as 1.0e-7) give longer */
/* running time, but a more accurate estimate of */
/* the minimum. */
/* itermax {an integer} A second, rarely used, halting */
/* criterion. If the algorithm uses >= itermax */
/* iterations, halt. */
/* The user-supplied objective function f(x,n) should return a C */
/* "double". Its arguments are x -- an array of doubles, and */
/* n -- an integer. x is the point at which f(x) should be */
/* evaluated, and n is the number of coordinates of x. That is, */
/* n is the number of coefficients being fitted. */
/* rho, the algorithm convergence control */
/* The algorithm works by taking "steps" from one estimate of */
/* a minimum, to another (hopefully better) estimate. Taking */
/* big steps gets to the minimum more quickly, at the risk of */
/* "stepping right over" an excellent point. The stepsize is */
/* controlled by a user supplied parameter called rho. At each */
/* iteration, the stepsize is multiplied by rho (0 < rho < 1), */
/* so the stepsize is successively reduced. */
/* Small values of rho correspond to big stepsize changes, */
/* which make the algorithm run more quickly. However, there */
/* is a chance (especially with highly nonlinear functions) */
/* that these big changes will accidentally overlook a */
/* promising search vector, leading to nonconvergence. */
/* Large values of rho correspond to small stepsize changes, */
/* which force the algorithm to carefully examine nearby points */
/* instead of optimistically forging ahead. This improves the */
/* probability of convergence. */
/* The stepsize is reduced until it is equal to (or smaller */
/* than) epsilon. So the number of iterations performed by */
/* Hooke-Jeeves is determined by rho and epsilon: */
/* rho**(number_of_iterations) = epsilon */
/* In general it is a good idea to set rho to an aggressively */
/* small value like 0.5 (hoping for fast convergence). Then, */
/* if the user suspects that the reported minimum is incorrect */
/* (or perhaps not accurate enough), the program can be run */
/* again with a larger value of rho such as 0.85, using the */
/* result of the first minimization as the starting guess to */
/* begin the second minimization. */
/* Normal use: (1) Code your function f() in the C language */
/* (2) Install your starting guess {or read it in} */
/* (3) Run the program */
/* (4) {for the skeptical}: Use the computed minimum */
/* as the starting point for another run */
/* Data Fitting: */
/* Code your function f() to be the sum of the squares of the */
/* errors (differences) between the computed values and the */
/* measured values. Then minimize f() using Hooke-Jeeves. */
/* EXAMPLE: you have 20 datapoints (ti, yi) and you want to */
/* find A,B,C such that (A*t*t) + (B*exp(t)) + (C*tan(t)) */
/* fits the data as closely as possible. Then f() is just */
/* f(x) = SUM (measured_y[i] - ((A*t[i]*t[i]) + (B*exp(t[i])) */
/* + (C*tan(t[i]))))^2 */
/* where x[] is a 3-vector consisting of {A, B, C}. */
/* */
/* The author of this software is M.G. Johnson. */
/* Permission to use, copy, modify, and distribute this software */
/* for any purpose without fee is hereby granted, provided that */
/* this entire notice is included in all copies of any software */
/* which is or includes a copy or modification of this software */
/* and in all copies of the supporting documentation for such */
/* software. THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT */
/* ANY EXPRESS OR IMPLIED WARRANTY. IN PARTICULAR, NEITHER THE */
/* AUTHOR NOR AT&T MAKE ANY REPRESENTATION OR WARRANTY OF ANY */
/* KIND CONCERNING THE MERCHANTABILITY OF THIS SOFTWARE OR ITS */
/* FITNESS FOR ANY PARTICULAR PURPOSE. */
/* */
#define __STDC_WANT_LIB_EXT2__ 1 //Define you want TR 24731-2:2010 extensions -- for gcc -- as printf
#include <stdio.h> // I/O operations
#include <stdlib.h> //memory allocation and randomizers
#include <math.h> //needs -lm at the end
#include <time.h> // Time struct for open mp
#include <omp.h> //include openmp
#include <sys/time.h> // Time struct
#include <string.h> //memset
unsigned long funevals = 0; // number of function evaluations (how many times function f was called)
// Total iterations count = total calls of rosenbrocks parabolic valley
// Left global for the debugging purposes
//set openmp environmental variables -- IN the terminal
//export OMP_DYNAMIC=FALSE -- or omp_set_dynamic(0) = false and omp_set_dynamic(1) = true
//export OMP_PROC_BIND=TRUE
#define MAXVARS (32) /* max # of variables */ //The maximum number of variables
#define RHO_BEGIN (0.9) /* stepsize geometric shrink */
#define EPSMIN (1E-6) /* ending value of stepsize */
#define IMAX (5000) /* max # of iterations */ // number of trials
/* Rosenbrock classic parabolic valley ("banana") function */
double f(double *x, int n, unsigned long *l_funevals) //endpt , and nvars
{
double fv = 0;
int i;
(*l_funevals)++; // this not the augmentation of the global counter of function evaluations but it is localized
// for each thread
// n is nvars
for (i=0; i<n-1; i++) /* rosenbrock */ //just 31 iterations
fv = fv + 100.0*pow((x[i+1]-x[i]*x[i]),2) + pow((x[i]-1.0),2);
return fv;
}
/* given a point, look for a better one nearby, one coord at a time */
double best_nearby(double delta[MAXVARS], double point[MAXVARS], double prevbest, int nvars, unsigned long* l_funevals)
{
double z[MAXVARS];
double minf, ftmp;
int i;
minf = prevbest;
for (i = 0; i < nvars; i++)
z[i] = point[i];
for (i = 0; i < nvars; i++) {
z[i] = point[i] + delta[i];
ftmp = f(z, nvars,l_funevals);
if (ftmp < minf)
minf = ftmp;
else {
delta[i] = 0.0 - delta[i];
z[i] = point[i] + delta[i];
ftmp = f(z, nvars,l_funevals);
if (ftmp < minf)
minf = ftmp;
else
z[i] = point[i];
}
}
for (i = 0; i < nvars; i++)
point[i] = z[i];
return (minf);
}
int hooke(int nvars, double startpt[MAXVARS], double endpt[MAXVARS], double rho, double epsilon, int itermax,unsigned long *l_funevals)
{
double delta[MAXVARS];
double newf, fbefore, steplength, tmp;
double xbefore[MAXVARS], newx[MAXVARS];
int i, j, keep; //j is used for debug mode
int iters, iadj;
for (i = 0; i < nvars; i++) {
newx[i] = xbefore[i] = startpt[i];
delta[i] = fabs(startpt[i] * rho);
if (delta[i] == 0.0)
delta[i] = rho;
}
iadj = 0;
steplength = rho;
iters = 0;
fbefore = f(newx, nvars, l_funevals);
newf = fbefore;
while ((iters < itermax) && (steplength > epsilon)) {
iters++;
iadj++;
#if DEBUG
printf("\nAfter %5lu funevals, f(x) = %.4le at\n", funevals, fbefore);
for (j = 0; j < nvars; j++)
printf(" x[%2d] = %.4le\n", j, xbefore[j]);
#endif
/* find best new point, one coord at a time */
for (i = 0; i < nvars; i++) {
newx[i] = xbefore[i];
}
newf = best_nearby(delta, newx, fbefore, nvars, l_funevals);
/* if we made some improvements, pursue that direction */
keep = 1;
while ((newf < fbefore) && (keep == 1)) {
iadj = 0;
for (i = 0; i < nvars; i++) {
/* firstly, arrange the sign of delta[] */
if (newx[i] <= xbefore[i])
delta[i] = 0.0 - fabs(delta[i]);
else
delta[i] = fabs(delta[i]);
/* now, move further in this direction */
tmp = xbefore[i];
xbefore[i] = newx[i];
newx[i] = newx[i] + newx[i] - tmp;
}
fbefore = newf;
newf = best_nearby(delta, newx, fbefore, nvars, l_funevals);
/* if the further (optimistic) move was bad.... */
if (newf >= fbefore)
break;
/* make sure that the differences between the new */
/* and the old points are due to actual */
/* displacements; beware of roundoff errors that */
/* might cause newf < fbefore */
keep = 0;
for (i = 0; i < nvars; i++) {
keep = 1;
if (fabs(newx[i] - xbefore[i]) > (0.5 * fabs(delta[i])))
break;
else
keep = 0;
}
}
if ((steplength >= epsilon) && (newf >= fbefore)) {
steplength = steplength * rho;
for (i = 0; i < nvars; i++) {
delta[i] *= rho;
}
}
}
for (i = 0; i < nvars; i++)
endpt[i] = xbefore[i];
return (iters);
}
double get_wtime(void)
{
struct timeval t;
gettimeofday(&t, NULL);
return (double)t.tv_sec + (double)t.tv_usec*1.0e-6;
}
//in order to implement correct struct padding we need the members that require the most memory to be placed first
typedef struct{
double l_best_pt[MAXVARS];
double l_best_fx;
unsigned long l_funevals;
int l_best_trial;
int l_best_jj;
}local_bests_struct; //struct size is: MAXVARS*8 + 24 bytes
int main(int argc, char *argv[])
{
int itermax = IMAX; //read only initialized variables
double rho = RHO_BEGIN; //
double epsilon = EPSMIN; //
int nvars = 32; //number of variables
int ntrials = 64*1024; //number of trials (problem dimentions)
double t0, t1; //timers
int k, threads=1; //general counter for main, and default threads used
double best_fx = 1e10; //reduction //initialize absolute minimum
double best_pt[MAXVARS]; //reducton //point on which the absolute minimum is found
int best_trial = -1; //reduction //initialization of best τrial where min is found
int best_jj = -1; //reduction //initializaton of the counter of iterations for the best trial
long ltimeSeed = time(0);
if (argc > 2) //set ntrials and thread number as the first and second execution argument respectively
{
ntrials = atoi(argv[1]);
threads = atoi(argv[2]);
}
//OMP ENVIRONMENTAL VARIABLES
omp_set_num_threads(threads); //initialize thread number as 1 if no argument has been given
omp_set_dynamic(0); //We want the explicit amount of threads we are requesting
//writing output to file
FILE* output;
char* stringOutput;
if ( 0> asprintf(&stringOutput,"OmpTasks.%d.txt", threads)) perror("String formatting failed"), exit(1);
if ( (output = fopen(stringOutput , "w")) == NULL ) perror("Error at Accessing output file "), exit(1);
//Creation of array of structs.-- large enough so no paddsizing is needed
__attribute__((aligned(64))) local_bests_struct local_best_of_threads_array[threads];
t0 = get_wtime(); //starting the clock
/*------ Here we start the main paralization ------*/
#pragma omp parallel //argv1 num threads
{
int trial; // counter of the current trial
//Initialization of shared variables for each thread
local_best_of_threads_array[omp_get_thread_num()].l_best_fx = 1e10;
local_best_of_threads_array[omp_get_thread_num()].l_funevals = 0;
unsigned short randBuffer[3] = {0,0,ltimeSeed + omp_get_thread_num()};
#pragma omp for nowait schedule(static)
for (trial = 0; trial < ntrials; trial++) //amount of times a starting point will be created and the procedure will be followed
{
// starting guess for rosenbrock test function, search space in [-5, 5)
__attribute__((aligned(64))) double startpt[MAXVARS]; //declaration of arrays with variables -- local for each thread
for (int i = 0; i < nvars; i++) {
startpt[i] = 10.0*erand48(randBuffer)-5.0; //starting point of search is selected (its coordinates are initialized) pseudo-randomly [-5,5)
}
#pragma omp task firstprivate(trial,startpt) shared(local_best_of_threads_array)
{
//Declarations
__attribute__((aligned(64))) double endpt[MAXVARS];
int i; //i is a counter
int jj; //jj is the temporary ending point for each loop -- result of hooke
double fx; //temporary rosenbrock fuction evaluation for each loop
unsigned long l_funevals=0;
//End of moved Declarations from omp parallel
//find ending point and store it in jj
jj = hooke(nvars, startpt, endpt, rho, epsilon, itermax, &l_funevals);
#if DEBUG
printf("\n\n\nHOOKE %d USED %d ITERATIONS, AND RETURNED\n", trial, jj);
for (i = 0; i < nvars; i++)
printf("x[%3d] = %15.7le \n", i, endpt[i]);
#endif
//endpt values
fx = f(endpt, nvars, &l_funevals); //calculate rosenbrock function's value on ending point
#if DEBUG
printf("f(x) = %15.7le\n", fx);
#endif
local_best_of_threads_array[omp_get_thread_num()].l_funevals += l_funevals; //reduction of local function evalutations
//reduction of best trial for each thread
if (fx < local_best_of_threads_array[omp_get_thread_num()].l_best_fx) {
local_best_of_threads_array[omp_get_thread_num()].l_best_trial = trial; //assign trial that gave the absolute minimum
local_best_of_threads_array[omp_get_thread_num()].l_best_jj = jj; //assign number of iterations on that trial
local_best_of_threads_array[omp_get_thread_num()].l_best_fx = fx; //assign absolute minimum value
for (i = 0; i < nvars; i++) //nvars is just 32 --> no need for parallelization
local_best_of_threads_array[omp_get_thread_num()].l_best_pt[i] = endpt[i]; //initialize point on which the absolute minimum was found
}
}//End of task
}//End of For
} //end of omp parallel
/*------ Here we end the main paralization ------*/
//Reduction of the best values is executed by only the master thread
//No need of critical region
int best_thread = 0;
funevals = local_best_of_threads_array[best_thread].l_funevals;
for(k=1;k<threads;k++){
funevals += local_best_of_threads_array[k].l_funevals;
if(local_best_of_threads_array[k].l_best_fx < local_best_of_threads_array[best_thread].l_best_jj)
best_thread = k;
}
t1 = get_wtime();
//printing the results
fprintf(output,"FINAL RESULTS: %d:%d\n",nvars,ntrials);
fprintf(output,"Elapsed time = %.3lf s\n", t1-t0); //effective time
fprintf(output,"Total number of trials = %d\n", ntrials); //number of trials --just ntrials
fprintf(output,"Total number of function evaluations = %ld\n", funevals); //total function evaluations
fprintf(output,"Total number of threads used = %d\n", threads); //total function evaluations
fprintf(output,"Best result at trial %d used %d iterations, and returned\n", local_best_of_threads_array[best_thread].l_best_trial, local_best_of_threads_array[best_thread].l_best_jj);
for (k = 0; k < nvars; k++)
fprintf(output,"x[%3d] = %15.7le \n", k, local_best_of_threads_array[best_thread].l_best_pt[k]);
fprintf(output,"f(x) = %15.7le\n", local_best_of_threads_array[best_thread].l_best_fx);
//free(output); //some times the results are not written before the freeing of the FILE*
free(stringOutput);
return 0;
}