From 5287e91aa4def49351d52b9bab81dcbb0b521ba0 Mon Sep 17 00:00:00 2001 From: Alexandros Koliousis Date: Thu, 25 Apr 2019 14:56:08 +0000 Subject: [PATCH 01/17] Add support for linear scaling rule in learning rate schedule; and data pre-processing --- clib-multigpu/GPU.c | 4 +- clib-multigpu/Makefile | 40 ++- clib-multigpu/cudnn/cudnnhelper.c | 4 + clib-multigpu/debug.h | 2 +- clib-multigpu/executioncontext.c | 11 +- clib-multigpu/executioncontext.h | 2 +- clib-multigpu/genmakefile.sh | 8 +- clib-multigpu/image/boundingbox.c | 14 + clib-multigpu/image/boundingbox.h | 2 + clib-multigpu/image/image.c | 164 ++++++----- clib-multigpu/image/recordfile.c | 2 +- clib-multigpu/image/recordreader.c | 124 +++++++-- clib-multigpu/image/recordreader.h | 7 +- clib-multigpu/image/rectangle.c | 34 ++- clib-multigpu/image/rectangle.h | 2 +- clib-multigpu/image/testbatchreader.c | 99 ++----- clib-multigpu/image/testrecordreader.c | 116 +++++--- clib-multigpu/image/yarng.cpp | 3 + clib-multigpu/list.c | 37 +++ clib-multigpu/list.h | 2 + clib-multigpu/recorddataset.c | 32 ++- clib-multigpu/recorddataset.h | 11 +- clib-multigpu/solverconfiguration.c | 24 +- clib-multigpu/solverconfiguration.h | 2 + clib-multigpu/testrecorddataset.c | 96 +++++-- ..._ac_imperial_lsds_crossbow_device_TheGPU.h | 4 +- clib-multigpu/utils.h | 2 +- .../ac/imperial/lsds/crossbow/ModelConf.java | 4 + .../imperial/lsds/crossbow/device/TheGPU.java | 2 +- .../lsds/crossbow/kernel/conf/SolverConf.java | 28 +- .../kernel/conf/SolverConf.java.stashed | 263 ------------------ .../types/LearningRateDecayPolicy.java | 8 +- 32 files changed, 597 insertions(+), 556 deletions(-) delete mode 100644 src/main/java/uk/ac/imperial/lsds/crossbow/kernel/conf/SolverConf.java.stashed diff --git a/clib-multigpu/GPU.c b/clib-multigpu/GPU.c index 777b940..1313ddc 100644 --- a/clib-multigpu/GPU.c +++ b/clib-multigpu/GPU.c @@ -774,7 +774,7 @@ JNIEXPORT jint JNICALL Java_uk_ac_imperial_lsds_crossbow_device_TheGPU_setLearni } JNIEXPORT jint JNICALL Java_uk_ac_imperial_lsds_crossbow_device_TheGPU_setLearningRateDecayPolicyMultiStep - (JNIEnv *env, jobject obj, jfloat learningRate, jdouble gamma, jintArray steps) { + (JNIEnv *env, jobject obj, jfloat learningRate, jdouble gamma, jint warmuptasks, jintArray steps) { (void) env; (void) obj; @@ -782,7 +782,7 @@ JNIEXPORT jint JNICALL Java_uk_ac_imperial_lsds_crossbow_device_TheGPU_setLearni jsize argc = (*env)->GetArrayLength(env, steps); jint *argv = (*env)->GetIntArrayElements(env, steps, 0); - crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (theGPU, learningRate, gamma, argc, argv); + crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (theGPU, learningRate, gamma, warmuptasks, argc, argv); (*env)->ReleaseIntArrayElements (env, steps, argv, JNI_ABORT); diff --git a/clib-multigpu/Makefile b/clib-multigpu/Makefile index 3bd8787..1eac06e 100644 --- a/clib-multigpu/Makefile +++ b/clib-multigpu/Makefile @@ -1,13 +1,16 @@ -CLASS_PATH := ../target/classes -vpath %.class $(CLASS_PATH) - +# Makefile for Crossbow C/C++ library +# Customised for platypus2 running Linux on Tue 9 Apr 16:57:23 UTC 2019 +# +OS = Linux CUDA_PATH := /usr/local/cuda +JAVA_PATH := /usr/lib/jvm/java-1.8.0-openjdk-amd64 BLAS_PATH := /opt/OpenBLAS -NCCL_PATH := /home/akolious/nccl/nccl-2.1.15 +NCCL_PATH := JPEG_PATH := /home/akolious/libjpeg-turbo -JAVA_PATH := /usr/lib/jvm/java-8-oracle -# Note: for Java 7, use: /usr/lib/jvm/java-7-openjdk-amd64/ -CBOW_PATH := /home/akolious/pre-release +CBOW_PATH := /home/akolious/crossbow + +CLASS_PATH := ../target/classes +vpath %.class $(CLASS_PATH) ARCH := $(shell uname -m) ifneq (,$(filter $(ARCH),x86_64)) @@ -65,13 +68,19 @@ INCLUDES := -I/usr/include -D_GNU_SOURCE INCLUDES += -I$(CUDA_PATH)/include # OpenBLAS -INCLUDES += -I$(BLAS_PATH)/include +ifneq ($(BLAS_PATH),) + INCLUDES += -I$(BLAS_PATH)/include +endif # NCCL -INCLUDES += -I$(NCCL_PATH)/include +ifneq ($(NCCL_PATH),) + INCLUDES += -I$(NCCL_PATH)/include +endif # Turbo-JPEG -INCLUDES += -I$(JPEG_PATH) +ifneq ($(JPEG_PATH),) + INCLUDES += -I$(JPEG_PATH) +endif # JNI ifeq ($(OS),Darwin) @@ -98,7 +107,11 @@ LIBS += -L$(CUDA_PATH)/lib$(POSTFIX) -lcudart -lcublas -lcudnn -lcurand -lnvTool LIBS += -L$(BLAS_PATH)/lib -lopenblas # NCCL -LIBS += -L$(NCCL_PATH)/lib -lnccl +ifneq ($(NCCL_PATH),) + LIBS += -L$(NCCL_PATH)/lib -lnccl +else + LIBS += -lnccl +endif # PTX code generation # @@ -170,7 +183,7 @@ random/generator.o: random/generator.cpp random/generator.hpp $(CPP) $(INCLUDES) -W -Wall -DWARNING -fPIC -Wno-unused-function -c $< -o $@ dataset.o: dataset.c uk_ac_imperial_lsds_crossbow_device_dataset_DatasetMemoryManager.h - $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ + $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ datasetfilemanager.o: datasetfilemanager.c datasetfilemanager.h $(CROSSBOWBASEINCLUDES) $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ @@ -191,7 +204,7 @@ memoryregistry.o: memoryregistry.c memoryregistry.h memoryregistrynode.h $(CROSS $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ lightweightdataset.o: lightweightdataset.c uk_ac_imperial_lsds_crossbow_device_dataset_LightWeightDatasetMemoryManager.h - $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ + $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ lightweightdatasetmanager.o: lightweightdatasetmanager.c lightweightdatasetmanager.h $(CROSSBOWBASEINCLUDES) $(NV) $(INCLUDES) $(LFL) $(GENCODE) -c $< -o $@ @@ -522,3 +535,4 @@ clean: rm -f image/testrecordreader rm -f image/testbatchreader rm -f testrecorddataset + diff --git a/clib-multigpu/cudnn/cudnnhelper.c b/clib-multigpu/cudnn/cudnnhelper.c index dd63d9c..42479b5 100644 --- a/clib-multigpu/cudnn/cudnnhelper.c +++ b/clib-multigpu/cudnn/cudnnhelper.c @@ -11,6 +11,10 @@ const char *cudnnActivationModeString (cudnnActivationMode_t mode) { #if CUDNN_MAJOR >= 6 case CUDNN_ACTIVATION_ELU: return "CUDNN_ACTIVATION_ELU"; #endif +#if CUDNN_MAJOR >= 7 + case CUDNN_ACTIVATION_IDENTITY: return "CUDNN_ACTIVATION_IDENTITY"; +#endif + default: return "CUDNN_ACTIVATION_UNKNOWN"; } return NULL; diff --git a/clib-multigpu/debug.h b/clib-multigpu/debug.h index cef7854..5b21b61 100644 --- a/clib-multigpu/debug.h +++ b/clib-multigpu/debug.h @@ -9,7 +9,7 @@ #include #undef GPU_VERBOSE -// #define GPU_VERBOSE +/* #define GPU_VERBOSE */ #undef KERNEL_CHECKSUM /* #define COMPUTE_CHECKSUM */ diff --git a/clib-multigpu/executioncontext.c b/clib-multigpu/executioncontext.c index a2f69b8..9213973 100644 --- a/clib-multigpu/executioncontext.c +++ b/clib-multigpu/executioncontext.c @@ -1457,10 +1457,10 @@ void crossbowExecutionContextSetDataflowStream (crossbowExecutionContextP ctx, i void crossbowExecutionContextSetDataflowDependency (crossbowExecutionContextP ctx, int id, int ord, int jtype, int guard, unsigned internal) { - crossbowOperatorDependency_t type; + crossbowOperatorDependency_t type = START_BEFORE_START; /* Operator `guard` must start or end before operator `ord` starts. */ switch (jtype) { - case 0: type = START_BEFORE_START; + case 0: type = START_BEFORE_START; break; case 1: type = END_BEFORE_START; } crossbowDataflowP dataflow = crossbowArrayListGet (ctx->dataflows, id); @@ -1663,11 +1663,12 @@ void crossbowExecutionContextSetLearningRateDecayPolicyStep (crossbowExecutionCo return; } -void crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (crossbowExecutionContextP ctx, float learningRate, double gamma, int argc, int* argv) { +void crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (crossbowExecutionContextP ctx, float learningRate, double gamma, int warmuptasks, int argc, int* argv) { int i; - ctx->theModel->conf->learningRateDecayPolicy = MULTISTEP; + ctx->theModel->conf->learningRateDecayPolicy = ((warmuptasks > 0) ? LSR : MULTISTEP); ctx->theModel->conf->learningRate = learningRate; ctx->theModel->conf->gamma = gamma; + ctx->theModel->conf->warmuptasks = warmuptasks; ctx->theModel->conf->numberofsteps = argc; ctx->theModel->conf->steps = crossbowMalloc (argc * sizeof(int)); for (i = 0; i < argc; ++i) @@ -3900,7 +3901,7 @@ int crossbowExecutionContextDelModel (crossbowExecutionContextP ctx) { void crossbowExecutionContextRecordDatasetInit (crossbowExecutionContextP ctx, int phase, int workers, int *capacity, int NB, int b, int *padding) { invalidConditionException ((! ctx->dataset[phase])); - ctx->dataset[phase] = crossbowRecordDatasetCreate (workers, capacity, NB, b, padding); + ctx->dataset[phase] = crossbowRecordDatasetCreate (workers, capacity, NB, b, padding, phase); return; } diff --git a/clib-multigpu/executioncontext.h b/clib-multigpu/executioncontext.h index b62a515..dedad1d 100644 --- a/clib-multigpu/executioncontext.h +++ b/clib-multigpu/executioncontext.h @@ -234,7 +234,7 @@ void crossbowExecutionContextSetLearningRateDecayPolicyInv (crossbowExecutionCon void crossbowExecutionContextSetLearningRateDecayPolicyStep (crossbowExecutionContextP, float, double, int); -void crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (crossbowExecutionContextP, float, double, int, int *); +void crossbowExecutionContextSetLearningRateDecayPolicyMultiStep (crossbowExecutionContextP, float, double, int, int, int *); void crossbowExecutionContextSetLearningRateDecayPolicyExp (crossbowExecutionContextP, float, double); diff --git a/clib-multigpu/genmakefile.sh b/clib-multigpu/genmakefile.sh index 02a3ade..0cdd0e7 100755 --- a/clib-multigpu/genmakefile.sh +++ b/clib-multigpu/genmakefile.sh @@ -149,7 +149,9 @@ INCLUDES := -I/usr/include -D_GNU_SOURCE INCLUDES += -I\$(CUDA_PATH)/include # OpenBLAS -INCLUDES += -I\$(BLAS_PATH)/include +ifneq (\$(BLAS_PATH),) + INCLUDES += -I\$(BLAS_PATH)/include +endif # NCCL ifneq (\$(NCCL_PATH),) @@ -157,7 +159,9 @@ ifneq (\$(NCCL_PATH),) endif # Turbo-JPEG -INCLUDES += -I\$(JPEG_PATH) +ifneq (\$(JPEG_PATH),) + INCLUDES += -I\$(JPEG_PATH) +endif # JNI ifeq (\$(OS),Darwin) diff --git a/clib-multigpu/image/boundingbox.c b/clib-multigpu/image/boundingbox.c index e974bbf..4197626 100644 --- a/clib-multigpu/image/boundingbox.c +++ b/clib-multigpu/image/boundingbox.c @@ -15,6 +15,20 @@ crossbowBoundingBoxP crossbowBoundingBoxCreate () { return p; } +unsigned crossbowBoundingBoxIsValid (crossbowBoundingBoxP p) { + nullPointerException(p); + /* All values must be in [0,1] */ + if ( + ((p->xmin < 0.0) || (p->xmin > 1.0)) || + ((p->ymin < 0.0) || (p->ymin > 1.0)) || + ((p->xmax < 0.0) || (p->xmax > 1.0)) || + ((p->ymax < 0.0) || (p->ymax > 1.0)) + ) { + return 0; + } + return 1; +} + void crossbowBoundingBoxFree (crossbowBoundingBoxP p) { if (! p) return; diff --git a/clib-multigpu/image/boundingbox.h b/clib-multigpu/image/boundingbox.h index dfecd3f..c4ee6fa 100644 --- a/clib-multigpu/image/boundingbox.h +++ b/clib-multigpu/image/boundingbox.h @@ -11,6 +11,8 @@ typedef struct crossbow_bbox { crossbowBoundingBoxP crossbowBoundingBoxCreate (); +unsigned crossbowBoundingBoxIsValid (crossbowBoundingBoxP); + void crossbowBoundingBoxFree (crossbowBoundingBoxP); #endif /* __CROSSBOW_BBOX_H_ */ diff --git a/clib-multigpu/image/image.c b/clib-multigpu/image/image.c index 6bc2937..3ee4d4a 100644 --- a/clib-multigpu/image/image.c +++ b/clib-multigpu/image/image.c @@ -519,6 +519,7 @@ void crossbowImageDumpAsFloat (crossbowImageP p, int pixels) { int crossbowImageCopy (crossbowImageP p, void * buffer, int offset, int limit) { nullPointerException (p); + invalidConditionException (p->decoded); invalidConditionException (p->isfloat); @@ -529,6 +530,7 @@ int crossbowImageCopy (crossbowImageP p, void * buffer, int offset, int limit) { /* Copy p->data to buffer (starting at offset) */ memcpy ((void *)(buffer + offset), (void *)(p->data), length); + return length; } @@ -709,68 +711,69 @@ void crossbowImageResize (crossbowImageP p, int height, int width) { return; } -static unsigned generateRandomCrop (crossbowRectangleP crop, int originalHeight, int originalWidth, float *area, float aspectRatio) { - - /* If any of height, width, area, aspect ratio is less that 0, return 0; */ - - float minArea = area[0] * originalWidth * originalHeight; - float maxArea = area[1] * originalWidth * originalHeight; - - int minHeight = (int) lrintf (sqrt (minArea / aspectRatio)); - int maxHeight = (int) lrintf (sqrt (maxArea / aspectRatio)); - - /* Find smaller max height s.t. round (maxHeight x acpectRatio) <= originalWidth */ - if (lrintf (maxHeight * aspectRatio) > originalWidth) { - +static unsigned crossbowImageGenerateRandomCrop (crossbowRectangleP p, int width, int height, float *area, float ratio) { + + nullPointerException (p); + + /* If any of the width, height, min/max area, or aspect ratio is less or equal to 0, return false */ + invalidConditionException(width > 0); + invalidConditionException(height > 0); + invalidConditionException(ratio > 0); + invalidConditionException(area[0] > 0); + invalidConditionException(area[1] > 0); + invalidConditionException(area[0] <= area[1]); + + /* Compute min and max relative crop area */ + float minArea = area[0] * width * height; + float maxArea = area[1] * width * height; + + int minHeight = (int) lrintf (sqrt (minArea / ratio)); + int maxHeight = (int) lrintf (sqrt (maxArea / ratio)); + + /* Find smaller max height s.t. round (maxHeight x ratio) <= width */ + if (lrintf (maxHeight * ratio) > width) { float epsilon = 0.0000001; - maxHeight = (int) ((originalWidth + 0.5 - epsilon) / aspectRatio); + maxHeight = (int) ((width + 0.5 - epsilon) / ratio); } - - if (maxHeight > originalHeight) - maxHeight = originalHeight; - + + if (maxHeight > height) + maxHeight = height; + if (minHeight > maxHeight) minHeight = maxHeight; - + if (minHeight < maxHeight) - /* Generate a random number of the closed range [0, (maxHeight - minHeight)]*/ + /* Generate a random number of the closed range [0, (maxHeight - minHeight)] */ minHeight += crossbowYarngNext (0, maxHeight - minHeight + 1); - - int minWidth = (int) lrintf (minHeight * aspectRatio); - + + int minWidth = (int) lrintf (minHeight * ratio); + /* Check that width is less or equal to the original width */ + invalidConditionException(minWidth <= width); + float newArea = (float) (minHeight * minWidth); /* Deal with rounding errors */ - if (newArea < minArea) { + /* Try a bigger rectangle */ minHeight += 1; - minWidth = (int) lrintf (minHeight * aspectRatio); - newArea = (float) (minHeight * minWidth); - } - - if (newArea > maxArea) { - minHeight -= 1; - minWidth = (int) lrintf (minHeight * aspectRatio); + minWidth = (int) lrintf (minHeight * ratio); newArea = (float) (minHeight * minWidth); } - - if (newArea < minArea || newArea > maxArea || minWidth > originalWidth || minHeight > originalHeight || minWidth <= 0 || minHeight <= 0) + + if ((newArea < minArea) || (newArea > maxArea) || (minWidth <= 0) || (minWidth > width) || (minHeight <= 0) || (minHeight > height)) { return 0; - - int x = 0; - if (minWidth < originalWidth) { - x = crossbowYarngNext (0, originalWidth - minWidth); } - + int y = 0; - if (minHeight < originalHeight) { - y = crossbowYarngNext (0, originalHeight - minHeight); - } - - crop->xmin = x; - crop->ymin = y; - crop->xmax = x + minWidth; - crop->ymax = y + minHeight; + if (minHeight < height) + y = crossbowYarngNext (0, height - minHeight); + + int x = 0; + if (minWidth < width) + x = crossbowYarngNext (0, width - minWidth); + + /* Configure rectangle */ + crossbowRectangleSet (p, x, y, x + minWidth, y + minHeight); return 1; } @@ -779,67 +782,80 @@ void crossbowImageSampleDistortedBoundingBox (crossbowImageP p, crossbowArrayLis int idx; nullPointerException(p); - int currentHeight = (int) crossbowImageCurrentHeight (p); - int currentWidth = (int) crossbowImageCurrentWidth (p); - + int h = (int) crossbowImageCurrentHeight (p); + int w = (int) crossbowImageCurrentWidth (p); + /* Convert bounding boxes to rectangles. If there are none, use entire image by default */ + dbg("Convert bounding boxes to rectangles\n"); crossbowArrayListP rectangles = NULL; if (boxes) { + /* Allocate as many slots as the number of boxes */ rectangles = crossbowArrayListCreate (crossbowArrayListSize (boxes)); for (idx = 0; idx < crossbowArrayListSize (boxes); ++idx) { crossbowBoundingBoxP box = (crossbowBoundingBoxP) crossbowArrayListGet (boxes, idx); - int xmin = box->xmin * currentWidth; - int ymin = box->ymin * currentHeight; - int xmax = box->xmax * currentWidth; - int ymax = box->ymax * currentHeight; - + if (! crossbowBoundingBoxIsValid(box)) + err("Invalid bounding box"); + + int xmin = box->xmin * w; + int ymin = box->ymin * h; + int xmax = box->xmax * w; + int ymax = box->ymax * h; + crossbowRectangleP rectangle = crossbowRectangleCreate (xmin, ymin, xmax, ymax); crossbowArrayListSet (rectangles, idx, rectangle); } } else { rectangles = crossbowArrayListCreate (1); - crossbowArrayListSet (rectangles, 0, crossbowRectangleCreate (0, 0, currentWidth, currentHeight)); + crossbowArrayListSet (rectangles, 0, crossbowRectangleCreate (0, 0, w, h)); } - + crossbowRectangleP crop = crossbowRectangleCreate (0, 0, 0, 0); unsigned generated = 0; int i; for (i = 0; i < attempts; ++i) { - float random = 0.1; - /* Sample aspect ratio (within bounds) */ - float sample = random * (ratio[1] - ratio[0]) + ratio[0]; - if (generateRandomCrop (crop, currentHeight, currentWidth, area, sample)) { - + + /* Sample aspect ratio (within ratio bounds) */ + float sample = crossbowYarngNext (0, 1) * (ratio[1] - ratio[0]) + ratio[0]; + + dbg("Generate random crop\n"); + if (crossbowImageGenerateRandomCrop (crop, w, h, area, sample)) { + + dbg("Check coverage\n"); if (crossbowRectangleCovers(crop, coverage, rectangles)) { generated = 1; break; } } } - if (! generated) - crossbowRectangleSet (crop, 0, 0, currentWidth, currentHeight); + if (! generated) { + /* Set the entire image as the bounding box */ + crossbowRectangleSet (crop, 0, 0, w, h); + } /* Determine cropping parameters for the bounding box */ + dbg("Set cropping parameters\n"); *width = crop->xmax - crop->xmin; *height = crop->ymax - crop->ymin; - - *top = crop->xmin; - *left = crop->ymin; + + /* be careful of the order */ + *top = crop->ymin; + *left = crop->xmin; /* Ensure sampled bounding box fits current image dimensions */ - invalidConditionException (currentWidth >= (*left + *width )); - invalidConditionException (currentHeight >= (*top + *height)); - + invalidConditionException (w >= (*left + *width)); + invalidConditionException (h >= (*top + *height)); + /* Free local state */ - - crossbowRectangleFree (crop); - for (i = 0; i < crossbowArrayListSize (rectangles); ++i) { - crossbowRectangleP rect = (crossbowRectangleP) crossbowArrayListGet (rectangles, i); + + for (idx = 0; idx < crossbowArrayListSize (rectangles); ++idx) { + crossbowRectangleP rect = (crossbowRectangleP) crossbowArrayListGet (rectangles, idx); crossbowRectangleFree (rect); } crossbowArrayListFree (rectangles); - + + crossbowRectangleFree (crop); + return; } diff --git a/clib-multigpu/image/recordfile.c b/clib-multigpu/image/recordfile.c index 41041a4..2a8b2e1 100644 --- a/clib-multigpu/image/recordfile.c +++ b/clib-multigpu/image/recordfile.c @@ -35,7 +35,7 @@ void crossbowRecordFileOpen (crossbowRecordFileP p) { exit (1); } /* Allocate a file pointer per worker */ - if (p->workers > 1) { + if (p->workers > 0) { p->f = (FILE **) crossbowMalloc (p->workers * sizeof(FILE *)); for (i = 0; i < p->workers; ++i) { p->f[i] = fopen(p->filename, "rb"); diff --git a/clib-multigpu/image/recordreader.c b/clib-multigpu/image/recordreader.c index 618cbdd..e1e319b 100644 --- a/clib-multigpu/image/recordreader.c +++ b/clib-multigpu/image/recordreader.c @@ -13,18 +13,27 @@ /* * Perform the kind of pre-processing for test images + * that TensorFlow does in benchmarks/: + * + * https://github.com/alexandroskoliousis/benchmarks/blob/27b2ec139c86b39ab596321afe08878b36a5adfd/scripts/tf_cnn_benchmarks/preprocessing.py#L198 */ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { - + + /* Cast image to 32-bit float */ crossbowImageCast (record->image); - - /* Resize image */ - + + /* Get image height and width (and convert to floats) */ float h = (float) crossbowImageInputHeight (record->image); float w = (float) crossbowImageInputWidth (record->image); - - float factor = 1.15; - + + /* In ResNet, images are cropped to 256 x 256 and the final image size is 224 x 224. + * It is: + * + * floor(224 x 1.45) ~= 256 + */ + float factor = 1.145; + + /* Maintain aspect ratio */ float ratio = max(224. / h, 224. / w); int resizeheight = (int) (h * ratio * factor); @@ -32,24 +41,77 @@ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { if (verbose > 0) printf("Resized image to (%d x %d)\n", resizeheight, resizewidth); - + + /* Resize the image to shape using the bilinear method (do not align corners) */ crossbowImageResize (record->image, resizeheight, resizewidth); if (verbose > 0) printf("Checksum of resized image is %.4f\n", crossbowImageChecksum (record->image)); - - /* Crop image */ - - int top = (resizeheight - 224) / 2; - int left = (resizewidth - 224) / 2; - + + /* Crop image to size (224, 224) */ + int top = floor((float) (resizeheight - 224) / 2.); /* x // y */ + int left = floor((float) (resizewidth - 224) / 2.); + crossbowImageCrop (record->image, 224, 224, top, left); if (verbose > 0) printf("Checksum of cropped image is %.4f\n", crossbowImageChecksum (record->image)); + return; } +/* + * Perform the kind of pre-processing for training images + * that TensorFlow does in benchmarks/: + * + * https://github.com/alexandroskoliousis/benchmarks/blob/27b2ec139c86b39ab596321afe08878b36a5adfd/scripts/tf_cnn_benchmarks/preprocessing.py#L286 + */ +static void preprocessTrainingRecord (crossbowRecordP record, int verbose) { + + (void) verbose; + + /* Cast image to 32-bit float */ + crossbowImageCast (record->image); + + /* + * Sample bounding box. If not box is supplied, + * assume the bounding box is the entire image. + * + * Minimum coverage is 0.1 + * Aspect ratio range is [0.75, 1.33] + * Area range is [0.05, 1.0] + * Max. attempts is 100 + */ + int height = 0; + int width = 0; + int top = 0; + int left = 0; + + float ratio [2] = {0.75, 1.33}; + float area [2] = {0.05, 1.00}; + + dbg("Sample bounding box\n"); + crossbowImageSampleDistortedBoundingBox ( + record->image, + record->boxes, + 0.1, + &ratio[0], + &area [0], + 100, + &height, &width, &top, &left); + + /* Crop image to the specified bounding box */ + crossbowImageCrop (record->image, height, width, top, left); + + /* Flip image */ + dbg("Flip image\n"); + crossbowImageRandomFlipLeftRight (record->image); + + /* Resize image to shape (224, 224) with the bilinear method (don't align corners) */ + dbg("Crop image to (224 x 224)\n"); + crossbowImageResize (record->image, 224, 224); +} + /* * A worker thread */ @@ -71,6 +133,7 @@ static void *handle (void *args) { sched_setaffinity (0, sizeof(set), &set); dbg("Decoder #%02d pinned on core %02d\n", task->id, core); + int bytes = 0; /* Iterate over list of tasks */ int idx; for (idx = 0; idx < crossbowArrayListSize (list); ++idx) { @@ -79,8 +142,13 @@ static void *handle (void *args) { crossbowRecordP record = crossbowRecordCreate (); /* Read record (thread-safe version) */ crossbowRecordFileReadSafely (task->file, task->id, task->position, record); + bytes += record->length; /* Pre-process record */ - preprocessTestRecord (record, 0); + if (task->training) { + preprocessTrainingRecord (record, 0); + } else { + preprocessTestRecord (record, 0); + } /* Copy decoded (augmented) image to buffer */ crossbowImageCopy (record->image, task->buffer[0], task->offset[0], 0); /* Ignore limit */ /* Copy label */ @@ -89,12 +157,14 @@ static void *handle (void *args) { /* Free record */ crossbowRecordFree (record); } + info("Decoder processed %d bytes\n", bytes); return args; } crossbowRecordReaderP crossbowRecordReaderCreate (int workers) { crossbowRecordReaderP p = NULL; p = (crossbowRecordReaderP) crossbowMalloc (sizeof(crossbow_record_reader_t)); + p->shuffle = 1; p->dataset = crossbowListCreate (); p->counter = 0; p->records = 0; @@ -188,9 +258,13 @@ void crossbowRecordReaderNext (crossbowRecordReaderP p, crossbowRecordP record) /* Reset current file */ crossbowRecordFileReset (p->current, (crossbowListSize(p->dataset) != 1)); if (! crossbowListIteratorHasNext (p->dataset)) { + /* Shuffle the files */ + if (p->shuffle) + crossbowListShuffle (p->dataset); /* Reset file iterator */ crossbowListIteratorReset (p->dataset); p->wraps ++; + info("Wrap #%03d\n", p->wraps); } p->current = crossbowListIteratorNext (p->dataset); } @@ -207,9 +281,13 @@ crossbowRecordFileP crossbowRecordReaderNextPointer (crossbowRecordReaderP p, in /* Reset current file */ crossbowRecordFileReset (p->current, (crossbowListSize(p->dataset) != 1)); if (! crossbowListIteratorHasNext (p->dataset)) { + /* Shuffle the files */ + if (p->shuffle) + crossbowListShuffle (p->dataset); /* Reset file iterator */ crossbowListIteratorReset (p->dataset); p->wraps ++; + info("Wrap #%03d\n", p->wraps); } p->current = crossbowListIteratorNext (p->dataset); } @@ -221,7 +299,8 @@ crossbowRecordFileP crossbowRecordReaderNextPointer (crossbowRecordReaderP p, in /* * Read `count` examples of size `size` into `buffer` */ -void crossbowRecordReaderRead (crossbowRecordReaderP p, +void crossbowRecordReaderRead (crossbowRecordReaderP p, + unsigned training, int count, int size, void *buffer, @@ -269,6 +348,7 @@ void crossbowRecordReaderRead (crossbowRecordReaderP p, /* Create new task */ task = crossbowMalloc (sizeof(crossbow_record_reader_task_t)); /* Fill-in task */ + task->training = training; task->id = id; task->jc = p->jc; @@ -331,6 +411,7 @@ void crossbowRecordReaderRead (crossbowRecordReaderP p, } void crossbowRecordReaderReadProperly (crossbowRecordReaderP p, + unsigned training, int count, int *size, int b, @@ -348,7 +429,7 @@ void crossbowRecordReaderReadProperly (crossbowRecordReaderP p, crossbowRecordFileP file; int position; - + crossbowArrayListP list; crossbowRecordReaderTaskP task; @@ -358,7 +439,7 @@ void crossbowRecordReaderReadProperly (crossbowRecordReaderP p, nullPointerException (p); invalidConditionException (p->finalised); - invalidConditionException (p->workers > 1); + invalidConditionException (p->workers > 0); /* Create worker pool */ pthread_t *pool = (pthread_t *) crossbowMalloc (p->workers * sizeof(pthread_t)); @@ -387,6 +468,7 @@ void crossbowRecordReaderReadProperly (crossbowRecordReaderP p, /* Create new task */ task = crossbowMalloc (sizeof(crossbow_record_reader_task_t)); /* Fill-in task */ + task->training = training; task->id = id; task->jc = p->jc; @@ -433,11 +515,11 @@ void crossbowRecordReaderReadProperly (crossbowRecordReaderP p, crossbowFree (pool, (p->workers * sizeof(pthread_t))); tstamp_t dt = crossbowTimerElapsedTime (timer); - info("%d images processed in %llu usecs\n", count, dt); + double throughput = (((double) count) * 1000000.0) / ((double) dt); + info("%6d images processed in %7llu usecs: %7.1f images/sec\n", count, dt, throughput); crossbowTimerFree (timer); return; - } void crossbowRecordReaderFree (crossbowRecordReaderP p) { @@ -446,8 +528,10 @@ void crossbowRecordReaderFree (crossbowRecordReaderP p) { if (p->dataset) { while (! crossbowListEmpty(p->dataset)) { crossbowRecordFileP file = crossbowListRemoveFirst (p->dataset); + /* info("Free record file %s\n", file->filename); */ crossbowRecordFileFree (file); } + /* info("Free list of files\n"); */ crossbowListFree (p->dataset); } crossbowFree(p, sizeof(crossbow_record_reader_t)); diff --git a/clib-multigpu/image/recordreader.h b/clib-multigpu/image/recordreader.h index 0a8458e..942b5c1 100644 --- a/clib-multigpu/image/recordreader.h +++ b/clib-multigpu/image/recordreader.h @@ -8,6 +8,7 @@ typedef struct crossbow_record_reader *crossbowRecordReaderP; typedef struct crossbow_record_reader { + unsigned shuffle; crossbowListP dataset; int records; /* Total number of records in dataset */ int counter; /* Current record counter */ @@ -21,6 +22,8 @@ typedef struct crossbow_record_reader { typedef struct crossbow_record_reader_task *crossbowRecordReaderTaskP; typedef struct crossbow_record_reader_task { + /* Distinguish between pre-processing training and validation images */ + unsigned training; /* Worker id */ int id; int jc; @@ -49,9 +52,9 @@ void crossbowRecordReaderNext (crossbowRecordReaderP, crossbowRecordP); crossbowRecordFileP crossbowRecordReaderNextPointer (crossbowRecordReaderP, int *); -void crossbowRecordReaderRead (crossbowRecordReaderP, int, int, void *, int); +void crossbowRecordReaderRead (crossbowRecordReaderP, unsigned, int, int, void *, int); -void crossbowRecordReaderReadProperly (crossbowRecordReaderP, int, int *, int, int *, void *, void *, int *); +void crossbowRecordReaderReadProperly (crossbowRecordReaderP, unsigned, int, int *, int, int *, void *, void *, int *); void crossbowRecordReaderFree (crossbowRecordReaderP); diff --git a/clib-multigpu/image/rectangle.c b/clib-multigpu/image/rectangle.c index e44b16d..b5af745 100644 --- a/clib-multigpu/image/rectangle.c +++ b/clib-multigpu/image/rectangle.c @@ -25,6 +25,20 @@ void crossbowRectangleSet (crossbowRectangleP p, int xmin, int ymin, int xmax, i } float crossbowRectangleArea (crossbowRectangleP p) { + nullPointerException (p); + /* info("Find area of rectangle from (%d, %d) to (%d, %d)\n", p->xmin, p->ymin, p->xmax, p->ymax); */ + + /* + if (p->xmax <= p->xmin) { + info("Invalid x-axis in rectangle ((%d, %d) (%d, %d))\n", p->xmin, p->ymin, p->xmax, p->ymax); + } + invalidConditionException (p->xmax > p->xmin); + if (p->ymax <= p->ymin) { + info("Invalid y-axis in rectangle ((%d, %d) (%d, %d))\n", p->xmin, p->ymin, p->xmax, p->ymax); + } + invalidConditionException (p->ymax > p->ymin); + */ + float x = (float) (p->xmax - p->xmin); float y = (float) (p->ymax - p->ymin); return (x * y); @@ -32,9 +46,10 @@ float crossbowRectangleArea (crossbowRectangleP p) { unsigned crossbowRectangleEmpty (crossbowRectangleP p) { nullPointerException (p); - return ((p->xmin > p->xmax) || (p->ymin > p->ymax)); + return ((p->xmin >= p->xmax) || (p->ymin >= p->ymax)); } +/* Return false is a rectangle covers an area less than `limit` */ unsigned crossbowRectangleValid (crossbowRectangleP p, float limit) { nullPointerException (p); float area = crossbowRectangleArea (p); @@ -42,13 +57,18 @@ unsigned crossbowRectangleValid (crossbowRectangleP p, float limit) { } crossbowRectangleP crossbowRectangleIntersect (crossbowRectangleP p, crossbowRectangleP q) { + nullPointerException (p); nullPointerException (q); + int xmin = max(p->xmin, q->xmin); int ymin = max(p->ymin, q->ymin); + int xmax = max(p->xmax, q->xmax); int ymax = max(p->ymax, q->ymax); + if ((xmin > xmax) || (ymin > ymax)) + /* Return empty rectangle */ return crossbowRectangleCreate (0, 0, 0, 0); else return crossbowRectangleCreate (xmin, ymin, xmax, ymax); @@ -56,24 +76,25 @@ crossbowRectangleP crossbowRectangleIntersect (crossbowRectangleP p, crossbowRec /* * Determine if rectangle `p` covers a sufficient - * fraction of the bounding boxes. + * fraction of an array of rectangles. */ unsigned crossbowRectangleCovers (crossbowRectangleP p, float limit, crossbowArrayListP boxes) { nullPointerException (p); - + + /* Reject any rectangle which contains no pixels */ if (! crossbowRectangleValid (p, 1)) return 0; - + unsigned covered = 0; crossbowRectangleP intersection; int i; float coverage; - + int length = crossbowArrayListSize (boxes); crossbowRectangleP box; - /* Iterate over `length` bounding boxes */ + /* Iterate over all rectangles in array list */ for (i = 0; i < length; ++i) { box = (crossbowRectangleP) crossbowArrayListGet (boxes, i); if (! crossbowRectangleValid (box, 1)) @@ -86,7 +107,6 @@ unsigned crossbowRectangleCovers (crossbowRectangleP p, float limit, crossbowArr break; } } - return covered; } diff --git a/clib-multigpu/image/rectangle.h b/clib-multigpu/image/rectangle.h index c90186c..9aa1cc1 100644 --- a/clib-multigpu/image/rectangle.h +++ b/clib-multigpu/image/rectangle.h @@ -6,8 +6,8 @@ typedef struct crossbow_rectangle *crossbowRectangleP; typedef struct crossbow_rectangle { int xmin; - int xmax; int ymin; + int xmax; int ymax; } crossbow_rectangle_t; diff --git a/clib-multigpu/image/testbatchreader.c b/clib-multigpu/image/testbatchreader.c index 82ab115..0212ce3 100644 --- a/clib-multigpu/image/testbatchreader.c +++ b/clib-multigpu/image/testbatchreader.c @@ -10,6 +10,8 @@ #include "record.h" #include "recordreader.h" +#include "yarng.h" + #include "../timer.h" #define USAGE "./testbatchreader [-d directory] [-s subset] [-n files] [-v level]" @@ -18,38 +20,10 @@ * Perform the kind of pre-processing for test images */ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { - - crossbowImageCast (record->image); - - /* Resize image */ - - float h = (float) crossbowImageInputHeight (record->image); - float w = (float) crossbowImageInputWidth (record->image); - - float factor = 1.15; - - float ratio = max(224. / h, 224. / w); - - int resizeheight = (int) (h * ratio * factor); - int resizewidth = (int) (w * ratio * factor); - if (verbose > 0) - printf("Resized image to (%d x %d)\n", resizeheight, resizewidth); - - crossbowImageResize (record->image, resizeheight, resizewidth); - - if (verbose > 0) - printf("Checksum of resized image is %.4f\n", crossbowImageChecksum (record->image)); - - /* Crop image */ - - int top = (resizeheight - 224) / 2; - int left = (resizewidth - 224) / 2; - - crossbowImageCrop (record->image, 224, 224, top, left); + (void) record; + (void) verbose; - if (verbose > 0) - printf("Checksum of cropped image is %.4f\n", crossbowImageChecksum (record->image)); return; } @@ -57,51 +31,10 @@ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { * Perform the kind of pre-processing for training images */ static void preprocessTrainingRecord (crossbowRecordP record) { - - crossbowImageCast (record->image); - - /* - * Sample bounding box: - * - * Minimum coverage is 0.1 - * Aspect ratio range is [0.75, 1.33] - * Area range is [0.05, 1.0] - * Max. attempts is 100 - */ - int height = 0; - int width = 0; - int top = 0; - int left = 0; - float ratio [2] = {0.75, 1.33}; - float area [2] = {0.05, 1.00}; - crossbowImageSampleDistortedBoundingBox ( - record->image, - record->boxes, - 0.1, - &ratio[0], - &area [0], - 100, - &height, &width, &top, &left); - - /* Crop image */ - crossbowImageCrop (record->image, height, width, top, left); - - /* Flip image */ - crossbowImageRandomFlipLeftRight (record->image); - - /* Resize image */ - crossbowImageResize (record->image, 224, 224); - - /* Distort image colours */ - crossbowImageMultiply (record->image, (1. / 255.)); - - crossbowImageRandomBrightness (record->image, (32. / 255.)); - crossbowImageRandomContrast (record->image, 0.5, 1.5); - /* Lower saturation is 0.5, upper saturation is 1.5, max. delta hue is (0.2 x pi) */ - crossbowImageRandomHSVInYIQ (record->image, 0.5, 1.5, 0.2 * 3.14); - - /* Clip by value */ - crossbowImageClipByValue (record->image, 0.0, 1.0); + + (void) record; + + return; } int main (int argc, char *argv[]) { @@ -114,7 +47,7 @@ int main (int argc, char *argv[]) { char *subset = "train"; int files = 1; int workers = 1; - int batchsize = 32; + int b = 32; for (i = 1; i < argc;) { if ((j = i + 1) == argc) { fprintf(stderr, "usage: %s\n", USAGE); @@ -137,7 +70,7 @@ int main (int argc, char *argv[]) { workers = atoi(argv[j]); } else if (strcmp(argv[i], "-b") == 0) { - batchsize = atoi(argv[j]); + b = atoi(argv[j]); } else if (strcmp(argv[i], "-v") == 0) { verbose = (unsigned) atoi(argv[j]); @@ -147,9 +80,14 @@ int main (int argc, char *argv[]) { i = j + 1; } + (void) verbose; + /* Initialise memory manager */ crossbowMemoryManagerInit (); + /* Initialise random number generator */ + crossbowYarngInit (123456789); + crossbowRecordReaderP reader = crossbowRecordReaderCreate (workers); /* Register dataset */ @@ -169,7 +107,7 @@ int main (int argc, char *argv[]) { /* Create temporary buffer to hold batch of images. Every decoded * image is (3 x 224 x 224) x sizeof(float) or 602,112 bytes long. */ - int buffersize = batchsize * 602112; + int buffersize = b * 602112; void *buffer = (void *) crossbowMalloc (buffersize); crossbowTimerP timer = crossbowTimerCreate (); @@ -177,11 +115,11 @@ int main (int argc, char *argv[]) { int count = 0; /* Process 100 batches */ while (count < 100) { - crossbowRecordReaderRead (reader, batchsize, 602112, buffer, buffersize); + crossbowRecordReaderRead (reader, (strcmp (subset, "train") == 0) ? 1 : 0, b, 602112, buffer, buffersize); count ++; } tstamp_t dt = crossbowTimerElapsedTime (timer); - printf("%d batches (or %d images) processed\n", count, count * batchsize); + printf("%d batches (or %d images) processed\n", count, count * b); printf("%llu usecs\n", dt); crossbowTimerFree (timer); crossbowRecordReaderFree (reader); @@ -190,3 +128,4 @@ int main (int argc, char *argv[]) { printf("Bye.\n"); return 0; } + diff --git a/clib-multigpu/image/testrecordreader.c b/clib-multigpu/image/testrecordreader.c index 3c2107f..6fa735a 100644 --- a/clib-multigpu/image/testrecordreader.c +++ b/clib-multigpu/image/testrecordreader.c @@ -12,22 +12,33 @@ #include "../timer.h" +#include "yarng.h" + #define USAGE "./testrecordreader [-d directory] [-s subset] [-n files] [-v level]" /* * Perform the kind of pre-processing for test images + * that TensorFlow does in benchmarks/: + * + * https://github.com/alexandroskoliousis/benchmarks/blob/27b2ec139c86b39ab596321afe08878b36a5adfd/scripts/tf_cnn_benchmarks/preprocessing.py#L198 */ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { - + + /* Cast image to 32-bit float */ crossbowImageCast (record->image); - - /* Resize image */ - + + /* Get image height and width (and convert to floats) */ float h = (float) crossbowImageInputHeight (record->image); float w = (float) crossbowImageInputWidth (record->image); - - float factor = 1.15; - + + /* In ResNet, images are cropped to 256 x 256 and the final image size is 224 x 224. + * It is: + * + * floor(224 x 1.45) ~= 256 + */ + float factor = 1.145; + + /* Maintain aspect ratio */ float ratio = max(224. / h, 224. / w); int resizeheight = (int) (h * ratio * factor); @@ -35,33 +46,39 @@ static void preprocessTestRecord (crossbowRecordP record, unsigned verbose) { if (verbose > 0) printf("Resized image to (%d x %d)\n", resizeheight, resizewidth); - + + /* Resize the image to shape using the bilinear method (do not align corners) */ crossbowImageResize (record->image, resizeheight, resizewidth); if (verbose > 0) printf("Checksum of resized image is %.4f\n", crossbowImageChecksum (record->image)); - - /* Crop image */ - - int top = (resizeheight - 224) / 2; - int left = (resizewidth - 224) / 2; - + + /* Crop image to size (224, 224) */ + int top = floor((float) (resizeheight - 224) / 2.); /* x // y */ + int left = floor((float) (resizewidth - 224) / 2.); + crossbowImageCrop (record->image, 224, 224, top, left); if (verbose > 0) printf("Checksum of cropped image is %.4f\n", crossbowImageChecksum (record->image)); + return; } /* * Perform the kind of pre-processing for training images + * that TensorFlow does in benchmarks/: + * + * https://github.com/alexandroskoliousis/benchmarks/blob/27b2ec139c86b39ab596321afe08878b36a5adfd/scripts/tf_cnn_benchmarks/preprocessing.py#L286 */ -static void preprocessTrainingRecord (crossbowRecordP record) { +static void preprocessTrainingRecord (crossbowRecordP record, int verbose) { + /* Cast image to 32-bit float */ crossbowImageCast (record->image); /* - * Sample bounding box: + * Sample bounding box. If not box is supplied, + * assume the bounding box is the entire image. * * Minimum coverage is 0.1 * Aspect ratio range is [0.75, 1.33] @@ -72,8 +89,11 @@ static void preprocessTrainingRecord (crossbowRecordP record) { int width = 0; int top = 0; int left = 0; + float ratio [2] = {0.75, 1.33}; float area [2] = {0.05, 1.00}; + + dbg("Sample bounding box\n"); crossbowImageSampleDistortedBoundingBox ( record->image, record->boxes, @@ -83,25 +103,20 @@ static void preprocessTrainingRecord (crossbowRecordP record) { 100, &height, &width, &top, &left); - /* Crop image */ + /* Crop image to the specified bounding box */ crossbowImageCrop (record->image, height, width, top, left); /* Flip image */ + dbg("Flip image\n"); crossbowImageRandomFlipLeftRight (record->image); - /* Resize image */ + /* Resize image to shape (224, 224) with the bilinear method (don't align corners) */ + dbg("Crop image to (224 x 224)\n"); crossbowImageResize (record->image, 224, 224); - - /* Distort image colours */ - crossbowImageMultiply (record->image, (1. / 255.)); - - crossbowImageRandomBrightness (record->image, (32. / 255.)); - crossbowImageRandomContrast (record->image, 0.5, 1.5); - /* Lower saturation is 0.5, upper saturation is 1.5, max. delta hue is (0.2 x pi) */ - crossbowImageRandomHSVInYIQ (record->image, 0.5, 1.5, 0.2 * 3.14); - - /* Clip by value */ - crossbowImageClipByValue (record->image, 0.0, 1.0); + + if (verbose > 0) + printf("Checksum of traning image is %.4f\n", crossbowImageChecksum (record->image)); + } int main (int argc, char *argv[]) { @@ -110,8 +125,9 @@ int main (int argc, char *argv[]) { int i, j; /* Default input arguments */ unsigned verbose = 0; - char *directory = "/mnt/nfs/users/piwatcha/my-tensorflow/data/imagenet/crossbow"; + char *directory = "examples"; char *subset = "train"; + char *type = "test"; int files = 1; for (i = 1; i < argc;) { if ((j = i + 1) == argc) { @@ -131,6 +147,13 @@ int main (int argc, char *argv[]) { if (strcmp(argv[i], "-n") == 0) { files = atoi(argv[j]); } else + if (strcmp(argv[i], "-t") == 0) { + if ((strcmp (argv[j], "train") != 0) && (strcmp (argv[j], "test") != 0)) { + fprintf(stderr, "error: invalid type '%s'. Try 'train' or 'test'\n", argv[j]); + exit(1); + } + type = argv[j]; + } else if (strcmp(argv[i], "-v") == 0) { verbose = (unsigned) atoi(argv[j]); } else { @@ -142,6 +165,8 @@ int main (int argc, char *argv[]) { /* Initialise memory manager */ crossbowMemoryManagerInit (); + crossbowYarngInit(123456789); + crossbowRecordReaderP reader = crossbowRecordReaderCreate (1); /* 1 worker */ /* Register dataset */ @@ -158,29 +183,34 @@ int main (int argc, char *argv[]) { crossbowRecordReaderRepeat (reader, 1); crossbowRecordReaderFinalise (reader); - crossbowTimerP timer = crossbowTimerCreate (); - crossbowTimerStart (timer); + crossbowTimerP timer = crossbowTimerCreate (); + crossbowTimerStart (timer); int count = 0; /* Iterate over dataset */ - while (crossbowRecordReaderHasNext(reader)) { + while (crossbowRecordReaderHasNext(reader) && (count < 10)) { crossbowRecordP record = crossbowRecordCreate (); crossbowRecordReaderNext (reader, record); if (verbose > 1) { - char *s = crossbowRecordString(record); - printf("%s\n", s); - crossbowStringFree (s); - /* crossbowImageDump (record->image, 5); */ + char *s = crossbowRecordString(record); + printf("%s\n", s); + crossbowStringFree (s); + /* crossbowImageDump (record->image, 5); */ } - preprocessTestRecord (record, verbose); - crossbowRecordFree (record); + if (strcmp (type, "train") == 0) + preprocessTrainingRecord (record, verbose); + else { + preprocessTestRecord (record, verbose); + } + crossbowRecordFree (record); count ++; } - tstamp_t dt = crossbowTimerElapsedTime (timer); - printf("%d images processed\n", count); - printf("%llu usecs\n", dt); - crossbowTimerFree (timer); + tstamp_t dt = crossbowTimerElapsedTime (timer); + printf("%d images processed\n", count); + printf("%llu usecs\n", dt); + crossbowTimerFree (timer); crossbowRecordReaderFree (reader); crossbowMemoryManagerDump (); printf("Bye.\n"); return 0; } + diff --git a/clib-multigpu/image/yarng.cpp b/clib-multigpu/image/yarng.cpp index fb31bf6..b1f3a65 100644 --- a/clib-multigpu/image/yarng.cpp +++ b/clib-multigpu/image/yarng.cpp @@ -5,8 +5,11 @@ using namespace crossbow; static CrossbowRandomGenerator *generator = NULL; +static int initialised = 0; void crossbowYarngInit (unsigned int seed) { + if (initialised) + return; generator = new CrossbowRandomGenerator (seed); } diff --git a/clib-multigpu/list.c b/clib-multigpu/list.c index 2243b99..4a3c8e6 100644 --- a/clib-multigpu/list.c +++ b/clib-multigpu/list.c @@ -5,6 +5,8 @@ #include "utils.h" #include "debug.h" +#include + /* Return node to free list */ static void putNode (crossbowListP list, crossbowListNodeP p) { p->item = NULL; @@ -139,6 +141,41 @@ void *crossbowListRemoveFirst (crossbowListP p) { return item; } +void crossbowListShuffle (crossbowListP p) { + int i, j; + void **array; + void *t; + int n; + nullPointerException(p); + if (crossbowListEmpty(p)) + return; + n = crossbowListSize(p); + if (n == 1) + return; + /* Convert list to array */ + array = crossbowMalloc (n * sizeof(void *)); + i = 0; + while (! crossbowListEmpty(p)) { + void *item = crossbowListRemoveFirst (p); + array[i++] = item; + } + /* Shuffle array */ + srand(time(NULL)); + for (i = n - 1; i > 0; --i) { + j = rand() % (i + 1); + /* Swap item at position i with item at position j */ + t = array[i]; + array[i] = array[j]; + array[j] = t; + } + /* Re-populate the list */ + for (i = 0; i < n; ++i) + crossbowListAppend(p, array[i]); + /* Free temporal array */ + crossbowFree (array, (n * sizeof(void *))); + return; +} + void crossbowListIteratorReset (crossbowListP p) { p->it = p->head; return; diff --git a/clib-multigpu/list.h b/clib-multigpu/list.h index 54ef075..c2653c6 100644 --- a/clib-multigpu/list.h +++ b/clib-multigpu/list.h @@ -32,6 +32,8 @@ void *crossbowListPeekTail (crossbowListP); void *crossbowListRemoveFirst (crossbowListP); +void crossbowListShuffle (crossbowListP); + void crossbowListIteratorReset (crossbowListP); unsigned crossbowListIteratorHasNext (crossbowListP); void *crossbowListIteratorNext (crossbowListP); diff --git a/clib-multigpu/recorddataset.c b/clib-multigpu/recorddataset.c index f00c4dd..e0922dc 100644 --- a/clib-multigpu/recorddataset.c +++ b/clib-multigpu/recorddataset.c @@ -31,15 +31,16 @@ static void *handle (void *args) { if (self->exit) { break; } - + /* Get task */ event = (crossbowRecordDatasetEventP) crossbowListRemoveFirst (self->events); - info("New task: fill %d\n", event->idx); + dbg("New task: fill %d\n", event->idx); /* Process event */ crossbowRecordReaderReadProperly (self->reader, + self->phi == TRAIN ? 1 : 0, self->count, self->buffer->size, self->buffer->b, @@ -54,24 +55,29 @@ static void *handle (void *args) { /* Return task to free list */ crossbowFree (event, sizeof(crossbow_record_dataset_event_t)); } + info("Record data set worker exits\n"); + self->exited = 1; return self; } -crossbowRecordDatasetP crossbowRecordDatasetCreate (int workers, int *capacity, int NB, int b, int *padding) { +crossbowRecordDatasetP crossbowRecordDatasetCreate (int workers, int *capacity, int NB, int b, int *padding, crossbowPhase_t phi) { crossbowRecordDatasetP p = (crossbowRecordDatasetP) crossbowMalloc (sizeof(crossbow_record_dataset_t)); + + /* Distinguish between training and validation datasets */ + p->phi = phi; p->reader = crossbowRecordReaderCreate (workers); p->buffer = crossbowDoubleBufferCreate (capacity, NB, b, padding); crossbowDoubleBufferRegister (p->buffer); crossbowDoubleBufferAdviceWillNeed (p->buffer); - + /* Current data pointers */ p->images = NULL; p->labels = NULL; - + /* Number of images and labels to decode/read per read call */ p->count = (NB * b); @@ -110,6 +116,7 @@ void crossbowRecordDatasetInitSafely (crossbowRecordDatasetP p) { p->buffer->capacity[1]); crossbowRecordReaderReadProperly (p->reader, + p->phi == TRAIN ? 1 : 0, p->count, p->buffer->size, p->buffer->b, @@ -135,8 +142,7 @@ void crossbowRecordDatasetSwap (crossbowRecordDatasetP p) { int next = (++p->buffer->idx) % 2; p->buffer->idx = next; - - info("Swap from %d to %d\n", prev, next); + dbg("Swap from %d to %d\n", prev, next); /* Unlock previous buffer */ crossbowDoubleBufferUnlock (p->buffer, prev); @@ -167,10 +173,18 @@ void crossbowRecordDatasetSwap (crossbowRecordDatasetP p) { void crossbowRecordDatasetFree (crossbowRecordDatasetP p) { if (! p) return; - + + /* Wait for thread to exit (it may still swapping) */ + p->exit = 1; + pthread_join(p->thread, NULL); + + /* Free buffer */ + info("Free double buffer\n"); if (p->buffer) crossbowDoubleBufferFree (p->buffer); - + + /* Free record dataset */ + info("Free record reader\n"); if (p->reader) crossbowRecordReaderFree (p->reader); diff --git a/clib-multigpu/recorddataset.h b/clib-multigpu/recorddataset.h index 41dfccf..d4fd7fd 100644 --- a/clib-multigpu/recorddataset.h +++ b/clib-multigpu/recorddataset.h @@ -1,6 +1,8 @@ #ifndef __CROSSBOW_RECORD_DATASET_H_ #define __CROSSBOW_RECORD_DATASET_H_ +#include "utils.h" + #include "doublebuffer.h" #include "image/recordreader.h" @@ -8,6 +10,8 @@ typedef struct crossbow_record_dataset *crossbowRecordDatasetP; typedef struct crossbow_record_dataset { + + crossbowPhase_t phi; /* Pointer to images and labels buffer */ void *images; @@ -19,13 +23,12 @@ typedef struct crossbow_record_dataset { crossbowDoubleBufferP buffer; /* Create worker thread */ - volatile int exit; int exited; - + pthread_mutex_t lock; pthread_cond_t cond; - + crossbowListP events; pthread_t thread; @@ -37,7 +40,7 @@ typedef struct crossbow_record_dataset_event { int idx; } crossbow_record_dataset_event_t; -crossbowRecordDatasetP crossbowRecordDatasetCreate (int, int *, int, int, int *); +crossbowRecordDatasetP crossbowRecordDatasetCreate (int, int *, int, int, int *, crossbowPhase_t); void crossbowRecordDatasetInit (crossbowRecordDatasetP); diff --git a/clib-multigpu/solverconfiguration.c b/clib-multigpu/solverconfiguration.c index 16e384e..b7d3bf1 100644 --- a/clib-multigpu/solverconfiguration.c +++ b/clib-multigpu/solverconfiguration.c @@ -27,6 +27,8 @@ crossbowSolverConfP crossbowSolverConfCreate () { p->steps = NULL; p->step = 0; + p->warmuptasks = 0; + p->irregular = 0; p->momentum = 0; @@ -73,6 +75,8 @@ crossbowSolverConfP crossbowSolverConfReplicate (crossbowSolverConfP conf) { } p->step = conf->step; + p->warmuptasks = conf->warmuptasks; + p->irregular = conf->irregular; p->momentum = conf->momentum; @@ -124,15 +128,27 @@ float crossbowSolverConfGetLearningRate (crossbowSolverConfP p, int task) { case MULTISTEP: if ((p->step < p->numberofsteps) && ((task + 1) >= p->steps[p->step])) { p->step ++; - //if (task >= 62560) { - // p->gamma = 0.1; info("Changing learning rate to %.10f\n", p->learningRate * (float) (pow (p->gamma, p->step))); - //} - /* Signal copy of base model(s) to replicas */ + /* Signal copy of base model(s) to replicas */ p->_copy = 1; } rate = p->learningRate * (float) (pow (p->gamma, p->step)); break; + case LSR: + invalidConditionException(p->warmuptasks > 0); + if (task < p->warmuptasks) { + rate = (p->learningRate * ((float) task)) / ((float) p->warmuptasks); + } else { + /* Fallback to multi-step strategy (piecewise-constant) */ + if ((p->step < p->numberofsteps) && ((task + 1) >= p->steps[p->step])) { + p->step ++; + info("Changing learning rate to %.10f\n", p->learningRate * (float) (pow (p->gamma, p->step))); + /* Signal copy of base model(s) to replicas */ + p->_copy = 1; + } + rate = p->learningRate * (float) (pow (p->gamma, p->step)); + } + break; case EXP: rate = p->learningRate * (float) (pow (p->gamma, (task + 1))); break; diff --git a/clib-multigpu/solverconfiguration.h b/clib-multigpu/solverconfiguration.h index ab3fbcf..0d9fdb3 100644 --- a/clib-multigpu/solverconfiguration.h +++ b/clib-multigpu/solverconfiguration.h @@ -20,6 +20,8 @@ typedef struct crossbow_solver_configuration { int *steps; int step; + int warmuptasks; + unsigned irregular; float momentum; diff --git a/clib-multigpu/testrecorddataset.c b/clib-multigpu/testrecorddataset.c index fc542c0..31bfbec 100644 --- a/clib-multigpu/testrecorddataset.c +++ b/clib-multigpu/testrecorddataset.c @@ -10,29 +10,95 @@ #include "recorddataset.h" -#define USAGE "./testrecorddataset" +#include "image/yarng.h" -int main (int argc, char *argv[]) { +#define USAGE "./testrecorddataset [-sdnwbv...]" - (void) argc; - (void) argv; +int main (int argc, char *argv[]) { crossbowRecordDatasetP dataset = NULL; - int workers = 4; - int capacity [2] = { 616562688, 131072 }; + + /* The number of batches pre-processed at a time */ int NB = 32; + /* The batch size */ int b = 32; - int padding [2] = { 0, 3968 }; + /* The number of pre-processing threads */ + int workers = 1; + /* The number of files in the dataset */ int files = 626; - char *directory = "/data/crossbow/imagenet/ilsvrc2012/records"; + /* The location of the dataset */ + char *directory = "/data/crossbow/imagenet/records"; + /* The kind of dataset (train or validation) */ char *subset = "train"; - + /* Number of iterations */ + int iterations = 100; + + int i, j; + for (i = 1; i < argc;) { + if ((j = i + 1) == argc) { + fprintf(stderr, "usage: %s\n", USAGE); + exit(1); + } + if (strcmp(argv[i], "-s") == 0) { + if ((strcmp (argv[j], "train") != 0) && (strcmp (argv[j], "validation") != 0)) { + fprintf(stderr, "error: invalid subset '%s'. Try 'train' or 'validation'\n", argv[j]); + exit(1); + } + subset = argv[j]; + } else + if (strcmp(argv[i], "-d") == 0) { + directory = argv[j]; + } else + if (strcmp(argv[i], "-f") == 0) { + files = atoi(argv[j]); + } else + if (strcmp(argv[i], "-w") == 0) { + workers = atoi(argv[j]); + } else + if (strcmp(argv[i], "-b") == 0) { + b = atoi(argv[j]); + } else + if (strcmp(argv[i], "-n") == 0) { + NB = atoi(argv[j]); + } else + if (strcmp(argv[i], "-i") == 0) { + iterations = atoi(argv[j]); + } else { + fprintf(stderr, "error: unknown flag: %s %s\n", argv[i], argv[j]); + } + i = j + 1; + } + + int padding [2] = { 0, 0 }; + /* + * Calculate padding per batch for images and labels + * so that each batch is page-aligned. + * + * Each image is 602112 bytes long. + * Each label is 4 bytes long. + */ + padding[0] = (((b * 602112) % 4096) == 0) ? 0 : (4096 - ((b * 602112) % 4096)); + padding[1] = (((b * 4) % 4096) == 0) ? 0 : (4096 - ((b * 4) % 4096)); + + invalidConditionException ((((b * 602112) + padding[0]) % 4096) == 0); + invalidConditionException ((((b * 4) + padding[1]) % 4096) == 0); + + /* Calculate the capacity of temporal buffers */ + int capacity [2] = { 0, 0 }; + + capacity[0] = NB * (padding[0] + (b * 602112)); + capacity[1] = NB * (padding[1] + (b * 4)); + /* Initialise memory manager */ crossbowMemoryManagerInit (); + + /* Initialise (yet another) random number generator */ + crossbowYarngInit (123456789); + + /* Create dataset */ + dataset = crossbowRecordDatasetCreate (workers, capacity, NB, b, padding, (strcmp(subset, "train") == 0) ? TRAIN : CHECK); - dataset = crossbowRecordDatasetCreate (workers, capacity, NB, b, padding); - - /* Register dataset */ + /* Register dataset files with the record reader */ int idx; char filename[1024]; for (idx = 0; idx < files; ++idx) { @@ -42,15 +108,13 @@ int main (int argc, char *argv[]) { } crossbowRecordReaderRegister (dataset->reader, filename); } - - info("Finalise dataset's reader\n"); + /* Finalise record reader */ crossbowRecordReaderFinalise (dataset->reader); info("Fill dataset's buffer for the first time\n"); - crossbowRecordDatasetInitSafely (dataset); - int count = 0, iterations = 1000000; + int count = 0; while (count < iterations) { /* Swap buffers (assuming instant processing) */ crossbowRecordDatasetSwap (dataset); diff --git a/clib-multigpu/uk_ac_imperial_lsds_crossbow_device_TheGPU.h b/clib-multigpu/uk_ac_imperial_lsds_crossbow_device_TheGPU.h index 2ea010a..6a56b2e 100644 --- a/clib-multigpu/uk_ac_imperial_lsds_crossbow_device_TheGPU.h +++ b/clib-multigpu/uk_ac_imperial_lsds_crossbow_device_TheGPU.h @@ -474,10 +474,10 @@ JNIEXPORT jint JNICALL Java_uk_ac_imperial_lsds_crossbow_device_TheGPU_setLearni /* * Class: uk_ac_imperial_lsds_crossbow_device_TheGPU * Method: setLearningRateDecayPolicyMultiStep - * Signature: (FD[I)I + * Signature: (FDI[I)I */ JNIEXPORT jint JNICALL Java_uk_ac_imperial_lsds_crossbow_device_TheGPU_setLearningRateDecayPolicyMultiStep - (JNIEnv *, jobject, jfloat, jdouble, jintArray); + (JNIEnv *, jobject, jfloat, jdouble, jint, jintArray); /* * Class: uk_ac_imperial_lsds_crossbow_device_TheGPU diff --git a/clib-multigpu/utils.h b/clib-multigpu/utils.h index b04d5a1..d2ef50c 100644 --- a/clib-multigpu/utils.h +++ b/clib-multigpu/utils.h @@ -92,7 +92,7 @@ typedef enum crossbow_model_synchronisation_type { BSP = 0, SSP, ASP } crossbowM typedef enum crossbow_model_update_type { DEFAULT = 0, WORKER, EAMSGD, SYNCHRONOUSEAMSGD, DOWNPOUR } crossbowModelUpdate_t; -typedef enum crossbow_learning_rate_decay_policy_type { FIXED = 0, INV, STEP, MULTISTEP, EXP, CLR } crossbowLearningRateDecayPolicy_t; +typedef enum crossbow_learning_rate_decay_policy_type { FIXED = 0, INV, STEP, MULTISTEP, LSR, EXP, CLR } crossbowLearningRateDecayPolicy_t; typedef enum crossbow_operator_dependency_type { START_BEFORE_START = 0, END_BEFORE_START } crossbowOperatorDependency_t; diff --git a/src/main/java/uk/ac/imperial/lsds/crossbow/ModelConf.java b/src/main/java/uk/ac/imperial/lsds/crossbow/ModelConf.java index 18575c7..c55d5ad 100644 --- a/src/main/java/uk/ac/imperial/lsds/crossbow/ModelConf.java +++ b/src/main/java/uk/ac/imperial/lsds/crossbow/ModelConf.java @@ -389,6 +389,10 @@ else if (arg.equals("--learning-rate-step-unit")) { System.exit(1); } } + else if (arg.equals("--warmup-steps")) { + + solverConf.setWarmupSteps (opt.getIntValue ()); + } else if (arg.equals("--alpha")) { solverConf.setAlpha (opt.getFloatValue ()); diff --git a/src/main/java/uk/ac/imperial/lsds/crossbow/device/TheGPU.java b/src/main/java/uk/ac/imperial/lsds/crossbow/device/TheGPU.java index d2ffbde..b033d7c 100644 --- a/src/main/java/uk/ac/imperial/lsds/crossbow/device/TheGPU.java +++ b/src/main/java/uk/ac/imperial/lsds/crossbow/device/TheGPU.java @@ -276,7 +276,7 @@ public void execute (int dataflowId, Batch batch, Integer replicaId, Task task) public native int setLearningRateDecayPolicyFixed (float rate); public native int setLearningRateDecayPolicyInv (float rate, double gamma, double power); public native int setLearningRateDecayPolicyStep (float rate, double gamma, int step); - public native int setLearningRateDecayPolicyMultiStep (float rate, double gamma, int [] step); + public native int setLearningRateDecayPolicyMultiStep (float rate, double gamma, int warmup, int [] step); public native int setLearningRateDecayPolicyExp (float rate, double gamma); public native int setLearningRateDecayPolicyCircular (float [] rate, int superconvergence, float [] momentum, int step); diff --git a/src/main/java/uk/ac/imperial/lsds/crossbow/kernel/conf/SolverConf.java b/src/main/java/uk/ac/imperial/lsds/crossbow/kernel/conf/SolverConf.java index 46777f3..03adddd 100644 --- a/src/main/java/uk/ac/imperial/lsds/crossbow/kernel/conf/SolverConf.java +++ b/src/main/java/uk/ac/imperial/lsds/crossbow/kernel/conf/SolverConf.java @@ -31,6 +31,8 @@ public class SolverConf implements IConf { private int currentstep; private int [] stepvalues; + + private int warmupsteps; private TrainingUnit stepUnit; private boolean converted; @@ -84,6 +86,8 @@ public SolverConf (ModelConf parent) { stepUnit = TrainingUnit.TASKS; converted = false; + warmupsteps = 0; + alpha = 0.5F; tau = 1; @@ -218,6 +222,18 @@ public SolverConf setCurrentStep (int currentstep) { converted = true; return stepvalues; } + + public SolverConf setWarmupSteps (int warmupsteps) { + this.warmupsteps = warmupsteps; + return this; + } + + public int getWarmupSteps () { + if (stepUnit != TrainingUnit.TASKS) + return (warmupsteps * parent.numberOfTasksPerEpoch()); + else + return warmupsteps; + } public String getStepValuesToString () { if (stepvalues == null) @@ -358,7 +374,15 @@ public void GPURegister () { case MULTISTEP: if (stepvalues == null) throw new NullPointerException(); - TheGPU.getInstance().setLearningRateDecayPolicyMultiStep (baseLearningRate, gamma, getStepValues()); + TheGPU.getInstance().setLearningRateDecayPolicyMultiStep (baseLearningRate, gamma, 0, getStepValues()); + break; + case LSR: + if (stepvalues == null) + throw new NullPointerException(); + if (warmupsteps == 0) + throw new IllegalStateException ("error: invalid number of warm-up steps"); + TheGPU.getInstance().setLearningRateDecayPolicyMultiStep (baseLearningRate, gamma, + getWarmupSteps(), getStepValues()); break; case EXP: TheGPU.getInstance().setLearningRateDecayPolicyExp (baseLearningRate, gamma); @@ -399,6 +423,7 @@ public static LinkedList