ir.go

package lblconv

// The intermediate annotation metadata representation.

import (
	"fmt"
	"image"
	"log"
	"math"
	"math/rand"
	"path/filepath"
	"reflect"
	"runtime"
	"strings"
	"sync"
	"time"

	"github.com/disintegration/imaging"
)

// Keys for known annotation attributes.
const (
	AncestorLabels = "Ancestors"  // Ancestors in the label taxonomy. Type []string.
	Confidence     = "Confidence" // Type float64 in [0.0, 1.0].
	CropCoords     = "CropCoords" // Absolute coords (x1,y1)(x2,y2) in the source image. Type string.
	DetectedText   = "Text"       // Text that is associated with the bounding box. Type string.
)

// Annotation is the intermediate representation of an object label.
type Annotation struct {
	Attributes map[string]interface{} // Additional attributes of this annotation.
	Coords     [4]float64             // Absolute x1, y1, x2, y2 offsets from the top-left corner.
	Label      string
}

// Width is the object width from a.Coords.
func (a Annotation) Width() float64 {
	return a.Coords[2] - a.Coords[0]
}

// Height is the object height from a.Coords.
func (a Annotation) Height() float64 {
	return a.Coords[3] - a.Coords[1]
}

// AnnotatedFile is the intermediate representation of file metadata.
type AnnotatedFile struct {
	Annotations []Annotation // The annotations.
	FilePath    string       // The annotated file.
}

// scaleCoords scales all Annotations.Coords by the given scale factors.
func (f *AnnotatedFile) scaleCoords(width, height float64) {
	for i := range f.Annotations {
		for j := 0; j < 4; j++ {
			if j&1 == 0 {
				f.Annotations[i].Coords[j] *= width
			} else {
				f.Annotations[i].Coords[j] *= height
			}
		}
	}
}

type subImager interface {
	SubImage(r image.Rectangle) image.Image
}

// cropObjectsFromImage returns a crop of img for each annotation with a bounding box that is at
// least partially contained in img. The crops may share their data with the original image.
//
// In addition it returns an []AnnotatedFile, one for each cropped image. The file paths are
// derived from f.FilePath, with a "_xx" suffix appended before the file extension, where xx is the
// index in f.Annotations.
func (f *AnnotatedFile) cropObjectsFromImage(img image.Image) (
		[]image.Image, []AnnotatedFile, error) {

	img2, ok := img.(subImager)
	if !ok {
		return nil, nil,
				fmt.Errorf("the image type of %q does not provide a SubImage method", f.FilePath)
	}

	crops := make([]image.Image, 0, len(f.Annotations))
	annotatedFiles := make([]AnnotatedFile, 0, len(f.Annotations))
	bounds := img.Bounds()

	for i, a := range f.Annotations {
		// Clip the bounding box to the image bounds.
		r := image.Rect(int(math.Round(a.Coords[0])), int(math.Round(a.Coords[1])),
			int(math.Round(a.Coords[2])), int(math.Round(a.Coords[3])))
		r = r.Intersect(bounds)
		if r.Empty() {
			continue
		}

		// Make a shallow clone of the annotation's attributes and add the CropCoords.
		attrs := make(map[string]interface{}, 1+len(a.Attributes))
		for k, v := range a.Attributes {
			attrs[k] = v
		}
		attrs[CropCoords] = fmt.Sprintf("(%d,%d)(%d,%d)", r.Min.X, r.Min.Y, r.Max.X, r.Max.Y)

		// Construct the file path for the crop from the original path.
		ext := filepath.Ext(f.FilePath)
		path := fmt.Sprintf("%s_%02d%s", f.FilePath[0:len(f.FilePath)-len(ext)], i, ext)

		// Create the annotation for the crop with a bounding box covering the entire area.
		fileData := AnnotatedFile{
			Annotations: []Annotation{
				{
					Attributes: attrs,
					Coords:     [4]float64{0, 0, float64(r.Dx()), float64(r.Dy())},
					Label:      a.Label,
				},
			},
			FilePath: path,
		}

		crops = append(crops, img2.SubImage(r))
		annotatedFiles = append(annotatedFiles, fileData)
	}

	return crops, annotatedFiles, nil
}

// AnnotatedFiles is the annotation metadata for a list of files.
type AnnotatedFiles []AnnotatedFile

// MapLabels replaces label (sub-)strings with substitution values, as specified in mappings.
//
// The format of mappings is old=new.
func (data *AnnotatedFiles) MapLabels(mappings []string) error {
	if len(mappings) == 0 {
		return nil
	}

	// Extract the individual old and new strings to map between.
	replacements := make([]struct{ old, new string }, len(mappings))
	for i, v := range mappings {
		a := strings.Split(v, "=")
		if len(a) != 2 {
			return fmt.Errorf("invalid mapping: %v", v)
		}

		replacements[i].old = a[0]
		replacements[i].new = a[1]
	}

	// Apply the replacements, in order, to all labels.
	count := 0
	for _, f := range *data {
		for i, aLen := 0, len(f.Annotations); i < aLen; i++ {
			a := &f.Annotations[i]

			oldLabel := a.Label
			for _, r := range replacements {
				a.Label = strings.Replace(a.Label, r.old, r.new, -1)
			}

			if a.Label != oldLabel {
				count++
			}
		}
	}

	log.Printf("The label mappings changed %d labels", count)
	return nil
}

// TransformBboxes transforms bounding boxes.
//
// First bboxes are scaled by the horizontal and vertical scale factors scaleX and scaleY.
//
// Next, the bounding box is grown (never shrunk) to match the desired aspect ratio. An aspectRatio
// of zero disables this transformation.
func (data *AnnotatedFiles) TransformBboxes(scaleX, scaleY, aspectRatio float64) {
	for _, f := range *data {
		for i, aLen := 0, len(f.Annotations); i < aLen; i++ {
			a := &f.Annotations[i]

			// Scale.
			if scaleX != 1 || scaleY != 1 {
				w := a.Width()
				h := a.Height()
				dx := (w*scaleX - w) * 0.5
				dy := (h*scaleY - h) * 0.5

				a.Coords[0] -= dx
				a.Coords[1] -= dy
				a.Coords[2] += dx
				a.Coords[3] += dy
			}

			// Grow to match desired aspect ratio.
			if aspectRatio > 0 {
				// Calculate the ratio so that the expansion works even if one of width or height is zero.
				w := a.Width()
				h := a.Height()
				var ratio float64
				if h != 0 {
					ratio = w / h
				} else {
					ratio = math.MaxFloat64
				}

				if ratio < aspectRatio {
					// Expand horizontally.
					dx := (h*aspectRatio - w) * 0.5
					a.Coords[0] -= dx
					a.Coords[2] += dx
				} else if ratio > aspectRatio {
					// Expand vertically.
					dy := (w/aspectRatio - h) * 0.5
					a.Coords[1] -= dy
					a.Coords[3] += dy
				}
			}
		}
	}
}

// Filter filters out annotations which do not match any of the given labelNames, have a confidence
// value less than minConfidence, a bounding box with less than minBboxWidth or minBboxHeight, or
// do not match the required aspect ratio.
//
// The aspect ratio of width/height must be in [minAspectRatio, maxAspectRatio], except that a
// min/max value of zero disables the respective filter.
//
// If attributes is non empty, only the listed attributes are kept. This only filters the list
// of attributes, not the annotations themselve.
//
// Similarly, requiredAttrs specifies attributes that must be present with a value that is not the
// Go zero value of their type. If this test fails for an annotation, that annotation is deleted.
func (data *AnnotatedFiles) Filter(labelNames, attributes, requiredAttrs []string,
		minConfidence float64, requireLabel bool, minBboxWidth, minBboxHeight, minAspectRatio,
		maxAspectRatio float64) {

	// Deletes the annotation at index i.
	deleteAnnotation := func(annotations []Annotation, i int) []Annotation {
		l := len(annotations)
		annotations[i] = annotations[l-1]
		return annotations[:l-1]
	}

	// Look for string in list.
	inList := func(v string, l []string) bool {
		for _, val := range l {
			if val == v {
				return true
			}
		}
		return false
	}

	numFiles := len(*data)
	numLabelsBeforeFilter := 0
	numLabelsAfterFilter := 0

	// Apply filters.
	for dataIdx, dataLen := 0, len(*data); dataIdx < dataLen; dataIdx++ {
		d := &(*data)[dataIdx]
		numLabelsBeforeFilter += len(d.Annotations)

		// Annotation filters.
	annotationLoop:
		for i, aLen := 0, len(d.Annotations); i < aLen; i++ {
			a := &d.Annotations[i]

			// Filter by confidence. If the annotation has no confidence value then it passes the filter.
			if c, ok := a.Attributes[Confidence].(float64); ok && c < minConfidence {
				d.Annotations = deleteAnnotation(d.Annotations, i)
				aLen--
				i--
				continue
			}

			// Filter by bbox size.
			width := a.Width()
			height := a.Height()
			if minBboxWidth > width || minBboxHeight > height {
				d.Annotations = deleteAnnotation(d.Annotations, i)
				aLen--
				i--
				continue
			}

			// Filter by bbox aspect ratio.
			if minAspectRatio != 0 || maxAspectRatio != 0 {
				keep := height != 0
				if keep {
					ratio := width / height
					keep = (minAspectRatio == 0 || ratio >= minAspectRatio) &&
							(maxAspectRatio == 0 || ratio <= maxAspectRatio)
				}
				if !keep {
					d.Annotations = deleteAnnotation(d.Annotations, i)
					aLen--
					i--
					continue
				}
			}

			// Filter by labels.
			if len(labelNames) > 0 && !inList(a.Label, labelNames) {
				d.Annotations = deleteAnnotation(d.Annotations, i)
				aLen--
				i--
				continue
			}

			// Filter by required attributes with non zero value.
			if len(requiredAttrs) > 0 {
				for _, k := range requiredAttrs {
					// Test against the zero value of the underlying type.
					if v := a.Attributes[k]; v == nil || v == reflect.Zero(reflect.TypeOf(v)).Interface() {
						d.Annotations = deleteAnnotation(d.Annotations, i)
						aLen--
						i--
						continue annotationLoop
					}
				}
			}

			// Filter attributes.
			if len(attributes) > 0 {
				for k := range a.Attributes {
					if !inList(k, attributes) {
						delete(a.Attributes, k)
					}
				}
			}
		}

		numLabelsAfterFilter += len(d.Annotations)

		// Delete the file annotation if files with no labels are filtered out.
		if requireLabel && len(d.Annotations) == 0 {
			dataLen--
			(*data)[dataIdx] = (*data)[dataLen]
			*data = (*data)[0:dataLen]
			dataIdx--
		}
	}

	log.Printf("Filtered out %d labels and %d files",
		numLabelsBeforeFilter-numLabelsAfterFilter, numFiles-len(*data))
}

// ProcessImages resizes all referenced images and writes them to imageOutDir using the specified
// encoding.
//
// If doCropObjects is true, individual objects as per the labels are cropped from the images. The
// crops are resized instead of the original images in this case. The data changes accordingly, with
// 0 or more cropped images replacing the original AnnotatedFile.
func (data *AnnotatedFiles) ProcessImages(imageOutDir string, longerSide, shorterSide int,
		downsamplingFilter, upsamplingFilter, encoding string, jpegQuality int,
		doCropObjects bool) error {

	doResizeImages := longerSide > 0 || shorterSide > 0
	if !doResizeImages && !doCropObjects {
		return nil
	}
	log.Print("Processing images")

	// Select the resampling algorithms.
	downsample := imaging.Box
	upsample := imaging.Linear
	filters := []struct {
		name   string
		filter *imaging.ResampleFilter
	}{
		{downsamplingFilter, &downsample},
		{upsamplingFilter, &upsample},
	}
	for _, v := range filters {
		switch v.name {
		case "nearest":
			*v.filter = imaging.NearestNeighbor
		case "box":
			*v.filter = imaging.Box
		case "linear":
			*v.filter = imaging.Linear
		case "gaussian":
			*v.filter = imaging.Gaussian
		case "lanczos":
			*v.filter = imaging.Lanczos
		default:
			return fmt.Errorf("unknown resampling filter %q", v.name)
		}
	}

	// Select the output file extension based on the requested encoding.
	var fileExt string
	switch strings.ToLower(encoding) {
	case "jpg", "jpeg":
		fileExt = ".jpg"
	case "png":
		fileExt = ".png"
	default:
		return fmt.Errorf("unsupported output encoding %q", encoding)
	}

	// Prepare for concurrent processing. Limit the number of goroutines in flight, as they load
	// potentially large images into memory.
	numTasks := 2 * runtime.NumCPU()
	if len(*data) < numTasks {
		numTasks = len(*data)
	}
	workQueue := make(chan *AnnotatedFile, 2*numTasks)

	var croppedData []AnnotatedFile
	var croppedDataCh chan *AnnotatedFile
	if doCropObjects {
		croppedData = make([]AnnotatedFile, 0, len(*data))
		croppedDataCh = make(chan *AnnotatedFile, 2*numTasks)
	}

	errors := make(chan error, 1)
	var wg sync.WaitGroup

	// Process images concurrently from a work queue.
	wg.Add(numTasks)
	for i := 0; i < numTasks; i++ {
		go func() {
			defer wg.Done()
			for d := range workQueue {
				processImage(d, imageOutDir, fileExt, longerSide, shorterSide, downsample,
					upsample, jpegQuality, doCropObjects, doResizeImages, croppedDataCh, errors)
			}
		}()
	}

	// Append image metadata for cropped images.
	var wgAppend sync.WaitGroup
	if doCropObjects {
		wgAppend.Add(1)
		go func() {
			defer wgAppend.Done()
			for d := range croppedDataCh {
				croppedData = append(croppedData, *d)
			}
		}()
	}

	// Feed the work queue.
	for i := range *data {
		workQueue <- &(*data)[i]
	}
	close(workQueue)

	// Wait for image processing to finish.
	wg.Wait()
	if doCropObjects {
		// Wait for all new metadata to be appended and then replace the old data.
		close(croppedDataCh)
		wgAppend.Wait()
		*data = croppedData
	}

	close(errors)
	if len(errors) > 0 {
		return <-errors
	}

	return nil
}

// processImage processes the image described by data.
//
// If and only if doCropObjects is true, new metadata for the image crops is written to croppedData.
func processImage(data *AnnotatedFile, imageOutDir, fileExt string, longerSide, shorterSide int,
		downsample, upsample imaging.ResampleFilter, jpegQuality int, doCropObjects, doResizeImage bool,
		croppedData chan<- *AnnotatedFile, errors chan<- error) {

	trySendError := func(err error) {
		select {
		case errors <- err:
		default:
		}
	}

	// Read the image.
	img, _, err := loadImage(data.FilePath)
	if err != nil {
		trySendError(err)
		return
	}

	// Crop labelled objects from the image if requested.
	var images []image.Image
	var imageData []*AnnotatedFile
	if doCropObjects {
		// The original image is not further processed in this case.
		var tmpData []AnnotatedFile
		images, tmpData, err = data.cropObjectsFromImage(img)
		if err != nil {
			trySendError(err)
			return
		}

		imageData = make([]*AnnotatedFile, len(tmpData))
		for i := range tmpData {
			imageData[i] = &tmpData[i]
		}
	} else {
		images = []image.Image{img}
		imageData = []*AnnotatedFile{data}
	}

	// Process either the original image or the crops.
	for i, img := range images {
		data := imageData[i]

		// Resize.
		var scaleWidth, scaleHeight float64
		if doResizeImage {
			img, scaleWidth, scaleHeight, err =
					resizeImage(img, longerSide, shorterSide, downsample, upsample)
			if err != nil {
				trySendError(err)
				return
			}
		}

		// Save the image.
		inName := filepath.Base(data.FilePath)
		inFileExt := filepath.Ext(inName)
		outName := inName[0:len(inName)-len(inFileExt)] + fileExt
		outPath := filepath.Join(imageOutDir, outName)
		if err := saveImage(outPath, img, jpegQuality); err != nil {
			trySendError(err)
			return
		}

		// Update the image file path and rescale the coordinates.
		data.FilePath = outPath
		if doResizeImage {
			data.scaleCoords(scaleWidth, scaleHeight)
		}

		// Return the metadata for the cropped image.
		if doCropObjects {
			croppedData <- data
		}
	}
}

// Split randomly splits the data into multiple datasets.
//
// The cumulativeSplits specify the cumulative distribution according to which the data is split
// into the returned datasets. Its values must add up to 100!
func (data *AnnotatedFiles) Split(cumulativeSplits []int) ([]AnnotatedFiles, error) {
	datasets := make([]AnnotatedFiles, len(cumulativeSplits))

	// Allocate slightly more than the expected size for each dataset.
	var sum int
	for i, s := range cumulativeSplits {
		percent := s - sum
		datasets[i] = make(AnnotatedFiles, 0, int(1.05*float64(percent)/100*float64(len(*data))))
		sum = s
	}
	if sum != 100 {
		return nil, fmt.Errorf("the split percentages do not add up to 100")
	}

	// Split the data.
	rng := rand.New(rand.NewSource(time.Now().UnixNano()))

outer:
	for _, d := range *data {
		r := rng.Intn(100)
		for i, s := range cumulativeSplits {
			if r < s {
				datasets[i] = append(datasets[i], d)
				continue outer
			}
		}
	}

	return datasets, nil
}