From ba650060c7b2053c35a41c81dbe95b17e881c206 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sat, 27 Feb 2016 03:43:23 -0500 Subject: [PATCH 1/3] OpenCV 2.4 rehaul, cmake build pipeline, readme --- CMakeLists.txt | 36 ++++ FeaturesMain.cpp | 120 +++++------ README | 20 +- TextDetection.cpp | 536 +++++++++++++++++++++++----------------------- TextDetection.h | 61 +++--- 5 files changed, 403 insertions(+), 370 deletions(-) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..24c7dc7 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required (VERSION 3.2) +project (DetectText) + +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) +CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +elseif(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + +set(CMAKE_BUILD_TYPE Debug) + +find_package(OpenCV REQUIRED core imgproc video highgui features2d) + +################ Boost ################# +set(Boost_USE_STATIC_LIBS ON) +FIND_PACKAGE(Boost COMPONENTS system REQUIRED) +include_directories(${Boost_INCLUDE_DIR}) +######################################## + +file(GLOB DETECT_TEXT_SOURCES *.cpp *.c) +file(GLOB DETECT_TEXT_HEADERS *.hpp *.h) + +add_executable(DetectText + ${DETECT_TEXT_SOURCES} + ${DETECT_TEXT_HEADERS} + ) + +target_link_libraries(DetectText + ${Boost} + ${OpenCV_LIBS} +) \ No newline at end of file diff --git a/FeaturesMain.cpp b/FeaturesMain.cpp index c01eda8..738b32b 100755 --- a/FeaturesMain.cpp +++ b/FeaturesMain.cpp @@ -19,84 +19,76 @@ */ #include #include -#include "TextDetection.h" -#include #include +#include -void convertToFloatImage ( IplImage * byteImage, IplImage * floatImage ) -{ - cvConvertScale ( byteImage, floatImage, 1 / 255., 0 ); -} +#include +#include +#include -class FeatureError : public std::exception -{ -std::string message; -public: -FeatureError ( const std::string & msg, const std::string & file ) -{ - std::stringstream ss; +#include "TextDetection.h" - ss << msg << " " << file; - message = msg.c_str (); -} -~FeatureError () throw ( ) +using namespace std; +using namespace cv; +using namespace DetectText; + +void convertToFloatImage ( Mat& byteImage, Mat& floatImage ) { + byteImage.convertTo(floatImage, CV_32FC1, 1 / 255.); } + +class FeatureError: public std::exception { + std::string message; +public: + FeatureError(const std::string & msg, const std::string & file) { + std::stringstream ss; + + ss << msg << " " << file; + message = msg.c_str(); + } + ~FeatureError() throw () { + } }; -IplImage * loadByteImage ( const char * name ) -{ - IplImage * image = cvLoadImage ( name ); +Mat loadByteImage(const char * name) { + Mat image = imread(name); - if ( !image ) - { - return 0; - } - cvCvtColor ( image, image, CV_BGR2RGB ); - return image; + if (image.empty()) { + return Mat(); + } + cvtColor(image, image, CV_BGR2RGB); + return image; } -IplImage * loadFloatImage ( const char * name ) -{ - IplImage * image = cvLoadImage ( name ); +Mat loadFloatImage(const char * name) { + Mat image = imread(name); - if ( !image ) - { - return 0; - } - cvCvtColor ( image, image, CV_BGR2RGB ); - IplImage * floatingImage = cvCreateImage ( cvGetSize ( image ), - IPL_DEPTH_32F, 3 ); - cvConvertScale ( image, floatingImage, 1 / 255., 0 ); - cvReleaseImage ( &image ); - return floatingImage; + if (image.empty()) { + return Mat(); + } + cvtColor(image, image, CV_BGR2RGB); + Mat floatingImage(image.size(), CV_32FC3); + image.convertTo(floatingImage, CV_32F, 1 / 255.); + return floatingImage; } -int mainTextDetection ( int argc, char * * argv ) -{ - IplImage * byteQueryImage = loadByteImage ( argv[1] ); - if ( !byteQueryImage ) - { - printf ( "couldn't load query image\n" ); - return -1; - } - - // Detect text in the image - IplImage * output = textDetection ( byteQueryImage, atoi(argv[3]) ); - cvReleaseImage ( &byteQueryImage ); - cvSaveImage ( argv[2], output ); - cvReleaseImage ( &output ); - return 0; +int mainTextDetection(int argc, char** argv) { + Mat byteQueryImage = loadByteImage(argv[1]); + if (byteQueryImage.empty()) { + cerr << "couldn't load query image" << endl; + return -1; + } + + // Detect text in the image + Mat output = textDetection(byteQueryImage, atoi(argv[3])); + imwrite(argv[2], output); + return 0; } -int main ( int argc, char * * argv ) -{ - if ( ( argc != 4 ) ) - { - printf ( "usage: %s imagefile resultImage darkText\n", - argv[0] ); - - return -1; - } - return mainTextDetection ( argc, argv ); +int main(int argc, char** argv) { + if ((argc != 4)) { + cerr << "usage: " << argv[0] << " imagefile resultImage darkText" << endl; + return -1; + } + return mainTextDetection(argc, argv); } diff --git a/README b/README index 8058a71..87264b7 100644 --- a/README +++ b/README @@ -3,18 +3,24 @@ DetectText Detect text with stroke width transform. -Dependencies: OpenCV, boost. +## Dependencies +OpenCV 2.4+, boost. -To compile: +## Compile g++ -o DetectText TextDetection.cpp FeaturesMain.cpp -lopencv_core -lopencv_highgui -lopencv_imgproc -I/path/to/current/directory where /path/to/current/directory is replaced with the absolute path to the current directory. -To run: +### Using CMake + + mkdir build + cd build + cmake .. + make + +## To run ./TextDetection input_file output_file dark_on_light where dark_on_light is 1 or 0, indicating whether the text is darker or lighter than the background. -GitHub repository: -https://github.com/aperrau/DetectText - -More details on the algorithm can be found in: +## More +Details on the algorithm can be found in: http://www.cs.cornell.edu/courses/cs4670/2010fa/projects/final/results/group_of_arp86_sk2357/Writeup.pdf \ No newline at end of file diff --git a/TextDetection.cpp b/TextDetection.cpp index c4184b5..ebb006b 100755 --- a/TextDetection.cpp +++ b/TextDetection.cpp @@ -33,76 +33,86 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include #include #include -#include + +using namespace cv; + +#include "TextDetection.h" #define PI 3.14159265 -std::vector > findBoundingBoxes( std::vector > & components, +namespace DetectText { + +const Scalar BLUE (255, 0, 0); +const Scalar GREEN(0, 255, 0); +const Scalar RED (0, 0, 255); + +std::vector findBoundingBoxes( std::vector > & components, std::vector & chains, - std::vector > & compBB, - IplImage * output) { - std::vector > bb; + std::vector & compBB, + Mat& output) { + std::vector bb; bb.reserve(chains.size()); - for (std::vector::iterator chainit = chains.begin(); chainit != chains.end(); chainit++) { - int minx = output->width; - int miny = output->height; + for (auto& chainit : chains) { + int minx = output.cols; + int miny = output.rows; int maxx = 0; int maxy = 0; - for (std::vector::const_iterator cit = chainit->components.begin(); cit != chainit->components.end(); cit++) { + for (std::vector::const_iterator cit = chainit.components.begin(); cit != chainit.components.end(); cit++) { miny = std::min(miny,compBB[*cit].first.y); minx = std::min(minx,compBB[*cit].first.x); maxy = std::max(maxy,compBB[*cit].second.y); maxx = std::max(maxx,compBB[*cit].second.x); } - CvPoint p0 = cvPoint(minx,miny); - CvPoint p1 = cvPoint(maxx,maxy); - std::pair pair(p0,p1); + Point2i p0(minx,miny); + Point2i p1(maxx,maxy); + SWTPointPair2i pair(p0,p1); bb.push_back(pair); } return bb; } -std::vector > findBoundingBoxes( std::vector > & components, - IplImage * output) { - std::vector > bb; +std::vector findBoundingBoxes( std::vector > & components, + Mat& output) { + std::vector bb; bb.reserve(components.size()); - for (std::vector >::iterator compit = components.begin(); compit != components.end(); compit++) { - int minx = output->width; - int miny = output->height; + for (auto& compit : components) { + int minx = output.cols; + int miny = output.rows; int maxx = 0; int maxy = 0; - for (std::vector::iterator it = compit->begin(); it != compit->end(); it++) { - miny = std::min(miny,it->y); - minx = std::min(minx,it->x); - maxy = std::max(maxy,it->y); - maxx = std::max(maxx,it->x); + for (auto& it : compit) { + miny = std::min(miny,it.y); + minx = std::min(minx,it.x); + maxy = std::max(maxy,it.y); + maxx = std::max(maxx,it.x); } - CvPoint p0 = cvPoint(minx,miny); - CvPoint p1 = cvPoint(maxx,maxy); - std::pair pair(p0,p1); + Point2i p0(minx,miny); + Point2i p1(maxx,maxy); + SWTPointPair2i pair(p0,p1); bb.push_back(pair); } return bb; } -void normalizeImage (IplImage * input, IplImage * output) { - assert ( input->depth == IPL_DEPTH_32F ); - assert ( input->nChannels == 1 ); - assert ( output->depth == IPL_DEPTH_32F ); - assert ( output->nChannels == 1 ); +void normalizeImage (const Mat& input, Mat& output) { + assert ( input.depth() == CV_32F ); + assert ( input.channels() == 1 ); + assert ( output.depth() == CV_32F ); + assert ( output.channels() == 1 ); + float maxVal = 0; float minVal = 1e100; - for( int row = 0; row < input->height; row++ ){ - const float* ptr = (const float*)(input->imageData + row * input->widthStep); - for ( int col = 0; col < input->width; col++ ){ + for ( int row = 0; row < input.rows; row++ ){ + const float* ptr = (const float*)input.ptr(row); + for ( int col = 0; col < input.cols; col++ ){ if (*ptr < 0) { } else { maxVal = std::max(*ptr, maxVal); @@ -113,10 +123,10 @@ void normalizeImage (IplImage * input, IplImage * output) { } float difference = maxVal - minVal; - for( int row = 0; row < input->height; row++ ){ - const float* ptrin = (const float*)(input->imageData + row * input->widthStep);\ - float* ptrout = (float*)(output->imageData + row * output->widthStep);\ - for ( int col = 0; col < input->width; col++ ){ + for ( int row = 0; row < input.rows; row++ ) { + const float* ptrin = (const float*)input.ptr(row); + float* ptrout = (float*)output.ptr(row); + for ( int col = 0; col < input.cols; col++ ) { if (*ptrin < 0) { *ptrout = 1; } else { @@ -128,16 +138,17 @@ void normalizeImage (IplImage * input, IplImage * output) { } } -void renderComponents (IplImage * SWTImage, std::vector > & components, IplImage * output) { - cvZero(output); - for (std::vector >::iterator it = components.begin(); it != components.end();it++) { - for (std::vector::iterator pit = it->begin(); pit != it->end(); pit++) { - CV_IMAGE_ELEM(output, float, pit->y, pit->x) = CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x); +void renderComponents (const Mat& SWTImage, std::vector > & components, Mat& output) { + output.setTo(0); + + for (auto& component : components) { + for (auto& pit : component) { + output.at(pit.y, pit.x) = SWTImage.at(pit.y, pit.x); } } - for( int row = 0; row < output->height; row++ ){ - float* ptr = (float*)(output->imageData + row * output->widthStep); - for ( int col = 0; col < output->width; col++ ){ + for( int row = 0; row < output.rows; row++ ){ + float* ptr = (float*)output.ptr(row); + for ( int col = 0; col < output.cols; col++ ){ if (*ptr == 0) { *ptr = -1; } @@ -146,9 +157,9 @@ void renderComponents (IplImage * SWTImage, std::vector > & } float maxVal = 0; float minVal = 1e100; - for( int row = 0; row < output->height; row++ ){ - const float* ptr = (const float*)(output->imageData + row * output->widthStep); - for ( int col = 0; col < output->width; col++ ){ + for( int row = 0; row < output.rows; row++ ){ + const float* ptr = (const float*)output.ptr(row); + for ( int col = 0; col < output.cols; col++ ){ if (*ptr == 0) { } else { maxVal = std::max(*ptr, maxVal); @@ -158,9 +169,9 @@ void renderComponents (IplImage * SWTImage, std::vector > & } } float difference = maxVal - minVal; - for( int row = 0; row < output->height; row++ ){ - float* ptr = (float*)(output->imageData + row * output->widthStep);\ - for ( int col = 0; col < output->width; col++ ){ + for( int row = 0; row < output.rows; row++ ){ + float* ptr = (float*)output.ptr(row); + for ( int col = 0; col < output.cols; col++ ){ if (*ptr < 1) { *ptr = 1; } else { @@ -172,91 +183,96 @@ void renderComponents (IplImage * SWTImage, std::vector > & } -void renderComponentsWithBoxes (IplImage * SWTImage, std::vector > & components, - std::vector > & compBB, IplImage * output) { - IplImage * outTemp = - cvCreateImage ( cvGetSize ( output ), IPL_DEPTH_32F, 1 ); +void renderComponentsWithBoxes (Mat& SWTImage, std::vector > & components, + std::vector & compBB, Mat& output) { + Mat outTemp( output.size(), CV_32FC1 ); + + renderComponents(SWTImage, components, outTemp); - renderComponents(SWTImage,components,outTemp); - std::vector > bb; + std::vector bb; bb.reserve(compBB.size()); - for (std::vector >::iterator it=compBB.begin(); it != compBB.end(); it++ ) { - CvPoint p0 = cvPoint(it->first.x,it->first.y); - CvPoint p1 = cvPoint(it->second.x,it->second.y); - std::pair pair(p0,p1); + for (auto& it : compBB) { + Point2i p0 = cvPoint(it.first.x, it.first.y); + Point2i p1 = cvPoint(it.second.x, it.second.y); + SWTPointPair2i pair(p0, p1); bb.push_back(pair); } - IplImage * out = - cvCreateImage ( cvGetSize ( output ), IPL_DEPTH_8U, 1 ); - cvConvertScale(outTemp, out, 255, 0); - cvCvtColor (out, output, CV_GRAY2RGB); - //cvReleaseImage ( &outTemp ); - //cvReleaseImage ( &out ); + Mat out( output.size(), CV_8UC1 ); + outTemp.convertTo(out, CV_8UC1, 255.); + cvtColor (out, output, CV_GRAY2RGB); int count = 0; - for (std::vector >::iterator it= bb.begin(); it != bb.end(); it++) { - CvScalar c; - if (count % 3 == 0) c=cvScalar(255,0,0); - else if (count % 3 == 1) c=cvScalar(0,255,0); - else c=cvScalar(0,0,255); + for (auto it : bb) { + Scalar c; + if (count % 3 == 0) { + c = BLUE; + } + else if (count % 3 == 1) { + c = GREEN; + } + else { + c = RED; + } count++; - cvRectangle(output,it->first,it->second,c,2); + rectangle(output, it.first, it.second, c, 2); } } -void renderChainsWithBoxes (IplImage * SWTImage, - std::vector > & components, +void renderChainsWithBoxes (Mat& SWTImage, + std::vector > & components, std::vector & chains, - std::vector > & compBB, - IplImage * output) { + std::vector & compBB, + Mat& output) { // keep track of included components std::vector included; included.reserve(components.size()); for (unsigned int i = 0; i != components.size(); i++) { included.push_back(false); } - for (std::vector::iterator it = chains.begin(); it != chains.end();it++) { - for (std::vector::iterator cit = it->components.begin(); cit != it->components.end(); cit++) { + for (Chain& it : chains) { + for (std::vector::iterator cit = it.components.begin(); cit != it.components.end(); cit++) { included[*cit] = true; } } - std::vector > componentsRed; + std::vector > componentsRed; for (unsigned int i = 0; i != components.size(); i++ ) { if (included[i]) { componentsRed.push_back(components[i]); } } - IplImage * outTemp = - cvCreateImage ( cvGetSize ( output ), IPL_DEPTH_32F, 1 ); + Mat outTemp( output.size(), CV_32FC1 ); std::cout << componentsRed.size() << " components after chaining" << std::endl; - renderComponents(SWTImage,componentsRed,outTemp); - std::vector > bb; + renderComponents(SWTImage, componentsRed, outTemp); + std::vector bb; bb = findBoundingBoxes(components, chains, compBB, outTemp); - IplImage * out = - cvCreateImage ( cvGetSize ( output ), IPL_DEPTH_8U, 1 ); - cvConvertScale(outTemp, out, 255, 0); - cvCvtColor (out, output, CV_GRAY2RGB); - cvReleaseImage ( &out ); - cvReleaseImage ( &outTemp); + Mat out( output.size(), CV_8UC1 ); + outTemp.convertTo(out, CV_8UC1, 255); + cvtColor (out, output, CV_GRAY2RGB); int count = 0; - for (std::vector >::iterator it= bb.begin(); it != bb.end(); it++) { + for (auto& it : bb) { CvScalar c; - if (count % 3 == 0) c=cvScalar(255,0,0); - else if (count % 3 == 1) c=cvScalar(0,255,0); - else c=cvScalar(0,0,255); + if (count % 3 == 0) { + c = BLUE; + } + else if (count % 3 == 1) { + c = GREEN; + } + else { + c = RED; + } count++; - cvRectangle(output,it->first,it->second,c,2); + rectangle(output, it.first, it.second, c, 2); } } -void renderChains (IplImage * SWTImage, - std::vector > & components, +void renderChains (Mat& SWTImage, + std::vector > & components, std::vector & chains, - IplImage * output) { + Mat& output) { // keep track of included components std::vector included; included.reserve(components.size()); @@ -268,146 +284,129 @@ void renderChains (IplImage * SWTImage, included[*cit] = true; } } - std::vector > componentsRed; + std::vector > componentsRed; for (unsigned int i = 0; i != components.size(); i++ ) { if (included[i]) { componentsRed.push_back(components[i]); } } std::cout << componentsRed.size() << " components after chaining" << std::endl; - IplImage * outTemp = - cvCreateImage ( cvGetSize ( output ), IPL_DEPTH_32F, 1 ); + Mat outTemp( output.size(), CV_32FC1 ); renderComponents(SWTImage,componentsRed,outTemp); - cvConvertScale(outTemp, output, 255, 0); - cvReleaseImage(&outTemp); + outTemp.convertTo(output, CV_8UC1, 255); + } -IplImage * textDetection (IplImage * input, bool dark_on_light) -{ - assert ( input->depth == IPL_DEPTH_8U ); - assert ( input->nChannels == 3 ); +Mat textDetection (const Mat& input, bool dark_on_light) { + assert ( input.depth() == CV_8U ); + assert ( input.channels() == 3 ); + std::cout << "Running textDetection with dark_on_light " << dark_on_light << std::endl; + // Convert to grayscale - IplImage * grayImage = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_8U, 1 ); - cvCvtColor ( input, grayImage, CV_RGB2GRAY ); + Mat grayImage( input.size(), CV_8UC1 ); + cvtColor ( input, grayImage, CV_RGB2GRAY ); // Create Canny Image double threshold_low = 175; double threshold_high = 320; - IplImage * edgeImage = - cvCreateImage( cvGetSize (input),IPL_DEPTH_8U, 1 ); - cvCanny(grayImage, edgeImage, threshold_low, threshold_high, 3) ; - cvSaveImage ( "canny.png", edgeImage); + Mat edgeImage( input.size(),CV_8UC1 ); + Canny(grayImage, edgeImage, threshold_low, threshold_high, 3) ; + imwrite ( "canny.png", edgeImage); // Create gradient X, gradient Y - IplImage * gaussianImage = - cvCreateImage ( cvGetSize(input), IPL_DEPTH_32F, 1); - cvConvertScale (grayImage, gaussianImage, 1./255., 0); - cvSmooth( gaussianImage, gaussianImage, CV_GAUSSIAN, 5, 5); - IplImage * gradientX = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_32F, 1 ); - IplImage * gradientY = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_32F, 1 ); - cvSobel(gaussianImage, gradientX , 1, 0, CV_SCHARR); - cvSobel(gaussianImage, gradientY , 0, 1, CV_SCHARR); - cvSmooth(gradientX, gradientX, 3, 3); - cvSmooth(gradientY, gradientY, 3, 3); - cvReleaseImage ( &gaussianImage ); - cvReleaseImage ( &grayImage ); + Mat gaussianImage( input.size(), CV_32FC1); + grayImage.convertTo(gaussianImage, CV_32FC1, 1./255.); + GaussianBlur( gaussianImage, gaussianImage, Size(5, 5), 0); + Mat gradientX( input.size(), CV_32FC1 ); + Mat gradientY( input.size(), CV_32FC1 ); + Scharr(gaussianImage, gradientX, -1, 1, 0); + Scharr(gaussianImage, gradientY, -1, 0, 1); + GaussianBlur(gradientX, gradientX, Size(3, 3), 0); + GaussianBlur(gradientY, gradientY, Size(3, 3), 0); // Calculate SWT and return ray vectors std::vector rays; - IplImage * SWTImage = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_32F, 1 ); - for( int row = 0; row < input->height; row++ ){ - float* ptr = (float*)(SWTImage->imageData + row * SWTImage->widthStep); - for ( int col = 0; col < input->width; col++ ){ + Mat SWTImage( input.size(), CV_32FC1 ); + for( int row = 0; row < input.rows; row++ ){ + float* ptr = (float*)SWTImage.ptr(row); + for ( int col = 0; col < input.cols; col++ ){ *ptr++ = -1; } } strokeWidthTransform ( edgeImage, gradientX, gradientY, dark_on_light, SWTImage, rays ); SWTMedianFilter ( SWTImage, rays ); - IplImage * output2 = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_32F, 1 ); + Mat output2( input.size(), CV_32FC1 ); normalizeImage (SWTImage, output2); - IplImage * saveSWT = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_8U, 1 ); - cvConvertScale(output2, saveSWT, 255, 0); - cvSaveImage ( "SWT.png", saveSWT); - cvReleaseImage ( &output2 ); - cvReleaseImage( &saveSWT ); + Mat saveSWT( input.size(), CV_8UC1 ); + output2.convertTo(saveSWT, CV_8UC1, 255); + imwrite ( "SWT.png", saveSWT); + + // Calculate legally connect components from SWT and gradient image. // return type is a vector of vectors, where each outer vector is a component and // the inner vector contains the (y,x) of each pixel in that component. - std::vector > components = findLegallyConnectedComponents(SWTImage, rays); + std::vector > components = findLegallyConnectedComponents(SWTImage, rays); // Filter the components - std::vector > validComponents; - std::vector > compBB; + std::vector > validComponents; + std::vector compBB; std::vector compCenters; std::vector compMedians; - std::vector compDimensions; + std::vector compDimensions; filterComponents(SWTImage, components, validComponents, compCenters, compMedians, compDimensions, compBB ); - IplImage * output3 = - cvCreateImage ( cvGetSize ( input ), 8U, 3 ); + Mat output3( input.size(), CV_8UC3 ); renderComponentsWithBoxes (SWTImage, validComponents, compBB, output3); - cvSaveImage ( "components.png",output3); - //cvReleaseImage ( &output3 ); + imwrite ( "components.png",output3); + // // Make chains of components std::vector chains; chains = makeChains(input, validComponents, compCenters, compMedians, compDimensions, compBB); - IplImage * output4 = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_8U, 1 ); + Mat output4( input.size(), CV_8UC1 ); renderChains ( SWTImage, validComponents, chains, output4 ); - //cvSaveImage ( "text.png", output4); + //imwrite ( "text.png", output4); + + Mat output5( input.size(), CV_8UC3 ); + cvtColor (output4, output5, CV_GRAY2RGB); - IplImage * output5 = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_8U, 3 ); - cvCvtColor (output4, output5, CV_GRAY2RGB); - cvReleaseImage ( &output4 ); /*IplImage * output = - cvCreateImage ( cvGetSize ( input ), IPL_DEPTH_8U, 3 ); + cvCreateImage ( input.size(), CV_8UC3 ); renderChainsWithBoxes ( SWTImage, validComponents, chains, compBB, output); */ - cvReleaseImage ( &gradientX ); - cvReleaseImage ( &gradientY ); - cvReleaseImage ( &SWTImage ); - cvReleaseImage ( &edgeImage ); return output5; } -void strokeWidthTransform (IplImage * edgeImage, - IplImage * gradientX, - IplImage * gradientY, +void strokeWidthTransform (const Mat& edgeImage, + Mat& gradientX, + Mat& gradientY, bool dark_on_light, - IplImage * SWTImage, + Mat& SWTImage, std::vector & rays) { // First pass float prec = .05; - for( int row = 0; row < edgeImage->height; row++ ){ - const uchar* ptr = (const uchar*)(edgeImage->imageData + row * edgeImage->widthStep); - for ( int col = 0; col < edgeImage->width; col++ ){ + for( int row = 0; row < edgeImage.rows; row++ ){ + const uchar* ptr = (const uchar*)edgeImage.ptr(row); + for ( int col = 0; col < edgeImage.cols; col++ ){ if (*ptr > 0) { Ray r; - Point2d p; + SWTPoint2d p; p.x = col; p.y = row; r.p = p; - std::vector points; + std::vector points; points.push_back(p); float curX = (float)col + 0.5; float curY = (float)row + 0.5; int curPixX = col; int curPixY = row; - float G_x = CV_IMAGE_ELEM ( gradientX, float, row, col); - float G_y = CV_IMAGE_ELEM ( gradientY, float, row, col); + float G_x = gradientX.at(row, col); + float G_y = gradientY.at(row, col); // normalize gradient float mag = sqrt( (G_x * G_x) + (G_y * G_y) ); if (dark_on_light){ @@ -425,36 +424,36 @@ void strokeWidthTransform (IplImage * edgeImage, curPixX = (int)(floor(curX)); curPixY = (int)(floor(curY)); // check if pixel is outside boundary of image - if (curPixX < 0 || (curPixX >= SWTImage->width) || curPixY < 0 || (curPixY >= SWTImage->height)) { + if (curPixX < 0 || (curPixX >= SWTImage.cols) || curPixY < 0 || (curPixY >= SWTImage.rows)) { break; } - Point2d pnew; + SWTPoint2d pnew; pnew.x = curPixX; pnew.y = curPixY; points.push_back(pnew); - if (CV_IMAGE_ELEM ( edgeImage, uchar, curPixY, curPixX) > 0) { + if (edgeImage.at(curPixY, curPixX) > 0) { r.q = pnew; // dot product - float G_xt = CV_IMAGE_ELEM(gradientX,float,curPixY,curPixX); - float G_yt = CV_IMAGE_ELEM(gradientY,float,curPixY,curPixX); + float G_xt = gradientX.at(curPixY,curPixX); + float G_yt = gradientY.at(curPixY,curPixX); mag = sqrt( (G_xt * G_xt) + (G_yt * G_yt) ); - if (dark_on_light){ - G_xt = -G_xt/mag; - G_yt = -G_yt/mag; + if (dark_on_light) { + G_xt = -G_xt / mag; + G_yt = -G_yt / mag; } else { - G_xt = G_xt/mag; - G_yt = G_yt/mag; + G_xt = G_xt / mag; + G_yt = G_yt / mag; } if (acos(G_x * -G_xt + G_y * -G_yt) < PI/2.0 ) { float length = sqrt( ((float)r.q.x - (float)r.p.x)*((float)r.q.x - (float)r.p.x) + ((float)r.q.y - (float)r.p.y)*((float)r.q.y - (float)r.p.y)); - for (std::vector::iterator pit = points.begin(); pit != points.end(); pit++) { - if (CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x) < 0) { - CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x) = length; + for (std::vector::iterator pit = points.begin(); pit != points.end(); pit++) { + if (SWTImage.at(pit->y, pit->x) < 0) { + SWTImage.at(pit->y, pit->x) = length; } else { - CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x) = std::min(length, CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x)); + SWTImage.at(pit->y, pit->x) = std::min(length, SWTImage.at(pit->y, pit->x)); } } r.points = points; @@ -471,41 +470,36 @@ void strokeWidthTransform (IplImage * edgeImage, } -void SWTMedianFilter (IplImage * SWTImage, - std::vector & rays) { - for (std::vector::iterator rit = rays.begin(); rit != rays.end(); rit++) { - for (std::vector::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) { - pit->SWT = CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x); +void SWTMedianFilter (Mat& SWTImage, std::vector & rays) { + for (auto& rit : rays) { + for (auto& pit : rit.points) { + pit.SWT = SWTImage.at(pit.y, pit.x); } - std::sort(rit->points.begin(), rit->points.end(), &Point2dSort); - float median = (rit->points[rit->points.size()/2]).SWT; - for (std::vector::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) { - CV_IMAGE_ELEM(SWTImage, float, pit->y, pit->x) = std::min(pit->SWT, median); + std::sort(rit.points.begin(), rit.points.end(), &Point2dSort); + float median = (rit.points[rit.points.size()/2]).SWT; + for (auto& pit : rit.points) { + SWTImage.at(pit.y, pit.x) = std::min(pit.SWT, median); } } - } -bool Point2dSort (const Point2d &lhs, const Point2d &rhs) { +bool Point2dSort (const SWTPoint2d &lhs, const SWTPoint2d &rhs) { return lhs.SWT < rhs.SWT; } -std::vector< std::vector > -findLegallyConnectedComponents (IplImage * SWTImage, - std::vector & rays) -{ +std::vector< std::vector > findLegallyConnectedComponents (Mat& SWTImage, std::vector & rays) { boost::unordered_map map; - boost::unordered_map revmap; + boost::unordered_map revmap; typedef boost::adjacency_list Graph; int num_vertices = 0; // Number vertices for graph. Associate each point with number - for( int row = 0; row < SWTImage->height; row++ ){ - float * ptr = (float*)(SWTImage->imageData + row * SWTImage->widthStep); - for (int col = 0; col < SWTImage->width; col++ ){ + for( int row = 0; row < SWTImage.rows; row++ ){ + float * ptr = (float*)SWTImage.ptr(row); + for (int col = 0; col < SWTImage.cols; col++ ){ if (*ptr > 0) { - map[row * SWTImage->width + col] = num_vertices; - Point2d p; + map[row * SWTImage.cols + col] = num_vertices; + SWTPoint2d p; p.x = col; p.y = row; revmap[num_vertices] = p; @@ -517,30 +511,30 @@ findLegallyConnectedComponents (IplImage * SWTImage, Graph g(num_vertices); - for( int row = 0; row < SWTImage->height; row++ ){ - float * ptr = (float*)(SWTImage->imageData + row * SWTImage->widthStep); - for (int col = 0; col < SWTImage->width; col++ ){ + for( int row = 0; row < SWTImage.rows; row++ ){ + float * ptr = (float*)SWTImage.ptr(row); + for (int col = 0; col < SWTImage.cols; col++ ){ if (*ptr > 0) { // check pixel to the right, right-down, down, left-down - int this_pixel = map[row * SWTImage->width + col]; - if (col+1 < SWTImage->width) { - float right = CV_IMAGE_ELEM(SWTImage, float, row, col+1); + int this_pixel = map[row * SWTImage.cols + col]; + if (col+1 < SWTImage.cols) { + float right = SWTImage.at(row, col+1); if (right > 0 && ((*ptr)/right <= 3.0 || right/(*ptr) <= 3.0)) - boost::add_edge(this_pixel, map.at(row * SWTImage->width + col + 1), g); + boost::add_edge(this_pixel, map.at(row * SWTImage.cols + col + 1), g); } - if (row+1 < SWTImage->height) { - if (col+1 < SWTImage->width) { - float right_down = CV_IMAGE_ELEM(SWTImage, float, row+1, col+1); + if (row+1 < SWTImage.rows) { + if (col+1 < SWTImage.cols) { + float right_down = SWTImage.at(row+1, col+1); if (right_down > 0 && ((*ptr)/right_down <= 3.0 || right_down/(*ptr) <= 3.0)) - boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col + 1), g); + boost::add_edge(this_pixel, map.at((row+1) * SWTImage.cols + col + 1), g); } - float down = CV_IMAGE_ELEM(SWTImage, float, row+1, col); + float down = SWTImage.at(row+1, col); if (down > 0 && ((*ptr)/down <= 3.0 || down/(*ptr) <= 3.0)) - boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col), g); + boost::add_edge(this_pixel, map.at((row+1) * SWTImage.cols + col), g); if (col-1 >= 0) { - float left_down = CV_IMAGE_ELEM(SWTImage, float, row+1, col-1); + float left_down = SWTImage.at(row+1, col-1); if (left_down > 0 && ((*ptr)/left_down <= 3.0 || left_down/(*ptr) <= 3.0)) - boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col - 1), g); + boost::add_edge(this_pixel, map.at((row+1) * SWTImage.cols + col - 1), g); } } } @@ -552,37 +546,37 @@ findLegallyConnectedComponents (IplImage * SWTImage, int num_comp = connected_components(g, &c[0]); - std::vector > components; + std::vector > components; components.reserve(num_comp); std::cout << "Before filtering, " << num_comp << " components and " << num_vertices << " vertices" << std::endl; for (int j = 0; j < num_comp; j++) { - std::vector tmp; + std::vector tmp; components.push_back( tmp ); } for (int j = 0; j < num_vertices; j++) { - Point2d p = revmap[j]; + SWTPoint2d p = revmap[j]; (components[c[j]]).push_back(p); } return components; } -std::vector< std::vector > -findLegallyConnectedComponentsRAY (IplImage * SWTImage, +std::vector< std::vector > +findLegallyConnectedComponentsRAY (Mat& SWTImage, std::vector & rays) { boost::unordered_map map; - boost::unordered_map revmap; + boost::unordered_map revmap; typedef boost::adjacency_list Graph; int num_vertices = 0; // Number vertices for graph. Associate each point with number - for( int row = 0; row < SWTImage->height; row++ ){ - float * ptr = (float*)(SWTImage->imageData + row * SWTImage->widthStep); - for (int col = 0; col < SWTImage->width; col++ ){ + for( int row = 0; row < SWTImage.rows; row++ ){ + float * ptr = (float*)SWTImage.ptr(row); + for (int col = 0; col < SWTImage.cols; col++ ){ if (*ptr > 0) { - map[row * SWTImage->width + col] = num_vertices; - Point2d p; + map[row * SWTImage.cols + col] = num_vertices; + SWTPoint2d p; p.x = col; p.y = row; revmap[num_vertices] = p; @@ -599,11 +593,11 @@ findLegallyConnectedComponentsRAY (IplImage * SWTImage, float lastSW = 0; int lastRow = 0; int lastCol = 0; - for (std::vector::const_iterator it2 = it->points.begin(); it2 != it->points.end(); it2++) { - float currentSW = CV_IMAGE_ELEM(SWTImage, float, it2->y, it2->x); + for (std::vector::const_iterator it2 = it->points.begin(); it2 != it->points.end(); it2++) { + float currentSW = SWTImage.at(it2->y, it2->x); if (lastSW == 0) {} else if (lastSW/currentSW<=3.0 || currentSW/lastSW<=3.0){ - boost::add_edge(map.at(it2->y * SWTImage->width + it2->x), map.at(lastRow * SWTImage->width + lastCol), g); + boost::add_edge(map.at(it2->y * SWTImage.cols + it2->x), map.at(lastRow * SWTImage.cols + lastCol), g); } lastSW = currentSW; lastRow = it2->y; @@ -618,23 +612,23 @@ findLegallyConnectedComponentsRAY (IplImage * SWTImage, int num_comp = connected_components(g, &c[0]); - std::vector > components; + std::vector > components; components.reserve(num_comp); std::cout << "Before filtering, " << num_comp << " components and " << num_vertices << " vertices" << std::endl; for (int j = 0; j < num_comp; j++) { - std::vector tmp; + std::vector tmp; components.push_back( tmp ); } for (int j = 0; j < num_vertices; j++) { - Point2d p = revmap[j]; + SWTPoint2d p = revmap[j]; (components[c[j]]).push_back(p); } return components; } -void componentStats(IplImage * SWTImage, - const std::vector & component, +void componentStats(Mat& SWTImage, + const std::vector & component, float & mean, float & variance, float & median, int & minx, int & miny, int & maxx, int & maxy) { @@ -646,8 +640,8 @@ void componentStats(IplImage * SWTImage, miny = 1000000; maxx = 0; maxy = 0; - for (std::vector::const_iterator it = component.begin(); it != component.end(); it++) { - float t = CV_IMAGE_ELEM(SWTImage, float, it->y, it->x); + for (std::vector::const_iterator it = component.begin(); it != component.end(); it++) { + float t = SWTImage.at(it->y, it->x); mean += t; temp.push_back(t); miny = std::min(miny,it->y); @@ -665,13 +659,13 @@ void componentStats(IplImage * SWTImage, } -void filterComponents(IplImage * SWTImage, - std::vector > & components, - std::vector > & validComponents, +void filterComponents(Mat& SWTImage, + std::vector > & components, + std::vector > & validComponents, std::vector & compCenters, std::vector & compMedians, - std::vector & compDimensions, - std::vector > & compBB ) + std::vector & compDimensions, + std::vector & compBB ) { validComponents.reserve(components.size()); compCenters.reserve(components.size()); @@ -679,7 +673,7 @@ void filterComponents(IplImage * SWTImage, compDimensions.reserve(components.size()); // bounding boxes compBB.reserve(components.size()); - for (std::vector >::iterator it = components.begin(); it != components.end();it++) { + for (std::vector >::iterator it = components.begin(); it != components.end();it++) { // compute the stroke width mean, variance, median float mean, variance, median; int minx, miny, maxx, maxy; @@ -744,7 +738,7 @@ void filterComponents(IplImage * SWTImage, denseRepr[i].push_back(0); } } - for (std::vector::iterator pit = it->begin(); pit != it->end(); pit++) { + for (std::vector::iterator pit = it->begin(); pit != it->end(); pit++) { (denseRepr[pit->x - minx])[pit->y - miny] = 1; } // create graph representing components @@ -762,18 +756,18 @@ void filterComponents(IplImage * SWTImage, center.x = ((float)(maxx+minx))/2.0; center.y = ((float)(maxy+miny))/2.0; - Point2d dimensions; + SWTPoint2d dimensions; dimensions.x = maxx - minx + 1; dimensions.y = maxy - miny + 1; - Point2d bb1; + SWTPoint2d bb1; bb1.x = minx; bb1.y = miny; - Point2d bb2; + SWTPoint2d bb2; bb2.x = maxx; bb2.y = maxy; - std::pair pair(bb1,bb2); + SWTPointPair2d pair(bb1,bb2); compBB.push_back(pair); compDimensions.push_back(dimensions); @@ -781,11 +775,11 @@ void filterComponents(IplImage * SWTImage, compCenters.push_back(center); validComponents.push_back(*it); } - std::vector > tempComp; - std::vector tempDim; + std::vector > tempComp; + std::vector tempDim; std::vector tempMed; std::vector tempCenters; - std::vector > tempBB; + std::vector tempBB; tempComp.reserve(validComponents.size()); tempCenters.reserve(validComponents.size()); tempDim.reserve(validComponents.size()); @@ -841,26 +835,26 @@ bool chainSortLength (const Chain &lhs, const Chain &rhs) { return lhs.components.size() > rhs.components.size(); } -std::vector makeChains( IplImage * colorImage, - std::vector > & components, +std::vector makeChains( const Mat& colorImage, + std::vector > & components, std::vector & compCenters, std::vector & compMedians, - std::vector & compDimensions, - std::vector > & compBB) { + std::vector & compDimensions, + std::vector & compBB) { assert (compCenters.size() == components.size()); // make vector of color averages std::vector colorAverages; colorAverages.reserve(components.size()); - for (std::vector >::iterator it = components.begin(); it != components.end();it++) { + for (std::vector >::iterator it = components.begin(); it != components.end();it++) { Point3dFloat mean; mean.x = 0; mean.y = 0; mean.z = 0; int num_points = 0; - for (std::vector::iterator pit = it->begin(); pit != it->end(); pit++) { - mean.x += (float) CV_IMAGE_ELEM (colorImage, unsigned char, pit->y, (pit->x)*3 ); - mean.y += (float) CV_IMAGE_ELEM (colorImage, unsigned char, pit->y, (pit->x)*3+1 ); - mean.z += (float) CV_IMAGE_ELEM (colorImage, unsigned char, pit->y, (pit->x)*3+2 ); + for (std::vector::iterator pit = it->begin(); pit != it->end(); pit++) { + mean.x += (float) colorImage.at(pit->y, (pit->x)*3 ); + mean.y += (float) colorImage.at(pit->y, (pit->x)*3+1 ); + mean.z += (float) colorImage.at(pit->y, (pit->x)*3+2 ); num_points++; } mean.x = mean.x / ((float)num_points); @@ -1123,3 +1117,5 @@ std::vector makeChains( IplImage * colorImage, std::cout << chains.size() << " chains after merging" << std::endl; return chains; } + +} diff --git a/TextDetection.h b/TextDetection.h index 3e619fb..fb222b2 100755 --- a/TextDetection.h +++ b/TextDetection.h @@ -19,23 +19,28 @@ #ifndef TEXTDETECTION_H #define TEXTDETECTION_H -#include +#include -struct Point2d { +namespace DetectText { + +struct SWTPoint2d { int x; int y; float SWT; }; +typedef std::pair SWTPointPair2d; +typedef std::pair SWTPointPair2i; + struct Point2dFloat { float x; float y; }; struct Ray { - Point2d p; - Point2d q; - std::vector points; + SWTPoint2d p; + SWTPoint2d q; + std::vector points; }; struct Point3dFloat { @@ -54,49 +59,47 @@ struct Chain { std::vector components; }; -bool Point2dSort (Point2d const & lhs, - Point2d const & rhs); +bool Point2dSort (SWTPoint2d const & lhs, + SWTPoint2d const & rhs); -IplImage * textDetection (IplImage * float_input, - bool dark_on_light); +cv::Mat textDetection (const cv::Mat& input, bool dark_on_light); -void strokeWidthTransform (IplImage * edgeImage, - IplImage * gradientX, - IplImage * gradientY, +void strokeWidthTransform (const cv::Mat& edgeImage, + cv::Mat& gradientX, + cv::Mat& gradientY, bool dark_on_light, - IplImage * SWTImage, + cv::Mat& SWTImage, std::vector & rays); -void SWTMedianFilter (IplImage * SWTImage, - std::vector & rays); +void SWTMedianFilter (cv::Mat& SWTImage, std::vector & rays); -std::vector< std::vector > -findLegallyConnectedComponents (IplImage * SWTImage, - std::vector & rays); +std::vector< std::vector > findLegallyConnectedComponents (cv::Mat& SWTImage, std::vector & rays); -std::vector< std::vector > +std::vector< std::vector > findLegallyConnectedComponentsRAY (IplImage * SWTImage, std::vector & rays); void componentStats(IplImage * SWTImage, - const std::vector & component, + const std::vector & component, float & mean, float & variance, float & median, int & minx, int & miny, int & maxx, int & maxy); -void filterComponents(IplImage * SWTImage, - std::vector > & components, - std::vector > & validComponents, +void filterComponents(cv::Mat& SWTImage, + std::vector > & components, + std::vector > & validComponents, std::vector & compCenters, std::vector & compMedians, - std::vector & compDimensions, - std::vector > & compBB ); + std::vector & compDimensions, + std::vector & compBB ); -std::vector makeChains( IplImage * colorImage, - std::vector > & components, +std::vector makeChains( const cv::Mat& colorImage, + std::vector > & components, std::vector & compCenters, std::vector & compMedians, - std::vector & compDimensions, - std::vector > & compBB); + std::vector & compDimensions, + std::vector & compBB); + +} #endif // TEXTDETECTION_H From 3782d32bf81e8d8dd3dcafbe01cb320f5aa1545c Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sat, 27 Feb 2016 03:44:14 -0500 Subject: [PATCH 2/3] readme markdown --- README => README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename README => README.md (100%) diff --git a/README b/README.md similarity index 100% rename from README rename to README.md From 3dbd81ac1ea5b6fa3b40d436a3c1cc3e37e27d7f Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sat, 27 Feb 2016 03:45:18 -0500 Subject: [PATCH 3/3] readme.. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 87264b7..a0d4ad9 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ Detect text with stroke width transform. OpenCV 2.4+, boost. ## Compile -g++ -o DetectText TextDetection.cpp FeaturesMain.cpp -lopencv_core -lopencv_highgui -lopencv_imgproc -I/path/to/current/directory + + g++ -o DetectText TextDetection.cpp FeaturesMain.cpp -lopencv_core -lopencv_highgui -lopencv_imgproc -I/path/to/current/directory + where /path/to/current/directory is replaced with the absolute path to the current directory. ### Using CMake