Proof-of-concept for a compiler that translates SQL queries to type-checked C++ code.
- Performance: build ML pipelines than run faster than those written in Python with Pandas (DataFrame performance).
- Security: no need for a SQL runtime, just run a single-purpose C++ program that can be audited.
- Integration: run SQL on (embedded) systems that don't support a heavy DBMS but can handle self-contained C++ programs.
- SQL parser based on sqlparser-rs.
- Generated C++ code uses DataFrame
git clone https://github.com/mo42/sqlc.git
cd sqlc
cargo build --release
Compiling the example SQL file:
cargo run -- example.sql > example.cpp
SELECT
date,
column2
FROM
'example.csv'
JOIN 'join.csv' USING(column2)
WHERE
joined_string = "Join string 3"
#include <DataFrame/DataFrame.h>
#include <iostream>
using namespace hmdf;
typedef ulong idx_t;
using SqlcDataFrame = StdDataFrame<idx_t>;
int main(int, char**) {
SqlcDataFrame df_main;
df_main.read("example.csv", io_format::csv2);
SqlcDataFrame df_join0;
df_join0.read("join.csv", io_format::csv2);
SqlcDataFrame df = df_main.join_by_column<decltype(df_join0), int, ulong,
double, int, std::string, long>(
df_join0, "column2", hmdf::join_policy::inner_join);
auto where_functor = [](const idx_t&,
const std::string& joined_string) -> bool {
return (joined_string == "Join string 3");
};
auto where_df = df.get_data_by_sel<std::string, decltype(where_functor),
ulong, double, int, std::string, long>(
"joined_string", where_functor);
std::vector<idx_t> idx = where_df.get_index();
std::vector<std::string> date = where_df.get_column<std::string>("date");
std::vector<int> column2 = where_df.get_column<int>("column2");
SqlcDataFrame select;
select.load_index(std::move(idx));
select.load_column("date", std::move(date));
select.load_column("column2", std::move(column2));
std::cout << select.to_string<double>() << std::endl;
select.write<std::ostream, std::string, int>(std::cout, hmdf::io_format::csv,
5, false, 100);
return 0;
}