Skip to content

Commit

Permalink
fix(ext/ffi): trampoline for fast calls (denoland#15139)
Browse files Browse the repository at this point in the history
  • Loading branch information
littledivy committed Jul 12, 2022
1 parent 5db16d1 commit 77d065e
Show file tree
Hide file tree
Showing 11 changed files with 435 additions and 11 deletions.
1 change: 1 addition & 0 deletions .dprint.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"cli/tsc/*typescript.js",
"gh-pages",
"target",
"test_ffi/tests/test.js",
"test_util/std",
"test_util/wpt",
"third_party",
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@
[submodule "test_util/wpt"]
path = test_util/wpt
url = https://github.com/web-platform-tests/wpt.git
[submodule "ext/ffi/tinycc"]
path = ext/ffi/tinycc
url = https://github.com/TinyCC/tinycc
22 changes: 22 additions & 0 deletions ext/ffi/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# deno_ffi

This crate implements dynamic library ffi.

## Performance

Deno FFI calls have extremely low overhead (~1ns on M1 16GB RAM) and perform on
par with native code. Deno leverages V8 fast api calls and JIT compiled bindings
to achieve these high speeds.

`Deno.dlopen` generates an optimized and a fallback path. Optimized paths are
triggered when V8 decides to optimize the function, hence call through the Fast
API. Fallback paths handle types like function callbacks and implement proper
error handling for unexpected types, that is not supported in Fast calls.

Optimized calls enter a JIT compiled function "trampoline" that translates Fast
API values directly for symbol calls. JIT compilation itself is super fast,
thanks to `tinycc`. Currently, the optimized path is only supported on Linux and
MacOS.

To run benchmarks:

```bash
target/release/deno bench --allow-ffi --allow-read --unstable ./test_ffi/tests/bench.js
```
63 changes: 63 additions & 0 deletions ext/ffi/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.

use std::env;

fn build_tcc() {
{
// TODO(@littledivy): Windows support for fast call.
// let tcc_path = root
// .parent()
// .unwrap()
// .to_path_buf()
// .parent()
// .unwrap()
// .to_path_buf()
// .join("third_party")
// .join("prebuilt")
// .join("win");
// println!("cargo:rustc-link-search=native={}", tcc_path.display());
}
#[cfg(not(target_os = "windows"))]
{
use std::path::PathBuf;
use std::process::exit;
use std::process::Command;

let root = PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR")));
let tcc_src = root.join("tinycc");
dbg!(&tcc_src);
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let mut configure = Command::new(tcc_src.join("configure"));
configure.current_dir(&out_dir);
configure.args(&["--enable-static", "--extra-cflags=-fPIC -O3 -g -static"]);
let status = configure.status().unwrap();
if !status.success() {
eprintln!("Fail to configure: {:?}", status);
exit(1);
}

let mut make = Command::new("make");
make.current_dir(&out_dir).arg(format!(
"-j{}",
env::var("NUM_JOBS").unwrap_or_else(|_| String::from("1"))
));
make.args(&["libtcc.a"]);
let status = make.status().unwrap();

if !status.success() {
eprintln!("Fail to make: {:?}", status);
exit(1);
}
println!("cargo:rustc-link-search=native={}", out_dir.display());
println!("cargo:rerun-if-changed={}", tcc_src.display());
}
}

#[cfg(target_os = "windows")]
fn main() {}

#[cfg(not(target_os = "windows"))]
fn main() {
build_tcc();
println!("cargo:rustc-link-lib=static=tcc");
}
153 changes: 153 additions & 0 deletions ext/ffi/jit_trampoline.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.

use crate::NativeType;
use crate::{tcc::Compiler, Symbol};
use std::ffi::c_void;
use std::ffi::CString;
use std::fmt::Write as _;

pub(crate) struct Allocation {
pub addr: *mut c_void,
_ctx: Compiler,
_sym: Box<Symbol>,
}

macro_rules! cstr {
($st:expr) => {
&CString::new($st).unwrap()
};
}

fn native_arg_to_c(ty: &NativeType) -> &'static str {
match ty {
NativeType::U8 | NativeType::U16 | NativeType::U32 => "uint32_t",
NativeType::I8 | NativeType::I16 | NativeType::I32 => "int32_t",
NativeType::Void => "void",
NativeType::F32 => "float",
NativeType::F64 => "double",
_ => unimplemented!(),
}
}

fn native_to_c(ty: &NativeType) -> &'static str {
match ty {
NativeType::U8 => "uint8_t",
NativeType::U16 => "uint16_t",
NativeType::U32 => "uint32_t",
NativeType::I8 => "int8_t",
NativeType::I16 => "uint16_t",
NativeType::I32 => "int32_t",
NativeType::Void => "void",
NativeType::F32 => "float",
NativeType::F64 => "double",
_ => unimplemented!(),
}
}

pub(crate) fn codegen(sym: &crate::Symbol) -> String {
let mut c = String::from("#include <stdint.h>\n");
let ret = native_to_c(&sym.result_type);

// extern <return_type> func(
c += "\nextern ";
c += ret;
c += " func(";
// <param_type> p0, <param_type> p1, ...);
for (i, ty) in sym.parameter_types.iter().enumerate() {
if i > 0 {
c += ", ";
}
c += native_to_c(ty);
let _ = write!(c, " p{i}");
}
c += ");\n\n";

// void* recv, <param_type> p0, <param_type> p1, ...);
c += ret;
c += " func_trampoline(";
c += "void* recv";
for (i, ty) in sym.parameter_types.iter().enumerate() {
c += ", ";
c += native_arg_to_c(ty);
let _ = write!(c, " p{i}");
}
c += ") {\n";
// return func(p0, p1, ...);
c += " return func(";
for (i, _) in sym.parameter_types.iter().enumerate() {
if i > 0 {
c += ", ";
}
let _ = write!(c, "p{i}");
}
c += ");\n}\n\n";
c
}

pub(crate) fn gen_trampoline(
sym: Box<crate::Symbol>,
) -> Result<Box<Allocation>, ()> {
let mut ctx = Compiler::new()?;
ctx.set_options(cstr!("-nostdlib"));
// SAFETY: symbol satisfies ABI requirement.
unsafe { ctx.add_symbol(cstr!("func"), sym.ptr.0 as *const c_void) };
let c = codegen(&sym);

ctx.compile_string(cstr!(c))?;
let alloc = Allocation {
addr: ctx.relocate_and_get_symbol(cstr!("func_trampoline"))?,
_ctx: ctx,
_sym: sym,
};
Ok(Box::new(alloc))
}

#[cfg(test)]
mod tests {
use super::*;
use libffi::middle::Type;
use std::ptr::null_mut;

fn codegen(parameters: Vec<NativeType>, ret: NativeType) -> String {
let sym = Box::new(crate::Symbol {
cif: libffi::middle::Cif::new(vec![], Type::void()),
ptr: libffi::middle::CodePtr(null_mut()),
parameter_types: parameters,
result_type: ret,
can_callback: false,
});
super::codegen(&sym)
}

#[test]
fn test_gen_trampoline() {
assert_eq!(
codegen(vec![], NativeType::Void),
"#include <stdint.h>\n\nextern void func();\n\nvoid func_trampoline(void* recv) {\n return func();\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::U32, NativeType::U32], NativeType::U32),
"#include <stdint.h>\n\nextern uint32_t func(uint32_t p0, uint32_t p1);\n\nuint32_t func_trampoline(void* recv, uint32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::I32, NativeType::I32], NativeType::I32),
"#include <stdint.h>\n\nextern int32_t func(int32_t p0, int32_t p1);\n\nint32_t func_trampoline(void* recv, int32_t p0, int32_t p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::F32, NativeType::F32], NativeType::F32),
"#include <stdint.h>\n\nextern float func(float p0, float p1);\n\nfloat func_trampoline(void* recv, float p0, float p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::F64, NativeType::F64], NativeType::F64),
"#include <stdint.h>\n\nextern double func(double p0, double p1);\n\ndouble func_trampoline(void* recv, double p0, double p1) {\n return func(p0, p1);\n}\n\n"
);
}

#[test]
fn test_gen_trampoline_implicit_cast() {
assert_eq!(
codegen(vec![NativeType::I8, NativeType::U8], NativeType::I8),
"#include <stdint.h>\n\nextern int8_t func(int8_t p0, uint8_t p1);\n\nint8_t func_trampoline(void* recv, int32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n"
)
}
}
39 changes: 32 additions & 7 deletions ext/ffi/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ use std::path::PathBuf;
use std::ptr;
use std::rc::Rc;

#[cfg(not(target_os = "windows"))]
mod jit_trampoline;
#[cfg(not(target_os = "windows"))]
mod tcc;

thread_local! {
static LOCAL_ISOLATE_POINTER: RefCell<*const v8::Isolate> = RefCell::new(ptr::null());
}
Expand Down Expand Up @@ -72,6 +77,8 @@ struct Symbol {
ptr: libffi::middle::CodePtr,
parameter_types: Vec<NativeType>,
result_type: NativeType,
// This is dead code only on Windows
#[allow(dead_code)]
can_callback: bool,
}

Expand Down Expand Up @@ -678,6 +685,7 @@ impl From<&NativeType> for fast_api::Type {
}
}

#[cfg(not(target_os = "windows"))]
fn is_fast_api(rv: NativeType) -> bool {
!matches!(
rv,
Expand All @@ -696,25 +704,36 @@ fn make_sync_fn<'s>(
scope: &mut v8::HandleScope<'s>,
sym: Box<Symbol>,
) -> v8::Local<'s, v8::Function> {
let mut fast_ffi_templ = None;
#[cfg(not(target_os = "windows"))]
let mut fast_ffi_templ: Option<FfiFastCallTemplate> = None;

#[cfg(target_os = "windows")]
let fast_ffi_templ: Option<FfiFastCallTemplate> = None;

#[cfg(not(target_os = "windows"))]
let mut fast_allocations: Option<*mut ()> = None;
#[cfg(not(target_os = "windows"))]
if !sym.can_callback
&& !sym.parameter_types.iter().any(|t| !is_fast_api(*t))
&& is_fast_api(sym.result_type)
{
let ret = fast_api::Type::from(&sym.result_type);

let mut args = sym
.parameter_types
.iter()
.map(|t| t.into())
.collect::<Vec<_>>();
if args.is_empty() {
args.push(fast_api::Type::V8Value);
}
// recv
args.insert(0, fast_api::Type::V8Value);
let symbol_trampoline =
jit_trampoline::gen_trampoline(sym.clone()).expect("gen_trampoline");
fast_ffi_templ = Some(FfiFastCallTemplate {
args: args.into_boxed_slice(),
ret: (&fast_api::Type::from(&sym.result_type)).into(),
symbol_ptr: sym.ptr.as_ptr() as *const c_void,
ret: (&ret).into(),
symbol_ptr: symbol_trampoline.addr,
});
fast_allocations = Some(Box::into_raw(symbol_trampoline) as *mut ());
}

let sym = Box::leak(sym);
Expand Down Expand Up @@ -754,7 +773,13 @@ fn make_sync_fn<'s>(
Box::new(move |_| {
// SAFETY: This is never called twice. pointer obtained
// from Box::into_raw, hence, satisfies memory layout requirements.
unsafe { Box::from_raw(sym) };
unsafe {
Box::from_raw(sym);
#[cfg(not(target_os = "windows"))]
if let Some(fast_allocations) = fast_allocations {
Box::from_raw(fast_allocations as *mut jit_trampoline::Allocation);
}
}
}),
);

Expand Down
Loading

0 comments on commit 77d065e

Please sign in to comment.