-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce AbstractWorkerPool, CachingPool #16808
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,26 @@ | ||
# This file is a part of Julia. License is MIT: https://julialang.org/license | ||
|
||
type WorkerPool | ||
abstract AbstractWorkerPool | ||
|
||
# An AbstractWorkerPool should implement | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and at least according to the way the methods below are using them, they should all have fields |
||
# | ||
# `push!` - add a new worker to the overall pool (available + busy) | ||
# `put!` - put back a worker to the available pool | ||
# `take!` - take a worker from the available pool (to be used for remote function execution) | ||
# `length` - number of workers available in the overall pool | ||
# `isready` - return false if a `take!` on the pool would block, else true | ||
# | ||
# The default implementations of the above (on a AbstractWorkerPool) require fields | ||
# channel::RemoteChannel{Channel{Int}} | ||
# workers::Set{Int} | ||
# | ||
|
||
type WorkerPool <: AbstractWorkerPool | ||
channel::RemoteChannel{Channel{Int}} | ||
count::Int | ||
workers::Set{Int} | ||
|
||
# Create a shared queue of available workers | ||
WorkerPool() = new(RemoteChannel(()->Channel{Int}(typemax(Int))), 0) | ||
WorkerPool() = new(RemoteChannel(()->Channel{Int}(typemax(Int))), Set{Int}()) | ||
end | ||
|
||
|
||
|
@@ -19,25 +34,26 @@ function WorkerPool(workers::Vector{Int}) | |
|
||
# Add workers to the pool | ||
for w in workers | ||
put!(pool, w) | ||
push!(pool, w) | ||
end | ||
|
||
return pool | ||
end | ||
|
||
push!(pool::AbstractWorkerPool, w::Int) = (push!(pool.workers, w); put!(pool.channel, w); pool) | ||
push!(pool::AbstractWorkerPool, w::Worker) = push!(pool, w.id) | ||
length(pool::AbstractWorkerPool) = length(pool.workers) | ||
isready(pool::AbstractWorkerPool) = isready(pool.channel) | ||
|
||
put!(pool::WorkerPool, w::Int) = (pool.count += 1; put!(pool.channel, w)) | ||
put!(pool::WorkerPool, w::Worker) = put!(pool, w.id) | ||
put!(pool::AbstractWorkerPool, w::Int) = (put!(pool.channel, w); pool) | ||
|
||
length(pool::WorkerPool) = pool.count | ||
workers(pool::AbstractWorkerPool) = collect(pool.workers) | ||
|
||
isready(pool::WorkerPool) = isready(pool.channel) | ||
|
||
function remotecall_pool(rc_f, f, pool::WorkerPool, args...; kwargs...) | ||
function take!(pool::AbstractWorkerPool) | ||
# Find an active worker | ||
worker = 0 | ||
while true | ||
if pool.count == 0 | ||
if length(pool) == 0 | ||
if pool === default_worker_pool() | ||
# No workers, the master process is used as a worker | ||
worker = 1 | ||
|
@@ -51,42 +67,47 @@ function remotecall_pool(rc_f, f, pool::WorkerPool, args...; kwargs...) | |
if worker in procs() | ||
break | ||
else | ||
pool.count = pool.count - 1 | ||
delete!(pool.workers, worker) # Remove invalid worker from pool | ||
end | ||
end | ||
return worker | ||
end | ||
|
||
function remotecall_pool(rc_f, f, pool::AbstractWorkerPool, args...; kwargs...) | ||
worker = take!(pool) | ||
try | ||
rc_f(f, worker, args...; kwargs...) | ||
finally | ||
if worker != 1 | ||
put!(pool.channel, worker) | ||
end | ||
# In case of default_worker_pool, the master is implictly considered a worker | ||
# till the time new workers are added, and it is not added back to the available pool. | ||
# However, it is perfectly valid for other pools to `push!` any worker (including 1) | ||
# to the pool. Confirm the same before making a worker available. | ||
worker in pool.workers && put!(pool, worker) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little confused on this - if it's already there, put it there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In case of the However, it is perfectly valid for other pools to explicitly add any worker (including 1) to the pool. Hence the check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. A few comments to that effect would be good. |
||
end | ||
end | ||
|
||
|
||
""" | ||
remotecall(f, pool::WorkerPool, args...; kwargs...) | ||
remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. be sure to keep rst signatures consistent with the docstrings |
||
Call `f(args...; kwargs...)` on one of the workers in `pool`. Returns a `Future`. | ||
""" | ||
remotecall(f, pool::WorkerPool, args...; kwargs...) = remotecall_pool(remotecall, f, pool, args...; kwargs...) | ||
remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall, f, pool, args...; kwargs...) | ||
|
||
|
||
""" | ||
remotecall_wait(f, pool::WorkerPool, args...; kwargs...) | ||
remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this worker-pool signature of |
||
Call `f(args...; kwargs...)` on one of the workers in `pool`. Waits for completion, returns a `Future`. | ||
""" | ||
remotecall_wait(f, pool::WorkerPool, args...; kwargs...) = remotecall_pool(remotecall_wait, f, pool, args...; kwargs...) | ||
remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_wait, f, pool, args...; kwargs...) | ||
|
||
|
||
""" | ||
remotecall_fetch(f, pool::WorkerPool, args...; kwargs...) | ||
remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) | ||
Call `f(args...; kwargs...)` on one of the workers in `pool`. Waits for completion and returns the result. | ||
""" | ||
remotecall_fetch(f, pool::WorkerPool, args...; kwargs...) = remotecall_pool(remotecall_fetch, f, pool, args...; kwargs...) | ||
remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_fetch, f, pool, args...; kwargs...) | ||
|
||
""" | ||
default_worker_pool() | ||
|
@@ -107,10 +128,91 @@ end | |
|
||
|
||
""" | ||
remote([::WorkerPool], f) -> Function | ||
remote([::AbstractWorkerPool], f) -> Function | ||
Returns a lambda that executes function `f` on an available worker | ||
using `remotecall_fetch`. | ||
""" | ||
remote(f) = (args...; kwargs...)->remotecall_fetch(f, default_worker_pool(), args...; kwargs...) | ||
remote(p::WorkerPool, f) = (args...; kwargs...)->remotecall_fetch(f, p, args...; kwargs...) | ||
remote(p::AbstractWorkerPool, f) = (args...; kwargs...)->remotecall_fetch(f, p, args...; kwargs...) | ||
|
||
type CachingPool <: AbstractWorkerPool | ||
channel::RemoteChannel{Channel{Int}} | ||
workers::Set{Int} | ||
|
||
# Mapping between a tuple (worker_id, f) and a remote_ref | ||
map_obj2ref::Dict{Tuple{Int, Function}, RemoteChannel} | ||
|
||
function CachingPool() | ||
wp = new(RemoteChannel(()->Channel{Int}(typemax(Int))), Set{Int}(), Dict{Int, Function}()) | ||
finalizer(wp, clear!) | ||
wp | ||
end | ||
end | ||
|
||
""" | ||
CachingPool(workers::Vector{Int}) | ||
An implementation of an `AbstractWorkerPool`. `remote`, `remotecall_fetch`, `pmap` and other | ||
remote calls which execute functions remotely, benefit from caching the serialized/deserialized | ||
functions on the worker nodes, especially for closures which capture large amounts of data. | ||
The remote cache is maintained for the lifetime of the returned `CachingPool` object. To clear the | ||
cache earlier, use `clear!(pool)`. | ||
For global variables, only the bindings are captured in a closure, not the data. | ||
`let` blocks can be used to capture global data. | ||
For example: | ||
``` | ||
const foo=rand(10^8); | ||
wp=CachingPool(workers()) | ||
let foo=foo | ||
pmap(wp, i->sum(foo)+i, 1:100); | ||
end | ||
``` | ||
The above would transfer `foo` only once to each worker. | ||
""" | ||
function CachingPool(workers::Vector{Int}) | ||
pool = CachingPool() | ||
for w in workers | ||
push!(pool, w) | ||
end | ||
return pool | ||
end | ||
|
||
CachingPool(wp::WorkerPool) = CachingPool(workers(wp)) | ||
|
||
""" | ||
clear!(pool::CachingPool) -> pool | ||
Removes all cached functions from all participating workers. | ||
""" | ||
function clear!(pool::CachingPool) | ||
for (_,rr) in pool.map_obj2ref | ||
finalize(rr) | ||
end | ||
empty!(pool.map_obj2ref) | ||
pool | ||
end | ||
|
||
exec_from_cache(rr::RemoteChannel, args...; kwargs...) = fetch(rr)(args...; kwargs...) | ||
function exec_from_cache(f_ref::Tuple{Function, RemoteChannel}, args...; kwargs...) | ||
put!(f_ref[2], f_ref[1]) # Cache locally | ||
f_ref[1](args...; kwargs...) | ||
end | ||
|
||
function remotecall_pool(rc_f, f, pool::CachingPool, args...; kwargs...) | ||
worker = take!(pool) | ||
f_ref = get(pool.map_obj2ref, (worker, f), (f, RemoteChannel(worker))) | ||
isa(f_ref, Tuple) && (pool.map_obj2ref[(worker, f)] = f_ref[2]) # Add to tracker | ||
|
||
try | ||
rc_f(exec_from_cache, worker, f_ref, args...; kwargs...) | ||
finally | ||
# ensure that we do not add pid 1 back if it is not registered. | ||
worker in pool.workers && put!(pool, worker) | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should the signature in the docstring and rst also change?