Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mykelk committed Aug 21, 2014
0 parents commit bb02028
Show file tree
Hide file tree
Showing 11 changed files with 1,338 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
language: cpp
compiler:
- clang
notifications:
email: false
env:
matrix:
- JULIAVERSION="juliareleases"
- JULIAVERSION="julianightlies"
before_install:
- sudo add-apt-repository ppa:staticfloat/julia-deps -y
- sudo add-apt-repository ppa:staticfloat/${JULIAVERSION} -y
- sudo apt-get update -qq -y
- sudo apt-get install libpcre3-dev julia -y
script:
- julia -e 'Pkg.init(); Pkg.clone(pwd()); Pkg.test("BayesNets")'
22 changes: 22 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
The BayesNets.jl package is licensed under the MIT "Expat" License:

> Copyright (c) 2014: Mykel Kochenderfer.
>
> Permission is hereby granted, free of charge, to any person obtaining
> a copy of this software and associated documentation files (the
> "Software"), to deal in the Software without restriction, including
> without limitation the rights to use, copy, modify, merge, publish,
> distribute, sublicense, and/or sell copies of the Software, and to
> permit persons to whom the Software is furnished to do so, subject to
> the following conditions:
>
> The above copyright notice and this permission notice shall be
> included in all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# BayesNets

This library supports representation, inference, and learning in Bayesian networks.

Read the [documentation](https://nbviewer.ipython.org/github/sisl/BayesNets.jl/blob/master/doc/BayesNets.ipynb).
3 changes: 3 additions & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
julia 0.3-
Graphs
DataFrames
735 changes: 735 additions & 0 deletions doc/BayesNets.ipynb

Large diffs are not rendered by default.

226 changes: 226 additions & 0 deletions src/BayesNets.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
module BayesNets

export BayesNet, addEdge!, addEdges!, CPD, CPDs, prob, setCPD!, pdf, rand, randBernoulliDict, table, domain, Assignment, *, sumout, normalize, select, randTable, NodeName, consistent, estimate, randTableWeighted, estimateConvergence, prior, logBayesScore
export Domain, BinaryDomain, DiscreteDomain, RealDomain, domain, cpd, parents

import Graphs: GenericGraph, simple_graph, Edge, add_edge!, topological_sort_by_dfs, in_edges, source, in_neighbors
import TikzGraphs: plot
import Base: rand, select
import DataFrames: DataFrame, groupby, array, isna

typealias DAG GenericGraph{Int64,Edge{Int64},Range1{Int64},Array{Edge{Int64},1},Array{Array{Edge{Int64},1},1}}

typealias NodeName Symbol

typealias Assignment Dict

function consistent(a::Assignment, b::Assignment)
commonKeys = intersect(keys(a), keys(b))
all([a[k] == b[k] for k in commonKeys])
end

include("cpds.jl")

typealias CPD CPDs.CPD

DAG(n) = simple_graph(n)

abstract Domain

type DiscreteDomain <: Domain
elements::Vector
end

type ContinuousDomain <: Domain
lower::Real
upper::Real
end

BinaryDomain() = DiscreteDomain([false, true])

RealDomain() = ContinuousDomain(-Inf, Inf)

type BayesNet
dag::DAG
cpds::Vector{CPD}
index::Dict{NodeName,Int}
names::Vector{NodeName}
domains::Vector{Domain}

function BayesNet(names::Vector{NodeName})
n = length(names)
index = [names[i]=>i for i = 1:n]
cpds = CPD[CPDs.Bernoulli() for i = 1:n]
domains = Domain[BinaryDomain() for i = 1:n] # default to binary domain
new(simple_graph(length(names)), cpds, index, names, domains)
end
end

domain(b::BayesNet, name::NodeName) = b.domains[b.index[name]]

cpd(b::BayesNet, name::NodeName) = b.cpds[b.index[name]]

function parents(b::BayesNet, name::NodeName)
i = b.index[name]
NodeName[b.names[j] for j in in_neighbors(i, b.dag)]
end


function addEdge!(bn::BayesNet, sourceNode::NodeName, destNode::NodeName)
i = bn.index[sourceNode]
j = bn.index[destNode]
add_edge!(bn.dag, i, j)
bn
end

function addEdges!(bn::BayesNet, pairs)
for p in pairs
addEdge!(bn, p[1], p[2])
end
bn
end

function setCPD!(bn::BayesNet, name::NodeName, cpd::CPD)
i = bn.index[name]
bn.cpds[i] = cpd
bn.domains[i] = domain(cpd)
nothing
end

function prob(bn::BayesNet, assignment::Assignment)
prod([pdf(bn.cpds[i], assignment)(assignment[bn.names[i]]) for i = 1:length(bn.names)])
end

include("sampling.jl")


Base.mimewritable(::MIME"image/svg+xml", b::BayesNet) = true

Base.mimewritable(::MIME"text/html", dfs::Vector{DataFrame}) = true

function Base.writemime(f::IO, a::MIME"image/svg+xml", b::BayesNet)
Base.writemime(f, a, plot(b.dag, ASCIIString[string(s) for s in b.names]))
end

function Base.writemime(io::IO, a::MIME"text/html", dfs::Vector{DataFrame})
for df in dfs
writemime(io, a, df)
end
end

include("ndgrid.jl")

function table(bn::BayesNet, name::NodeName)
edges = in_edges(bn.index[name], bn.dag)
names = [bn.names[source(e, bn.dag)] for e in edges]
names = [names, name]
c = cpd(bn, name)
d = DataFrame()
if length(edges) > 0
A = ndgrid([domain(bn, name).elements for name in names]...)
i = 1
for name in names
d[name] = A[i][:]
i = i + 1
end
else
d[name] = domain(bn, name).elements
end
p = ones(size(d,1))
for i = 1:size(d,1)
ownValue = d[i,length(names)]
a = [names[j]=>d[i,j] for j = 1:(length(names)-1)]
p[i] = pdf(c, a)(ownValue)
end
d[:p] = p
d
end

table(bn::BayesNet, name::NodeName, a::Assignment) = select(table(bn, name), a)

function *(df1::DataFrame, df2::DataFrame)
onnames = setdiff(intersect(names(df1), names(df2)), [:p])
finalnames = vcat(setdiff(union(names(df1), names(df2)), [:p]), :p)
if isempty(onnames)
j = join(df1, df2, kind=:cross)
j[:,:p] .*= j[:,:p_1]
return j[:,finalnames]
else
j = join(df1, df2, on=onnames, kind=:outer)
j[:,:p] .*= j[:,:p_1]
return j[:,finalnames]
end
end

# TODO: this currently only supports binary valued variables
function sumout(a::DataFrame, v::Symbol)
@assert unique(a[:,v]) == [0,1]
remainingvars = setdiff(names(a), [v, :p])
g = groupby(a, v)
j = join(g..., on=remainingvars)
j[:,:p] += j[:,:p_1]
j[:,vcat(remainingvars, :p)]
end

function sumout(a::DataFrame, v::Vector{Symbol})
if isempty(v)
return a
else
sumout(sumout(a, v[1]), v[2:end])
end
end

function normalize(a::DataFrame)
a[:,:p] /= sum(a[:,:p])
a
end

function select(t::DataFrame, a::Assignment)
commonNames = intersect(names(t), keys(a))
mask = bool(ones(size(t,1)))
for s in commonNames
mask &= t[s] .== a[s]
end
t[mask, :]
end

function estimate(df::DataFrame)
n = size(df, 1)
w = ones(n)
t = df
if haskey(df, :p)
t = df[:, names(t) .!= :p]
w = df[:p]
end
# unique samples
tu = unique(t)
# add column with probabilities of unique samples
tu[:p] = Float64[sum(w[Bool[tu[j,:] == t[i,:] for i = 1:size(t,1)]]) for j = 1:size(tu,1)]
tu[:p] /= sum(tu[:p])
tu
end

function estimateConvergence(df::DataFrame, a::Assignment)
n = size(df, 1)
p = zeros(n)
w = ones(n)
if haskey(df, :p)
w = df[:p]
end
dfindex = find([haskey(a, n) for n in names(df)])
dfvalues = [a[n] for n in names(df)[dfindex]]'
cumWeight = 0.
cumTotalWeight = 0.
for i = 1:n
if array(df[i, dfindex]) == dfvalues
cumWeight += w[i]
end
cumTotalWeight += w[i]
p[i] = cumWeight / cumTotalWeight
end
p
end

include("learning.jl")

end # module
91 changes: 91 additions & 0 deletions src/cpds.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
typealias Assignment Dict

module CPDs
abstract CPD

export CPD, Discrete, Bernoulli, Normal, domain

typealias NodeName Symbol

cpdDict(names::Vector{NodeName}, dict::Dict) = a -> dict[[a[n] for n in names]]

type Discrete <: CPD
domain::AbstractArray{Any,1}
parameterFunction::Function
domainIndex::Dict{Any,Integer}
function Discrete{T}(domain::AbstractArray{T,1}, parameterFunction::Function)
new(domain, parameterFunction, [domain[i] => i for i in 1:length(domain)])
end
function Discrete{T,U}(domain::AbstractArray{T,1}, parameters::AbstractArray{U,1})
new(domain, a->parameters, [domain[i] => i for i in 1:length(domain)])
end
function Discrete{T}(domain::AbstractArray{T,1}, names::AbstractArray{NodeName,1}, dict::Dict)
new(domain, a->dict[[a[n] for n in names]], [domain[i] => i for i in 1:length(domain)])
end
end

type Bernoulli <: CPD
parameterFunction::Function
function Bernoulli(parameterFunction::Function)
new(parameterFunction)
end
function Bernoulli(parameter::Real = 0.5)
new(a->parameter)
end
function Bernoulli(names::AbstractArray{NodeName,1}, dict::Dict)
new(a->dict[[a[n] for n in names]])
end
end

type Normal <: CPD
parameterFunction::Function
function Normal(parameterFunction::Function)
new(parameterFunction)
end
function Normal(mu::Real, sigma::Real)
new(a->[mu, sigma])
end
end

end # module CPDs

domain(d::CPDs.Discrete) = DiscreteDomain(d.domain)
domain(d::CPDs.Bernoulli) = BinaryDomain()
domain(d::CPDs.Normal) = RealDomain()

function pdf(d::CPDs.Discrete, a::Assignment)
(x) -> d.parameterFunction(a)[d.domainIndex[x]]
end

function pdf(d::CPDs.Bernoulli, a::Assignment)
(x) -> x != 0 ? d.parameterFunction(a) : (1 - d.parameterFunction(a))
end

function rand(d::CPDs.Discrete, a::Assignment)
p = d.parameterFunction(a)
n = length(p)
i = 1
c = p[1]
u = rand()
while c < u && i < n
c += p[i += 1]
end
return d.domain[i]
end

function rand(d::CPDs.Bernoulli, a::Assignment)
rand() < d.parameterFunction(a)
end

function pdf(d::CPDs.Normal, a::Assignment)
(mu::Float64, sigma::Float64) = d.parameterFunction(a)
function f(x)
z = (x - mu)/sigma
exp(-0.5*z*z)/(2π*sigma)
end
end

function rand(d::CPDs.Normal, a::Assignment)
(mu::Float64, sigma::Float64) = d.parameterFunction(a)
mu + randn() * sigma
end
Loading

0 comments on commit bb02028

Please sign in to comment.