forked from JmlrOrg/jmlr-style-file
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample.tex
145 lines (112 loc) · 5.27 KB
/
sample.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
\documentclass[twoside,11pt]{article}
% Any additional packages needed should be included after jmlr2e.
% Note that jmlr2e.sty includes epsfig, amssymb, natbib and graphicx,
% and defines many common macros, such as 'proof' and 'example'.
%
% It also sets the bibliographystyle to plainnat; for more information on
% natbib citation styles, see the natbib documentation, a copy of which
% is archived at https://www.jmlr.org/format/natbib.pdf
% Available options for package jmlr2e are:
%
% - abbrvbib : use abbrvnat for the bibliography style
% - nohyperref : do not load the hyperref package
% - preprint : remove JMLR specific information from the template,
% useful for example for posting to preprint servers.
%
% Example of using the package with custom options:
%
% \usepackage[abbrvbib, preprint]{jmlr2e}
\usepackage{jmlr2e}
% Definitions of handy macros can go here
\newcommand{\dataset}{{\cal D}}
\newcommand{\fracpartial}[2]{\frac{\partial #1}{\partial #2}}
% Heading arguments are {volume}{year}{pages}{date submitted}{date published}{paper id}{author-full-names}
\jmlrheading{1}{2000}{1-48}{4/00}{10/00}{meila00a}{Marina Meil\u{a} and Michael I. Jordan}
% Short headings should be running head and authors last names
\ShortHeadings{Learning with Mixtures of Trees}{Meil\u{a} and Jordan}
\firstpageno{1}
\begin{document}
\title{Learning with Mixtures of Trees}
\author{\name Marina Meil\u{a} \email [email protected] \\
\addr Department of Statistics\\
University of Washington\\
Seattle, WA 98195-4322, USA
\AND
\name Michael I.\ Jordan \email [email protected] \\
\addr Division of Computer Science and Department of Statistics\\
University of California\\
Berkeley, CA 94720-1776, USA}
\editor{Kevin Murphy and Bernhard Sch{\"o}lkopf}
\maketitle
\begin{abstract}% <- trailing '%' for backward compatibility of .sty file
This paper describes the mixtures-of-trees model, a probabilistic
model for discrete multidimensional domains. Mixtures-of-trees
generalize the probabilistic trees of \citet{chow:68}
in a different and complementary direction to that of Bayesian networks.
We present efficient algorithms for learning mixtures-of-trees
models in maximum likelihood and Bayesian frameworks.
We also discuss additional efficiencies that can be
obtained when data are ``sparse,'' and we present data
structures and algorithms that exploit such sparseness.
Experimental results demonstrate the performance of the
model for both density estimation and classification.
We also discuss the sense in which tree-based classifiers
perform an implicit form of feature selection, and demonstrate
a resulting insensitivity to irrelevant attributes.
\end{abstract}
\begin{keywords}
Bayesian networks, mixture models, Chow-Liu trees
\end{keywords}
\section{Introduction}
Probabilistic inference has become a core technology in AI,
largely due to developments in graph-theoretic methods for the
representation and manipulation of complex probability
distributions~\citep{pearl:88}. Whether in their guise as
directed graphs (Bayesian networks) or as undirected graphs (Markov
random fields), \emph{probabilistic graphical models} have a number
of virtues as representations of uncertainty and as inference engines.
Graphical models allow a separation between qualitative, structural
aspects of uncertain knowledge and the quantitative, parametric aspects
of uncertainty...\\
{\noindent \em Remainder omitted in this sample. See https://www.jmlr.org/papers/ for full paper.}
% Acknowledgements should go at the end, before appendices and references
\acks{We would like to acknowledge support for this project
from the National Science Foundation (NSF grant IIS-9988642)
and the Multidisciplinary Research Program of the Department
of Defense (MURI N00014-00-1-0637). }
% Manual newpage inserted to improve layout of sample file - not
% needed in general before appendices/bibliography.
\newpage
\appendix
\section*{Appendix A.}
\label{app:theorem}
% Note: in this sample, the section number is hard-coded in. Following
% proper LaTeX conventions, it should properly be coded as a reference:
%In this appendix we prove the following theorem from
%Section~\ref{sec:textree-generalization}:
In this appendix we prove the following theorem from
Section~6.2:
\noindent
{\bf Theorem} {\it Let $u,v,w$ be discrete variables such that $v, w$ do
not co-occur with $u$ (i.e., $u\neq0\;\Rightarrow \;v=w=0$ in a given
dataset $\dataset$). Let $N_{v0},N_{w0}$ be the number of data points for
which $v=0, w=0$ respectively, and let $I_{uv},I_{uw}$ be the
respective empirical mutual information values based on the sample
$\dataset$. Then
\[
N_{v0} \;>\; N_{w0}\;\;\Rightarrow\;\;I_{uv} \;\leq\;I_{uw}
\]
with equality only if $u$ is identically 0.} \hfill\BlackBox
\noindent
{\bf Proof}. We use the notation:
\[
P_v(i) \;=\;\frac{N_v^i}{N},\;\;\;i \neq 0;\;\;\;
P_{v0}\;\equiv\;P_v(0)\; = \;1 - \sum_{i\neq 0}P_v(i).
\]
These values represent the (empirical) probabilities of $v$
taking value $i\neq 0$ and 0 respectively. Entropies will be denoted
by $H$. We aim to show that $\fracpartial{I_{uv}}{P_{v0}} < 0$....\\
{\noindent \em Remainder omitted in this sample. See https://www.jmlr.org/papers/ for full paper.}
\vskip 0.2in
\bibliography{sample}
\end{document}