Euler在研究阶乘插值时引入了函数(第一型积分)和函数(第二型积分),且它们有关系: \begin{align*} B(m,n) = \int_0^1 x^{m-1} (1-x)^{n-1} \diff x = \frac{\Gamma(m) \Gamma(n)}{\Gamma(m+n)} \end{align*} 于是定义函数如下: \begin{align*} f_{m,n}(x) = \frac{x^{m-1} (1-x)^{n-1}}{B(m,n)} = \frac{\Gamma(m+n)}{\Gamma(m) \Gamma(n)} x^{m-1} (1-x)^{n-1} \end{align*} 易知其积分为,故可以将其看成一个概率密度函数,由于这个函数的分母是函数,我们称其对应的概率分布为参数为、的Beta分布。
下面给出Beta分布的数字特征,易知其阶矩为 \begin{align*} \Ebb[x^k] & = \int_0^1 x^k f_{m,n}(x) \diff x = \int_0^1 x^k \frac{x^{m-1} (1-x)^{n-1}}{B(m,n)} \diff x = \frac{B(m+k,n)}{B(m,n)} \int_0^1 \frac{x^{m+k-1} (1-x)^{n-1}}{B(m+k, n)} \diff x \\ & = \frac{B(m+k, n)}{B(m,n)} = \frac{\Gamma(m+k) \Gamma(n)}{\Gamma(m+k+n)} / \frac{\Gamma(m) \Gamma(n)}{\Gamma(m+n)} = \frac{\Gamma(m+k) \Gamma(m+n)}{\Gamma(m+k+n) \Gamma(m)} \end{align*} 于是 \begin{align*} \Ebb[x] = \frac{\Gamma(m+1) \Gamma(m+n)}{\Gamma(m+1+n) \Gamma(m)} = \frac{m}{m+n}, \quad \Ebb[x^2] = \frac{\Gamma(m+2) \Gamma(m+n)}{\Gamma(m+2+n) \Gamma(m)} = \frac{(m+1)m}{(m+n+1)(m+n)} \end{align*} 故其均值和方差分别为 \begin{align*} \Ebb[x] = \frac{m}{m+n}, \quad \Vbb[x] = \frac{(m+1)m}{(m+n+1)(m+n)} - \left(\frac{m}{m+n}\right)^2 = \frac{mn}{(m+n+1)(m+n)^2} \end{align*}
函数是二元的,可将其推广成到元: \begin{align} \label{eq: multivariate beta function} B(m_1, \ldots, m_{k+1}) = \int_0^1 x_1^{m_1-1} \int_0^{1-x_1} x_2^{m_2-1} \cdots \int_0^{1-x_1 - \cdots - x_{k-1}} x_k^{m_k-1} (1 - x_1 - \cdots - x_k)^{m_{k+1}-1} \diff \xv \end{align} 注意式(\ref{eq: multivariate beta function})是一个重积分,考察最里面关于的积分,即 \begin{align*} E_k(m_k, m_{k+1}) = \int_0^{1-x_1 - \cdots - x_{k-1}} x_k^{m_k-1} (1 - x_1 - \cdots - x_k)^{m_{k+1}-1} \diff x_k = \int_0^t x_k^{m_k-1} (t - x_k)^{m_{k+1}-1} \diff x_k \end{align*} 其中。由分部积分有 \begin{align*} E_k(m_k, m_{k+1}) & = \frac{1}{m_k} \int_0^t (t - x_k)^{m_{k+1}-1} \diff x_k^{m_k} \\ & = \left. \frac{1}{m_k} (t - x_k)^{m_{k+1}-1} x_k^{m_k} \right|_0^t + \frac{1}{m_k} \int_0^t x_k^{m_k} (m_{k+1}-1) (t - x_k)^{m_{k+1}-2} \diff x_k \\ & = \frac{m_{k+1}-1}{m_k} E_k(m_k+1, m_{k+1}-1) \end{align*} 于是递推下去有 \begin{align*} E_k(m_k, m_{k+1}) & = \frac{m_{k+1}-1}{m_k} \cdot \frac{m_{k+1}-2}{m_k+1} E_k(m_k+2, m_{k+1}-2) \\ & = \cdots \\ & = \frac{m_{k+1}-1}{m_k} \cdots \frac{1}{m_k + m_{k+1} - 2} E_k(m_k + m_{k+1} - 1, 1) \end{align*} 又 \begin{align*} E_k(m_k + m_{k+1} - 1, 1) = \int_0^t x_k^{m_k + m_{k+1} - 2} \diff x_k = \left. \frac{x_k^{m_k + m_{k+1} - 1}}{m_k + m_{k+1} - 1} \right|_0^t = \frac{t^{m_k + m_{k+1} - 1}}{m_k + m_{k+1} - 1} \end{align*} 于是 \begin{align*} E_k(m_k, m_{k+1}) = \frac{\Gamma(m_{k+1}) \Gamma(m_k)}{\Gamma(m_{k+1} + m_k)} (1-x_1 - \cdots - x_{k-1})^{m_k + m_{k+1} - 1} \end{align*} 将其回代入式(\ref{eq: multivariate beta function}),接着考察最里面关于的积分 \begin{align*} E_{k-1}(m_{k-1}, m_k + m_{k+1}) = \frac{\Gamma(m_{k+1}) \Gamma(m_k)}{\Gamma(m_{k+1} + m_k)} \int_0^t x_{k-1}^{m_{k-1}-1} (t - x_{k-1})^{m_k + m_{k+1} - 1} \diff x_{k-1} \end{align*} 其中。由分部积分有 \begin{align*} E_{k-1}(m_{k-1}, m_k + m_{k+1}) & = \frac{\Gamma(m_{k+1}) \Gamma(m_k)}{\Gamma(m_{k+1} + m_k)} \cdot \frac{\Gamma(m_{k+1} + m_k) \Gamma(m_{k-1})}{\Gamma(m_{k+1} + m_k + m_{k-1})} t^{m_{k+1} + m_k + m_{k-1} - 1} \\ & = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \Gamma(m_{k-1})}{\Gamma(m_{k+1} + m_k + m_{k-1})} (1-x_1 - \cdots - x_{k-2})^{m_{k+1} + m_k + m_{k-1} - 1} \end{align*} 不断重复这个过程可知 \begin{align} \label{eq: E2} E_2(m_2, m_{k+1} + m_k + \cdots + m_3) = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_2)}{\Gamma(m_{k+1} + m_k + \cdots + m_2)} (1-x_1)^{m_{k+1} + m_k + \cdots + m_2 - 1} \end{align} 于是最终对积分可得 \begin{align*} B(m_1, \cdots, m_{k+1}) & = \int_0^1 x_1^{m_1-1} \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_2)}{\Gamma(m_{k+1} + m_k + \cdots + m_2)} (1-x_1)^{m_{k+1} + m_k + \cdots + m_2 - 1} \diff x_1 \\ & = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_2)}{\Gamma(m_{k+1} + m_k + \cdots + m_2)} \cdot \frac{\Gamma(m_{k+1} + m_k + \cdots + m_2) \Gamma(m_1)}{\Gamma(m_{k+1} + m_k + \cdots + m_1)} 1^{m_{k+1} + m_k + \cdots + m_1 - 1} \\ & = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_1)}{\Gamma(m_{k+1} + m_k + \cdots + m_1)} \end{align*} 记,,定义函数如下: \begin{align*} f_{\mv} (\xv) = \frac{\Gamma(m_{k+1} + m_k + \cdots + m_1)}{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_1)} \prod_{i=1}^{k+1} x_i^{m_i - 1} \end{align*} 由上面的推导可知的重积分为,故可以将其看成一个概率密度函数,们称其对应的概率分布为参数为的Dirichlet分布。
下面给出Dirichlet分布的数字特征,易知 \begin{align*} & x_j^n f_{\mv} (\xv) = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1}) \cdots \Gamma(m_1)} x_j^n \prod_{i=1}^{k+1} x_i^{m_i - 1} \\ & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1} + \cdots + m_j + n + \cdots + m_1)} \cdot \frac{\Gamma(m_j + n)}{\Gamma(m_j)} \cdot \frac{\Gamma(m_{k+1} + \cdots + m_j + n + \cdots + m_1)}{\Gamma(m_{k+1}) \cdots \Gamma(m_j + n) \cdots \Gamma(m_1)} x_j^n \prod_{i=1}^{k+1} x_i^{m_i - 1} \end{align*} 于是 \begin{align*} \Ebb[x_j] & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1} + \cdots + m_j + 1 + \cdots + m_1)} \frac{\Gamma(m_j + 1)}{\Gamma(m_j)} = \frac{m_j}{m_{k+1} + \cdots + m_1} \\ \Ebb[x_j^2] & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1} + \cdots + m_j + 2 + \cdots + m_1)} \frac{\Gamma(m_j + 2)}{\Gamma(m_j)} = \frac{(m_j+1)m_j}{(m_{k+1} + \cdots + m_1 + 1)(m_{k+1} + \cdots + m_1)} \end{align*} 故其均值和方差分别为 \begin{align*} \Ebb[x_j] & = \frac{m_j}{m_{k+1} + \cdots + m_1} \\ \Vbb[x_j] & = \frac{(m_j+1)m_j}{(m_{k+1} + \cdots + m_1 + 1)(m_{k+1} + \cdots + m_1)} - \left(\frac{m_j}{m_{k+1} + \cdots + m_1}\right)^2 \\ & = \frac{m_j (m_{k+1} + \cdots + m_1 - m_j)}{(m_{k+1} + \cdots + m_1+1)(m_{k+1} + \cdots + m_1)^2} \end{align*} 又 \begin{align*} x_p x_q f_{\mv} (\xv) & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1}) \cdots \Gamma(m_1)} x_p x_q \prod_{i=1}^{k+1} x_i^{m_i - 1} \\ & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1} + \cdots + m_p + 1 + \cdots + m_q + 1 + \cdots + m_1)} \cdot \frac{\Gamma(m_p + 1)}{\Gamma(m_p)} \cdot \frac{\Gamma(m_q + 1)}{\Gamma(m_q)} \\ & \qquad \qquad \cdot \frac{\Gamma(m_{k+1} + \cdots + m_p + 1 + \cdots + m_q + 1 + \cdots + m_1)}{\Gamma(m_{k+1}) \cdots \Gamma(m_p + 1) \cdots \Gamma(m_q + 1) \cdots \Gamma(m_1)} x_p x_q \prod_{i=1}^{k+1} x_i^{m_i - 1} \end{align*} 于是 \begin{align*} \Ebb[x_p x_q] & = \frac{\Gamma(m_{k+1} + \cdots + m_1)}{\Gamma(m_{k+1} + \cdots + m_p + 1 + \cdots + m_q + 1 + \cdots + m_1)} \cdot \frac{\Gamma(m_p + 1)}{\Gamma(m_p)} \cdot \frac{\Gamma(m_q + 1)}{\Gamma(m_q)} \\ & = \frac{m_p m_q}{(m_{k+1} + \cdots + m_1 + 1)(m_{k+1} + \cdots + m_1)} \end{align*} 故协方差为 \begin{align*} \cov(x_p, x_q) & = \Ebb[x_p x_q] - \Ebb[x_p] \Ebb[x_q] \\ & = \frac{m_p m_q}{(m_{k+1} + \cdots + m_1 + 1)(m_{k+1} + \cdots + m_1)} - \frac{m_p}{m_{k+1} + \cdots + m_1} \cdot \frac{m_q}{m_{k+1} + \cdots + m_1} \\ & = \frac{-m_p m_q}{(m_{k+1} + \cdots + m_1+1)(m_{k+1} + \cdots + m_1)^2} \end{align*}
由式(\ref{eq: E2})知 \begin{align*} \pr(x_1 = t) & = t^{m_1 - 1} \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_2)}{\Gamma(m_{k+1} + m_k + \cdots + m_2)} (1-t)^{m_{k+1} + m_k + \cdots + m_2 - 1} \\ & = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_1)}{\Gamma(m_1) \Gamma(m_{k+1} + m_k + \cdots + m_1 - m_1)} t^{m_1 - 1} (1-t)^{m_{k+1} + m_k + \cdots + m_1 - m_1 - 1} \end{align*} 由对称性可知 \begin{align*} \pr(x_i = t) = \frac{\Gamma(m_{k+1}) \Gamma(m_k) \cdots \Gamma(m_1)}{\Gamma(m_i) \Gamma(m_{k+1} + m_k + \cdots + m_1 - m_i)} t^{m_i - 1} (1-t)^{m_{k+1} + m_k + \cdots + m_1 - m_i - 1} \end{align*} 这意味着Dirichlet分布的变量的边际分布是参数为、的Beta分布。