annals of statistics読み回 第一回
TRANSCRIPT
…
©
“Lai (1987) KL-UCB
(Garivier+ 2011) ”
1 2 K
Image from http://www.mrc-bsu.cam.ac.uk/bandit-problems-and-clinical-trials-design/
image from http://research.microsoft.com/en-
us/projects/bandits/
𝑘 𝑁
𝑛 = 1, … , 𝑁
𝑗 ∈ [𝑘]
𝑥𝑛
𝑗 Π𝑗
SN = 𝑛=1𝑁 𝑥𝑛
𝑆𝑛/𝑛
𝜇𝑗
UCB
𝜃
𝑓 𝑥; 𝜃 = 𝑒𝜃𝑥−𝜓(𝜃), 𝜈(𝑥)
𝜇 𝜃 = 𝜓′(𝜃) 𝜃
Bernoulli(p): p 1, 1-p 0
𝑥 ∈ {0,1}
𝜃 = log𝑝
1−𝑝𝜓 𝜃 = −log(1 − 𝑝) , 𝜈 𝑥 = 1
𝑥 ∈ 𝑅
𝜎 = 1
𝜃
𝜓 𝜃 =𝜇2
2, 𝜈 𝑥 =
1
2𝜋𝑒−
𝑥2
2
i∗ =
argmaxi∈[𝐾]𝜇 𝜃𝑖
𝛉 = {𝜃1, … , 𝜃𝑘}
𝑅𝑁(𝛉)
𝑅𝑁 𝛉 = 𝑁𝜇∗ 𝛉 − 𝑗:𝜇 𝜃𝑗 <𝜇∗(𝛉)(𝜇
∗ 𝛉 − 𝜇 𝜃𝑗 )E𝛉[𝑇𝑁(𝑗)]
𝑇𝑁(𝑗)
{𝜃1, … , 𝜃𝑘}
𝐻 𝛉
Bayesian regret ∶ ∫ 𝑅𝑁 𝛉 𝑑𝐻 𝛉
𝛉 𝛼 > 0 𝑅𝑁 𝛉 < 𝑂(𝑁𝛼)
liminf𝑁→∞
E𝛉 𝑇𝑁(𝑗)
log 𝑁≥
1
𝐼 𝜃𝑗 , 𝜃∗
𝐼(∙,∙)
log 𝑁 /𝐼(𝜃𝑗 , 𝜃∗) 𝜃𝑗 𝜃∗
𝜇1 > 𝜇2
2
𝜇2 𝜇1
Ω(𝑇)
𝜇2 > 𝜇1 1/𝑁
exp(−𝑇𝑁(2) 𝐼(𝜇2, 𝜇1)) 𝑇𝑁 2 = log 𝑁 /𝐼(𝜇2, 𝜇1)1/𝑁
𝜇1
𝜇2
𝜇1
𝜇2
𝜇2 > 𝜇1
𝑁 → ∞
∫ 𝑅𝑁 𝛉 𝑑𝐻 𝛉 ≥1
2
𝑗∈[𝑘]
∫ ℎ𝑗 𝜃𝑗∗; 𝛉𝑗 𝑑𝐻𝑗 𝛉𝑗 log 𝑁 2
𝜃𝑗∗ = max 𝜃𝑖(≠𝑗) , 𝛉𝑗 = 𝜃1, . . . , 𝜃𝑗−1, 𝜃𝑗+1, … , 𝜃𝑘 , ℎ𝑗
𝑗
𝑅𝑁 𝛉
𝑗 𝑈𝑗,𝑁𝑡(𝑗)
𝑈𝑗,𝑟 = inf {𝜃: 𝜃 ≥ 𝜃𝑗,𝑟 and 𝑟𝐼 𝜃𝑗,𝑟 , 𝜃 ≥ 𝑔(𝑟
𝑁)}
𝑔 1/𝑡 𝑔 1/𝑡 ≥ log 𝑡 + 𝜉 log log 𝑡 𝜉
𝜃
𝜃𝑗,𝑟 r/𝑁
𝑈𝑗,𝑟 𝐼 𝜃𝑗,𝑟 , 𝜃
𝑈𝑗,𝑟 𝑡 = sup{𝜃: 𝑟𝐼 𝜃𝑗,𝑟 , 𝜃 ≤ 𝑓(𝑛)}
𝑓 𝑡 = log 𝑡 + 3log(log 𝑡 )
𝛼𝑁 = 𝑜(𝑁−1
2) 𝛽𝑁 = 𝑜( log 𝑁1
2) 𝛼𝑁 < 𝛽𝑁
𝑇𝑁 𝑗
E𝛉 𝑇𝑁 𝑗 ∼log 𝑁 𝜃∗ − 𝜃𝑗
2
𝐼 𝜃𝑗 , 𝜃∗𝑎𝑠 𝑁 → ∞,
𝑠. 𝑡. 𝛽𝑁 ≥ 𝜃∗ − 𝜃𝑗 ≥ 𝛼𝑁
𝛽𝑁 ≥ 𝜃∗ − 𝜃𝑗 ≥ 𝛼𝑁
𝑁 → ∞
∫ 𝑅𝑁 𝛉 𝑑𝐻 𝛉 ~1
2
𝑗∈[𝑘]
∫ ℎ𝑗 𝜃𝑗∗; 𝛉𝑗 𝑑𝐻𝑗 𝛉𝑗 log 𝑁 2
E𝛉 𝑇𝑁(𝑗) ∼
log 𝑁
𝐼 𝜃𝑗,𝜃∗
𝜃𝑗 𝜃∗
𝜇(𝜃∗) −
𝜇(𝜃𝑗)
𝑗
(𝜇(𝜃∗) − 𝜇(𝜃𝑗))𝑁
𝜃∗ 𝜃𝑗
𝑏𝑁 =
log 𝑁 1/2