These notes were taken as I followed along the group session on Privacy in UiO course IN-STK5000 H20
import numpy
import matplotlib.pyplot as plt
true_values = numpy.random.choice([0,1], 10000)
true_values[:10]
true_values.mean()
def randomized_response(a, theta=0.5):
'''Randomized mechanism tell truth w/ prob theta'''
coins = numpy.random.choice([True, False], len(a), p=(theta, 1-theta))
# coins used as a mask (index).
response = a.copy()
noise = numpy.random.choice([0,1], len(a))
# keep response if coins[i] = true
# else replace with noise value
response[~coins] = noise[~coins] # substitutes False values in
return response
r = numpy.random.choice([True, False], 10)
l = numpy.linspace(0, 9, 10)
numpy.random.choice([0,1])
r
l
l[~r]
l
randomized_response(true_values).mean()
randomized_response(true_values, 0.1).mean()
true_values_unfair = numpy.random.choice([0,1], 10000, p=(.3, .7)) # prob. of getting the first =.2, second=.8
true_values_unfair.mean()
randomized_response(true_values_unfair).mean()
p = 0.7 # 1s in the unfair data
theta = 0.5
theta * p + (1-theta)*0.5 # this looks like the utility
randomized_response(true_values_unfair, 0.7).mean()
p = 0.8 #1s in the unfair data
theta = 0.7
theta * p + (1-theta)*0.5 # this looks like the utility
def approx_p(sample, theta=0.5):
return (randomized_response(sample, theta).mean() - (1-theta)*0.5)/theta
approx_p(true_values_unfair, 0.6)
approx_p(true_values_unfair, 0.7)
def gen_sample(p, n=10000):
return numpy.random.choice([0,1], 10000, p=(1-p, p))
def show_hist(p, theta):
approx = [approx_p(gen_sample(p), theta) for _ in range(1000)]
true_v = [gen_sample(p).mean() for _ in range(1000)]
plt.hist(approx, bins=15, alpha=0.5, label='noisy')
plt.hist(true_v, bins=15, alpha=0.5, label='true')
plt.legend()
show_hist(0.8, 0.5)
show_hist(0.3, 0.5)
show_hist(0.8, 0.9)
show_hist(0.8, 0.2)
Reminder:
$$ \Big| log \frac{\pi(a,x)}{\pi(a, x')} \Big| \leq \epsilon $$N = 10
x = numpy.zeros(N)
xp = x.copy()
xp[-1] = 1
a = x.copy()
numpy.fromiter(((randomized_response(x) == a).all() for _ in range(5000)), bool).mean()
numpy.fromiter(((randomized_response(xp) == a).all() for _ in range(5000)), bool).mean()
thetas = numpy.linspace(0.1, 0.5, 10)
thetas
ratios = [
numpy.fromiter(((randomized_response(x, theta) == a).all() for _ in range(5000)), bool).mean() /
numpy.fromiter(((randomized_response(xp, theta) == a).all() for _ in range(5000)), bool).mean()
for theta in thetas
]
plt.scatter(thetas, numpy.log(ratios))
Example: Most popular pet
numpy.random.seed(42)
universe = ['cat', 'dog', 'hamster', 'fish']
probs = numpy.random.uniform(size=len(universe))
probs /=probs.sum()
values = numpy.random.choice(universe, size=100, p=probs)
values
probs
labels, counts = numpy.unique(values, return_counts=True)
plt.bar(labels, counts)
q = counts
plt.bar(labels, counts**2)
epsilon = 0.5
pi = numpy.exp(epsilon*q)/numpy.exp(epsilon*q).sum() # softmax
plt.bar(labels, pi)
epsilon = 0.1
pi = numpy.exp(epsilon*q)/numpy.exp(epsilon*q).sum()
plt.bar(labels, pi)
epsilon = 0.01
pi = numpy.exp(epsilon*q)/numpy.exp(epsilon*q).sum()
plt.bar(labels, pi)
epsilon = 0.001
pi = numpy.exp(epsilon*q)/numpy.exp(epsilon*q).sum()
plt.bar(labels, pi)