Scatterplot

Story-telling with data 2/N

公開日: 2022-11-02
更新日: 2023-01-10

Table of Contents

成果物

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## DGP
N = 100
mu = 2500
std = 250
np.random.seed(42)

X = np.random.normal(mu, std, N)
error = np.random.normal(0, 0.05, N)
Y = (X - 2500)**2 / 500000 + 1.5 + error

## plot

fig, axes = plt.subplots(1, 2, figsize=(16, 4))

## before
axes[0].set_title('Cost per mile by miles driven',
                  loc='left',
                  fontsize=15)
axes[0].set_xlabel('Miles driven per month', loc='left')
axes[0].set_ylabel('Cost per mile ($)')
axes[0].scatter(np.mean(X), np.mean(Y), c='black', s=50)
axes[0].scatter(X, Y)
axes[0].annotate('AVG', 
                 xy=(np.mean(X)*0.99, np.mean(Y)*1.02),
                 fontweight='bold')

## after
above_avg_index = Y > np.mean(Y)


axes[1].set_title('Cost per mile by miles driven',
                  loc='left',
                  fontsize=15)
axes[1].set_xlabel('Miles driven per month', loc='left')
axes[1].set_ylabel('Cost per mile ($)')
axes[1].scatter(np.mean(X), np.mean(Y), c='black', s=50)
axes[1].hlines(y=np.mean(Y),
               xmin=1800, xmax=3000,
               linestyles='dashed',
               colors='black'
               )
axes[1].scatter(X[above_avg_index], Y[above_avg_index], c='orange')
axes[1].scatter(X[~above_avg_index], Y[~above_avg_index])
axes[1].annotate('AVG', 
                 xy=(np.mean(X)*0.99, np.mean(Y)*1.02),
                 fontweight='bold',
                 c='black');




Share Buttons
Share on:

Feature Tags
Leave a Comment
(注意:GitHub Accountが必要となります)