import pandas as pd
# ----------------------------
## 気象データdata preprocess
# ----------------------------
def join_multilevel_col(col):
level0, level1, level2 = col
if "Unnamed:" in level2:
level2 = ""
if level2:
return (level0, f"{level1}_{level2}")
else:
return (level0, level1)
# CSV読み込み(3行ヘッダー)
df = pd.read_csv("../data/monthly-average-temp-2024.csv", header=[0, 1, 2])
# '年月'のカラム名タプルが以下のようになっている前提
idx_col = ("Unnamed: 0_level_0", "年月", "Unnamed: 0_level_2")
# インデックスに設定
df.set_index(idx_col, inplace=True)
# インデックス名をわかりやすく変更(任意)
df.index.name = "年月"
df.columns = pd.Index([join_multilevel_col(col) for col in df.columns])
df = df.stack(level=0, future_stack=True).reset_index()
rename_dict = {
"年月": "YearMonth",
"level_1": "City",
"平均気温(℃)": "average_temp",
"平均気温(℃)_品質情報": "average_temp_QualityInfo",
"平均気温(℃)_均質番号": "average_temp_KinshitsuNumber",
"降水量の合計(mm)": "total_precipitation_mm",
"降水量の合計(mm)_品質情報": "total_precipitation_mm_QualityInfo",
"降水量の合計(mm)_均質番号": "total_precipitation_mm_KinshitsuNumber",
"降水量の合計(mm)_現象なし情報": "total_precipitation_mm_NoPhenomenonInfo",
}
df = df.rename(columns=rename_dict)
# data type
df["YearMonth"] = pd.to_datetime(df["YearMonth"], format="%Y/%m")
# ----------------------------
## plot
# ----------------------------
import plotly.express as px
okabe_ito_palette = [
"#E69F00", # orange
"#56B4E9", # sky blue
"#009E73", # bluish green
"#F0E442", # yellow
"#0072B2", # blue
"#D55E00", # vermillion
"#CC79A7", # reddish purple
"#000000", # black
]
# 1. sort
city_order = (
df.groupby("City")["average_temp"].max().sort_values(ascending=False).index.tolist()
)
# 2. Set categorical order for 'City'
df["City"] = pd.Categorical(df["City"], categories=city_order, ordered=True)
df = df.sort_values(by=["City", "YearMonth"]).reset_index(drop=True)
# ----------------------------
## Bad Example
# ----------------------------
fig = px.line(
df,
x="YearMonth",
y="average_temp",
color="City",
color_discrete_sequence=okabe_ito_palette,
template="ggplot2",
labels={"City": "観測都市"},
)
# Change axis titles
fig.update_layout(
title=dict(
text="月別平均気温の推移(都市別) - Bad Example",
x=0.075,
xanchor="left",
y=0.95,
yanchor="top",
font=dict(size=20),
),
margin=dict(t=80),
xaxis_title="年月",
yaxis_title="平均気温",
xaxis=dict(tickformat="%Y-%m"),
width=640, # (px)
height=400,
legend=dict(
orientation="h", # horizontal layout
yanchor="bottom",
y=1.02, # slightly above the plot
xanchor="center",
x=0.5, # center-align
),
)
fig.show()
# ----------------------------
## Better Example
# ----------------------------
fig = px.line(
df,
x="YearMonth",
y="average_temp",
color="City",
color_discrete_sequence=okabe_ito_palette,
template="ggplot2",
labels={"City": "観測都市"},
)
# Change axis titles
fig.update_layout(
title=dict(
text="月別平均気温の推移(都市別) - Better Example",
x=0.075,
xanchor="left",
y=0.95,
yanchor="top",
font=dict(size=20),
),
margin=dict(t=80, b=40),
xaxis_title="年月",
yaxis_title="平均気温 [°C]",
xaxis=dict(tickformat="%Y-%m"),
width=640, # (px)
height=400,
legend=dict(
orientation="h", # horizontal layout
yanchor="bottom",
y=1.02, # slightly above the plot
xanchor="center",
x=0.5, # center-align
),
)
fig.add_annotation(
text="<a href='https://www.data.jma.go.jp/risk/obsdl/index.php' target='_blank' style='color:gray;'>出所: 気象庁 > 過去の気象データより作成</a>",
xref="paper",
yref="paper",
x=0,
y=-0.2,
showarrow=False,
font=dict(size=12),
align="left"
)
fig.show()