Project idea: Analysis of weather data¶
The topic of this project revolves around the analysis of weather data given by an open-source provider Bright Sky.
How to proceed?¶
- Use the provided function
get_weather_datato receive a PandasDataFrame. - Plot relevant measures and inspect the results.
- Try using a clustering algorithm to distinguish factors. For instance, are there distinct months where
sunshineis low and high? - Try different dates and assess how
In [ ]:
Copied!
# Add you packages here
%pip install pandas
# Add you packages here
%pip install pandas
In [ ]:
Copied!
import requests
import pandas as pd
from typing import Optional
from datetime import date, datetime
import requests
import pandas as pd
from typing import Optional
from datetime import date, datetime
In [6]:
Copied!
def get_weather_data(
since_date: str,
end_date: Optional[str] = None,
dwd_station_id: str = "01766",
) -> pd.DataFrame:
"""Fetched weather data from the Bright Sky API.
Example:
>>> df = get_weather_data(since_date="2017-01-01")
>>> df.head()
Args:
since_date (str): The date to start from. Format is YYYY-MM-DD.
end_date (str): The date to end at. Format is YYYY-MM-DD.
If not specificed, will default to today.
dwd_station_id (str): The Deutscher Wetterdienst ID of
the station of measurement.
Returns:
pd.DataFrame: A Pandas DataFrame containing the weather data.
"""
validate_date(since_date, "since_datw")
url = "https://api.brightsky.dev/weather"
if not end_date:
# If no end date is provided, use today's
end_date = date.today()
else:
# If provided, check if it correct
validate_date(end_date, "end_date")
params = {
"last_date": date.today(),
"date": date.fromisoformat(since_date),
"dwd_station_id": "01766",
}
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
df = pd.DataFrame(response.json()["weather"])
# Parse the timestamps
df["timestamp"] = pd.to_datetime(df["timestamp"])
return df
def validate_date(date: str, date_type: str):
"""Validates the format of a date to comply with YYYY-MM-DD"""
try:
# We need to make sure that the correct date format is used
datetime.strptime(date, "%Y-%m-%d")
except ValueError:
raise ValueError(
f"The date format supplied for `{date_type}` is of the wrong format.",
"Please use YYYY-MM-DD to use this function",
)
def get_weather_data(
since_date: str,
end_date: Optional[str] = None,
dwd_station_id: str = "01766",
) -> pd.DataFrame:
"""Fetched weather data from the Bright Sky API.
Example:
>>> df = get_weather_data(since_date="2017-01-01")
>>> df.head()
Args:
since_date (str): The date to start from. Format is YYYY-MM-DD.
end_date (str): The date to end at. Format is YYYY-MM-DD.
If not specificed, will default to today.
dwd_station_id (str): The Deutscher Wetterdienst ID of
the station of measurement.
Returns:
pd.DataFrame: A Pandas DataFrame containing the weather data.
"""
validate_date(since_date, "since_datw")
url = "https://api.brightsky.dev/weather"
if not end_date:
# If no end date is provided, use today's
end_date = date.today()
else:
# If provided, check if it correct
validate_date(end_date, "end_date")
params = {
"last_date": date.today(),
"date": date.fromisoformat(since_date),
"dwd_station_id": "01766",
}
headers = {"Accept": "application/json"}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
df = pd.DataFrame(response.json()["weather"])
# Parse the timestamps
df["timestamp"] = pd.to_datetime(df["timestamp"])
return df
def validate_date(date: str, date_type: str):
"""Validates the format of a date to comply with YYYY-MM-DD"""
try:
# We need to make sure that the correct date format is used
datetime.strptime(date, "%Y-%m-%d")
except ValueError:
raise ValueError(
f"The date format supplied for `{date_type}` is of the wrong format.",
"Please use YYYY-MM-DD to use this function",
)
In [5]:
Copied!
# Example usage
df = get_weather_data("2017-01-01")
df.head()
# Example usage
df = get_weather_data("2017-01-01")
df.head()
Out[5]:
| timestamp | source_id | precipitation | pressure_msl | sunshine | temperature | wind_direction | wind_speed | cloud_cover | dew_point | relative_humidity | visibility | wind_gust_direction | wind_gust_speed | condition | precipitation_probability | precipitation_probability_6h | solar | fallback_source_ids | icon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2017-01-01 00:00:00+00:00 | 7003 | 0.0 | 1024.59 | NaN | 2.2 | 230.0 | 11.5 | 87.0 | -2.4 | 72.0 | 15000.0 | 220.0 | 18.4 | dry | None | None | 0.0 | {} | cloudy |
| 1 | 2017-01-01 01:00:00+00:00 | 7003 | 0.0 | 1023.70 | NaN | 1.7 | 210.0 | 9.4 | 62.0 | -2.4 | 74.0 | 15000.0 | 200.0 | 17.3 | dry | None | None | 0.0 | {} | partly-cloudy-night |
| 2 | 2017-01-01 02:00:00+00:00 | 7003 | 0.0 | 1023.30 | NaN | 0.8 | 230.0 | 9.0 | 50.0 | -2.4 | 79.0 | 15000.0 | 210.0 | 21.6 | dry | None | None | 0.0 | {} | partly-cloudy-night |
| 3 | 2017-01-01 03:00:00+00:00 | 7003 | 0.0 | 1022.30 | 0.0 | 1.0 | 200.0 | 13.0 | 87.0 | -1.6 | 83.0 | 12000.0 | 180.0 | 22.3 | dry | None | None | 0.0 | NaN | cloudy |
| 4 | 2017-01-01 04:00:00+00:00 | 7003 | 0.0 | 1021.80 | 0.0 | 0.2 | 190.0 | 11.9 | 87.0 | -1.7 | 87.0 | 8000.0 | 160.0 | 20.5 | rain | None | None | 0.0 | NaN | cloudy |