From 5b0135ba805d8e313b481cc70ce104db7c62a927 Mon Sep 17 00:00:00 2001 From: khuss Date: Sun, 1 Jun 2025 16:09:17 -0400 Subject: [PATCH] AQI space model updated with new hourly to daily logic for calculations and categorization of aqi brackets --- .../fact_daily_space_aqi_score.sql | 234 +++++++++++++++--- 1 file changed, 197 insertions(+), 37 deletions(-) diff --git a/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql b/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql index 3ffe42a..ccee7f4 100644 --- a/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql +++ b/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql @@ -1,9 +1,11 @@ - WITH device_space AS ( +-- CTE for device + status log + space + +WITH device_space AS ( SELECT device.uuid AS device_id, device.created_at, device.space_device_uuid AS space_id, - "device-status-log".event_time::date, + "device-status-log".event_time::timestamp AS event_time, "device-status-log".code, "device-status-log".value, "device-status-log".log @@ -12,43 +14,201 @@ ON device.uuid = "device-status-log".device_id LEFT JOIN product ON product.uuid = device.product_device_uuid - WHERE product.cat_name = 'hjjcy' + WHERE product.cat_name = 'hjjcy' ), -average_pollutants as( -SELECT -event_time, -space_id, -AVG(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_avg, -AVG(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_avg, -AVG(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_avg, -AVG(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_avg, -AVG(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_avg, -AVG(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_avg, -AVG(CASE WHEN code = 'air_quality_index' then level_to_numeric(value) END) as air_quality_index +-- Getting the hourly pollutants max min avg for each space +average_pollutants AS ( + SELECT + date_trunc('hour', event_time) AS event_hour, + space_id, + + -- AVG READINGS + AVG(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_avg, + AVG(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_avg, + AVG(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_avg, + AVG(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_avg, + AVG(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_avg, + AVG(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_avg, + AVG(CASE WHEN code = 'air_quality_index' THEN level_to_numeric(value) END) AS air_quality_index_avg, + + -- MIN READINGS + MIN(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_min, + MIN(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_min, + MIN(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_min, + MIN(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_min, + MIN(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_min, + MIN(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_min, + MIN(CASE WHEN code = 'air_quality_index' THEN level_to_numeric(value) END) AS air_quality_index_min, + + -- MAX READINGS + MAX(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_max, + MAX(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_max, + MAX(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_max, + MAX(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_max, + MAX(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_max, + MAX(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_max, + MAX(CASE WHEN code = 'air_quality_index' THEN level_to_numeric(value) END) AS air_quality_index_max -FROM device_space ---WHERE code IN ('pm25_value', 'pm10') -GROUP BY space_id, event_time + + FROM device_space + GROUP BY space_id, event_hour +), + +-- Fill NULLs due to missing log values +filled_pollutants AS ( + SELECT + *, + -- Forward-fill nulls using LAG() over partitioned data + COALESCE(pm25_avg, LAG(pm25_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_filled_avg, + COALESCE(pm10_avg, LAG(pm10_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_filled_avg, + COALESCE(voc_avg, LAG(voc_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_filled_avg, + COALESCE(co2_avg, LAG(co2_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_filled_avg, + COALESCE(ch2o_avg, LAG(ch2o_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_filled_avg, + COALESCE(air_quality_index_avg, LAG(air_quality_index_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS aqi_filled_avg, + + COALESCE(pm25_min, LAG(pm25_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_min_filled, + COALESCE(pm10_min, LAG(pm10_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_min_filled, + COALESCE(voc_min, LAG(voc_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_min_filled, + COALESCE(co2_min, LAG(co2_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_min_filled, + COALESCE(ch2o_min, LAG(ch2o_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_min_filled, + COALESCE(air_quality_index_min, LAG(air_quality_index_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS aqi_min_filled, + + COALESCE(pm25_max, LAG(pm25_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_max_filled, + COALESCE(pm10_max, LAG(pm10_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_max_filled, + COALESCE(voc_max, LAG(voc_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_max_filled, + COALESCE(co2_max, LAG(co2_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_max_filled, + COALESCE(ch2o_max, LAG(ch2o_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_max_filled, + COALESCE(air_quality_index_max, LAG(air_quality_index_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS aqi_max_filled + + + FROM average_pollutants +), + +-- Calculate max, min, avg hourly AQI for each space +hourly_results as ( + SELECT + space_id, + event_hour, + --voc_filled, + pm25_filled_avg, + pm10_filled_avg, + -- co2_filled, + --ch2o_filled, + --aqi_filled, + pm25_max_filled, + pm10_max_filled, + pm25_min_filled, + pm10_min_filled, + air_quality_index_avg, + GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + --calculate_aqi('voc_value', voc_filled_avg), + --calculate_aqi('co2_value', co2_filled_avg), + --calculate_aqi('ch2o_value', ch2o_filled_avg) + -- Add more AQI sources as needed + ) AS hourly_avg_aqi, + GREATEST( + calculate_aqi('pm25', pm25_max_filled), + calculate_aqi('pm10', pm10_max_filled) + ) AS hourly_max_aqi, + GREATEST( + calculate_aqi('pm25', pm25_min_filled), + calculate_aqi('pm10', pm10_min_filled) + ) AS hourly_min_aqi, + case + when GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + ) <= 50 then 'Good' + when GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + ) <= 100 then 'Moderate' + when GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + ) <= 150 then 'Unhealthy for Sensitive Groups' + when GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + ) <= 200 then 'Unhealthy' + when GREATEST( + calculate_aqi('pm25', pm25_filled_avg), + calculate_aqi('pm10', pm10_filled_avg) + ) <= 300 then 'Very Unhealthy' + else 'Hazardous' + end as aqi_category + + FROM filled_pollutants + ORDER BY space_id, event_hour +), + +-- counting how many categories/hours are there in total (in case device(s) was disconnected could be less than 24) +daily_category_counts AS ( + SELECT + space_id, + date_trunc('day', event_hour) AS event_day, + aqi_category, + COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_day, aqi_category +), + +-- Aggregate Total Counts per Day +daily_totals AS ( + SELECT + space_id, + event_day, + SUM(category_count) AS total_count + FROM daily_category_counts + GROUP BY space_id, event_day +), + +-- Calculate the daily of the daily min max avg AQI values +daily_averages AS ( + SELECT + space_id, + date_trunc('day', event_hour) AS event_day, + ROUND(AVG(hourly_avg_aqi)::numeric, 2) AS daily_avg_aqi, + ROUND(AVG(hourly_max_aqi)::numeric, 2) AS daily_max_aqi, + ROUND(AVG(hourly_min_aqi)::numeric, 2) AS daily_min_aqi + FROM hourly_results + GROUP BY space_id, event_day +), + +-- Pivot Categories into Columns +daily_percentages AS ( + SELECT + dt.space_id, + dt.event_day, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Good' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Moderate' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Unhealthy for Sensitive Groups' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Unhealthy' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Very Unhealthy' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Hazardous' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_percentage + FROM daily_totals dt + LEFT JOIN daily_category_counts dcc + ON dt.space_id = dcc.space_id AND dt.event_day = dcc.event_day + GROUP BY dt.space_id, dt.event_day, dt.total_count ) -SELECT - event_time::date as date, - space_id, - pm1_avg, - pm25_avg, - pm10_avg, - voc_avg, - ch2o_avg, - co2_avg, - --calculate_aqi('pm25', pm25_avg) AS aqi_pm25, - --calculate_aqi('pm10', pm10_avg) AS aqi_pm10, - GREATEST( - calculate_aqi('pm25', pm25_avg), - calculate_aqi('pm10', pm10_avg), - calculate_aqi('voc_value', voc_avg), - calculate_aqi('co2_value', co2_avg), - calculate_aqi('ch2o_value', ch2o_avg) - ) AS overall_AQI, - air_quality_index as avg_space_device_aqi -FROM average_pollutants; \ No newline at end of file +-- Final Output +SELECT + p.space_id, + p.event_day, + p.good_percentage, + p.moderate_percentage, + p.unhealthy_sensitive_percentage, + p.unhealthy_percentage, + p.very_unhealthy_percentage, + p.hazardous_percentage, + a.daily_avg_aqi, + a.daily_max_aqi, + a.daily_min_aqi +FROM daily_percentages p +LEFT JOIN daily_averages a + ON p.space_id = a.space_id AND p.event_day = a.event_day +ORDER BY p.space_id, p.event_day; \ No newline at end of file