diff --git a/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql b/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql index 3ffe42a..a74e66b 100644 --- a/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql +++ b/libs/common/src/sql/queries/fact_daily_space_aqi_score/fact_daily_space_aqi_score.sql @@ -1,54 +1,275 @@ - WITH device_space AS ( +-- Query Pipeline Starts Here +WITH device_space AS ( SELECT device.uuid AS device_id, - device.created_at, device.space_device_uuid AS space_id, - "device-status-log".event_time::date, + "device-status-log".event_time::timestamp AS event_time, "device-status-log".code, - "device-status-log".value, - "device-status-log".log + "device-status-log".value FROM device LEFT JOIN "device-status-log" ON device.uuid = "device-status-log".device_id LEFT JOIN product ON product.uuid = device.product_device_uuid - WHERE product.cat_name = 'hjjcy' + WHERE product.cat_name = 'hjjcy' ), -average_pollutants as( -SELECT -event_time, -space_id, -AVG(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_avg, -AVG(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_avg, -AVG(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_avg, -AVG(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_avg, -AVG(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_avg, -AVG(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_avg, -AVG(CASE WHEN code = 'air_quality_index' then level_to_numeric(value) END) as air_quality_index +average_pollutants AS ( + SELECT + event_time::date AS event_date, + date_trunc('hour', event_time) AS event_hour, + space_id, -FROM device_space ---WHERE code IN ('pm25_value', 'pm10') -GROUP BY space_id, event_time + -- PM1 + MIN(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_min, + AVG(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_avg, + MAX(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_max, + + -- PM25 + MIN(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_min, + AVG(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_avg, + MAX(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_max, + + -- PM10 + MIN(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_min, + AVG(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_avg, + MAX(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_max, + + -- VOC + MIN(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_min, + AVG(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_avg, + MAX(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_max, + + -- CH2O + MIN(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_min, + AVG(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_avg, + MAX(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_max, + + -- CO2 + MIN(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_min, + AVG(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_avg, + MAX(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_max + + FROM device_space + GROUP BY space_id, event_hour, event_date +), + +filled_pollutants AS ( + SELECT + *, + -- AVG + COALESCE(pm25_avg, LAG(pm25_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_avg_f, + COALESCE(pm10_avg, LAG(pm10_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_avg_f, + COALESCE(voc_avg, LAG(voc_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_avg_f, + COALESCE(co2_avg, LAG(co2_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_avg_f, + COALESCE(ch2o_avg, LAG(ch2o_avg) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_avg_f, + + -- MIN + COALESCE(pm25_min, LAG(pm25_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_min_f, + COALESCE(pm10_min, LAG(pm10_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_min_f, + COALESCE(voc_min, LAG(voc_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_min_f, + COALESCE(co2_min, LAG(co2_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_min_f, + COALESCE(ch2o_min, LAG(ch2o_min) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_min_f, + + -- MAX + COALESCE(pm25_max, LAG(pm25_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm25_max_f, + COALESCE(pm10_max, LAG(pm10_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS pm10_max_f, + COALESCE(voc_max, LAG(voc_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS voc_max_f, + COALESCE(co2_max, LAG(co2_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS co2_max_f, + COALESCE(ch2o_max, LAG(ch2o_max) OVER (PARTITION BY space_id ORDER BY event_hour)) AS ch2o_max_f + FROM average_pollutants +), + +hourly_results AS ( + SELECT + space_id, + event_date, + event_hour, + pm1_min, pm1_avg, pm1_max, + pm25_min_f, pm25_avg_f, pm25_max_f, + pm10_min_f, pm10_avg_f, pm10_max_f, + voc_min_f, voc_avg_f, voc_max_f, + co2_min_f, co2_avg_f, co2_max_f, + ch2o_min_f, ch2o_avg_f, ch2o_max_f, + + GREATEST( + calculate_aqi('pm25', pm25_min_f), + calculate_aqi('pm10', pm10_min_f) + ) AS hourly_min_aqi, + + GREATEST( + calculate_aqi('pm25', pm25_avg_f), + calculate_aqi('pm10', pm10_avg_f) + ) AS hourly_avg_aqi, + + GREATEST( + calculate_aqi('pm25', pm25_max_f), + calculate_aqi('pm10', pm10_max_f) + ) AS hourly_max_aqi, + + classify_aqi(GREATEST( + calculate_aqi('pm25', pm25_avg_f), + calculate_aqi('pm10', pm10_avg_f) + )) AS aqi_category, + + classify_aqi(calculate_aqi('pm25',pm25_avg_f)) as pm25_category, + classify_aqi(calculate_aqi('pm10',pm10_avg_f)) as pm10_category, + classify_aqi(calculate_aqi('voc',voc_avg_f)) as voc_category, + classify_aqi(calculate_aqi('co2',co2_avg_f)) as co2_category, + classify_aqi(calculate_aqi('ch2o',ch2o_avg_f)) as ch2o_category + + FROM filled_pollutants +), + +daily_category_counts AS ( + SELECT space_id, event_date, aqi_category AS category, 'aqi' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, aqi_category + + UNION ALL + + SELECT space_id, event_date, pm25_category AS category, 'pm25' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, pm25_category + + UNION ALL + + SELECT space_id, event_date, pm10_category AS category, 'pm10' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, pm10_category + + UNION ALL + + SELECT space_id, event_date, voc_category AS category, 'voc' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, voc_category + + UNION ALL + + SELECT space_id, event_date, co2_category AS category, 'co2' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, co2_category + + UNION ALL + + SELECT space_id, event_date, ch2o_category AS category, 'ch2o' AS pollutant, COUNT(*) AS category_count + FROM hourly_results + GROUP BY space_id, event_date, ch2o_category +), + +daily_totals AS ( + SELECT + space_id, + event_date, + SUM(category_count) AS total_count + FROM daily_category_counts + where pollutant = 'aqi' + GROUP BY space_id, event_date +), + +-- Pivot Categories into Columns +daily_percentages AS ( + select + dt.space_id, + dt.event_date, + -- AQI CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_aqi_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_aqi_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_aqi_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_aqi_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_aqi_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'aqi' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_aqi_percentage, + -- PM25 CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_pm25_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_pm25_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_pm25_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_pm25_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_pm25_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'pm25' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_pm25_percentage, + -- PM10 CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_pm10_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_pm10_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_pm10_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_pm10_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_pm10_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'pm10' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_pm10_percentage, + -- VOC CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_voc_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_voc_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_voc_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_voc_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_voc_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'voc' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_voc_percentage, + -- CO2 CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_co2_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_co2_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_co2_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_co2_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_co2_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'co2' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_co2_percentage, + -- CH20 CATEGORIES + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Good' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_ch2o_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Moderate' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_ch2o_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy for Sensitive Groups' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_ch2o_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Unhealthy' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_ch2o_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Very Unhealthy' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_ch2o_percentage, + ROUND(COALESCE(SUM(CASE WHEN dcc.category = 'Hazardous' and dcc.pollutant = 'ch2o' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_ch2o_percentage + FROM daily_totals dt + LEFT JOIN daily_category_counts dcc + ON dt.space_id = dcc.space_id AND dt.event_date = dcc.event_date + GROUP BY dt.space_id, dt.event_date, dt.total_count +), + +daily_averages AS ( + SELECT + space_id, + event_date, + -- AQI + ROUND(AVG(hourly_min_aqi)::numeric, 2) AS daily_min_aqi, + ROUND(AVG(hourly_avg_aqi)::numeric, 2) AS daily_avg_aqi, + ROUND(AVG(hourly_max_aqi)::numeric, 2) AS daily_max_aqi, + -- PM25 + ROUND(AVG(pm25_min_f)::numeric, 2) AS daily_min_pm25, + ROUND(AVG(pm25_avg_f)::numeric, 2) AS daily_avg_pm25, + ROUND(AVG(pm25_max_f)::numeric, 2) AS daily_max_pm25, + -- PM10 + ROUND(AVG(pm10_min_f)::numeric, 2) AS daily_min_pm10, + ROUND(AVG(pm10_avg_f)::numeric, 2) AS daily_avg_pm10, + ROUND(AVG(pm10_max_f)::numeric, 2) AS daily_max_pm10, + -- VOC + ROUND(AVG(voc_min_f)::numeric, 2) AS daily_min_voc, + ROUND(AVG(voc_avg_f)::numeric, 2) AS daily_avg_voc, + ROUND(AVG(voc_max_f)::numeric, 2) AS daily_max_voc, + -- CO2 + ROUND(AVG(co2_min_f)::numeric, 2) AS daily_min_co2, + ROUND(AVG(co2_avg_f)::numeric, 2) AS daily_avg_co2, + ROUND(AVG(co2_max_f)::numeric, 2) AS daily_max_co2, + -- CH2O + ROUND(AVG(ch2o_min_f)::numeric, 2) AS daily_min_ch2o, + ROUND(AVG(ch2o_avg_f)::numeric, 2) AS daily_avg_ch2o, + ROUND(AVG(ch2o_max_f)::numeric, 2) AS daily_max_ch2o + + FROM hourly_results + GROUP BY space_id, event_date ) +SELECT + p.space_id, + p.event_date, + p.good_aqi_percentage, p.moderate_aqi_percentage, p.unhealthy_sensitive_aqi_percentage, p.unhealthy_aqi_percentage, p.very_unhealthy_aqi_percentage, p.hazardous_aqi_percentage, + a.daily_avg_aqi,a.daily_max_aqi, a.daily_min_aqi, + p.good_pm25_percentage, p.moderate_pm25_percentage, p.unhealthy_sensitive_pm25_percentage, p.unhealthy_pm25_percentage, p.very_unhealthy_pm25_percentage, p.hazardous_pm25_percentage, + a.daily_avg_pm25,a.daily_max_pm25, a.daily_min_pm25, + p.good_pm10_percentage, p.moderate_pm10_percentage, p.unhealthy_sensitive_pm10_percentage, p.unhealthy_pm10_percentage, p.very_unhealthy_pm10_percentage, p.hazardous_pm10_percentage, + a.daily_avg_pm10, a.daily_max_pm10, a.daily_min_pm10, + p.good_voc_percentage, p.moderate_voc_percentage, p.unhealthy_sensitive_voc_percentage, p.unhealthy_voc_percentage, p.very_unhealthy_voc_percentage, p.hazardous_voc_percentage, + a.daily_avg_voc, a.daily_max_voc, a.daily_min_voc, + p.good_co2_percentage, p.moderate_co2_percentage, p.unhealthy_sensitive_co2_percentage, p.unhealthy_co2_percentage, p.very_unhealthy_co2_percentage, p.hazardous_co2_percentage, + a.daily_avg_co2,a.daily_max_co2, a.daily_min_co2, + p.good_ch2o_percentage, p.moderate_ch2o_percentage, p.unhealthy_sensitive_ch2o_percentage, p.unhealthy_ch2o_percentage, p.very_unhealthy_ch2o_percentage, p.hazardous_ch2o_percentage, + a.daily_avg_ch2o,a.daily_max_ch2o, a.daily_min_ch2o +FROM daily_percentages p +LEFT JOIN daily_averages a + ON p.space_id = a.space_id AND p.event_date = a.event_date +ORDER BY p.space_id, p.event_date; + -SELECT - event_time::date as date, - space_id, - pm1_avg, - pm25_avg, - pm10_avg, - voc_avg, - ch2o_avg, - co2_avg, - --calculate_aqi('pm25', pm25_avg) AS aqi_pm25, - --calculate_aqi('pm10', pm10_avg) AS aqi_pm10, - GREATEST( - calculate_aqi('pm25', pm25_avg), - calculate_aqi('pm10', pm10_avg), - calculate_aqi('voc_value', voc_avg), - calculate_aqi('co2_value', co2_avg), - calculate_aqi('ch2o_value', ch2o_avg) - ) AS overall_AQI, - air_quality_index as avg_space_device_aqi -FROM average_pollutants; \ No newline at end of file