device model for aqi updated with hourly to daily logic getting max, min, avergae and percentage of categorical values for each aqi bracket

This commit is contained in:
khuss
2025-06-01 16:03:07 -04:00
parent 59161d4049
commit 2fee8c055e

View File

@ -51,7 +51,7 @@ BEGIN
END;
$$ LANGUAGE plpgsql;
-- Function to convert Tuya AQI level (e.g., level_0, level_1) to numeric value
-- Function to convert Tuya AQI level (e.g., level_1, level_2) to numeric value
CREATE OR REPLACE FUNCTION level_to_numeric(level_text TEXT)
RETURNS NUMERIC AS $$
BEGIN
@ -62,18 +62,14 @@ EXCEPTION WHEN others THEN
END;
$$ LANGUAGE plpgsql;
-- CTE for device + status log + space
WITH device_space AS (
SELECT
device.uuid AS device_id,
device.created_at,
device.space_device_uuid AS space_id,
"device-status-log".event_id,
"device-status-log".event_time::date,
"device-status-log".event_time::timestamp AS event_time,
"device-status-log".code,
"device-status-log".value,
"device-status-log".log
"device-status-log".value
FROM device
LEFT JOIN "device-status-log"
ON device.uuid = "device-status-log".device_id
@ -82,81 +78,185 @@ WITH device_space AS (
WHERE product.cat_name = 'hjjcy'
),
-- Aggregate air sensor data per device per day
air_data AS (
-- Getting the hourly pollutants max min avg for each device
average_pollutants AS (
SELECT
event_time AS date,
date_trunc('hour', event_time) AS event_hour,
device_id,
space_id,
-- VOC
MIN(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_min,
MAX(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_max,
-- AVG READINGS
AVG(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_avg,
-- PM1
MIN(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_min,
MAX(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_max,
AVG(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_avg,
-- PM2.5
MIN(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_min,
MAX(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_max,
AVG(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_avg,
-- PM10
MIN(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_min,
MAX(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_max,
AVG(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_avg,
-- CH2O
MIN(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_min,
MAX(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_max,
AVG(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_avg,
-- Humidity
MIN(CASE WHEN code = 'humidity_value' THEN value::numeric END) AS humidity_low,
MAX(CASE WHEN code = 'humidity_value' THEN value::numeric END) AS humidity_high,
AVG(CASE WHEN code = 'humidity_value' THEN value::numeric END) AS humidity_avg,
-- Temperature
MIN(CASE WHEN code = 'temp_current' THEN value::numeric END) AS temp_low,
MAX(CASE WHEN code = 'temp_current' THEN value::numeric END) AS temp_high,
AVG(CASE WHEN code = 'temp_current' THEN value::numeric END) AS temp_avg,
-- CO2
MIN(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_min,
MAX(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_max,
AVG(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_avg,
-- AQI
AVG(CASE WHEN code = 'air_quality_index' then level_to_numeric(value) END) as air_quality_index
-- MIN READINGS
MIN(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_min,
MIN(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_min,
MIN(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_min,
MIN(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_min,
MIN(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_min,
MIN(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_min,
-- MAX READINGS
MAX(CASE WHEN code = 'voc_value' THEN value::numeric END) AS voc_max,
MAX(CASE WHEN code = 'pm1' THEN value::numeric END) AS pm1_max,
MAX(CASE WHEN code = 'pm25_value' THEN value::numeric END) AS pm25_max,
MAX(CASE WHEN code = 'pm10' THEN value::numeric END) AS pm10_max,
MAX(CASE WHEN code = 'ch2o_value' THEN value::numeric END) AS ch2o_max,
MAX(CASE WHEN code = 'co2_value' THEN value::numeric END) AS co2_max
FROM device_space
GROUP BY date, device_id, space_id
)
GROUP BY device_id, space_id, event_hour
),
-- Final select with AQI calculation
SELECT
date,
-- Fill NULLs due to missing log values
filled_pollutants AS (
SELECT
*,
COALESCE(pm25_avg, LAG(pm25_avg) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm25_filled_avg,
COALESCE(pm10_avg, LAG(pm10_avg) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm10_filled_avg,
COALESCE(voc_avg, LAG(voc_avg) OVER (PARTITION BY device_id ORDER BY event_hour)) AS voc_filled_avg,
COALESCE(co2_avg, LAG(co2_avg) OVER (PARTITION BY device_id ORDER BY event_hour)) AS co2_filled_avg,
COALESCE(ch2o_avg, LAG(ch2o_avg) OVER (PARTITION BY device_id ORDER BY event_hour)) AS ch2o_filled_avg,
COALESCE(pm25_min, LAG(pm25_min) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm25_min_filled,
COALESCE(pm10_min, LAG(pm10_min) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm10_min_filled,
COALESCE(pm25_max, LAG(pm25_max) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm25_max_filled,
COALESCE(pm10_max, LAG(pm10_max) OVER (PARTITION BY device_id ORDER BY event_hour)) AS pm10_max_filled
FROM average_pollutants
),
-- Calculate max, min, avg hourly AQI for each device
hourly_results AS (
SELECT
device_id,
space_id,
voc_min, voc_max, voc_avg,
pm1_min, pm1_max, pm1_avg,
pm25_min, pm25_max, pm25_avg,
pm10_min, pm10_max, pm10_avg,
ch2o_min, ch2o_max, ch2o_avg,
humidity_low, humidity_high, humidity_avg,
temp_low, temp_high, temp_avg,
co2_min, co2_max, co2_avg,
GREATEST(
calculate_aqi('pm25', pm25_avg),
calculate_aqi('pm10', pm10_avg),
calculate_aqi('voc_value', voc_avg),
calculate_aqi('co2_value', co2_avg),
calculate_aqi('ch2o_value', ch2o_avg)
) AS overall_AQI,
air_quality_index as avg_device_index
FROM air_data;
event_hour,
pm25_filled_avg,
pm10_filled_avg,
pm25_max_filled,
pm10_max_filled,
pm25_min_filled,
pm10_min_filled,
GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) AS hourly_avg_aqi,
GREATEST(
calculate_aqi('pm25', pm25_max_filled),
calculate_aqi('pm10', pm10_max_filled)
) AS hourly_max_aqi,
GREATEST(
calculate_aqi('pm25', pm25_min_filled),
calculate_aqi('pm10', pm10_min_filled)
) AS hourly_min_aqi,
CASE
WHEN GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) <= 50 THEN 'Good'
WHEN GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) <= 100 THEN 'Moderate'
WHEN GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) <= 150 THEN 'Unhealthy for Sensitive Groups'
WHEN GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) <= 200 THEN 'Unhealthy'
WHEN GREATEST(
calculate_aqi('pm25', pm25_filled_avg),
calculate_aqi('pm10', pm10_filled_avg)
) <= 300 THEN 'Very Unhealthy'
ELSE 'Hazardous'
END AS aqi_category
FROM filled_pollutants
ORDER BY device_id, event_hour
),
-- counting how many categories/hours are there in total (in case device was disconnected could be less than 24)
daily_category_counts AS (
SELECT
space_id,
device_id,
date_trunc('day', event_hour) AS event_day,
aqi_category,
COUNT(*) AS category_count
FROM hourly_results
GROUP BY device_id, space_id, event_day, aqi_category
),
-- Aggregate Total Counts per Day
daily_totals AS (
select
device_id,
space_id,
event_day,
SUM(category_count) AS total_count
FROM daily_category_counts
GROUP BY device_id, space_id, event_day
),
-- Calculate the daily of the daily min max avg AQI values
daily_averages AS (
select
device_id,
space_id,
date_trunc('day', event_hour) AS event_day,
ROUND(AVG(hourly_avg_aqi)::numeric, 2) AS daily_avg_aqi,
ROUND(AVG(hourly_max_aqi)::numeric, 2) AS daily_max_aqi,
ROUND(AVG(hourly_min_aqi)::numeric, 2) AS daily_min_aqi
FROM hourly_results
GROUP BY device_id, space_id, event_day
),
-- Pivot Categories into Columns
daily_percentages AS (
select
dt.device_id,
dt.space_id,
dt.event_day,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Good' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS good_percentage,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Moderate' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS moderate_percentage,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Unhealthy for Sensitive Groups' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_sensitive_percentage,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Unhealthy' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS unhealthy_percentage,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Very Unhealthy' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS very_unhealthy_percentage,
ROUND(COALESCE(SUM(CASE WHEN dcc.aqi_category = 'Hazardous' THEN dcc.category_count ELSE 0 END) * 100.0 / dt.total_count, 0), 2) AS hazardous_percentage
FROM daily_totals dt
LEFT JOIN daily_category_counts dcc
ON dt.device_id = dcc.device_id AND dt.event_day = dcc.event_day
GROUP BY dt.device_id, dt.space_id, dt.event_day, dt.total_count
)
-- Final Output
SELECT
p.device_id,
p.space_id,
p.event_day,
p.good_percentage,
p.moderate_percentage,
p.unhealthy_sensitive_percentage,
p.unhealthy_percentage,
p.very_unhealthy_percentage,
p.hazardous_percentage,
a.daily_avg_aqi,
a.daily_max_aqi,
a.daily_min_aqi
FROM daily_percentages p
LEFT JOIN daily_averages a
ON p.device_id = a.device_id AND p.event_day = a.event_day
ORDER BY p.space_id, p.event_day;