Merge pull request #387 from SyncrowIOT/DATA-occupancy-duration-fix

DATA-occupancy-duration-fix
This commit is contained in:
Dona Maria Absi
2025-05-30 11:59:31 +03:00
committed by GitHub
4 changed files with 243 additions and 256 deletions

View File

@ -1,100 +1,94 @@
-- Step 1: Get device presence events with previous timestamps WITH presence_logs AS (
WITH start_date AS ( SELECT
SELECT d.space_device_uuid AS space_id,
d.uuid AS device_id, l.device_id,
d.space_device_uuid AS space_id, l.event_time,
l.value, l.value,
l.event_time::timestamp AS event_time, LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp LAG(l.value) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_value
FROM device d FROM device d
LEFT JOIN "device-status-log" l JOIN "device-status-log" l ON d.uuid = l.device_id
ON d.uuid = l.device_id JOIN product p ON p.uuid = d.product_device_uuid
LEFT JOIN product p WHERE l.code = 'presence_state'
ON p.uuid = d.product_device_uuid AND p.cat_name = 'hps'
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
), ),
-- Step 2: Identify periods when device reports "none" -- Intervals when device was in 'presence' (between prev_time and event_time when value='none')
device_none_periods AS ( presence_intervals AS (
SELECT SELECT
space_id, space_id,
device_id, prev_time AS start_time,
event_time AS empty_from, event_time AS end_time
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until FROM presence_logs
FROM start_date WHERE value = 'none'
WHERE value = 'none' AND prev_value = 'presence'
AND prev_time IS NOT NULL
), ),
-- Step 3: Clip the "none" periods to the edges of each day -- Split intervals across days
clipped_device_none_periods AS ( split_intervals AS (
SELECT SELECT
space_id, space_id,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from, generate_series(
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until date_trunc('day', start_time),
FROM device_none_periods date_trunc('day', end_time),
WHERE empty_until IS NOT NULL interval '1 day'
)::date AS event_date,
GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + interval '1 day') AS interval_end
FROM presence_intervals
), ),
-- Step 4: Break multi-day periods into daily intervals -- Mark and group overlapping intervals per space per day
generated_daily_intervals AS ( ordered_intervals AS (
SELECT SELECT
space_id, space_id,
gs::date AS day, event_date,
GREATEST(clipped_from, gs) AS interval_start, interval_start,
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end interval_end,
FROM clipped_device_none_periods, LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs FROM split_intervals
), ),
-- Step 5: Merge overlapping or adjacent intervals per day grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
),
-- Merge overlapping intervals per group
merged_intervals AS ( merged_intervals AS (
SELECT SELECT
space_id, space_id,
day, event_date,
interval_start, MIN(interval_start) AS merged_start,
interval_end MAX(interval_end) AS merged_end
FROM ( FROM grouped_intervals
SELECT GROUP BY space_id, event_date, grp
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
), ),
-- Step 6: Sum up total missing seconds (device reported "none") per day -- Sum durations of merged intervals
missing_seconds_per_day AS ( summed_intervals AS (
SELECT SELECT
space_id, space_id,
day AS missing_date, event_date,
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
FROM merged_intervals FROM merged_intervals
GROUP BY space_id, day GROUP BY space_id, event_date
), ),
-- Step 7: Calculate total occupied time per day (86400 - missing) final_data AS (
occupied_seconds_per_day AS ( SELECT
SELECT space_id,
space_id, event_date,
missing_date as event_date, LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
86400 - total_missing_seconds AS total_occupied_seconds, ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
(86400 - total_missing_seconds)/86400*100 as occupancy_prct FROM summed_intervals
FROM missing_seconds_per_day ORDER BY space_id, event_date)
)
-- Final Output
, final_data as (
SELECT space_id,
event_date,
total_occupied_seconds,
occupancy_prct
FROM occupied_seconds_per_day
ORDER BY 1,2
)
INSERT INTO public."space-daily-occupancy-duration" ( INSERT INTO public."space-daily-occupancy-duration" (
space_uuid, space_uuid,
@ -104,12 +98,13 @@ INSERT INTO public."space-daily-occupancy-duration" (
) )
select space_id, select space_id,
event_date, event_date,
total_occupied_seconds, occupied_seconds,
occupancy_prct occupancy_percentage
FROM final_data FROM final_data
ON CONFLICT (space_uuid, event_date) DO UPDATE ON CONFLICT (space_uuid, event_date) DO UPDATE
SET SET
occupancy_percentage = EXCLUDED.occupancy_percentage; occupancy_percentage = EXCLUDED.occupancy_percentage,
occupied_seconds = EXCLUDED.occupied_seconds;

View File

@ -2,116 +2,108 @@ WITH params AS (
SELECT SELECT
TO_DATE(NULLIF($1, ''), 'YYYY-MM-DD') AS event_date, TO_DATE(NULLIF($1, ''), 'YYYY-MM-DD') AS event_date,
$2::uuid AS space_id $2::uuid AS space_id
)
, start_date AS (
SELECT
d.uuid AS device_id,
d.space_device_uuid AS space_id,
l.value,
l.event_time::timestamp AS event_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp
FROM device d
LEFT JOIN "device-status-log" l
ON d.uuid = l.device_id
LEFT JOIN product p
ON p.uuid = d.product_device_uuid
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
), ),
-- Step 2: Identify periods when device reports "none" presence_logs AS (
device_none_periods AS ( SELECT
SELECT d.space_device_uuid AS space_id,
space_id, l.device_id,
device_id, l.event_time,
event_time AS empty_from, l.value,
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time
FROM start_date FROM device d
WHERE value = 'none' JOIN "device-status-log" l ON d.uuid = l.device_id
JOIN product p ON p.uuid = d.product_device_uuid
WHERE l.code = 'presence_state'
AND p.cat_name = 'hps'
), ),
-- Step 3: Clip the "none" periods to the edges of each day presence_intervals AS (
clipped_device_none_periods AS ( SELECT
SELECT space_id,
space_id, prev_time AS start_time,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from, event_time AS end_time
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until FROM presence_logs
FROM device_none_periods WHERE value = 'none' AND prev_time IS NOT NULL
WHERE empty_until IS NOT NULL
), ),
-- Step 4: Break multi-day periods into daily intervals split_intervals AS (
generated_daily_intervals AS ( SELECT
SELECT space_id,
space_id, generate_series(
gs::date AS day, date_trunc('day', start_time),
GREATEST(clipped_from, gs) AS interval_start, date_trunc('day', end_time),
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end interval '1 day'
FROM clipped_device_none_periods, )::date AS event_date,
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + INTERVAL '1 day') AS interval_end
FROM presence_intervals
),
ordered_intervals AS (
SELECT
space_id,
event_date,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
FROM split_intervals
),
grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
), ),
-- Step 5: Merge overlapping or adjacent intervals per day
merged_intervals AS ( merged_intervals AS (
SELECT SELECT
space_id, space_id,
day, event_date,
interval_start, MIN(interval_start) AS merged_start,
interval_end MAX(interval_end) AS merged_end
FROM ( FROM grouped_intervals
SELECT GROUP BY space_id, event_date, grp
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
), ),
-- Step 6: Sum up total missing seconds (device reported "none") per day summed_intervals AS (
missing_seconds_per_day AS ( SELECT
SELECT space_id,
space_id, event_date,
day AS missing_date, SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds FROM merged_intervals
FROM merged_intervals GROUP BY space_id, event_date
GROUP BY space_id, day
), ),
-- Step 7: Calculate total occupied time per day (86400 - missing) final_data AS (
occupied_seconds_per_day AS ( SELECT
SELECT s.space_id,
space_id, s.event_date,
missing_date as event_date, LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
86400 - total_missing_seconds AS total_occupied_seconds, ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
(86400 - total_missing_seconds)/86400*100 as occupancy_percentage FROM summed_intervals s
FROM missing_seconds_per_day JOIN params p
) ON p.space_id = s.space_id
AND p.event_date = s.event_date
-- Final Output
, final_data as (
SELECT occupied_seconds_per_day.space_id,
occupied_seconds_per_day.event_date,
occupied_seconds_per_day.occupancy_percentage
FROM occupied_seconds_per_day
join params p on true
and p.space_id = occupied_seconds_per_day.space_id
and p.event_date = occupied_seconds_per_day.event_date
ORDER BY 1,2
) )
INSERT INTO public."space-daily-occupancy-duration" ( INSERT INTO public."space-daily-occupancy-duration" (
space_uuid, space_uuid,
event_date, event_date,
occupied_seconds,
occupancy_percentage occupancy_percentage
) )
select space_id, SELECT
event_date, space_id,
occupancy_percentage event_date,
occupied_seconds,
occupancy_percentage
FROM final_data FROM final_data
ON CONFLICT (space_uuid, event_date) DO UPDATE ON CONFLICT (space_uuid, event_date) DO UPDATE
SET SET
occupancy_percentage = EXCLUDED.occupancy_percentage; occupancy_percentage = EXCLUDED.occupancy_percentage,
occupied_seconds = EXCLUDED.occupied_seconds;

View File

@ -16,4 +16,5 @@ WITH params AS (
WHERE A.device_uuid::text = ANY(P.device_ids) WHERE A.device_uuid::text = ANY(P.device_ids)
AND (P.month IS NULL AND (P.month IS NULL
OR date_trunc('month', A.event_date) = P.month OR date_trunc('month', A.event_date) = P.month
) );

View File

@ -1,91 +1,90 @@
-- Step 1: Get device presence events with previous timestamps WITH presence_logs AS (
WITH start_date AS ( SELECT
SELECT d.space_device_uuid AS space_id,
d.uuid AS device_id, l.device_id,
d.space_device_uuid AS space_id, l.event_time,
l.value, l.value,
l.event_time::timestamp AS event_time, LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp LAG(l.value) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_value
FROM device d FROM device d
LEFT JOIN "device-status-log" l JOIN "device-status-log" l ON d.uuid = l.device_id
ON d.uuid = l.device_id JOIN product p ON p.uuid = d.product_device_uuid
LEFT JOIN product p WHERE l.code = 'presence_state'
ON p.uuid = d.product_device_uuid AND p.cat_name = 'hps'
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
), ),
-- Step 2: Identify periods when device reports "none" -- Intervals when device was in 'presence' (between prev_time and event_time when value='none')
device_none_periods AS ( presence_intervals AS (
SELECT SELECT
space_id, space_id,
device_id, prev_time AS start_time,
event_time AS empty_from, event_time AS end_time
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until FROM presence_logs
FROM start_date WHERE value = 'none'
WHERE value = 'none' AND prev_value = 'presence'
AND prev_time IS NOT NULL
), ),
-- Step 3: Clip the "none" periods to the edges of each day -- Split intervals across days
clipped_device_none_periods AS ( split_intervals AS (
SELECT SELECT
space_id, space_id,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from, generate_series(
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until date_trunc('day', start_time),
FROM device_none_periods date_trunc('day', end_time),
WHERE empty_until IS NOT NULL interval '1 day'
)::date AS event_date,
GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + interval '1 day') AS interval_end
FROM presence_intervals
), ),
-- Step 4: Break multi-day periods into daily intervals -- Mark and group overlapping intervals per space per day
generated_daily_intervals AS ( ordered_intervals AS (
SELECT SELECT
space_id, space_id,
gs::date AS day, event_date,
GREATEST(clipped_from, gs) AS interval_start, interval_start,
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end interval_end,
FROM clipped_device_none_periods, LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs FROM split_intervals
), ),
-- Step 5: Merge overlapping or adjacent intervals per day grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
),
-- Merge overlapping intervals per group
merged_intervals AS ( merged_intervals AS (
SELECT SELECT
space_id, space_id,
day, event_date,
interval_start, MIN(interval_start) AS merged_start,
interval_end MAX(interval_end) AS merged_end
FROM ( FROM grouped_intervals
SELECT GROUP BY space_id, event_date, grp
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
), ),
-- Step 6: Sum up total missing seconds (device reported "none") per day -- Sum durations of merged intervals
missing_seconds_per_day AS ( summed_intervals AS (
SELECT SELECT
space_id, space_id,
day AS missing_date, event_date,
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
FROM merged_intervals FROM merged_intervals
GROUP BY space_id, day GROUP BY space_id, event_date
),
-- Step 7: Calculate total occupied time per day (86400 - missing)
occupied_seconds_per_day AS (
SELECT
space_id,
missing_date as date,
86400 - total_missing_seconds AS total_occupied_seconds
FROM missing_seconds_per_day
) )
-- Final Output -- Final output with capped seconds and percentage
SELECT * SELECT
FROM occupied_seconds_per_day space_id,
ORDER BY 1,2; event_date,
LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
FROM summed_intervals
ORDER BY space_id, event_date;