model adjustments

This commit is contained in:
Dona Maria Absi
2025-05-28 13:55:56 +03:00
parent f44dc793a6
commit 25967d02f9
4 changed files with 244 additions and 256 deletions

View File

@ -1,100 +1,93 @@
-- Step 1: Get device presence events with previous timestamps
WITH start_date AS (
SELECT
d.uuid AS device_id,
d.space_device_uuid AS space_id,
l.value,
l.event_time::timestamp AS event_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp
FROM device d
LEFT JOIN "device-status-log" l
ON d.uuid = l.device_id
LEFT JOIN product p
ON p.uuid = d.product_device_uuid
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
WITH presence_logs AS (
SELECT
d.space_device_uuid AS space_id,
l.device_id,
l.event_time,
l.value,
LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time
FROM device d
JOIN "device-status-log" l ON d.uuid = l.device_id
JOIN product p ON p.uuid = d.product_device_uuid
WHERE l.code = 'presence_state'
AND p.cat_name = 'hps'
),
-- Step 2: Identify periods when device reports "none"
device_none_periods AS (
SELECT
space_id,
device_id,
event_time AS empty_from,
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until
FROM start_date
WHERE value = 'none'
-- Intervals when device was in 'presence' (between prev_time and event_time when value='none')
presence_intervals AS (
SELECT
space_id,
prev_time AS start_time,
event_time AS end_time
FROM presence_logs
WHERE value = 'none'
AND prev_value = 'presence'
AND prev_time IS NOT NULL
),
-- Step 3: Clip the "none" periods to the edges of each day
clipped_device_none_periods AS (
SELECT
space_id,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from,
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until
FROM device_none_periods
WHERE empty_until IS NOT NULL
-- Split intervals across days
split_intervals AS (
SELECT
space_id,
generate_series(
date_trunc('day', start_time),
date_trunc('day', end_time),
interval '1 day'
)::date AS event_date,
GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + interval '1 day') AS interval_end
FROM presence_intervals
),
-- Step 4: Break multi-day periods into daily intervals
generated_daily_intervals AS (
SELECT
space_id,
gs::date AS day,
GREATEST(clipped_from, gs) AS interval_start,
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end
FROM clipped_device_none_periods,
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs
-- Mark and group overlapping intervals per space per day
ordered_intervals AS (
SELECT
space_id,
event_date,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
FROM split_intervals
),
-- Step 5: Merge overlapping or adjacent intervals per day
grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
),
-- Merge overlapping intervals per group
merged_intervals AS (
SELECT
space_id,
day,
interval_start,
interval_end
FROM (
SELECT
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
SELECT
space_id,
event_date,
MIN(interval_start) AS merged_start,
MAX(interval_end) AS merged_end
FROM grouped_intervals
GROUP BY space_id, event_date, grp
),
-- Step 6: Sum up total missing seconds (device reported "none") per day
missing_seconds_per_day AS (
SELECT
space_id,
day AS missing_date,
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds
FROM merged_intervals
GROUP BY space_id, day
),
-- Sum durations of merged intervals
summed_intervals AS (
SELECT
space_id,
event_date,
SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
FROM merged_intervals
GROUP BY space_id, event_date
),
-- Step 7: Calculate total occupied time per day (86400 - missing)
occupied_seconds_per_day AS (
SELECT
space_id,
missing_date as event_date,
86400 - total_missing_seconds AS total_occupied_seconds,
(86400 - total_missing_seconds)/86400*100 as occupancy_prct
FROM missing_seconds_per_day
)
final_data AS (
SELECT
space_id,
event_date,
LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
FROM summed_intervals
ORDER BY space_id, event_date)
-- Final Output
, final_data as (
SELECT space_id,
event_date,
total_occupied_seconds,
occupancy_prct
FROM occupied_seconds_per_day
ORDER BY 1,2
)
INSERT INTO public."space-daily-occupancy-duration" (
space_uuid,
@ -104,12 +97,13 @@ INSERT INTO public."space-daily-occupancy-duration" (
)
select space_id,
event_date,
total_occupied_seconds,
occupancy_prct
occupied_seconds,
occupancy_percentage
FROM final_data
ON CONFLICT (space_uuid, event_date) DO UPDATE
SET
occupancy_percentage = EXCLUDED.occupancy_percentage;
occupancy_percentage = EXCLUDED.occupancy_percentage,
occupied_seconds = EXCLUDED.occupied_seconds;

View File

@ -2,116 +2,108 @@ WITH params AS (
SELECT
TO_DATE(NULLIF($1, ''), 'YYYY-MM-DD') AS event_date,
$2::uuid AS space_id
)
, start_date AS (
SELECT
d.uuid AS device_id,
d.space_device_uuid AS space_id,
l.value,
l.event_time::timestamp AS event_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp
FROM device d
LEFT JOIN "device-status-log" l
ON d.uuid = l.device_id
LEFT JOIN product p
ON p.uuid = d.product_device_uuid
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
),
-- Step 2: Identify periods when device reports "none"
device_none_periods AS (
SELECT
space_id,
device_id,
event_time AS empty_from,
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until
FROM start_date
WHERE value = 'none'
presence_logs AS (
SELECT
d.space_device_uuid AS space_id,
l.device_id,
l.event_time,
l.value,
LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time
FROM device d
JOIN "device-status-log" l ON d.uuid = l.device_id
JOIN product p ON p.uuid = d.product_device_uuid
WHERE l.code = 'presence_state'
AND p.cat_name = 'hps'
),
-- Step 3: Clip the "none" periods to the edges of each day
clipped_device_none_periods AS (
SELECT
space_id,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from,
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until
FROM device_none_periods
WHERE empty_until IS NOT NULL
presence_intervals AS (
SELECT
space_id,
prev_time AS start_time,
event_time AS end_time
FROM presence_logs
WHERE value = 'none' AND prev_time IS NOT NULL
),
-- Step 4: Break multi-day periods into daily intervals
generated_daily_intervals AS (
SELECT
space_id,
gs::date AS day,
GREATEST(clipped_from, gs) AS interval_start,
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end
FROM clipped_device_none_periods,
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs
split_intervals AS (
SELECT
space_id,
generate_series(
date_trunc('day', start_time),
date_trunc('day', end_time),
interval '1 day'
)::date AS event_date,
GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + INTERVAL '1 day') AS interval_end
FROM presence_intervals
),
ordered_intervals AS (
SELECT
space_id,
event_date,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
FROM split_intervals
),
grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
),
-- Step 5: Merge overlapping or adjacent intervals per day
merged_intervals AS (
SELECT
space_id,
day,
interval_start,
interval_end
FROM (
SELECT
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
SELECT
space_id,
event_date,
MIN(interval_start) AS merged_start,
MAX(interval_end) AS merged_end
FROM grouped_intervals
GROUP BY space_id, event_date, grp
),
-- Step 6: Sum up total missing seconds (device reported "none") per day
missing_seconds_per_day AS (
SELECT
space_id,
day AS missing_date,
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds
FROM merged_intervals
GROUP BY space_id, day
summed_intervals AS (
SELECT
space_id,
event_date,
SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
FROM merged_intervals
GROUP BY space_id, event_date
),
-- Step 7: Calculate total occupied time per day (86400 - missing)
occupied_seconds_per_day AS (
SELECT
space_id,
missing_date as event_date,
86400 - total_missing_seconds AS total_occupied_seconds,
(86400 - total_missing_seconds)/86400*100 as occupancy_percentage
FROM missing_seconds_per_day
)
-- Final Output
, final_data as (
SELECT occupied_seconds_per_day.space_id,
occupied_seconds_per_day.event_date,
occupied_seconds_per_day.occupancy_percentage
FROM occupied_seconds_per_day
join params p on true
and p.space_id = occupied_seconds_per_day.space_id
and p.event_date = occupied_seconds_per_day.event_date
ORDER BY 1,2
final_data AS (
SELECT
s.space_id,
s.event_date,
LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
FROM summed_intervals s
JOIN params p
ON p.space_id = s.space_id
AND p.event_date = s.event_date
)
INSERT INTO public."space-daily-occupancy-duration" (
space_uuid,
event_date,
occupied_seconds,
occupancy_percentage
)
select space_id,
event_date,
occupancy_percentage
SELECT
space_id,
event_date,
occupied_seconds,
occupancy_percentage
FROM final_data
ON CONFLICT (space_uuid, event_date) DO UPDATE
SET
occupancy_percentage = EXCLUDED.occupancy_percentage;
occupancy_percentage = EXCLUDED.occupancy_percentage,
occupied_seconds = EXCLUDED.occupied_seconds;

View File

@ -16,4 +16,5 @@ WITH params AS (
WHERE A.device_uuid::text = ANY(P.device_ids)
AND (P.month IS NULL
OR date_trunc('month', A.event_date) = P.month
)
);

View File

@ -1,91 +1,92 @@
-- Step 1: Get device presence events with previous timestamps
WITH start_date AS (
SELECT
d.uuid AS device_id,
d.space_device_uuid AS space_id,
l.value,
l.event_time::timestamp AS event_time,
LAG(l.event_time::timestamp) OVER (PARTITION BY d.uuid ORDER BY l.event_time) AS prev_timestamp
FROM device d
LEFT JOIN "device-status-log" l
ON d.uuid = l.device_id
LEFT JOIN product p
ON p.uuid = d.product_device_uuid
WHERE p.cat_name = 'hps'
AND l.code = 'presence_state'
WITH presence_logs AS (
SELECT
d.space_device_uuid AS space_id,
l.device_id,
l.event_time,
l.value,
LAG(l.event_time) OVER (PARTITION BY l.device_id ORDER BY l.event_time) AS prev_time
FROM device d
JOIN "device-status-log" l ON d.uuid = l.device_id
JOIN product p ON p.uuid = d.product_device_uuid
WHERE l.code = 'presence_state'
AND p.cat_name = 'hps'
),
-- Step 2: Identify periods when device reports "none"
device_none_periods AS (
SELECT
space_id,
device_id,
event_time AS empty_from,
LEAD(event_time) OVER (PARTITION BY device_id ORDER BY event_time) AS empty_until
FROM start_date
WHERE value = 'none'
-- Intervals when device was in 'presence' (between prev_time and event_time when value='none')
presence_intervals AS (
SELECT
space_id,
prev_time AS start_time,
event_time AS end_time
FROM presence_logs
WHERE value = 'none'
AND prev_value = 'presence'
AND prev_time IS NOT NULL
),
-- Step 3: Clip the "none" periods to the edges of each day
clipped_device_none_periods AS (
SELECT
space_id,
GREATEST(empty_from, DATE_TRUNC('day', empty_from)) AS clipped_from,
LEAST(empty_until, DATE_TRUNC('day', empty_until) + INTERVAL '1 day') AS clipped_until
FROM device_none_periods
WHERE empty_until IS NOT NULL
-- Split intervals across days
split_intervals AS (
SELECT
space_id,
generate_series(
date_trunc('day', start_time),
date_trunc('day', end_time),
interval '1 day'
)::date AS event_date,
GREATEST(start_time, date_trunc('day', start_time)) AS interval_start,
LEAST(end_time, date_trunc('day', end_time) + interval '1 day') AS interval_end
FROM presence_intervals
),
-- Step 4: Break multi-day periods into daily intervals
generated_daily_intervals AS (
SELECT
space_id,
gs::date AS day,
GREATEST(clipped_from, gs) AS interval_start,
LEAST(clipped_until, gs + INTERVAL '1 day') AS interval_end
FROM clipped_device_none_periods,
LATERAL generate_series(DATE_TRUNC('day', clipped_from), DATE_TRUNC('day', clipped_until), INTERVAL '1 day') AS gs
-- Mark and group overlapping intervals per space per day
ordered_intervals AS (
SELECT
space_id,
event_date,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS prev_end
FROM split_intervals
),
-- Step 5: Merge overlapping or adjacent intervals per day
grouped_intervals AS (
SELECT *,
SUM(CASE
WHEN prev_end IS NULL OR interval_start > prev_end THEN 1
ELSE 0
END) OVER (PARTITION BY space_id, event_date ORDER BY interval_start) AS grp
FROM ordered_intervals
),
-- Merge overlapping intervals per group
merged_intervals AS (
SELECT
space_id,
day,
interval_start,
interval_end
FROM (
SELECT
space_id,
day,
interval_start,
interval_end,
LAG(interval_end) OVER (PARTITION BY space_id, day ORDER BY interval_start) AS prev_end
FROM generated_daily_intervals
) sub
WHERE prev_end IS NULL OR interval_start > prev_end
SELECT
space_id,
event_date,
MIN(interval_start) AS merged_start,
MAX(interval_end) AS merged_end
FROM grouped_intervals
GROUP BY space_id, event_date, grp
),
-- Step 6: Sum up total missing seconds (device reported "none") per day
missing_seconds_per_day AS (
SELECT
space_id,
day AS missing_date,
SUM(EXTRACT(EPOCH FROM (interval_end - interval_start))) AS total_missing_seconds
FROM merged_intervals
GROUP BY space_id, day
),
-- Step 7: Calculate total occupied time per day (86400 - missing)
occupied_seconds_per_day AS (
SELECT
space_id,
missing_date as date,
86400 - total_missing_seconds AS total_occupied_seconds
FROM missing_seconds_per_day
-- Sum durations of merged intervals
summed_intervals AS (
SELECT
space_id,
event_date,
SUM(EXTRACT(EPOCH FROM (merged_end - merged_start))) AS raw_occupied_seconds
FROM merged_intervals
GROUP BY space_id, event_date
)
-- Final Output
SELECT *
FROM occupied_seconds_per_day
ORDER BY 1,2;
-- Final output with capped seconds and percentage
SELECT
space_id,
event_date,
LEAST(raw_occupied_seconds, 86400) AS occupied_seconds,
ROUND(LEAST(raw_occupied_seconds, 86400) / 86400.0 * 100, 2) AS occupancy_percentage
FROM summed_intervals
ORDER BY space_id, event_date;