Migration scripts for OAI-PMH
Populate complete_updated_date column in instance table
0. Disable trigger
Trigger should be disabled to not override historical data to be populated in `complete_updated_date` column
ALTER TABLE {tenant}_mod_inventory_storage.instance DISABLE TRIGGER updatecompleteupdateddate_instance;
1. Prepare step
On this step uuid_range table is created in public schema to split UUIDs in different partitions (16 partitions in total) and track their status later (completed column).
create table public.uuid_range
(
id serial primary key,
partition uuid,
subrange_start uuid,
subrange_end uuid,
completed boolean default false
);
DO
$$
DECLARE
partition_start UUID;
subrange_start UUID;
subrange_end UUID;
BEGIN
FOR i IN 0..15
LOOP
partition_start := (rpad(to_hex(i), 8, '0') || '-0000-0000-0000-000000000000')::UUID;
subrange_start := partition_start;
FOR j IN 1..4096
LOOP
IF i < 15 OR (i = 15 AND j < 4096) THEN
if (j < 4096) then
subrange_end := (to_hex(i)::text || rpad(lpad(to_hex(j), 3, '0'), 7, '0') ||
'-0000-0000-0000-000000000000')::UUID;
else
subrange_end := (rpad(to_hex(i + 1), 8, '0') || '-0000-0000-0000-000000000000')::UUID;
end if;
ELSE
subrange_end := 'ffffffff-ffff-ffff-ffff-ffffffffffff'::UUID; -- upper bound for last subrange in last partition
END IF;
INSERT INTO public.uuid_range (partition, subrange_start, subrange_end)
VALUES (partition_start, subrange_start, subrange_end);
subrange_start := subrange_end;
END LOOP;
END LOOP;
END;
$$
LANGUAGE plpgsql;
2. Update
Update step should be executed for each partition created from step #1. Update script can be executed concurrently (tests results can be found in PERF-725 - Getting issue details... STATUS to understand how many partitions can be processed at the same time on specific env without users impact)
Params to be updated
tenant - tenant name
partition - partition value from public.uuid_range table
set search_path = "{tenant}_mod_inventory_storage", "public";
do
$$
declare
partition_to_process uuid := '{partition}'::uuid;
range_rec public.uuid_range%rowtype;
begin
for range_rec in select ur.*
from public.uuid_range ur
where ur.partition = partition_to_process
and ur.completed = false
loop
-- start transaction;
UPDATE {tenant}_mod_inventory_storage.instance i
SET complete_updated_date = (jsonb -> 'metadata' ->> 'updatedDate')::timestamp with time zone
where i.id between range_rec.subrange_start and range_rec.subrange_end;
update public.uuid_range
set completed = true
where id = range_rec.id;
--
raise notice 'subrange id: % completed', range_rec.id;
commit;
end loop;
end
$$;
3. Monitor
Next script can be used to monitor how many sub-partitions in partitions are left to be processed
select partition, count(partition) from public.uuid_range where completed = false group by partition order by partition;
4. Post-migration
As initially trigger was disabled, it should be enabled again, additionally index should be created for better performance
ALTER TABLE {tenant}_mod_inventory_storage.instance ENABLE TRIGGER updatecompleteupdateddate_instance;
CREATE INDEX IF NOT EXISTS idx_instance_complete_updated_date
ON {tenant}_mod_inventory_storage.instance(complete_updated_date);