...
Step N | Execution time |
---|---|
1 | 1s |
2 | 1h 10m |
3 | 20m |
4 | 1h 30m |
5 | 1s |
6 | 2s |
Total | 3h |
Step 2
...
Precondition
Run time:
After upgrade, before reindex
Precondition:
No preconditions
Action:
Run script:
Code Block | ||||||
---|---|---|---|---|---|---|
| ||||||
-- Change search path for the script SET search_path = <tenant>_mod_inventory_storage; -- Crete/replace function to recalculate effective values and shelving order in items CREATE OR REPLACE FUNCTION setdewey_itemcall_effective_values(holding jsonb, item jsonbnumber(call_number text) RETURNS jsonbtext AS $$ BEGINDECLARE RETURN set_effective_shelving_order(matches text[]; class_digits text; itemclass_decimal text; cutter text; || jsonb_build_object('effectiveLocationId', other text; BEGIN matches COALESCE(item->>'temporaryLocationId', item->>'permanentLocationId', holding->>'effectiveLocationId')) || effective_call_number_components(holding, item)= regexp_match(call_number, '^(\d+)(\.\d+)? *\.?(?:([A-Z]\d{1,3}(?:[A-Z]+)?) *(.*)|(.*))$'); END; $$ LANGUAGEIF plpgsqlmatches IMMUTABLEIS PARALLELNULL SAFETHEN STRICT; -- Recalculate effective values and shelving order in items UPDATE item SET jsonb = set_item_effective_values(item_data.holdings, item_data.item) FROM (RETURN numerically_sortable(trim2null(call_number)); END IF; class_digits = matches[1]; select item.id, hr.jsonb as holdings, item.jsonb as item from holdings_record hr join item on item.holdingsRecordId = hr.idclass_decimal = matches[2]; cutter = matches[3]; other = numerically_sortable(trim2null(concat(trim2null(matches[4]), trim2null(matches[5])))); whereRETURN trim2null(hr.jsonb->>'callNumberTypeId') IS NOT NULLconcat_ws(' ', concat(sortable_number(class_digits), class_decimal), and trim2null(item.jsonb->'effectiveCallNumberComponents'->>'typeId') IS NULL ) AS item_data WHERE item.id = item_data.id; |
Action timing:
Tested on database with 9M rows in item table.
Execution time: TBD
...
cutter,
other
);
END;
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;
CREATE OR REPLACE FUNCTION lc_nlm_call_number(call_number text)
RETURNS text AS $$
DECLARE
matches text[];
classification text;
classLetters text;
classDigits text;
classDecimal text;
everythingElse text;
classSuffix text;
cutter text;
BEGIN
call_number = upper(call_number);
matches = regexp_match(call_number, '^(([A-Z]+) *(?:(\d+)(\.\d+)?)?)(.*)$');
IF matches IS NULL THEN
RETURN trim(cutter_shelf_key(trim2null(call_number)));
END IF;
classification = trim(matches[1]);
classLetters = trim(matches[2]);
classDigits = trim(matches[3]);
classDecimal = trim(matches[4]);
everythingElse = matches[5];
IF classDigits IS NULL THEN
RETURN NULL;
END IF;
IF length(everythingElse) > 0 THEN
-- combining greedy and non-greedy:
-- https://www.postgresql.org/docs/current/functions-matching.html#POSIX-MATCHING-RULES
matches = regexp_match(everythingElse, '(?:(.*?)(\.?[A-Z]\d+|^\.[A-Z]| \.[A-Z])(.*)){1,1}');
IF matches IS NULL THEN
classSuffix = trim2null(everythingElse);
ELSE
classSuffix = trim2null(matches[1]);
cutter = trim(matches[2] || matches[3]);
END IF;
END IF;
classSuffix = numerically_sortable(classSuffix);
IF substr(classSuffix, 1, 1) BETWEEN 'A' AND 'Z' THEN
classSuffix = '_' || classSuffix;
END IF;
cutter = cutter_shelf_key(cutter);
return trim(concat_ws(' ', classLetters,
concat(length(classDigits), classDigits, classDecimal),
trim2null(classSuffix),
trim(cutter)
));
END;
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;
CREATE OR REPLACE FUNCTION su_doc_call_number(call_number text)
RETURNS text AS $$
DECLARE
matches text[];
BEGIN
matches = regexp_match(upper(call_number),
'^([A-Z]+)\s*(\d+)(\.(?:[A-Z]+\d*|\d+))(/(?:[A-Z]+(?:\d+(?:-\d+)?)?|\d+(?:-\d+)?))?:?(.*)$');
IF matches IS NULL THEN
RETURN su_doc_part(trim2null(upper(call_number)));
END IF;
RETURN concat_ws(' ', matches[1], su_doc_part(matches[2]), su_doc_part(matches[3]), su_doc_part(matches[4]), su_doc_part(matches[5]));
END;
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;
CREATE OR REPLACE FUNCTION sortable_number(n text)
RETURNS text AS $$
DECLARE int_part text;
BEGIN
n = regexp_replace(n, '^0+', '');
int_part = split_part(n, '.', 1);
RETURN concat(length(int_part), n);
END;
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;
CREATE OR REPLACE FUNCTION cutter_shelf_key(s text)
RETURNS text AS $$
DECLARE
chunk text;
matches text[];
cutter text;
suffix text;
result text;
BEGIN
FOREACH chunk IN ARRAY regexp_split_to_array(s, '(?=\.?[A-Z][0-9])') LOOP
matches = regexp_match(chunk, '([A-Z][0-9]+)(.*)');
IF matches IS NULL THEN
-- before the first cutter
IF result IS NULL THEN
result = trim2null(numerically_sortable(chunk));
ELSE
result = concat_ws(' ', result, trim2null(numerically_sortable(chunk)));
END IF;
ELSE
cutter = matches[1];
suffix = trim2null(numerically_sortable(matches[2]));
result = concat_ws(' ', result, cutter, suffix);
END IF;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;
|
Step 2 (Option 1). Reqires Poppy CSP #3
Run time:
After upgrade, before reindex
Precondition:
No preconditions
Action:
...
Run script:
Code Block | ||||||
---|---|---|---|---|---|---|
|
...
-- Change search path for the script SET search_path = <tenant>_mod_inventory_storage |
...
;
|
...
-- |
...
Recalculate effective values and shelving order in items
UPDATE item
SET jsonb = set_item_effective_values(item_data.holdings, item_data.item)
FROM (
select item.id, hr.jsonb as holdings, item.jsonb as item from holdings_record hr
join item on item.holdingsRecordId = hr.id
where item.jsonb->'effectiveCallNumberComponents'->'callNumber' IS NOT NULL
and item.jsonb->'effectiveCallNumberComponents'->'callNumber' != 'null'
) AS item_data
WHERE item.id = item_data.id; |
Action timing:
Tested on database with 9M rows in item table.
Execution time: TBD
Step 2 (Option 2). Reqires Poppy CSP #3
Run time:
After upgrade, before reindex
Precondition:
No preconditions
Action:
Disable triggers
Code Block language sql collapse true createALTER table public.uuid_range ( id serial primary key, partition uuid, subrange_start uuid, subrange_end uuid, completed boolean default false );-- If table exists and filled then do: UPDATE public.uuid_range SET completed=false; -- If table was just created then fill it: DO $$ DECLARE partition_start UUID; subrange_start UUID; subrange_end UUID; BEGIN FOR i IN 0..15 LOOP partition_start := (rpad(to_hex(i), 8, '0') || '-0000-0000-0000-000000000000')::UUID; subrange_start := partition_start; FOR j IN 1..4096 TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER audit_item; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER check_statistical_code_references_on_insert; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER check_statistical_code_references_on_update; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER set_id_in_jsonb; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER set_item_md_json_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER set_item_md_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER set_item_ol_version_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER update_item_references; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER update_item_status_date; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER updatecompleteupdateddate_item_delete; ALTER TABLE {tenant}_mod_inventory_storage.item DISABLE TRIGGER updatecompleteupdateddate_item_insert_update;
Create uuid_range table
Code Block language sql collapse true create table public.uuid_range ( id serial primary key, partition uuid, subrange_start uuid, subrange_end uuid, completed boolean default false );-- If table exists and filled then do: UPDATE public.uuid_range SET completed=false; -- If table was just created then fill it: DO $$ DECLARE partition_start UUID; subrange_start UUID; LOOP subrange_end UUID; BEGIN IFFOR i <IN 0..15 OR (i = 15 AND j < 4096)LOOP THEN partition_start := (rpad(to_hex(i), 8, '0') || '-0000-0000-0000-000000000000')::UUID; if (j < 4096) then subrange_start := partition_start; FOR j IN 1..4096 subrange_end := (to_hex(i)::text || rpad(lpad(to_hex(j), 3, '0'), 7, '0') || LOOP IF i < 15 OR (i = 15 AND j < 4096) THEN '-0000-0000-0000-000000000000')::UUID; if (j < 4096) elsethen subrange_end := (to_hex(i)::text || rpad(lpad(to_hex(i + 1j), 3, '0'), 87, '0') || '-0000-0000-0000-000000000000')::UUID; end if; '-0000-0000-0000-000000000000')::UUID; ELSE else subrange_end := 'ffffffff-ffff-ffff-ffff-ffffffffffff'::UUID; -- upper bound for last subrange in last partition subrange_end := (rpad(to_hex(i + 1), 8, '0') || '-0000-0000-0000-000000000000')::UUID; END IF; end if; INSERT INTO public.uuid_range (partition, subrange_start, subrange_end) ELSE VALUES (partition_start, subrange_start, subrange_end); subrange_end := 'ffffffff-ffff-ffff-ffff-ffffffffffff'::UUID; -- upper bound for last subrange in last partition subrange_start := subrange_end; END LOOP;IF; END LOOP; END;INSERT $$ LANGUAGE plpgsql;
Create/replace function to recalculate effective values and shelving order in items
Code Block language sql SET search_path = <tenant>_mod_inventory_storage; CREATE OR REPLACE FUNCTION set_item_effective_values(holding jsonb, item jsonb) RETURNS jsonb AS $$ BEGININTO public.uuid_range (partition, subrange_start, subrange_end) RETURNVALUES set_effective_shelving_order((partition_start, subrange_start, subrange_end); item || jsonb_build_object('effectiveLocationId',subrange_start := subrange_end; COALESCE(item->>'temporaryLocationId', item->>'permanentLocationId', holding->>'effectiveLocationId')) END LOOP; || effective_call_number_components(holding, item)) END LOOP; END; $$ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT;;
Update step should be executed for each partition created from step #2. Update script can be executed concurrently.
Code Block language sql collapse true set search_path = "{tenant}_mod_inventory_storage", "public"; do $$ declare partition_to_process uuid := '{partition}'::uuid; range_rec public.uuid_range%rowtype; begin for range_rec in select ur.* from public.uuid_range ur where ur.partition = partition_to_process and ur.completed = false loop -- start transaction; UPDATE item i SET jsonb = set_item_effective_values(item_data.holdings, item_data.item) FROM ( select item.id, hr.jsonb as holdings, item.jsonb as item from holdings_record hr join item on item.holdingsRecordId = hr.id where item.id between range_rec.subrange_start and range_rec.subrange_end and item.jsonb->'effectiveCallNumberComponents'->'callNumber' IS NOT NULL and item.jsonb->'effectiveCallNumberComponents'->'callNumber' != 'null' ) AS item_data WHERE i.id = item_data.id; update uuid_range set completed = true where id = range_rec.id; -- raise notice 'subrange id: % completed', range_rec.id; commit; end loop; end $$;
Monitor how many sub-partitions in partitions are left to be processed
Code Block language sql collapse true select partition, count(partition) from public.uuid_range where completed = false group by partition order by partition;
Enable triggers
Code Block language sql collapse true ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER audit_item; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER check_statistical_code_references_on_insert; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER check_statistical_code_references_on_update; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER set_id_in_jsonb; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER set_item_md_json_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER set_item_md_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER set_item_ol_version_trigger; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER update_item_references; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER update_item_status_date; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER updatecompleteupdateddate_item_delete; ALTER TABLE {tenant}_mod_inventory_storage.item ENABLE TRIGGER updatecompleteupdateddate_item_insert_update;
Step 2 (Option 3). Requires Poppy CSP #3. Unstable on big amounts of data
Run time:
After upgrade, before reindex
...
Code Block | ||
---|---|---|
| ||
-- Change search path for the script SET search_path = <tenant>_mod_inventory_storage; -- Recalculate effective values for items with no call number type specified. No shelving order recalculation UPDATE item SET jsonb = set_item_effective_values_no_shelv_recalc(item_data.holdings, item_data.item) FROM ( select item.id, hr.jsonb as holdings, item.jsonb as item from holdings_record hr join item on item.holdingsRecordId = hr.id where trim2null(hr.jsonb->>'callNumberTypeId') IS NOT NULL and trim2null(item.jsonb->'effectiveCallNumberComponents'->>'typeId') IS NULL ) AS item_data WHERE item.id = item_data.id; |
Step 3. Requires Poppy CSP #3. Unstable on big amounts of data
Run async item shelving order migration via api. Anytime, may be during runtime. Values surrounded by "<>" to be replaced
Code Block language text curl --location --request POST '<okapi-url>/inventory-storage/migrations/jobs' \ --header 'X-Okapi-Tenant: <tenant>' \ --header 'X-Okapi-Token: <token>' \ --header 'Content-Type: application/json' \ --data '{ "migrations": ["itemShelvingOrderMigration"], "affectedEntities": ["ITEM"] }'
Monitor execution until response body parameter "jobStatus" is "Completed". <jobId> is returned from previous request as "id" parameter in response body
Code Block language text curl --location --request GET '<okapi-url>/inventory-storage/migrations/jobs/<jobId>' \ --header 'X-Okapi-Tenant: <tenant>' \ --header 'X-Okapi-Token: <token>' \ --header 'Content-Type: application/json'
- Once migration is completed - reindex instances
...