Authorities migration
- Pavlo Smahin
Context
Authorities were moved from mod-inventory-storage to mod-entities-links. This migration is required for moving existing records from one module schema to another.
Note
If during script execution errors related to missing naturalId is appeared then authorities must be refreshed with Script to refresh Authorities against an updated MARC-Authority map
Instructions
Step 1
Run time:
Before upgrade
Action:
Step 1.
Run script:
CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_note_type ( id uuid NOT NULL, name character varying(250) NOT NULL, source character varying(100) NOT NULL, created_date timestamp without time zone NOT NULL, updated_date timestamp without time zone NOT NULL, created_by_user_id uuid NOT NULL, updated_by_user_id uuid NOT NULL, CONSTRAINT pk_authority_note_type PRIMARY KEY (id), CONSTRAINT authority_note_type_name_unq UNIQUE (name) ); CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_source_file ( id uuid NOT NULL, name character varying(250) NOT NULL, type character varying(100) NOT NULL, base_url character varying(250), source character varying(100) NOT NULL, created_date timestamp without time zone NOT NULL, updated_date timestamp without time zone NOT NULL, created_by_user_id uuid NOT NULL, updated_by_user_id uuid NOT NULL, CONSTRAINT pk_authority_source_file PRIMARY KEY (id), CONSTRAINT authority_source_file_base_url_unq UNIQUE (base_url), CONSTRAINT authority_source_file_name_unq UNIQUE (name) ); CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_source_file_code ( id integer NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 2147483647 CACHE 1 ), authority_source_file_id uuid NOT NULL, code character varying(25) NOT NULL, CONSTRAINT pk_authority_source_file_code PRIMARY KEY (id), CONSTRAINT authority_source_file_code_unq UNIQUE (code), CONSTRAINT fk_authority_source_file_code_authority_source_file_id FOREIGN KEY (authority_source_file_id) REFERENCES ${tenant_name}_mod_entities_links.authority_source_file (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ); CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority ( id uuid NOT NULL, natural_id character varying(250) NOT NULL, source_file_id uuid, source character varying(250), heading character varying(2500), heading_type character varying(250), _version integer, subject_heading_code character(1), sft_headings jsonb, saft_headings jsonb, identifiers jsonb, notes jsonb, deleted boolean DEFAULT false, created_date timestamp without time zone NOT NULL, updated_date timestamp without time zone NOT NULL, created_by_user_id uuid NOT NULL, updated_by_user_id uuid NOT NULL, CONSTRAINT pk_authority_storage PRIMARY KEY (id), CONSTRAINT authority_storage_source_file_id_foreign_key FOREIGN KEY (source_file_id) REFERENCES ${tenant_name}_mod_entities_links.authority_source_file (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ); INSERT INTO ${tenant_name}_mod_entities_links.authority_source_file (id, name, type, base_url, source, created_date, updated_date, created_by_user_id, updated_by_user_id) VALUES ('191874a0-707a-4634-928e-374ee9103225', 'Faceted Application of Subject Terminology (FAST)', 'Subjects', 'id.worldcat.org/fast/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('2c0e41b5-8ffb-4856-aa64-76648a6f6b18', 'LC Medium of Performance Thesaurus for Music (LCMPT)', 'Subjects', 'id.loc.gov/authorities/performanceMediums/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('4b531a84-d4fe-44e5-b75f-542ec71b2f62', 'LC Demographic Group Terms (LCFGT)', 'Subjects', 'id.loc.gov/authorities/demographicTerms/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('67d1ec4b-a19a-4324-9f19-473b49e370ac', 'LC Genre/Form Terms (LCGFT)', 'Subjects', 'id.loc.gov/authorities/genreForms/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('6ddf21a6-bc2f-4cb0-ad96-473e1f82da23', 'Medical Subject Headings (MeSH)', 'Subjects', 'id.nlm.nih.gov/mesh/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'Thesaurus for Graphic Materials (TGM)', 'Subjects', 'id.loc.gov/vocabulary/graphicMaterials/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('837e2c7b-037b-4113-9dfd-b1b8aeeb1fb8', 'LC Subject Headings (LCSH)', 'Subjects', 'id.loc.gov/authorities/subjects/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('af045f2f-e851-4613-984c-4bc13430454a', 'LC Name Authority file (LCNAF)', 'Names', 'id.loc.gov/authorities/names/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('b0f38dbe-5bc0-477d-b1ee-6d9878a607f7', 'Rare Books and Manuscripts Section (RBMS)', 'Subjects', NULL, 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('b224845c-5026-4594-8b55-61d39ecf0541', 'GSAFD Genre Terms (GSAFD)', 'Subjects', 'vocabularyserver.com/gsafd/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('cb58492d-018e-442d-9ce3-35aabfc524aa', 'Art & architecture thesaurus (AAT)', 'Subjects', 'vocab.getty.edu/aat/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'), ('ccebe5d8-5bfe-46f5-bfa2-79f257c249c9', 'LC Children''s Subject Headings', 'Subjects', 'id.loc.gov/authorities/childrensSubjects/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'); INSERT INTO ${tenant_name}_mod_entities_links.authority_source_file_code (authority_source_file_id, code) VALUES ('4b531a84-d4fe-44e5-b75f-542ec71b2f62', 'dg'), ('af045f2f-e851-4613-984c-4bc13430454a', 'n'), ('af045f2f-e851-4613-984c-4bc13430454a', 'nb'), ('af045f2f-e851-4613-984c-4bc13430454a', 'nr'), ('af045f2f-e851-4613-984c-4bc13430454a', 'no'), ('af045f2f-e851-4613-984c-4bc13430454a', 'ns'), ('b0f38dbe-5bc0-477d-b1ee-6d9878a607f7', 'rbmscv'), ('67d1ec4b-a19a-4324-9f19-473b49e370ac', 'gf'), ('6ddf21a6-bc2f-4cb0-ad96-473e1f82da23', 'D'), ('837e2c7b-037b-4113-9dfd-b1b8aeeb1fb8', 'sh'), ('b224845c-5026-4594-8b55-61d39ecf0541', 'gsafd'), ('2c0e41b5-8ffb-4856-aa64-76648a6f6b18', 'mp'), ('cb58492d-018e-442d-9ce3-35aabfc524aa', 'aat'), ('cb58492d-018e-442d-9ce3-35aabfc524aa', 'aatg'), ('ccebe5d8-5bfe-46f5-bfa2-79f257c249c9', 'sj'), ('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'lcgtm'), ('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'tgm'), ('191874a0-707a-4634-928e-374ee9103225', 'fst'); DELETE FROM ${tenant_name}_mod_entities_links.instance_authority_link WHERE authority_id IN ((SELECT id FROM ${tenant_name}_mod_entities_links.authority_data WHERE state = TRUE)); DELETE FROM ${tenant_name}_mod_entities_links.authority_data_stat WHERE authority_id IN ((SELECT id FROM ${tenant_name}_mod_entities_links.authority_data WHERE state = TRUE)); insert into ${tenant_name}_mod_entities_links.authority(id, natural_id, source_file_id, source, _version, subject_heading_code, heading, heading_type, sft_headings, saft_headings, identifiers, notes, created_date, updated_date, created_by_user_id, updated_by_user_id) select id, natural_id, source_file_id, source, _version, subject_heading_code, heading_heading_type[1] heading, heading_heading_type[2] heading_type, sft_headings, saft_headings, identifiers, notes, created_date, updated_date, created_by_user_id, updated_by_user_id from (select id, (jsonb ->> 'naturalId')::text natural_id, (jsonb ->> 'sourceFileId')::uuid source_file_id, 'MARC' source, -- (jsonb ->> 'source')::text source (jsonb ->> '_version')::int _version, (jsonb ->> 'subjectHeadings')::char(1) subject_heading_code, case when jsonb ->> 'personalName' is not null then Array [jsonb ->> 'personalName', 'personalName'] when jsonb ->> 'personalNameTitle' is not null then Array [jsonb ->> 'personalNameTitle', 'personalNameTitle'] when jsonb ->> 'corporateName' is not null then Array [jsonb ->> 'corporateName', 'corporateName'] when jsonb ->> 'corporateNameTitle' is not null then Array [jsonb ->> 'corporateNameTitle', 'corporateNameTitle'] when jsonb ->> 'meetingName' is not null then Array [jsonb ->> 'meetingName', 'meetingName'] when jsonb ->> 'meetingNameTitle' is not null then Array [jsonb ->> 'meetingNameTitle', 'meetingNameTitle'] when jsonb ->> 'uniformTitle' is not null then Array [jsonb ->> 'uniformTitle', 'uniformTitle'] when jsonb ->> 'topicalTerm' is not null then Array [jsonb ->> 'topicalTerm', 'topicalTerm'] when jsonb ->> 'geographicName' is not null then Array [jsonb ->> 'geographicName', 'geographicName'] when jsonb ->> 'genreTerm' is not null then Array [jsonb ->> 'genreTerm', 'genreTerm'] end heading_heading_type, (select jsonb_agg(val) from (select jsonb_build_object('headingType', 'personalName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftPersonalName') x1 union all select jsonb_build_object('headingType', 'personalNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftPersonalNameTitle') x2 union all select jsonb_build_object('headingType', 'corporateName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftCorporateName') x3 union all select jsonb_build_object('headingType', 'corporateNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftCorporateNameTitle') x4 union all select jsonb_build_object('headingType', 'meetingName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftMeetingName') x5 union all select jsonb_build_object('headingType', 'meetingNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftMeetingNameTitle') x6 union all select jsonb_build_object('headingType', 'uniformTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftUniformTitle') x7 union all select jsonb_build_object('headingType', 'topicalTerm', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftTopicalTerm') x8 union all select jsonb_build_object('headingType', 'geographicName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftGeographicName') x9 union all select jsonb_build_object('headingType', 'genreTerm', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'sftGenreTerm') x10) x) sft_headings, (select jsonb_agg(val) from (select jsonb_build_object('headingType', 'personalName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftPersonalName') x1 union all select jsonb_build_object('headingType', 'personalNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftPersonalNameTitle') x2 union all select jsonb_build_object('headingType', 'corporateName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftCorporateName') x3 union all select jsonb_build_object('headingType', 'corporateNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftCorporateNameTitle') x4 union all select jsonb_build_object('headingType', 'meetingName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftMeetingName') x5 union all select jsonb_build_object('headingType', 'meetingNameTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftMeetingNameTitle') x6 union all select jsonb_build_object('headingType', 'uniformTitle', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftUniformTitle') x7 union all select jsonb_build_object('headingType', 'topicalTerm', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftTopicalTerm') x8 union all select jsonb_build_object('headingType', 'geographicName', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftGeographicName') x9 union all select jsonb_build_object('headingType', 'genreTerm', 'heading', value) val from jsonb_array_elements_text(jsonb -> 'saftGenreTerm') x10) x) saft_headings, (jsonb -> 'identifiers') identifiers, (jsonb -> 'notes') notes, (jsonb -> 'metadata' ->> 'createdDate')::timestamp /*with time zone*/ created_date, (jsonb -> 'metadata' ->> 'updatedDate')::timestamp /*with time zone*/ updated_date, (jsonb -> 'metadata' ->> 'createdByUserId')::uuid created_by_user_id, (jsonb -> 'metadata' ->> 'updatedByUserId')::uuid updated_by_user_id from ${tenant_name}_mod_inventory_storage.authority) a;
Action timing:
Num Records | Time (sec) |
---|---|
1000 | 0.5 |
6000 | 0.6 |
56000 | 4 |
106000 | 7.8 |
According to the test results, the execution time of the script appears to follow a linear pattern, making it possible to predict the time for a larger quantity of records. Based on the provided graph, for 1 million records, the estimated execution time would be approximately 75 seconds.
Step 2
Action:
Verify that <tenant>_mod_inventory_storage.authority and <tenant>_mod_entities_links have the same amount of records. This will mean that migration was successful.
After that tables in <tenant>_mod_inventory_storage could be truncated.
DELETE FROM ${tenant_name}_mod_inventory_storage.authority; DELETE FROM ${tenant_name}_mod_inventory_storage.authority_note_type; DELETE FROM ${tenant_name}_mod_inventory_storage.authority_source_file;