Authorities migration

Context

Authorities were moved from mod-inventory-storage to mod-entities-links. This migration is required for moving existing records from one module schema to another.

Note

If during script execution errors related to missing naturalId is appeared then authorities must be refreshed with Script to refresh Authorities against an updated MARC-Authority map

Instructions

Step 1

Run time:

Before upgrade

Action:

Step 1.

Run script: 

Action script
CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_note_type
(
    id uuid NOT NULL,
    name character varying(250) NOT NULL,
    source character varying(100) NOT NULL,
    created_date timestamp without time zone NOT NULL,
    updated_date timestamp without time zone NOT NULL,
    created_by_user_id uuid NOT NULL,
    updated_by_user_id uuid NOT NULL,
    CONSTRAINT pk_authority_note_type PRIMARY KEY (id),
    CONSTRAINT authority_note_type_name_unq UNIQUE (name)
);

CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_source_file
(
    id uuid NOT NULL,
    name character varying(250) NOT NULL,
    type character varying(100) NOT NULL,
    base_url character varying(250),
    source character varying(100) NOT NULL,
    created_date timestamp without time zone NOT NULL,
    updated_date timestamp without time zone NOT NULL,
    created_by_user_id uuid NOT NULL,
    updated_by_user_id uuid NOT NULL,
    CONSTRAINT pk_authority_source_file PRIMARY KEY (id),
    CONSTRAINT authority_source_file_base_url_unq UNIQUE (base_url),
    CONSTRAINT authority_source_file_name_unq UNIQUE (name)
);


CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority_source_file_code
(
    id integer NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 2147483647 CACHE 1 ),
    authority_source_file_id uuid NOT NULL,
    code character varying(25) NOT NULL,
    CONSTRAINT pk_authority_source_file_code PRIMARY KEY (id),
    CONSTRAINT authority_source_file_code_unq UNIQUE (code),
    CONSTRAINT fk_authority_source_file_code_authority_source_file_id FOREIGN KEY (authority_source_file_id)
        REFERENCES ${tenant_name}_mod_entities_links.authority_source_file (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
);

CREATE TABLE IF NOT EXISTS ${tenant_name}_mod_entities_links.authority
(
    id uuid NOT NULL,
    natural_id character varying(250) NOT NULL,
    source_file_id uuid,
    source character varying(250),
    heading character varying(2500),
    heading_type character varying(250),
    _version integer,
    subject_heading_code character(1),
    sft_headings jsonb,
    saft_headings jsonb,
    identifiers jsonb,
    notes jsonb,
    deleted boolean DEFAULT false,
    created_date timestamp without time zone NOT NULL,
    updated_date timestamp without time zone NOT NULL,
    created_by_user_id uuid NOT NULL,
    updated_by_user_id uuid NOT NULL,
    CONSTRAINT pk_authority_storage PRIMARY KEY (id),
    CONSTRAINT authority_storage_source_file_id_foreign_key FOREIGN KEY (source_file_id)
        REFERENCES ${tenant_name}_mod_entities_links.authority_source_file (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
);

INSERT INTO ${tenant_name}_mod_entities_links.authority_source_file
(id, name, type, base_url, source, created_date, updated_date, created_by_user_id, updated_by_user_id)
VALUES
('191874a0-707a-4634-928e-374ee9103225', 'Faceted Application of Subject Terminology (FAST)', 'Subjects', 'id.worldcat.org/fast/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('2c0e41b5-8ffb-4856-aa64-76648a6f6b18', 'LC Medium of Performance Thesaurus for Music (LCMPT)', 'Subjects', 'id.loc.gov/authorities/performanceMediums/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('4b531a84-d4fe-44e5-b75f-542ec71b2f62', 'LC Demographic Group Terms (LCFGT)', 'Subjects', 'id.loc.gov/authorities/demographicTerms/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('67d1ec4b-a19a-4324-9f19-473b49e370ac', 'LC Genre/Form Terms (LCGFT)', 'Subjects', 'id.loc.gov/authorities/genreForms/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('6ddf21a6-bc2f-4cb0-ad96-473e1f82da23', 'Medical Subject Headings (MeSH)', 'Subjects', 'id.nlm.nih.gov/mesh/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'Thesaurus for Graphic Materials (TGM)', 'Subjects', 'id.loc.gov/vocabulary/graphicMaterials/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('837e2c7b-037b-4113-9dfd-b1b8aeeb1fb8', 'LC Subject Headings (LCSH)', 'Subjects', 'id.loc.gov/authorities/subjects/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('af045f2f-e851-4613-984c-4bc13430454a', 'LC Name Authority file (LCNAF)', 'Names', 'id.loc.gov/authorities/names/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('b0f38dbe-5bc0-477d-b1ee-6d9878a607f7', 'Rare Books and Manuscripts Section (RBMS)', 'Subjects', NULL, 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('b224845c-5026-4594-8b55-61d39ecf0541', 'GSAFD Genre Terms (GSAFD)', 'Subjects', 'vocabularyserver.com/gsafd/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('cb58492d-018e-442d-9ce3-35aabfc524aa', 'Art & architecture thesaurus (AAT)', 'Subjects', 'vocab.getty.edu/aat/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000'),
('ccebe5d8-5bfe-46f5-bfa2-79f257c249c9', 'LC Children''s Subject Headings', 'Subjects', 'id.loc.gov/authorities/childrensSubjects/', 'folio', now(), now(), '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000');

INSERT INTO ${tenant_name}_mod_entities_links.authority_source_file_code
(authority_source_file_id, code)
VALUES
('4b531a84-d4fe-44e5-b75f-542ec71b2f62', 'dg'),
('af045f2f-e851-4613-984c-4bc13430454a', 'n'),
('af045f2f-e851-4613-984c-4bc13430454a', 'nb'),
('af045f2f-e851-4613-984c-4bc13430454a', 'nr'),
('af045f2f-e851-4613-984c-4bc13430454a', 'no'),
('af045f2f-e851-4613-984c-4bc13430454a', 'ns'),
('b0f38dbe-5bc0-477d-b1ee-6d9878a607f7', 'rbmscv'),
('67d1ec4b-a19a-4324-9f19-473b49e370ac', 'gf'),
('6ddf21a6-bc2f-4cb0-ad96-473e1f82da23', 'D'),
('837e2c7b-037b-4113-9dfd-b1b8aeeb1fb8', 'sh'),
('b224845c-5026-4594-8b55-61d39ecf0541', 'gsafd'),
('2c0e41b5-8ffb-4856-aa64-76648a6f6b18', 'mp'),
('cb58492d-018e-442d-9ce3-35aabfc524aa', 'aat'),
('cb58492d-018e-442d-9ce3-35aabfc524aa', 'aatg'),
('ccebe5d8-5bfe-46f5-bfa2-79f257c249c9', 'sj'),
('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'lcgtm'),
('70ff583b-b8c9-483e-ac21-cb4a9217898b', 'tgm'),
('191874a0-707a-4634-928e-374ee9103225', 'fst');


DELETE FROM ${tenant_name}_mod_entities_links.instance_authority_link
	WHERE authority_id IN ((SELECT id FROM ${tenant_name}_mod_entities_links.authority_data WHERE state = TRUE));
	
DELETE FROM ${tenant_name}_mod_entities_links.authority_data_stat
	WHERE authority_id IN ((SELECT id FROM ${tenant_name}_mod_entities_links.authority_data WHERE state = TRUE));
	

insert into ${tenant_name}_mod_entities_links.authority(id,
                                              natural_id,
                                              source_file_id,
                                              source,
                                              _version,
                                              subject_heading_code,
                                              heading,
                                              heading_type,
                                              sft_headings,
                                              saft_headings,
                                              identifiers,
                                              notes,
                                              created_date,
                                              updated_date,
                                              created_by_user_id,
                                              updated_by_user_id)
select id,
       natural_id,
       source_file_id,
       source,
       _version,
       subject_heading_code,
       heading_heading_type[1] heading,
       heading_heading_type[2] heading_type,
       sft_headings,
       saft_headings,
       identifiers,
       notes,
       created_date,
       updated_date,
       created_by_user_id,
       updated_by_user_id
from (select id,
             (jsonb ->> 'naturalId')::text                                           natural_id,
             (jsonb ->> 'sourceFileId')::uuid                                        source_file_id,
             'MARC'                                                                  source, -- (jsonb ->> 'source')::text source
             (jsonb ->> '_version')::int                                             _version,
             (jsonb ->> 'subjectHeadings')::char(1)                                  subject_heading_code,

             case
                 when jsonb ->> 'personalName' is not null then Array [jsonb ->> 'personalName', 'personalName']
                 when jsonb ->> 'personalNameTitle' is not null
                     then Array [jsonb ->> 'personalNameTitle', 'personalNameTitle']
                 when jsonb ->> 'corporateName' is not null then Array [jsonb ->> 'corporateName', 'corporateName']
                 when jsonb ->> 'corporateNameTitle' is not null
                     then Array [jsonb ->> 'corporateNameTitle', 'corporateNameTitle']
                 when jsonb ->> 'meetingName' is not null then Array [jsonb ->> 'meetingName', 'meetingName']
                 when jsonb ->> 'meetingNameTitle' is not null
                     then Array [jsonb ->> 'meetingNameTitle', 'meetingNameTitle']
                 when jsonb ->> 'uniformTitle' is not null then Array [jsonb ->> 'uniformTitle', 'uniformTitle']
                 when jsonb ->> 'topicalTerm' is not null then Array [jsonb ->> 'topicalTerm', 'topicalTerm']
                 when jsonb ->> 'geographicName' is not null then Array [jsonb ->> 'geographicName', 'geographicName']
                 when jsonb ->> 'genreTerm' is not null then Array [jsonb ->> 'genreTerm', 'genreTerm']
                 end                                                                 heading_heading_type,
             (select jsonb_agg(val)
              from (select jsonb_build_object('headingType', 'personalName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftPersonalName') x1
                    union all
                    select jsonb_build_object('headingType', 'personalNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftPersonalNameTitle') x2
                    union all
                    select jsonb_build_object('headingType', 'corporateName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftCorporateName') x3
                    union all
                    select jsonb_build_object('headingType', 'corporateNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftCorporateNameTitle') x4
                    union all
                    select jsonb_build_object('headingType', 'meetingName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftMeetingName') x5
                    union all
                    select jsonb_build_object('headingType', 'meetingNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftMeetingNameTitle') x6
                    union all
                    select jsonb_build_object('headingType', 'uniformTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftUniformTitle') x7
                    union all
                    select jsonb_build_object('headingType', 'topicalTerm', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftTopicalTerm') x8
                    union all
                    select jsonb_build_object('headingType', 'geographicName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftGeographicName') x9
                    union all
                    select jsonb_build_object('headingType', 'genreTerm', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'sftGenreTerm') x10) x)  sft_headings,
             (select jsonb_agg(val)
              from (select jsonb_build_object('headingType', 'personalName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftPersonalName') x1
                    union all
                    select jsonb_build_object('headingType', 'personalNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftPersonalNameTitle') x2
                    union all
                    select jsonb_build_object('headingType', 'corporateName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftCorporateName') x3
                    union all
                    select jsonb_build_object('headingType', 'corporateNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftCorporateNameTitle') x4
                    union all
                    select jsonb_build_object('headingType', 'meetingName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftMeetingName') x5
                    union all
                    select jsonb_build_object('headingType', 'meetingNameTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftMeetingNameTitle') x6
                    union all
                    select jsonb_build_object('headingType', 'uniformTitle', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftUniformTitle') x7
                    union all
                    select jsonb_build_object('headingType', 'topicalTerm', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftTopicalTerm') x8
                    union all
                    select jsonb_build_object('headingType', 'geographicName', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftGeographicName') x9
                    union all
                    select jsonb_build_object('headingType', 'genreTerm', 'heading', value) val
                    from jsonb_array_elements_text(jsonb -> 'saftGenreTerm') x10) x) saft_headings,
             (jsonb -> 'identifiers')                                                identifiers,
             (jsonb -> 'notes')                                                      notes,

             (jsonb -> 'metadata' ->> 'createdDate')::timestamp /*with time zone*/   created_date,
             (jsonb -> 'metadata' ->> 'updatedDate')::timestamp /*with time zone*/   updated_date,
             (jsonb -> 'metadata' ->> 'createdByUserId')::uuid                       created_by_user_id,
             (jsonb -> 'metadata' ->> 'updatedByUserId')::uuid                       updated_by_user_id
      from ${tenant_name}_mod_inventory_storage.authority) a;
  

Action timing:

Num RecordsTime (sec)
10000.5
60000.6
560004
1060007.8

According to the test results, the execution time of the script appears to follow a linear pattern, making it possible to predict the time for a larger quantity of records. Based on the provided graph, for 1 million records, the estimated execution time would be approximately 75 seconds.

Step 2

Action:

Verify that <tenant>_mod_inventory_storage.authority and <tenant>_mod_entities_links have the same amount of records. This will mean that migration was successful.

After that tables in <tenant>_mod_inventory_storage could be truncated.

Truncate script
DELETE FROM ${tenant_name}_mod_inventory_storage.authority;
DELETE FROM ${tenant_name}_mod_inventory_storage.authority_note_type;
DELETE FROM ${tenant_name}_mod_inventory_storage.authority_source_file;