Skip to end of banner
Go to start of banner

Clean up records with inconsistent matched id values (for now designed for Orchid)

Skip to end of metadata
Go to start of metadata

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 17 Next »

MODSOURCE-666 - Getting issue details... STATUS

 if 999 ff s UUID is different from matched_id and status ACTUAL - get highest generation for matched_id and overwrite matched_id to be the same as 999 ff s with incremented generation and also If more than 1 ACTUAL record with the same matched_id - then to those records status will be set as 'OLD'.


The outlined scripts are aimed at fixing records on the Orchid version. 
The scripts implementation aimed at the Poppy version is currently in the testing stage and will be described later.



1. Update records containing inconsistent matched_id with generation and state

Update unmatched matched_id and generation and update status
DO
$$
    DECLARE
        rec record;
        updated_generation numeric := 0;
    BEGIN

--  temporary table is designated to hold records when matched_id and 999 ff $s values are mismatched
        CREATE TEMP TABLE value_matched_id_inconsistency ON COMMIT DROP
        AS
        SELECT rl.id, rl.matched_id, rl.generation, mi.value, mi.version
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             {tenantId}_mod_source_record_storage.marc_indexers mi,
             (
                 SELECT mi.value, max(mi.version) AS max_version
                 FROM {tenantId}_mod_source_record_storage.records_lb rl,
                      {tenantId}_mod_source_record_storage.marc_indexers mi
                 WHERE rl.id = mi.marc_id AND rl.state='ACTUAL'
                   AND mi.field_no='999' AND mi.ind1='f' AND mi.ind2='f' AND mi.subfield_no='s' AND rl.matched_id::text != mi.value
                 GROUP BY  mi.value
             ) AS nested_query
        WHERE rl.id = mi.marc_id AND rl.state='ACTUAL' AND mi.value = nested_query.value AND mi.field_no='999' AND mi.ind1='f' AND mi.ind2='f' AND mi.subfield_no='s' AND rl.matched_id::text != mi.value AND mi.version = nested_query.max_version;

--  temporary table (matched_id_generation_table) is for holding pair matched_id, generation
        CREATE TEMP TABLE matched_id_generation_table ON COMMIT DROP AS
        SELECT rl.matched_id, MAX(rl.generation)+1 AS max_gen
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             value_matched_id_inconsistency val_match
        WHERE rl.matched_id::text = val_match.value
        GROUP by rl.matched_id;

-- iteration through records with mismatched matched_id and value
        FOR rec IN SELECT * FROM value_matched_id_inconsistency
            LOOP
                updated_generation := (SELECT max_gen
                                       FROM matched_id_generation_table matched_gen
                                       WHERE matched_gen.matched_id = rec.value::uuid);

                if updated_generation is null then
                    updated_generation := 0;
                end if;

                UPDATE {tenantId}_mod_source_record_storage.records_lb rl
                SET
                    generation = updated_generation,
                    matched_id = rec.value::uuid,
                    state = 'OLD'
                WHERE rl.id = rec.id;

                UPDATE matched_id_generation_table matched_gen
                SET max_gen = updated_generation + 1
                WHERE matched_gen.matched_id = rec.value::uuid;
            END LOOP;

-- update records with greatest generation to 'ACTUAL' state
        UPDATE {tenantId}_mod_source_record_storage.records_lb rl
        SET state='ACTUAL'
        FROM matched_id_generation_table
        WHERE rl.matched_id = matched_id_generation_table.matched_id AND rl.generation = matched_id_generation_table.max_gen - 1;

    END;
$$
LANGUAGE plpgsql;


2. Update multiple ACTUAL records containing the same generation

DO
$$
    BEGIN
        CREATE TEMP TABLE external_id_max_created_date_table ON COMMIT DROP AS
        SELECT rl.external_id, MAX(rl.created_date) AS last_creation_date
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             (
                 SELECT external_id
                 FROM {tenantId}_mod_source_record_storage.records_lb
                 WHERE state = 'ACTUAL'
                 GROUP BY external_id, generation
                 HAVING COUNT(external_id) > 1
             ) AS external_id_of_duplicates_with_same_generation
        WHERE rl.external_id = external_id_of_duplicates_with_same_generation.external_id
        GROUP by rl.external_id;

        UPDATE {tenantId}_mod_source_record_storage.records_lb r
        SET state = 'OLD'
        FROM external_id_max_created_date_table
        WHERE r.external_id = external_id_max_created_date_table.external_id
          AND r.created_date < external_id_max_created_date_table.last_creation_date;
    END;
$$;



3. Update the remaining multiple ACTUAL records

DO
$$
    BEGIN
        CREATE TEMP TABLE external_id_max_generation_table ON COMMIT DROP AS
        SELECT r.external_id, MAX(r.generation) AS max_generation
        FROM {tenantId}_mod_source_record_storage.records_lb r,
             (
                 SELECT external_id
                 FROM {tenantId}_mod_source_record_storage.records_lb
                 WHERE state = 'ACTUAL'
                 GROUP BY external_id
                 HAVING COUNT(external_id) > 1
             ) AS external_id_of_duplicates
        WHERE r.external_id = external_id_of_duplicates.external_id
        GROUP BY r.external_id;

        UPDATE {tenantId}_mod_source_record_storage.records_lb r
        SET state = 'OLD'
        FROM external_id_max_generation_table
        WHERE r.external_id = external_id_max_generation_table.external_id
          AND r.generation < external_id_max_generation_table.max_generation;
    END;
$$;




  • No labels