Clean up records with inconsistent matched id values

MODSOURCE-666 - Getting issue details... STATUS

 if 999 ff s UUID is different from matched_id and status ACTUAL - get highest generation for matched_id and overwrite matched_id to be the same as 999 ff s with incremented generation and also If more than 1 ACTUAL record with the same matched_id - then to those records status will be set as 'OLD'.


1.  Script to update multiple ACTUAL records containing the same generation

DO
$$
    BEGIN
        CREATE TEMP TABLE external_id_max_created_date_table ON COMMIT DROP AS
        SELECT rl.external_id, MAX(rl.created_date) AS last_creation_date
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             (
                 SELECT external_id
                 FROM {tenantId}_mod_source_record_storage.records_lb
                 WHERE state = 'ACTUAL'
                 GROUP BY external_id, generation
                 HAVING COUNT(external_id) > 1
             ) AS external_id_of_duplicates_with_same_generation
        WHERE rl.external_id = external_id_of_duplicates_with_same_generation.external_id
        GROUP by rl.external_id;

        UPDATE {tenantId}_mod_source_record_storage.records_lb r
        SET state = 'OLD'
        FROM external_id_max_created_date_table
        WHERE r.external_id = external_id_max_created_date_table.external_id
          AND r.created_date < external_id_max_created_date_table.last_creation_date;

    END;
$$;


2. Script to update multiple ACTUAL records

DO
$$
    BEGIN
        CREATE TEMP TABLE external_id_max_generation_table ON COMMIT DROP AS
        SELECT r.external_id, MAX(r.generation) AS max_generation
        FROM {tenantId}_mod_source_record_storage.records_lb r,
             (
                 SELECT external_id
                 FROM {tenantId}_mod_source_record_storage.records_lb
                 WHERE state = 'ACTUAL'
                 GROUP BY external_id
                 HAVING COUNT(external_id) > 1
             ) AS external_id_of_duplicates
        WHERE r.external_id = external_id_of_duplicates.external_id
        GROUP BY r.external_id;

        UPDATE {tenantId}_mod_source_record_storage.records_lb r
        SET state = 'OLD'
        FROM external_id_max_generation_table
        WHERE r.external_id = external_id_max_generation_table.external_id
          AND r.generation < external_id_max_generation_table.max_generation;

    END;
$$;


3. Script to update records containing inconsistent matched_id with consistent matched_id value and generation

DO
$$
    BEGIN
        CREATE TEMP TABLE value_matched_id_inconsistency ON COMMIT DROP AS
        SELECT rl.id, rl.matched_id, mi.value
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             {tenantId}_mod_source_record_storage.marc_indexers mi,
             {tenantId}_mod_source_record_storage.marc_records_tracking rt
        WHERE rt.marc_id = mi.marc_id and rt.version = mi.version
          AND rl.id = mi.marc_id AND rl.state='ACTUAL'
          AND mi.field_no='999' AND mi.ind1='f' AND mi.ind2='f' AND mi.subfield_no='s' AND rl.matched_id::text != mi.value;

        CREATE TEMP TABLE matched_id_max_generation_table ON COMMIT DROP AS
        SELECT rl.matched_id, MAX(rl.generation) AS max_gen
        FROM {tenantId}_mod_source_record_storage.records_lb rl,
             value_matched_id_inconsistency val_match
        WHERE rl.matched_id = val_match.value::uuid
        GROUP BY rl.matched_id;

        UPDATE {tenantId}_mod_source_record_storage.records_lb r
        SET matched_id = val_match.value::uuid,
            generation = matched_id_max_generation_table.max_gen + 1
        FROM value_matched_id_inconsistency val_match, matched_id_max_generation_table
        WHERE r.id = val_match.id AND val_match.value::uuid = matched_id_max_generation_table.matched_id;
    END;
$$;