/
Data Import performance of retrieving source records
Data Import performance of retrieving source records
Draft
There is the view that I tested
CREATE OR REPLACE VIEW source_records_view AS SELECT records._id, json_build_object('recordId', records.jsonb ->> 'id', 'snapshotId', records.jsonb ->> 'snapshotId', 'recordType', records.jsonb ->> 'recordType', 'deleted', records.jsonb ->> 'deleted', 'additionalInfo', records.jsonb -> 'additionalInfo', 'metadata', records.jsonb -> 'metadata', 'rawRecord', raw_records.jsonb, 'parsedRecord', COALESCE(marc_records.jsonb)) AS jsonb FROM records JOIN raw_records ON records.jsonb ->> 'rawRecordId' = raw_records.jsonb ->> 'id' LEFT JOIN marc_records ON records.jsonb ->> 'parsedRecordId' = marc_records.jsonb ->> 'id' WHERE records.jsonb ->> 'parsedRecordId' IS NOT NULL;
Here is the query that I tested performance(it's changed OFFSET only)
SELECT * FROM diku_mod_source_record_storage.source_records_view WHERE lower(f_unaccent(diku_mod_source_record_storage.source_records_view.jsonb ->> 'recordType')) LIKE lower(f_unaccent('MARC')) LIMIT 50 OFFSET 0;
There are results 3 different cases: NO indexes, Btree indexes (see below) and Foreign keys(see Relationships records table with marc_recods and raw_records)
OFFSET | No indexes | Btree indexes | Foreign keys | Notes |
0 | 11s | 61ms | 134ms | |
50 | 25s | 177ms | 122ms | |
100 | 48s | 529ms | 598ms | |
150 | 51s | 580ms | 572ms | |
200 | 1 m 5s | 687ms | 577ms | |
600 | - | 839ms | 695ms | |
Insert | - | 10 ms | 14ms |
Indexes Btree
Btree indexes
CREATE INDEX idx_btree_raw_record_id ON records USING BTREE ((jsonb ->> 'rawRecordId')); CREATE INDEX idx_btree_parsed_record_id ON records USING BTREE ((jsonb ->> 'parsedRecordId')); CREATE INDEX idx_btree_raw_records_id ON raw_records USING BTREE ((jsonb ->> 'id')); CREATE INDEX idx_btree_marc_records_id ON marc_records USING BTREE ((jsonb ->> 'id'));
Relationships records table with marc_recods and raw_records
{ "tableName": "records", "withMetadata": true, "pkColumnName": "_id", "generateId": false, "withAuditing": false, "uniqueIndex": [ { "fieldName": "id", "tOps": "ADD" } ], "index": [ { "fieldName": "id", "tOps": "ADD" } ], "foreignKeys": [ { "fieldName": "rawRecordId", "targetTable": "raw_records", "tOps": "ADD" }, { "fieldName": "parsedRecordId", "targetTable": "marc_records", "tOps": "ADD" } ] }