### Find records where the original filename indicates an H264 variant -- Purpose: list distinct base records that have an "original" filename matching the -- pattern FILE%_H264% (i.e. files stored under the FILE... folder or beginning with -- "FILE" and containing the "_H264" marker). This helps locate master records that -- also have an H264-derived file present. -- Columns returned: -- base_id : the parent/base record id (h_base_record_id) -- file_type : the logical file type extracted from the JSON `file_type` column -- original_file_name: the stored original filename (may include folder/prefix) -- digital_file_name : the current digital filename in the database SELECT DISTINCT h_base_record_id AS base_id, file_type ->> 'type' AS file_type, original_file_name, digital_file_name FROM file WHERE file_type ->> 'type' IS NOT NULL AND original_file_name LIKE 'FILE%_H264%'; ### Audio files (mp3) that are not in the FILE/ folder -- Purpose: find distinct base records for streaming audio (.mp3) where the original -- filename is not located in the FILE/... area. Useful to separate ingest/original -- conservative copies (often under FILE/) from streaming or derivative objects. SELECT DISTINCT h_base_record_id AS base_id, file_type ->> 'type' AS file_type FROM file WHERE file_type ->> 'type' IS NOT NULL AND original_file_name NOT LIKE 'FILE%' AND digital_file_name LIKE '%mp3'; ### Video files (mp4) that are not in the FILE/ folder -- Purpose: same as the mp3 query but for mp4 streaming/derivative files. This helps -- identify which base records currently have mp4 derivatives recorded outside the -- FILE/... (master) namespace. SELECT DISTINCT h_base_record_id AS base_id, file_type ->> 'type' AS file_type FROM file WHERE file_type ->> 'type' IS NOT NULL AND original_file_name NOT LIKE 'FILE%' AND digital_file_name LIKE '%mp4'; ### Records with non-image digital files -- Purpose: list base records that have digital files which are not JPEG images. The -- `NOT LIKE '%jpg'` filter excludes typical image derivatives; this is useful for -- auditing non-image assets attached to records. SELECT DISTINCT h_base_record_id AS base_id, file_type ->> 'type' AS file_type FROM file WHERE file_type ->> 'type' IS NOT NULL AND original_file_name NOT LIKE 'FILE%' AND digital_file_name NOT LIKE '%jpg'; ### Count of unique base records per file_type -- Purpose: aggregate the number of distinct base records (h_base_record_id) associated -- with each `file_type` value. This gives an overview of how many unique objects have -- files recorded for each logical file type. SELECT file_type ->> 'type' AS file_type, COUNT(DISTINCT h_base_record_id) AS file_type_unique_record_count FROM file WHERE file_type ->> 'type' IS NOT NULL GROUP BY file_type ->> 'type'; ### Duplicate of the previous aggregate (kept for convenience) -- Note: the query below is identical to the one above and will produce the same -- counts; it may be intentional for running in a separate context or as a copy-and-paste -- placeholder for further edits. SELECT file_type ->> 'type' AS file_type, COUNT(DISTINCT h_base_record_id) AS file_type_unique_record_count FROM file WHERE file_type ->> 'type' IS NOT NULL GROUP BY file_type ->> 'type';