82 lines
3.3 KiB
Markdown
82 lines
3.3 KiB
Markdown
### Find records where the original filename indicates an H264 variant
|
|
|
|
-- Purpose: list distinct base records that have an "original" filename matching the
|
|
-- pattern FILE%_H264% (i.e. files stored under the FILE... folder or beginning with
|
|
-- "FILE" and containing the "_H264" marker). This helps locate master records that
|
|
-- also have an H264-derived file present.
|
|
-- Columns returned:
|
|
-- base_id : the parent/base record id (h_base_record_id)
|
|
-- file_type : the logical file type extracted from the JSON `file_type` column
|
|
-- original_file_name: the stored original filename (may include folder/prefix)
|
|
-- digital_file_name : the current digital filename in the database
|
|
SELECT DISTINCT
|
|
h_base_record_id AS base_id,
|
|
file_type ->> 'type' AS file_type,
|
|
original_file_name,
|
|
digital_file_name
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
AND original_file_name LIKE 'FILE%_H264%';
|
|
|
|
### Audio files (mp3) that are not in the FILE/ folder
|
|
|
|
-- Purpose: find distinct base records for streaming audio (.mp3) where the original
|
|
-- filename is not located in the FILE/... area. Useful to separate ingest/original
|
|
-- conservative copies (often under FILE/) from streaming or derivative objects.
|
|
SELECT DISTINCT
|
|
h_base_record_id AS base_id,
|
|
file_type ->> 'type' AS file_type
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
AND original_file_name NOT LIKE 'FILE%'
|
|
AND digital_file_name LIKE '%mp3';
|
|
|
|
### Video files (mp4) that are not in the FILE/ folder
|
|
|
|
-- Purpose: same as the mp3 query but for mp4 streaming/derivative files. This helps
|
|
-- identify which base records currently have mp4 derivatives recorded outside the
|
|
-- FILE/... (master) namespace.
|
|
SELECT DISTINCT
|
|
h_base_record_id AS base_id,
|
|
file_type ->> 'type' AS file_type
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
AND original_file_name NOT LIKE 'FILE%'
|
|
AND digital_file_name LIKE '%mp4';
|
|
|
|
### Records with non-image digital files
|
|
|
|
-- Purpose: list base records that have digital files which are not JPEG images. The
|
|
-- `NOT LIKE '%jpg'` filter excludes typical image derivatives; this is useful for
|
|
-- auditing non-image assets attached to records.
|
|
SELECT DISTINCT
|
|
h_base_record_id AS base_id,
|
|
file_type ->> 'type' AS file_type
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
AND original_file_name NOT LIKE 'FILE%'
|
|
AND digital_file_name NOT LIKE '%jpg';
|
|
|
|
### Count of unique base records per file_type
|
|
|
|
-- Purpose: aggregate the number of distinct base records (h_base_record_id) associated
|
|
-- with each `file_type` value. This gives an overview of how many unique objects have
|
|
-- files recorded for each logical file type.
|
|
SELECT
|
|
file_type ->> 'type' AS file_type,
|
|
COUNT(DISTINCT h_base_record_id) AS file_type_unique_record_count
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
GROUP BY file_type ->> 'type';
|
|
|
|
### Duplicate of the previous aggregate (kept for convenience)
|
|
|
|
-- Note: the query below is identical to the one above and will produce the same
|
|
-- counts; it may be intentional for running in a separate context or as a copy-and-paste
|
|
-- placeholder for further edits.
|
|
SELECT
|
|
file_type ->> 'type' AS file_type,
|
|
COUNT(DISTINCT h_base_record_id) AS file_type_unique_record_count
|
|
FROM file
|
|
WHERE file_type ->> 'type' IS NOT NULL
|
|
GROUP BY file_type ->> 'type'; |