ACH-ARKIVO-ImportMedia/utils.py

133 lines
7.5 KiB
Python

# Description: Utility functions for the media validation service
# Art.c.hive 2024/09/30
# Standard libs
import logging
import os
# import logging_config
# result, message = check_video_info(audio_json_content)
def check_video_info(media_info):
logging.info("Checking video info...")
logging.info(f"Media info: {media_info}")
try:
#('mediainfo', {}).get('media', {}).get('track', [])
# Check if the file name ends with .mov
file_name = media_info.get('media', {}).get('@ref', '')
logging.info(f"File name in JSON: {file_name}")
# Determine the parent directory (one level above the basename).
# Example: for 'SOME/FOLDER/filename.mov' -> parent_dir == 'FOLDER'
parent_dir = os.path.basename(os.path.dirname(file_name))
logging.info(f"Parent directory: {parent_dir}")
# If the parent directory is 'FILE' we are in the "streaming" / "retrieval" path.
# In this context we only accept a predefined set of container extensions.
# This is not the MASTER copy validation (which requires ProRes and is handled elsewhere).
if parent_dir.lower() == 'file':
# Allowed video container extensions for FILE/ paths.
# These are used purely as a whitelist to reject unknown/unsupported containers
# before we attempt to parse the mediainfo JSON.
video_allowed_extensions = ['.mov', '.avi', '.m4v', '.mp4', '.mxf', '.mpg', '.mpeg', '.wmv']
if not any(file_name.lower().endswith(ext) for ext in video_allowed_extensions):
return False, "The file is not a .mov, .avi, .m4v, .mp4, .mxf, .mpg, .mpeg or .wmv file."
# Map file extensions to lists of acceptable general formats (video)
general_formats = {
'.avi': ['AVI'], # General/Format for AVI files
'.mov': ['QuickTime', 'MOV', 'MPEG-4'], # MediaInfo may report QuickTime or MOV for .mov
'.mp4': ['MPEG-4', 'MP4', 'QuickTime'], # MPEG-4 container (QuickTime variant ?? VO-MP4-16028_H264.mp4)
'.m4v': ['MPEG-4', 'MP4'], # MPEG-4 container (Apple variant)
'.mxf': ['MXF'], # Material eXchange Format
'.mpg': ['MPEG','MPEG-PS'], # MPEG program/transport streams
'.mpeg': ['MPEG','MPEG-PS'],
}
# check that the extension correspond to one of the allowed formats in track 0 in the corresponding json file
file_ext = os.path.splitext(file_name)[1].lower()
logging.info(f"File extension: {file_ext}")
expected_formats = general_formats.get(file_ext)
logging.info(f"Expected formats for extension {file_ext}: {expected_formats}")
if not expected_formats:
return False, f"Unsupported file extension: {file_ext}"
tracks = media_info.get('media', {}).get('track', [])
if len(tracks) > 0:
track_0 = tracks[0] # Assuming track 0 is the first element (index 0)
logging.info(f"Track 0: {track_0}")
actual_format = track_0.get('Format', '')
if track_0.get('@type', '') == 'General' and actual_format in expected_formats:
logging.info(f"File extension {file_ext} matches one of the expected formats {expected_formats} (actual: {actual_format}).")
else:
return False, f"Track 0 format '{actual_format}' does not match any expected formats {expected_formats} for extension {file_ext}."
else:
# Outside FILE/ directory require .mov specifically
if not file_name.lower().endswith('.mov'):
return False, "The file is not a .mov file."
# Strict master MOV rule: track[1] must be ProRes
tracks = media_info.get('media', {}).get('track', [])
if len(tracks) <= 1:
return False, "No track 1 found."
track_1 = tracks[1] # track[1] should represent the video stream
logging.info(f"Track 1: {track_1}")
if track_1.get('@type', '') != 'Video':
return False, "Track 1 is not a video track."
if track_1.get('Format', '') != 'ProRes':
return False, "Track 1 format is not ProRes."
if track_1.get('Format_Profile', '') != '4444':
return False, "Track 1 format profile is not 4444."
return True, "The file is a .mov master with ProRes track 1."
return True, "The file passed the video format checks."
except Exception as e:
return False, f"Error processing the content: {e}"
# result, message = check_audio_info(json_content)
def check_audio_info(media_info):
try:
# Determine source filename (from JSON) and its parent folder
file_name = media_info.get('media', {}).get('@ref', '')
parent_dir = os.path.basename(os.path.dirname(file_name))
# If the file lives under FILE/, we treat it as a streaming/retrieval file.
# In this path we whitelist only a specific set of audio containers.
# (Master validation is handled elsewhere and requires ProRes.)
if parent_dir.lower() == 'file':
audio_allowed_extensions = ['.wav', '.mp3', '.m4a', '.aif', '.aiff']
if not any(file_name.lower().endswith(ext) for ext in audio_allowed_extensions):
return False, f"The file is not one of the allowed audio containers: {', '.join(audio_allowed_extensions)}."
# For WAV, do the strict Wave/PCM validation
if file_name.lower().endswith('.wav'):
tracks = media_info.get('media', {}).get('track', [])
if len(tracks) > 1:
track_1 = tracks[1]
if track_1.get('@type', '') == 'Audio' and track_1.get('Format', '') == 'PCM' and track_1.get('SamplingRate', '') == '96000' and track_1.get('BitDepth', '') == '24':
return True, "The file is a .wav file with Wave format in track 1."
else:
return False, f"Track 1 format is not Wave. Format: {track_1.get('Format', '')}, SamplingRate: {track_1.get('SamplingRate', '')}, BitDepth: {track_1.get('BitDepth', '')}"
return False, "No track 1 found."
# For MP3/M4A we accept it without strict Wave validation
return True, "The file is an accepted audio container under FILE/ (mp3/m4a/wav)."
# Outside FILE/ directory require .wav specifically
if not file_name.lower().endswith('.wav'):
return False, "The file is not a .wav file."
# Check if track 1's format is Wave
tracks = media_info.get('media', {}).get('track', [])
# Ensure there are at least two track entries before accessing index 1
if len(tracks) > 1:
track_1 = tracks[1] # Assuming track 1 is the second element (index 1)
if track_1.get('@type', '') == 'Audio' and track_1.get('Format', '') == 'PCM' and track_1.get('SamplingRate', '') == '96000' and track_1.get('BitDepth', '') == '24':
return True, "The file is a .wav file with Wave format in track 1."
else:
return False, f"Track 1 format is not Wave. Format: {track_1.get('Format', '')}, SamplingRate: {track_1.get('SamplingRate', '')}, BitDepth: {track_1.get('BitDepth', '')}"
return False, "No track 1 found."
except Exception as e:
return False, f"Error processing the content: {e}"