534 lines
27 KiB
Python
534 lines
27 KiB
Python
# v20251103 - Main script to import media files from S3 to the database
|
|
import logging
|
|
import time
|
|
from datetime import datetime
|
|
import pytz
|
|
import os
|
|
import re
|
|
from logging_config import setup_logging, CUSTOM_ERROR_LEVEL
|
|
from email_utils import handle_error, send_email_with_attachment
|
|
from s3_utils import create_s3_client, list_s3_bucket, parse_s3_files
|
|
from error_handler import handle_general_error, handle_file_not_found_error, handle_value_error
|
|
from file_utils import is_file_empty
|
|
from db_utils import count_files, get_distinct_filenames_from_db
|
|
from dotenv import load_dotenv
|
|
from validation_utils import validate_inventory_code, analyze_pattern_match, validate_icode_extension, list_s3_not_in_db, validate_mp4_file, validate_mp3_file
|
|
import config
|
|
import psycopg2
|
|
|
|
load_dotenv()
|
|
|
|
import re
|
|
import logging
|
|
import os
|
|
|
|
|
|
# MAIN PROCESS
|
|
def main_process(aws_config, db_config, ach_config, bucket_name, ach_variables):
|
|
# import global variables
|
|
#from config import load_config, aws_config, db_config, ach_config, bucket_name
|
|
#global aws_config, db_config, ach_config, bucket_name
|
|
#config import load_config , aws_config, db_config, ach_config, bucket_name
|
|
#load_config()
|
|
|
|
logging.info(f"bucket_name: {bucket_name}")
|
|
|
|
# SECURITY CHECK: If DRY_RUN is false and ENV is development, ask for confirmation
|
|
dry_run_env = os.getenv('ACH_DRY_RUN', 'true').lower()
|
|
ach_env = os.getenv('ACH_ENV', 'development').lower()
|
|
|
|
if dry_run_env == 'false' and ach_env == 'development':
|
|
print("\n" + "!"*60)
|
|
print("!!! SECURITY CHECK: RUNNING IMPORT ON DEVELOPMENT ENVIRONMENT !!!")
|
|
print(f"DB_HOST: {db_config.get('host')}")
|
|
print(f"DB_NAME: {db_config.get('database')}")
|
|
print(f"DB_USER: {db_config.get('user')}")
|
|
print(f"DB_PORT: {db_config.get('port')}")
|
|
print("!"*60 + "\n")
|
|
|
|
user_input = input(f"Please type the DB_NAME '{db_config.get('database')}' to proceed: ")
|
|
if user_input != db_config.get('database'):
|
|
print("Action aborted by user. Database name did not match.")
|
|
logging.error("Process aborted: User failed to confirm DB_NAME for development import.")
|
|
return
|
|
|
|
# Ensure timing variables are always defined so later error-email logic
|
|
# won't fail if an exception is raised before end_time/elapsed_time is set.
|
|
start_time = time.time()
|
|
# IN HUMAN READABLE FORMAT
|
|
logging.info(f"Process started at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
|
|
end_time = start_time
|
|
elapsed_time = 0.0
|
|
|
|
try:
|
|
logging.info("Starting the main process...")
|
|
|
|
# ---------------------------------------------------------------------
|
|
# PHASE 1: S3 OBJECT DISCOVERY + INITIAL VALIDATION
|
|
#
|
|
# 1) List objects in the configured S3 bucket.
|
|
# 2) Filter objects by allowed extensions and excluded folders.
|
|
# 3) Validate the inventory code format (e.g. VA-C01-12345) and ensure the
|
|
# folder prefix matches the code type (e.g. "BRD" folder for BRD code).
|
|
# 4) Reject files that violate naming conventions before any DB interaction.
|
|
#
|
|
# This phase is intentionally descriptive so the workflow can be understood
|
|
# from logs even if the function names are not immediately clear.
|
|
# ---------------------------------------------------------------------
|
|
logging.info("PHASE 1: S3 object discovery + initial validation")
|
|
|
|
# Helper to make spaces visible in filenames for logging (replace ' ' with open-box char)
|
|
def _visible_spaces(name: str) -> str:
|
|
try:
|
|
return name.replace(' ', '\u2423')
|
|
except Exception:
|
|
return name
|
|
|
|
# Create the S3 client
|
|
s3_client = create_s3_client(aws_config)
|
|
|
|
# List S3 bucket s3_validated_contents
|
|
list_s3_files = list_s3_bucket(s3_client, bucket_name)
|
|
|
|
# Define valid extensions and excluded folders
|
|
# NOTE: This list is used only for the initial S3 filtering step (Phase 1).
|
|
# It determines which object keys are considered for further processing.
|
|
valid_extensions = {'.mp3', '.mp4', '.md5', '.json', '.pdf'}
|
|
# excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'FILE/'}
|
|
# excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/', 'FILE/', 'DVD/', 'UMT/'}
|
|
excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/', 'UMT/'}
|
|
# excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/',}
|
|
# included_folders = {'FILE/'} # uncomment this to NOT use excluded folders
|
|
# included_folders = {'TEST-FOLDER-DEV/'} # uncomment this to NOT use excluded folders
|
|
|
|
# Extract and filter file names
|
|
|
|
# s3_file_names: include only files that match valid extensions and
|
|
# (if configured) whose key starts with one of the included_folders.
|
|
# We still skip any explicitly excluded_folders. Guard against the
|
|
# case where `included_folders` isn't defined to avoid NameError.
|
|
try:
|
|
use_included = bool(included_folders)
|
|
except NameError:
|
|
use_included = False
|
|
|
|
if use_included:
|
|
s3_file_names = [
|
|
content['Key'] for content in list_s3_files
|
|
if any(content['Key'].endswith(ext) for ext in valid_extensions)
|
|
and any(content['Key'].startswith(folder) for folder in included_folders)
|
|
]
|
|
logging.info(f"Using included_folders filter: {included_folders}")
|
|
else:
|
|
s3_file_names = [
|
|
content['Key'] for content in list_s3_files
|
|
if any(content['Key'].endswith(ext) for ext in valid_extensions)
|
|
and not any(content['Key'].startswith(folder) for folder in excluded_folders)
|
|
]
|
|
logging.info("Using excluded_folders filter")
|
|
|
|
# check inventory code syntax
|
|
# first check s3_file_names if the file base name and folder name match pattern = r'^[VA][OC]-[A-Z0-9]{3}-\d{5}_\d{2}$'
|
|
s3_validated_contents = []
|
|
|
|
for s3file in s3_file_names:
|
|
# s3_file_names contains the object keys (strings), not dicts.
|
|
base_name = os.path.basename(s3file)
|
|
logging.info(f"S3 Base name: {base_name}")
|
|
|
|
# extract folder prefix and media type from inventory code
|
|
folder_prefix = os.path.dirname(s3file).rstrip('/')
|
|
media_type_in_code = base_name[3:6] if len(base_name) >= 6 else None
|
|
|
|
# Generic sanity check: prefix (folder name) should equal the media type in the code
|
|
is_valid_prefix = (folder_prefix == media_type_in_code)
|
|
|
|
# Some folders are allowed to contain multiple media types.
|
|
# This is a relaxation of the strict prefix==code rule for known cases
|
|
# where the folder is effectively a container of multiple media formats.
|
|
# E.g.:
|
|
# - DVD folder may contain both DVD and BRD files
|
|
# - FILE folder is used for retrievals and may contain many container types
|
|
folder_allowances = {
|
|
'DVD': ['DVD', 'BRD'],
|
|
'FILE': ['M4V', 'AVI', 'MOV', 'MP4', 'MXF', 'AIF', 'WMV', 'M4A', 'MPG'],
|
|
}
|
|
|
|
if folder_prefix in folder_allowances:
|
|
if media_type_in_code in folder_allowances[folder_prefix]:
|
|
is_valid_prefix = True
|
|
|
|
if folder_prefix and media_type_in_code and not is_valid_prefix:
|
|
logging.warning(f"Prefix mismatch for {s3file}: Folder '{folder_prefix}' does not match code type '{media_type_in_code}'")
|
|
# we only warning here but still proceed with standard validation
|
|
|
|
if validate_inventory_code(base_name): # truncated to first 12 char in the function
|
|
logging.info(f"File {base_name} matches pattern.")
|
|
# only check inventory code extension for media files (.mp4, .mp3)
|
|
# sidecars (.json, .pdf, .md5) only need their base validated
|
|
if s3file.lower().endswith(('.mp4', '.mp3')):
|
|
if not validate_icode_extension(s3file):
|
|
logging.warning(f"File {s3file} has invalid extension for its inventory code.")
|
|
continue # skip adding this file to validated contents
|
|
s3_validated_contents.append(s3file)
|
|
else:
|
|
# Check base name in case of error
|
|
base_issues = analyze_pattern_match(base_name, "Base name")
|
|
logging.warning(f"Base name '{base_name}' does not match pattern. Issues: {base_issues}")
|
|
folder_name = os.path.dirname(s3file)
|
|
logging.warning(f"File {s3file} in folder {folder_name} does not match pattern.")
|
|
|
|
# ---------------------------------------------------------------------
|
|
# PHASE 2: DATABASE CROSS-REFERENCE + FILTERING
|
|
#
|
|
# 1) Fetch existing filenames from the database.
|
|
# 2) Skip files already represented in the DB (including sidecar records).
|
|
# 3) Produce the final list of S3 object keys that should be parsed/inserted.
|
|
# ---------------------------------------------------------------------
|
|
logging.info("PHASE 2: Database cross-reference + filtering")
|
|
|
|
# filter_s3_files_not_in_db
|
|
# --- Get all DB filenames in one call ---
|
|
db_file_names = get_distinct_filenames_from_db()
|
|
|
|
# --- Keep only those not in DB ---
|
|
# Additionally, if the DB already contains a sidecar record for the
|
|
# same basename (for extensions .md5, .json, .pdf), skip the S3 file
|
|
# since the asset is already represented in the DB via those sidecars.
|
|
sidecar_exts = ('.md5', '.json', '.pdf')
|
|
db_sidecar_basenames = set()
|
|
for dbf in db_file_names:
|
|
for ext in sidecar_exts:
|
|
if dbf.endswith(ext):
|
|
db_sidecar_basenames.add(dbf[:-len(ext)])
|
|
break
|
|
|
|
|
|
filtered_file_names=list_s3_not_in_db(s3_validated_contents, db_file_names, db_sidecar_basenames)
|
|
|
|
|
|
# Print the total number of files
|
|
total_files_s3 = len(s3_validated_contents)
|
|
logging.info(f"Total number of the valid (mp3,mp4,md5,json,pdf) files in the S3 bucket before DB filter: {total_files_s3}")
|
|
total_files = len(filtered_file_names)
|
|
logging.info(f"Total number of the valid (mp3,mp4,md5,json,pdf) files after DB filter: {total_files}")
|
|
|
|
# Log the files that need to be updated (those not yet in DB)
|
|
if total_files > 0:
|
|
logging.info("List of files to be updated in the database:")
|
|
for f in filtered_file_names:
|
|
logging.info(f" - {f}")
|
|
else:
|
|
logging.info("No new files found to update in the database.")
|
|
|
|
# Count files with .mp4 and .mp3 extensions
|
|
mp4_count = sum(1 for file in s3_file_names if file.endswith('.mp4'))
|
|
mp3_count = sum(1 for file in s3_file_names if file.endswith('.mp3'))
|
|
md5_count = sum(1 for file in s3_file_names if file.endswith('.md5'))
|
|
pdf_count = sum(1 for file in s3_file_names if file.endswith('.pdf'))
|
|
json_count = sum(1 for file in s3_file_names if file.endswith('.json'))
|
|
mov_count = sum(1 for file in s3_file_names if file.endswith('.mov'))
|
|
# jpg_count = sum(1 for file in file_names if file.endswith('.jpg'))
|
|
|
|
# file directory
|
|
avi_count = sum(1 for file in s3_file_names if file.endswith('.avi'))
|
|
m4v_count = sum(1 for file in s3_file_names if file.endswith('.m4v'))
|
|
# Log the counts
|
|
# Get the logger instance
|
|
logger = logging.getLogger()
|
|
|
|
# Use the logger instance to log custom info
|
|
logging.info("Number of .mp4 files on S3 bucket (%s): %s", bucket_name, mp4_count)
|
|
logging.info("Number of .mp3 files on S3 bucket (%s): %s", bucket_name, mp3_count)
|
|
logging.info("Number of .md5 files on S3 bucket (%s): %s", bucket_name, md5_count)
|
|
logging.info("Number of .pdf files on S3 bucket (%s): %s", bucket_name, pdf_count)
|
|
logging.info("Number of .json files on S3 bucket (%s): %s", bucket_name, json_count)
|
|
logging.info("Number of .mov files on S3 bucket (%s): %s", bucket_name, mov_count)
|
|
# logging.info(f"Number of .jpg files: {jpg_count}")
|
|
|
|
# should check all .mp4 should have base_name.endswith('_H264'):
|
|
for file in s3_file_names:
|
|
if file.endswith('.mp4'):
|
|
validate_mp4_file(file) # validation_utils.py - check also _H264 at the end
|
|
elif file.endswith('.mp3'):
|
|
validate_mp3_file(file) # validation_utils.py
|
|
# Count by CODE media type (e.g. OA4, MCC) and log the counts for each type
|
|
|
|
|
|
# If ACH_SAFE_RUN is 'false' we enforce strict mp4/pdf parity and abort
|
|
# when mismatched. Default is 'true' which skips this abort to allow
|
|
# safer runs during testing or manual reconciliation.
|
|
s3_files_filtered= []
|
|
if os.getenv('ACH_SAFE_RUN', 'true') == 'true':
|
|
if mp4_count != pdf_count:
|
|
logging.error("Number of .mp4 files is not equal to number of .pdf files")
|
|
# MOD 20251103
|
|
# add a check to find the missing pdf or mp4 files and report them
|
|
# use filtered_file_names to find missing files
|
|
# store tuples (source_file, expected_counterpart) for clearer logging
|
|
missing_pdfs = [] # list of (mp4_file, expected_pdf)
|
|
missing_mp4s = [] # list of (pdf_file, expected_mp4)
|
|
for file in filtered_file_names:
|
|
if file.endswith('.mp4'):
|
|
# remove extension robustly using os.path.splitext to preserve any path prefix
|
|
base_name = os.path.splitext(file)[0]
|
|
# if the mp4 is an H264 variant (e.g. name_H264.mp4) remove the suffix
|
|
if base_name.endswith('_H264'):
|
|
# must check if has extra number for DBT and DVD and [FILE]
|
|
base_name = base_name[:-5]
|
|
expected_pdf = base_name + '.pdf'
|
|
if expected_pdf not in filtered_file_names:
|
|
missing_pdfs.append((file, expected_pdf))
|
|
elif file.endswith('.pdf'): # check if pdf as no .mp4
|
|
# Normalize base name and accept either the regular mp4 or the _H264 variant.
|
|
# remove extension robustly using os.path.splitext
|
|
base_name = os.path.splitext(file)[0]
|
|
expected_mp4 = base_name + '_H264.mp4'
|
|
# If neither the regular mp4 nor the H264 variant exists, report missing.
|
|
if expected_mp4 not in filtered_file_names:
|
|
missing_mp4s.append((file, expected_mp4))
|
|
else:
|
|
# append to s3_files_filtered
|
|
s3_files_filtered.append(file)
|
|
continue
|
|
# report missing .pdf files
|
|
if missing_pdfs:
|
|
logging.error("Missing .pdf files (mp4 -> expected pdf):")
|
|
for mp4_file, expected_pdf in missing_pdfs:
|
|
logging.error("%s -> %s", _visible_spaces(mp4_file), _visible_spaces(expected_pdf))
|
|
# report missing .mp4 files
|
|
if missing_mp4s:
|
|
logging.error("Missing .mp4 files (pdf -> expected mp4):")
|
|
for pdf_file, expected_mp4 in missing_mp4s:
|
|
logging.error("%s -> %s", _visible_spaces(pdf_file), _visible_spaces(expected_mp4))
|
|
|
|
logging.error("Abort Import Process due to missing files")
|
|
raise ValueError("Inconsistent file counts mp4 vs pdf")
|
|
|
|
if mp3_count + mp4_count != json_count:
|
|
logging.error("Number of .mp3 files + number of .mp4 files is not equal to number of .json files")
|
|
|
|
# add check of mp3 +6 mp4 vs json and md5 file like above for mp4 and pdf
|
|
logging.error("Abort Import Process due to missing files")
|
|
|
|
# search wich file dont match TODO
|
|
raise ValueError("Inconsistent file counts mp3+mp4 vs json")
|
|
|
|
if mp3_count + mp4_count != md5_count:
|
|
logging.error("Number of .mp3 files + number of .mp4 files is not equal to number of .md5 files")
|
|
logging.error("Abort Import Process due to missing files")
|
|
# search wich file dont match TODO
|
|
raise ValueError("Inconsistent file counts mp3+mp4 vs md5")
|
|
|
|
# ---------------------------------------------------------------------
|
|
# PHASE 3: PARSE & INSERT INTO DATABASE
|
|
#
|
|
# 1) Process each remaining S3 object and validate its associated metadata.
|
|
# 2) Insert new records into the database (unless running in DRY_RUN).
|
|
# 3) Report counts of successful uploads, warnings, and errors.
|
|
# ---------------------------------------------------------------------
|
|
logging.info("PHASE 3: Parse S3 objects and insert new records into the database")
|
|
|
|
# Try to parse S3 files
|
|
try:
|
|
# If DRY RUN is set to True, the files will not be uploaded to the database
|
|
if os.getenv('ACH_DRY_RUN', 'true') == 'false':
|
|
uploaded_files_count, warning_files_count, error_files_count = parse_s3_files(s3_client, filtered_file_names, ach_variables, excluded_folders)
|
|
else:
|
|
logging.warning("DRY RUN is set to TRUE - No files will be added to the database")
|
|
# set the tuples to zero
|
|
uploaded_files_count, warning_files_count, error_files_count = (0, 0, 0)
|
|
logging.info("Total number of files (mp3+mp4) with warnings: %s. (Probably already existing in the DB)", warning_files_count)
|
|
logging.info("Total number of files with errors: %s", error_files_count)
|
|
logging.info("Total number of files uploaded: %s", uploaded_files_count)
|
|
logging.info("All files parsed")
|
|
except Exception as e:
|
|
logging.error(f"An error occurred while parsing S3 files: {e}")
|
|
handle_general_error(e)
|
|
|
|
# Check results
|
|
# connect to database
|
|
conn = psycopg2.connect(**db_config)
|
|
cur = conn.cursor()
|
|
|
|
# Use centralized mime types from config
|
|
from config import EXTENSION_MIME_MAP, MIME_TYPES
|
|
|
|
logging.info(f"Mime types for counting files: {MIME_TYPES}")
|
|
|
|
all_files_on_db = count_files(cur, MIME_TYPES,'*', False)
|
|
mov_files_on_db = count_files(cur,['video/mov'],'.mov', False )
|
|
mxf_files_on_db = count_files(cur,['application/mxf'],'.mxf', False )
|
|
mpg_files_on_db = count_files(cur,['video/mpeg'],'.mpg', False )
|
|
avi_files_on_db = count_files(cur,['video/x-msvideo'],'.avi', False )
|
|
m4v_files_on_db = count_files(cur,['video/mp4'],'.m4v', False )
|
|
mp4_files_on_db = count_files(cur,['video/mp4'],'.mp4', False )
|
|
wav_files_on_db = count_files(cur,['audio/wav'],'.wav', False )
|
|
mp3_files_on_db = count_files(cur,['audio/mp3'],'.mp3', False )
|
|
|
|
# mov + m4v + avi + mxf + mpg
|
|
logging.info(f"Number of all video files in the database: {all_files_on_db}")
|
|
logging.info(f"Number of .mov files in the database: {mov_files_on_db} and S3: {mov_count} ")
|
|
logging.info(f"Number of .mp4 files in the database: {mp4_files_on_db} and S3: {mp4_count}")
|
|
|
|
# compare the mp4 name and s3 name and report the missing files in the 2 lists a print the list
|
|
missing_mp4s = [f for f in s3_file_names if f.endswith('.mp4') and f not in db_file_names]
|
|
# if missing_mp4s empty do not return a warning
|
|
if missing_mp4s:
|
|
logging.warning(f"Missing {len(missing_mp4s)} .mp4 files in DB compared to S3: {missing_mp4s}")
|
|
|
|
logging.info(f"Number of .wav files in the database: {wav_files_on_db} ")
|
|
logging.info(f"Number of .mp3 files in the database: {mp3_files_on_db} and S3: {mp3_count}")
|
|
|
|
missing_mp3s = [f for f in s3_file_names if f.endswith('.mp3') and f not in db_file_names]
|
|
# if missing_mp3s empty do not return a warning
|
|
if missing_mp3s:
|
|
logging.warning(f"Missing {len(missing_mp3s)} .mp3 files in DB compared to S3: {missing_mp3s}")
|
|
|
|
logging.info(f"Number of .avi files in the database: {avi_files_on_db} ")
|
|
logging.info(f"Number of .m4v files in the database: {m4v_files_on_db} ")
|
|
logging.info(f"Number of .mxf files in the database: {mxf_files_on_db} ")
|
|
logging.info(f"Number of .mpg files in the database: {mpg_files_on_db} ")
|
|
|
|
logging.info(f"Total file in s3 before import {total_files}")
|
|
|
|
# time elapsed
|
|
end_time = time.time() # Record end time
|
|
|
|
elapsed_time = end_time - start_time
|
|
logging.info(f"Processing completed. Time taken: {elapsed_time:.2f} seconds")
|
|
|
|
except FileNotFoundError as e:
|
|
handle_file_not_found_error(e)
|
|
except ValueError as e:
|
|
handle_value_error(e)
|
|
except Exception as e:
|
|
handle_general_error(e)
|
|
|
|
# Send Email with logs if success or failure
|
|
# Define the CET timezone
|
|
cet = pytz.timezone('CET')
|
|
|
|
# Helper to rename a log file by appending a timestamp and return the new path.
|
|
def _rename_log_if_nonempty(path):
|
|
try:
|
|
if not path or not os.path.exists(path):
|
|
return None
|
|
# If file is empty, don't attach/rename it
|
|
if os.path.getsize(path) == 0:
|
|
return None
|
|
dir_name = os.path.dirname(path)
|
|
base_name = os.path.splitext(os.path.basename(path))[0]
|
|
timestamp = datetime.now(cet).strftime("%Y%m%d_%H%M%S")
|
|
new_log_path = os.path.join(dir_name, f"{base_name}_{timestamp}.log")
|
|
# Attempt to move/replace atomically where possible
|
|
try:
|
|
os.replace(path, new_log_path)
|
|
except Exception:
|
|
# Fallback to rename (may raise on Windows if target exists)
|
|
os.rename(path, new_log_path)
|
|
return new_log_path
|
|
except Exception as e:
|
|
logging.error("Failed to rename log %s: %s", path, e)
|
|
return None
|
|
|
|
# close logging to flush handlers before moving files
|
|
logging.shutdown()
|
|
|
|
logging.info("Preparing summary email")
|
|
|
|
error_log = './logs/ACH_media_import_errors.log'
|
|
warning_log = './logs/ACH_media_import_warning.log'
|
|
|
|
# Determine presence of errors/warnings
|
|
has_errors = False
|
|
has_warnings = False
|
|
try:
|
|
if os.path.exists(error_log) and os.path.getsize(error_log) > 0:
|
|
with open(error_log, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
if 'ERROR' in content or len(content.strip()) > 0:
|
|
has_errors = True
|
|
if os.path.exists(warning_log) and os.path.getsize(warning_log) > 0:
|
|
with open(warning_log, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
if 'WARNING' in content or len(content.strip()) > 0:
|
|
has_warnings = True
|
|
except Exception as e:
|
|
logging.error("Error while reading log files: %s", e)
|
|
|
|
# from env - split safely and strip whitespace
|
|
def _split_env_list(name):
|
|
raw = os.getenv(name, '')
|
|
return [s.strip() for s in raw.split(',') if s.strip()]
|
|
|
|
EMAIL_RECIPIENTS = _split_env_list('EMAIL_RECIPIENTS')
|
|
ERROR_EMAIL_RECIPIENTS = _split_env_list('ERROR_EMAIL_RECIPIENTS') or EMAIL_RECIPIENTS
|
|
SUCCESS_EMAIL_RECIPIENTS = _split_env_list('SUCCESS_EMAIL_RECIPIENTS') or EMAIL_RECIPIENTS
|
|
|
|
# Choose subject and attachment based on severity
|
|
if has_errors:
|
|
subject = "ARKIVO Import of Video/Audio Ran with Errors"
|
|
attachment_to_send = _rename_log_if_nonempty(error_log) or error_log
|
|
body = "Please find the attached error log file. Job started at %s and ended at %s, taking %.2f seconds." % (
|
|
datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
elapsed_time
|
|
)
|
|
email_recipients=ERROR_EMAIL_RECIPIENTS
|
|
elif has_warnings:
|
|
subject = "ARKIVO Import of Video/Audio Completed with Warnings"
|
|
# Attach the warnings log for investigation
|
|
attachment_to_send = _rename_log_if_nonempty(warning_log) or warning_log
|
|
body = "The import completed with warnings. Please find the attached warning log. Job started at %s and ended at %s, taking %.2f seconds." % (
|
|
datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
elapsed_time
|
|
)
|
|
email_recipients=ERROR_EMAIL_RECIPIENTS
|
|
else:
|
|
subject = "ARKIVO Video/Audio Import Completed Successfully"
|
|
# No attachment for clean success
|
|
attachment_to_send = None
|
|
body = "The import of media (video/audio) completed successfully without any errors or warnings. Job started at %s and ended at %s, taking %.2f seconds." % (
|
|
datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S'),
|
|
elapsed_time
|
|
)
|
|
email_recipients=SUCCESS_EMAIL_RECIPIENTS
|
|
|
|
logging.info("Sending summary email: %s (attach: %s)", subject, bool(attachment_to_send))
|
|
|
|
# Send email
|
|
try:
|
|
send_email_with_attachment(
|
|
subject=subject,
|
|
body=body,
|
|
attachment_path=attachment_to_send,
|
|
email_recipients=email_recipients
|
|
)
|
|
except Exception as e:
|
|
logging.error("Failed to send summary email: %s", e)
|
|
|
|
return
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
# Setup logging using standard TimedRotatingFileHandler handlers.
|
|
# No manual doRollover calls — rely on the handler's built-in rotation.
|
|
logger, rotating_handler, error_handler, warning_handler = setup_logging()
|
|
|
|
# Load configuration settings
|
|
aws_config, db_config, ach_config, bucket_name, ach_variables = config.load_config()
|
|
|
|
logging.info("Config loaded, logging setup done")
|
|
|
|
# Run the main process
|
|
main_process(aws_config, db_config, ach_config, bucket_name, ach_variables)
|
|
|
|
logging.info("Main process completed at: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
|
|
|
except Exception as e:
|
|
logging.error(f"An error occurred: {e}") |