diff --git a/.gitea/copilot-instructions.md b/.gitea/copilot-instructions.md deleted file mode 100644 index 9877b96..0000000 --- a/.gitea/copilot-instructions.md +++ /dev/null @@ -1,93 +0,0 @@ -# ACH Server Media Import - Agent Instructions - -Guidelines and standards for the ACH Media Import project. - -## Project Overview -This project is a Python-based utility that imports media files from an S3-compatible bucket into a PostgreSQL database, enforcing specific naming conventions and metadata validation. - -## Technical Stack -- **Language**: Python 3.8+ -- **Database**: PostgreSQL (via `psycopg2`) -- **Cloud Storage**: AWS S3/S3-compatible storage (via `boto3`) -- **Containerization**: Docker & Docker Compose -- **Environment**: Managed via `.env` and `config.py` - -## Architecture & Modular Design -The project uses a utility-based modular architecture orchestrated by `main.py`. -- [main.py](main.py): Entry point and workflow orchestrator. -- [s3_utils.py](s3_utils.py): S3 client operations and bucket listing. -- [db_utils.py](db_utils.py): Database connectivity and SQL execution. -- [validation_utils.py](validation_utils.py): Pattern matching and business logic validation. -- [logging_config.py](logging_config.py): Centralized logging configuration. -- [error_handler.py](error_handler.py): Error handling and notifications. -- [email_utils.py](email_utils.py): SMTP integration for alerts. - -## Domain Logic: Inventory Codes -The core validation revolves around "Inventory Codes" which MUST follow a strict 12-character format: -- `^[VA][OC]-[A-Z0-9]{3}-\d{5}$` -- Examples: `VA-C01-12345`, `OC-A99-67890`. -- Files not matching this pattern in S3 are logged but skipped. - -## Development Workflows - -### Environment Setup -- **Windows**: Use `. .venv\Scripts\Activate.ps1` -- **Linux/macOS**: Use `source .venv/bin/activate` -- **Dependency installation**: `pip install -r requirements.txt` - -### Local Execution -- **Run script**: `python main.py` -- **Verify Configuration**: Ensure `.env` is populated with `DB_`, `AWS_`, and `SMTP_` variables. - -### Docker Operations -- **Build/Up**: `docker compose up -d --build` -- **Logs**: `docker compose logs -f app` -- **Stop**: `docker compose stop` - -## Coding Standards & Conventions - -### Logging -- Use the custom logger from `logging_config.py`. -- **Log Levels**: Use `logging.INFO`, `logging.WARNING`, and the custom `CUSTOM_ERROR_LEVEL` (35) via `error_handler.py`. -- Logs are rotated and stored in the `logs/` directory. - -### Error Handling -- Wrap critical operations that should trigger notifications in try-except blocks that call `error_handler.notify_error()`. -- Avoid silent failures; ensure errors are logged to the appropriate file sync. - -### Configuration -- Access settings exclusively via the `config.py` module's dictionaries: `db_config`, `aws_config`, `ach_config`. -- Never hardcode credentials or endpoints. - -## Copilot / Agent Behavior - -This repository is used with an AI assistant. When interacting with the assistant, follow these principles: - -- **Do not modify code unless explicitly requested.** The assistant should not change files unless given a clear instruction to do so. -- **Ask before acting.** If a change is needed, the assistant should describe the required modification and confirm before applying it. -- **Prefer explanation over edits.** When debugging or answering questions, provide guidance and analysis rather than directly editing source files. -- **Keep changes minimal.** If a code change is approved, apply the smallest possible edit that resolves the issue. - -## Code Style & Maintainability - -When generating or modifying code, prioritize **maintainability and clarity over optimization**. - -This is **development-stage code**, so it must remain easy to read, understand, and modify by humans. - -Guidelines: - -- Prefer **clear, explicit implementations** rather than clever or overly compact solutions. -- Avoid **micro-optimizations** or complex patterns that reduce readability. -- Do **not introduce obscure algorithms or creative tricks** that make the code difficult to understand. -- Write code that a developer unfamiliar with the project can quickly follow. -- Use **meaningful variable and function names**. -- Add **thoughtful comments** explaining non-obvious logic, assumptions, and decisions. -- Favor **simple and conventional approaches** instead of experimental or highly abstract ones. -- Maintain a **consistent structure and formatting**. - -The goal is **clean, maintainable, well-documented code**, not maximum performance or cleverness. - -## Related Files -- [query-sql.md](query-sql.md): Reference for database schema and SQL logic. -- [requirements.txt](requirements.txt): Project dependencies. -- [docker-compose.yml](docker-compose.yml): Deployment configuration. diff --git a/.gitea/workflows/docker-build.yml b/.gitea/workflows/docker-build.yml index d09f4ad..99440e6 100644 --- a/.gitea/workflows/docker-build.yml +++ b/.gitea/workflows/docker-build.yml @@ -24,4 +24,4 @@ jobs: push: false load: true tags: | - ach-server-import-media02:latest + ach-server-import-media:latest diff --git a/docker-compose.harbor.yml b/docker-compose.harbor.yml new file mode 100644 index 0000000..6090900 --- /dev/null +++ b/docker-compose.harbor.yml @@ -0,0 +1,27 @@ +services: + app: + image: reg.neurareel.com/ach-server-import-media:latest + container_name: ACH_server_media_importer02 + volumes: + - logs:/app/logs + env_file: + - .env + environment: + - AWS_ACCESS_KEY_I=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - AWS_REGION=${AWS_REGION} + - AWS_ENDPOINT_URL=${AWS_ENDPOINT_URL} + - BUCKET_NAME=${BUCKET_NAME} + - DB_HOST=${DB_HOST} + - DB_NAME=${DB_NAME} + - DB_USER=${DB_USER} + - SMTP_SERVER=${SMTP_SERVER} + - SMTP_PORT=${SMTP_PORT} + - SMTP_USER=${SMTP_USER} + - SMTP_PASSWORD=${SMTP_PASSWORD} + - SENDER_EMAIL=${SENDER_EMAIL} + - EMAIL_RECIPIENTS=${EMAIL_RECIPIENTS} + restart: unless-stopped + +volumes: + logs: diff --git a/email_utils.py b/email_utils.py index 59a9cb5..d831657 100644 --- a/email_utils.py +++ b/email_utils.py @@ -92,7 +92,7 @@ def send_error_email(subject, body, recipients): server.login(smtp_user, smtp_password) server.sendmail(sender_email, recipients, msg.as_string()) - logging.error("Error email sent successfully") + logging.info("Error notification email sent successfully") except Exception as e: logging.error(f"Failed to send error email: {e}") diff --git a/main.py b/main.py index 3530f91..7d19895 100644 --- a/main.py +++ b/main.py @@ -102,7 +102,8 @@ def main_process(aws_config, db_config, ach_config, bucket_name, ach_variables): valid_extensions = {'.mp3', '.mp4', '.md5', '.json', '.pdf'} # dont like this # excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'FILE/'} # excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/', 'FILE/', 'DVD/', 'UMT/'} - excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/', 'FILE/' ,'MCC/'} + # excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/', 'FILE/' ,'MCC/'} + excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/' } # excluded_folders = {'DOCUMENTAZIONE_FOTOGRAFICA/', 'TEST-FOLDER-DEV/', 'TST/',} # included_folders = {'FILE/'} # uncomment this to NOT use excluded folders # included_folders = {'TEST-FOLDER-DEV/'} # uncomment this to NOT use excluded folders diff --git a/s3_utils.py b/s3_utils.py index acf128c..b94a2b1 100644 --- a/s3_utils.py +++ b/s3_utils.py @@ -4,6 +4,7 @@ import logging # for logging import json # for json.loads import os # for os.path import psycopg2 # for PostgreSQL +from psycopg2 import sql # Import custom modules from file_utils import retrieve_file_contents, check_related_files, extract_and_validate_file_info # for file operations @@ -47,9 +48,21 @@ def parse_s3_files( s3, s3_files, ach_variables, excluded_folders=[], s3_listing raise ValueError("Database configuration is not loaded") # return # Exit the function if db_config is None + conn = None + cur = None + conn = psycopg2.connect(**db_config) cur = conn.cursor() + # Disable the trigger during bulk import to avoid repeated expensive materialized view maintenance. + # We'll re-enable it in the finally block below. + try: + cur.execute("ALTER TABLE file DISABLE TRIGGER trg_refresh_file_view;") + conn.commit() + logging.info("Disabled trigger trg_refresh_file_view for bulk import.") + except Exception as e: + logging.warning("Could not disable trigger trg_refresh_file_view: %s", e) + # Filter files with the desired prefix # excluded_prefix = ['TEST-FOLDER-DEV/', 'DOCUMENTAZIONE_FOTOGRAFICA/', 'BTC/', 'VHS/', 'UMT/', 'OV2/', 'OA4/'] excluded_prefix = excluded_folders @@ -305,8 +318,6 @@ def parse_s3_files( s3, s3_files, ach_variables, excluded_folders=[], s3_listing except Exception: # Ignore release errors; rollback already cleaned up state if needed pass - cur.close() - conn.close() except ValueError as e: # Handle specific validation errors logging.error(f"Validation error: {e}") @@ -317,7 +328,53 @@ def parse_s3_files( s3, s3_files, ach_variables, excluded_folders=[], s3_listing import traceback notify_error("FATAL ERROR in Phase 3 process", e) raise e # Raise the exception to the calling function - + finally: + # Ensure trigger is re-enabled even if the loop fails or the transaction is aborted. + try: + if conn and getattr(conn, 'closed', 1) == 0: + # Clear any aborted transaction state so we can run the enable statement. + conn.rollback() + except Exception: + pass + + try: + if conn and getattr(conn, 'closed', 1) == 0: + if cur is None or getattr(cur, 'closed', True): + cur = conn.cursor() + cur.execute("ALTER TABLE file ENABLE TRIGGER trg_refresh_file_view;") + conn.commit() + logging.info("Re-enabled trigger trg_refresh_file_view.") + except Exception as e: + logging.critical("Failed to re-enable trigger trg_refresh_file_view: %s", e) + + try: + if conn and getattr(conn, 'closed', 1) == 0: + # Refresh a materialized view. Default to mv_last_file if not configured. + view_name = os.getenv('REFRESH_MATERIALIZED_VIEW_NAME') + if not view_name or not view_name.strip(): + view_name = 'mv_last_file' + + if cur is None or getattr(cur, 'closed', False): + cur = conn.cursor() + cur.execute(sql.SQL("REFRESH MATERIALIZED VIEW {view};").format( + view=sql.Identifier(view_name) + )) + conn.commit() + logging.info("Refreshed materialized view: %s", view_name) + except Exception as e: + logging.critical("Failed to refresh materialized view: %s", e) + + try: + if cur: + cur.close() + except Exception: + pass + try: + if conn: + conn.close() + except Exception: + pass + # return the file saved return uploaded_files_count, warning_files_count, error_files_count