From 5dcc1bd31c12c309ef8267f38de648816e2d011e Mon Sep 17 00:00:00 2001 From: Michael Manganiello Date: Wed, 6 Aug 2025 17:33:04 -0300 Subject: [PATCH] feat: Migrate scheduler to native RQ process This change replaces our custom `scheduler.py` script with the `rqscheduler` command, allowing us to run the RQ scheduler as a separate, low-memory process, by avoiding the need to maintain the Python app in memory. * Remove `scheduler.py` script. * Move initialization of scheduled tasks to `worker.py`. * Update `docker/init_scripts/init` to start the `rqscheduler` command instead of the custom script. * Fix scheduled tasks' `func` paths to the new project structure. * Temporarily use a fork of `rq-scheduler` to support username and SSL settings in the `rqscheduler` command. --- backend/scheduler.py | 33 ------------------- backend/tasks/scheduled/scan_library.py | 2 +- .../scheduled/update_launchbox_metadata.py | 2 +- .../tasks/scheduled/update_switch_titledb.py | 2 +- backend/tasks/tests/test_scan_library.py | 7 ++-- .../tests/test_update_launchbox_metadata.py | 2 +- .../tasks/tests/test_update_switch_titledb.py | 5 ++- backend/worker.py | 25 ++++++++++++-- docker/init_scripts/init | 23 ++++++++++--- pyproject.toml | 4 ++- uv.lock | 10 ++---- 11 files changed, 61 insertions(+), 54 deletions(-) delete mode 100644 backend/scheduler.py diff --git a/backend/scheduler.py b/backend/scheduler.py deleted file mode 100644 index 44a357242..000000000 --- a/backend/scheduler.py +++ /dev/null @@ -1,33 +0,0 @@ -import sentry_sdk -from config import ( - ENABLE_SCHEDULED_RESCAN, - ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA, - ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB, - SENTRY_DSN, -) -from logger.logger import log -from tasks.scheduled.scan_library import scan_library_task -from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task -from tasks.scheduled.update_switch_titledb import update_switch_titledb_task -from tasks.tasks import tasks_scheduler -from utils import get_version - -sentry_sdk.init(dsn=SENTRY_DSN, release=f"romm@{get_version()}") - - -if __name__ == "__main__": - # Initialize the tasks - if ENABLE_SCHEDULED_RESCAN: - log.info("Starting scheduled rescan") - scan_library_task.init() - - if ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB: - log.info("Starting scheduled update switch titledb") - update_switch_titledb_task.init() - - if ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA: - log.info("Starting scheduled update launchbox metadata") - update_launchbox_metadata_task.init() - - # Start the scheduler - tasks_scheduler.run() diff --git a/backend/tasks/scheduled/scan_library.py b/backend/tasks/scheduled/scan_library.py index 47e21874d..25c0efae5 100644 --- a/backend/tasks/scheduled/scan_library.py +++ b/backend/tasks/scheduled/scan_library.py @@ -23,7 +23,7 @@ class ScanLibraryTask(PeriodicTask): enabled=ENABLE_SCHEDULED_RESCAN, manual_run=False, cron_string=SCHEDULED_RESCAN_CRON, - func="tasks.scan_library.scan_library_task.run", + func="tasks.scheduled.scan_library.scan_library_task.run", ) async def run(self): diff --git a/backend/tasks/scheduled/update_launchbox_metadata.py b/backend/tasks/scheduled/update_launchbox_metadata.py index 005fd4778..6ca1d0905 100644 --- a/backend/tasks/scheduled/update_launchbox_metadata.py +++ b/backend/tasks/scheduled/update_launchbox_metadata.py @@ -31,7 +31,7 @@ class UpdateLaunchboxMetadataTask(RemoteFilePullTask): enabled=ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA, cron_string=SCHEDULED_UPDATE_LAUNCHBOX_METADATA_CRON, manual_run=True, - func="tasks.update_launchbox_metadata.update_launchbox_metadata_task.run", + func="tasks.scheduled.update_launchbox_metadata.update_launchbox_metadata_task.run", url="https://gamesdb.launchbox-app.com/Metadata.zip", ) diff --git a/backend/tasks/scheduled/update_switch_titledb.py b/backend/tasks/scheduled/update_switch_titledb.py index 3f7c7a519..8c49c4958 100644 --- a/backend/tasks/scheduled/update_switch_titledb.py +++ b/backend/tasks/scheduled/update_switch_titledb.py @@ -23,7 +23,7 @@ class UpdateSwitchTitleDBTask(RemoteFilePullTask): enabled=ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB, cron_string=SCHEDULED_UPDATE_SWITCH_TITLEDB_CRON, manual_run=True, - func="tasks.update_switch_titledb.update_switch_titledb_task.run", + func="tasks.scheduled.update_switch_titledb.update_switch_titledb_task.run", url="https://raw.githubusercontent.com/blawar/titledb/master/US.en.json", ) diff --git a/backend/tasks/tests/test_scan_library.py b/backend/tasks/tests/test_scan_library.py index 375ca337a..6e4eb2dcc 100644 --- a/backend/tasks/tests/test_scan_library.py +++ b/backend/tasks/tests/test_scan_library.py @@ -12,7 +12,7 @@ class TestScanLibraryTask: def test_init(self, task): """Test task initialization""" - assert task.func == "tasks.scan_library.scan_library_task.run" + assert task.func == "tasks.scheduled.scan_library.scan_library_task.run" assert task.description == "Rescans the entire library" @patch("tasks.scheduled.scan_library.ENABLE_SCHEDULED_RESCAN", True) @@ -57,4 +57,7 @@ class TestScanLibraryTask: def test_task_instance(self): """Test that the module-level task instance is created correctly""" assert isinstance(scan_library_task, ScanLibraryTask) - assert scan_library_task.func == "tasks.scan_library.scan_library_task.run" + assert ( + scan_library_task.func + == "tasks.scheduled.scan_library.scan_library_task.run" + ) diff --git a/backend/tasks/tests/test_update_launchbox_metadata.py b/backend/tasks/tests/test_update_launchbox_metadata.py index 03be882a2..a1e9a93e1 100644 --- a/backend/tasks/tests/test_update_launchbox_metadata.py +++ b/backend/tasks/tests/test_update_launchbox_metadata.py @@ -45,7 +45,7 @@ class TestUpdateLaunchboxMetadataTask: """Test task initialization with correct parameters""" assert ( task.func - == "tasks.update_launchbox_metadata.update_launchbox_metadata_task.run" + == "tasks.scheduled.update_launchbox_metadata.update_launchbox_metadata_task.run" ) assert task.description == "Updates the LaunchBox metadata store" assert task.url == "https://gamesdb.launchbox-app.com/Metadata.zip" diff --git a/backend/tasks/tests/test_update_switch_titledb.py b/backend/tasks/tests/test_update_switch_titledb.py index bda96723e..0df115a56 100644 --- a/backend/tasks/tests/test_update_switch_titledb.py +++ b/backend/tasks/tests/test_update_switch_titledb.py @@ -53,7 +53,10 @@ class TestUpdateSwitchTitleDBTask: def test_init(self, task): """Test task initialization""" - assert task.func == "tasks.update_switch_titledb.update_switch_titledb_task.run" + assert ( + task.func + == "tasks.scheduled.update_switch_titledb.update_switch_titledb_task.run" + ) assert task.description == "Updates the Nintendo Switch TitleDB file" assert ( task.url diff --git a/backend/worker.py b/backend/worker.py index 2c58fbd87..428735124 100644 --- a/backend/worker.py +++ b/backend/worker.py @@ -1,8 +1,16 @@ import sentry_sdk -from config import SENTRY_DSN +from config import ( + ENABLE_SCHEDULED_RESCAN, + ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA, + ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB, + SENTRY_DSN, +) from handler.redis_handler import redis_client -from logger.logger import unify_logger +from logger.logger import log, unify_logger from rq import Queue, Worker +from tasks.scheduled.scan_library import scan_library_task +from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task +from tasks.scheduled.update_switch_titledb import update_switch_titledb_task from utils import get_version unify_logger("rq.worker") @@ -15,5 +23,18 @@ sentry_sdk.init( ) if __name__ == "__main__": + # Initialize scheduled tasks + if ENABLE_SCHEDULED_RESCAN: + log.info("Starting scheduled rescan") + scan_library_task.init() + + if ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB: + log.info("Starting scheduled update switch titledb") + update_switch_titledb_task.init() + + if ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA: + log.info("Starting scheduled update launchbox metadata") + update_launchbox_metadata_task.init() + worker = Worker([Queue(name, connection=redis_client) for name in listen]) worker.work() diff --git a/docker/init_scripts/init b/docker/init_scripts/init index 645f53730..78c24bfe3 100755 --- a/docker/init_scripts/init +++ b/docker/init_scripts/init @@ -161,6 +161,21 @@ start_bin_valkey-server() { error_log "Internal valkey did not become ready after $((max_retries * 500))ms" } +# Commands to start RQ scheduler +start_bin_scheduler() { + info_log "Starting RQ scheduler" + + RQ_REDIS_HOST=${REDIS_HOST:-127.0.0.1} \ + RQ_REDIS_PORT=${REDIS_PORT:-6379} \ + RQ_REDIS_USERNAME=${REDIS_USERNAME:-""} \ + RQ_REDIS_PASSWORD=${REDIS_PASSWORD:-""} \ + RQ_REDIS_DB=${REDIS_DB:-0} \ + RQ_REDIS_SSL=${REDIS_SSL:-0} \ + rqscheduler \ + --path /backend \ + --pid /tmp/scheduler.pid & +} + # function that runs our independent python scripts and creates corresponding PID files, start_python() { SCRIPT="${1}" @@ -208,8 +223,8 @@ stop_process_pid() { shutdown() { # shutdown in reverse order - stop_process_pid scheduler stop_process_pid worker + stop_process_pid scheduler stop_process_pid watcher stop_process_pid nginx stop_process_pid gunicorn @@ -247,13 +262,13 @@ fi while ! ((exited)); do watchdog_process_pid bin gunicorn - watchdog_process_pid python worker - # only start the scheduler if enabled if [[ ${ENABLE_SCHEDULED_RESCAN} == "true" || ${ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB} == "true" || ${ENABLE_SCHEDULED_UPDATE_LAUNCHBOX_METADATA} == "true" ]]; then - watchdog_process_pid python scheduler + watchdog_process_pid bin scheduler fi + watchdog_process_pid python worker + # only start the watcher if enabled if [[ ${ENABLE_RESCAN_ON_FILESYSTEM_CHANGE} == "true" ]]; then watchdog_process_pid python watcher diff --git a/pyproject.toml b/pyproject.toml index b5630a66f..9aff21d29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,9 @@ dependencies = [ "python-socketio == 5.11.1", "redis ~= 6.2", "rq ~= 2.1", - "rq-scheduler ~= 0.14", + # TODO: Move back to upstream `rq-scheduler`, when support for username and SSL settings is added. + # Related PR: https://github.com/rq/rq-scheduler/pull/325 + "rq-scheduler @ git+https://github.com/adamantike/rq-scheduler.git@feat/script-options-username-ssl", "sentry-sdk ~= 2.32", "starlette-csrf ~= 3.0", "streaming-form-data ~= 1.19", diff --git a/uv.lock b/uv.lock index e47430c18..8365e35bf 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" resolution-markers = [ "platform_python_implementation != 'PyPy'", @@ -1844,7 +1844,7 @@ requires-dist = [ { name = "pyyaml", specifier = "==6.0.1" }, { name = "redis", specifier = "~=6.2" }, { name = "rq", specifier = "~=2.1" }, - { name = "rq-scheduler", specifier = "~=0.14" }, + { name = "rq-scheduler", git = "https://github.com/adamantike/rq-scheduler.git?rev=feat%2Fscript-options-username-ssl" }, { name = "sentry-sdk", specifier = "~=2.32" }, { name = "sqlalchemy", extras = ["mariadb-connector", "mysql-connector", "postgresql-psycopg"], specifier = "~=2.0" }, { name = "starlette-csrf", specifier = "~=3.0" }, @@ -1879,17 +1879,13 @@ wheels = [ [[package]] name = "rq-scheduler" version = "0.14.0" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/adamantike/rq-scheduler.git?rev=feat%2Fscript-options-username-ssl#42a7a2b8146179cf4578ab1751b80f76c5a0f09e" } dependencies = [ { name = "crontab" }, { name = "freezegun" }, { name = "python-dateutil" }, { name = "rq" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a0/4e/977bbcc1f3b25ed9ea60ec968b13f7147661defe5b2f9272b44fdb1c5549/rq-scheduler-0.14.0.tar.gz", hash = "sha256:2d5a14a1ab217f8693184ebaa1fe03838edcbc70b4f76572721c0b33058cd023", size = 16582, upload-time = "2024-10-29T13:30:32.641Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/d0/28cedca9f3b321f30e69d644c2dcd7097ec21570ec9606fde56750621300/rq_scheduler-0.14.0-py2.py3-none-any.whl", hash = "sha256:d4ec221a3d8c11b3ff55e041f09d9af1e17f3253db737b6b97e86ab20fc3dc0d", size = 13874, upload-time = "2024-10-29T13:30:30.449Z" }, -] [[package]] name = "sentry-sdk"