This commit is contained in:
Cody Cook 2025-06-15 21:20:30 -07:00
commit 095bf52a2f
29 changed files with 2494 additions and 758 deletions

View file

@ -12,19 +12,38 @@ from app.models.settings import Settings
# Set up logging
logger = logging.getLogger(__name__)
def download_episode(episode):
def download_episode(episode_id, progress_callback=None):
"""
Download a podcast episode.
Args:
episode: Episode model instance.
episode_id: ID of the Episode to download.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
str: Path to the downloaded file.
"""
from app.models.podcast import Episode, Podcast
if progress_callback:
progress_callback(2, "Loading episode data")
# Load the episode with its podcast relationship
episode = Episode.query.get(episode_id)
if not episode:
raise ValueError(f"Episode with ID {episode_id} not found")
# Explicitly load the podcast to avoid lazy loading issues
podcast = Podcast.query.get(episode.podcast_id)
if not podcast:
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
if not episode.audio_url:
raise ValueError("Episode has no audio URL")
if progress_callback:
progress_callback(5, "Getting settings")
# Get settings
settings = Settings.query.first()
if not settings:
@ -39,20 +58,28 @@ def download_episode(episode):
download_path = settings.download_path
os.makedirs(download_path, exist_ok=True)
if progress_callback:
progress_callback(10, "Formatting filename")
# Use podcast's naming format if available, otherwise use global settings
naming_format = podcast.naming_format or settings.naming_format
# Format filename using the naming format
podcast = episode.podcast
filename = format_filename(settings.naming_format, podcast, episode)
filename = format_filename(naming_format, podcast, episode)
# Ensure the directory exists
file_dir = os.path.dirname(os.path.join(download_path, filename))
os.makedirs(file_dir, exist_ok=True)
# Add file extension based on content type
file_path = os.path.join(download_path, filename)
file_path = os.path.normpath(os.path.join(download_path, filename))
# Download the file
try:
response = requests.get(episode.audio_url, stream=True)
if progress_callback:
progress_callback(15, "Connecting to server")
response = requests.get(episode.audio_url, stream=True, timeout=30)
response.raise_for_status()
# Get content type and set appropriate extension
@ -70,17 +97,37 @@ def download_episode(episode):
else:
file_path += '.mp3' # Default to mp3
# Get file size if available
file_size = int(response.headers.get('Content-Length', 0))
episode.file_size = file_size
if progress_callback:
progress_callback(20, "Starting download")
# Write the file
downloaded_bytes = 0
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded_bytes += len(chunk)
# Update progress if file size is known
if file_size > 0 and progress_callback:
progress = 20 + int((downloaded_bytes / file_size) * 70) # Scale to 20-90%
progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB")
if progress_callback:
progress_callback(95, "Updating database")
# Update episode in database
episode.downloaded = True
episode.file_path = file_path
db.session.commit()
if progress_callback:
progress_callback(100, "Download complete")
logger.info(f"Downloaded episode: {episode.title}")
return file_path
@ -100,22 +147,58 @@ def format_filename(format_string, podcast, episode):
Returns:
str: Formatted filename.
"""
# Calculate absolute number if needed
absolute_number = ''
if '{absolute_number}' in format_string:
from app.models.podcast import Episode
# Get all episodes for this podcast ordered by published date
episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all()
# Find the position of the current episode in the ordered list
for i, ep in enumerate(episodes, 1):
if ep.id == episode.id:
absolute_number = str(i)
break
# Create a dictionary with all available variables
format_vars = {
'podcast_title': sanitize_filename(podcast.title),
'episode_title': sanitize_filename(episode.title),
'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '',
'season': sanitize_filename(str(episode.season)) if episode.season else '',
# Format season_episode as S01E01, ensuring season is always included
'season_episode': (
# If we have season and episode_number is a digit, format as S01E01
f"S{episode.season:02d}E{int(episode.episode_number):02d}"
if episode.season and episode.episode_number and episode.episode_number.isdigit()
# If episode_number exists but is not a digit, format as S01E{episode_number}
else f"S{episode.season or 1:02d}E{episode.episode_number}"
if episode.episode_number
# Otherwise, return empty string
else ''
),
'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '',
'author': sanitize_filename(podcast.author) if podcast.author else ''
'author': sanitize_filename(podcast.author) if podcast.author else '',
'explicit': 'explicit' if episode.explicit else '',
'absolute_number': sanitize_filename(absolute_number)
}
# Format the string
try:
return format_string.format(**format_vars)
formatted_path = format_string.format(**format_vars)
except KeyError as e:
logger.warning(f"Invalid format variable: {str(e)}")
# Fall back to a simple format
return f"{format_vars['podcast_title']}/{format_vars['episode_title']}"
formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}"
# Replace forward slashes with OS-specific path separator
formatted_path = formatted_path.replace('/', os.path.sep)
# Handle empty path segments by removing them
path_parts = formatted_path.split(os.path.sep)
path_parts = [part for part in path_parts if part.strip()]
# Rejoin the path with proper separators
return os.path.sep.join(path_parts)
def sanitize_filename(filename):
"""
@ -127,15 +210,28 @@ def sanitize_filename(filename):
Returns:
str: Sanitized filename.
"""
if not filename:
return ""
# Replace invalid characters
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
for char in invalid_chars:
filename = filename.replace(char, '_')
# Remove leading and trailing whitespace and periods
filename = filename.strip().strip('.')
# Replace multiple spaces with a single space
filename = ' '.join(filename.split())
# Limit length
if len(filename) > 100:
filename = filename[:97] + '...'
# If filename is empty after sanitization, provide a default
if not filename:
filename = "unnamed"
return filename
def delete_old_episodes(days=30):
@ -177,3 +273,141 @@ def delete_old_episodes(days=30):
db.session.commit()
logger.info(f"Deleted {count} old episodes")
return count
def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
"""
Verify that downloaded episodes still exist on disk and update their status.
Args:
podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
dict: Statistics about the verification process.
"""
from app.models.podcast import Episode, Podcast
# Get episodes to check
query = Episode.query.filter(Episode.downloaded == True)
if podcast_id:
query = query.filter(Episode.podcast_id == podcast_id)
episodes = query.all()
total = len(episodes)
if progress_callback:
progress_callback(0, f"Verifying {total} downloaded episodes")
missing = 0
for i, episode in enumerate(episodes):
if progress_callback and total > 0:
progress = int((i / total) * 100)
progress_callback(progress, f"Verifying episode {i+1}/{total}")
if not episode.file_path or not os.path.exists(episode.file_path):
episode.downloaded = False
if episode.file_path:
logger.warning(f"Episode file not found: {episode.file_path}")
missing += 1
db.session.commit()
if progress_callback:
progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.")
logger.info(f"Verified {total} episodes. {missing} were missing.")
return {
'total_checked': total,
'missing': missing
}
def rename_episode(episode_id, new_format=None, progress_callback=None):
"""
Rename a downloaded episode file using a new format.
Args:
episode_id: ID of the Episode to rename.
new_format (str, optional): New format string. If None, use the podcast's format or the global settings format.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
str: New file path.
"""
from app.models.podcast import Episode, Podcast
if progress_callback:
progress_callback(5, "Loading episode data")
# Load the episode with its podcast relationship
episode = Episode.query.get(episode_id)
if not episode:
raise ValueError(f"Episode with ID {episode_id} not found")
if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path):
raise ValueError("Episode is not downloaded or file does not exist")
if progress_callback:
progress_callback(10, "Getting podcast and format settings")
# Explicitly load the podcast to avoid lazy loading issues
podcast = Podcast.query.get(episode.podcast_id)
if not podcast:
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
settings = Settings.query.first()
if not settings:
settings = Settings(
download_path=current_app.config['DOWNLOAD_PATH'],
naming_format="{podcast_title}/{episode_title}"
)
db.session.add(settings)
db.session.commit()
# Use provided format, podcast's format, or global settings format
format_string = new_format or podcast.naming_format or settings.naming_format
if progress_callback:
progress_callback(20, "Formatting new filename")
# Format new filename
new_filename = format_filename(format_string, podcast, episode)
# Get file extension from current file
_, ext = os.path.splitext(episode.file_path)
# Create full path for new file
download_path = settings.download_path
new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext))
# Ensure the directory exists
new_file_dir = os.path.dirname(new_file_path)
os.makedirs(new_file_dir, exist_ok=True)
if progress_callback:
progress_callback(50, f"Renaming file to {new_file_path}")
# Rename the file
try:
# Check if the new path is different
if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path):
os.rename(episode.file_path, new_file_path)
episode.file_path = new_file_path
db.session.commit()
if progress_callback:
progress_callback(100, "File renamed successfully")
logger.info(f"Renamed episode file: {episode.title} to {new_file_path}")
return new_file_path
else:
if progress_callback:
progress_callback(100, "File already has the correct name")
logger.info(f"Episode file already has the correct name: {new_file_path}")
return episode.file_path
except Exception as e:
if progress_callback:
progress_callback(100, f"Error renaming file: {str(e)}")
logger.error(f"Error renaming episode file: {str(e)}")
raise

View file

@ -142,9 +142,16 @@ def get_podcast_episodes(feed_url):
'published_date': _parse_date(entry.get('published')),
'guid': entry.get('id', ''),
'duration': _parse_duration(entry.get('itunes_duration', '')),
'episode_number': entry.get('itunes_episode', '')
'season': entry.get('itunes_season'), # Season number
'episode_number': entry.get('itunes_episode', ''), # Episode number within season
'explicit': False # Default to False
}
# Handle explicit flag safely
itunes_explicit = entry.get('itunes_explicit', '')
if isinstance(itunes_explicit, str) and itunes_explicit:
episode['explicit'] = itunes_explicit.lower() == 'yes'
# Generate a GUID if one is not provided
if not episode['guid']:
# Try to use a link as GUID

View file

@ -41,12 +41,13 @@ def update_all_podcasts():
return stats
def update_podcast(podcast_id):
def update_podcast(podcast_id, progress_callback=None):
"""
Update a specific podcast.
Args:
podcast_id (int): ID of the podcast to update.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
dict: Statistics about the update process.
@ -63,12 +64,18 @@ def update_podcast(podcast_id):
logger.info(f"Updating podcast: {podcast.title} (ID: {podcast.id})")
logger.info(f"Feed URL: {podcast.feed_url}")
if progress_callback:
progress_callback(10, f"Fetching episodes for {podcast.title}")
# Get episodes from feed
episodes = get_podcast_episodes(podcast.feed_url)
# Update podcast last_checked timestamp
podcast.last_checked = datetime.utcnow()
if progress_callback:
progress_callback(30, f"Found {len(episodes)} episodes")
if not episodes:
logger.warning(f"No episodes found for podcast: {podcast.title}")
stats['feed_status'] = 'no_episodes'
@ -92,7 +99,11 @@ def update_podcast(podcast_id):
logger.error(f"Error refreshing feed URL: {str(e)}")
# Process each episode
for episode_data in episodes:
total_episodes = len(episodes)
for i, episode_data in enumerate(episodes):
if progress_callback and total_episodes > 0:
progress = 30 + int((i / total_episodes) * 60) # Scale from 30% to 90%
progress_callback(progress, f"Processing episode {i+1}/{total_episodes}")
# Skip episodes without required fields
if not episode_data.get('guid'):
logger.warning(f"Skipping episode without GUID: {episode_data.get('title', 'Unknown')}")
@ -129,7 +140,9 @@ def update_podcast(podcast_id):
# Auto-download if enabled
if podcast.auto_download and episode.audio_url:
try:
download_episode(episode)
# Need to commit first to ensure episode has an ID
db.session.commit()
download_episode(episode.id)
stats['episodes_downloaded'] += 1
logger.info(f"Auto-downloaded episode: {episode.title}")
except Exception as e:
@ -144,6 +157,9 @@ def update_podcast(podcast_id):
db.session.commit()
logger.info(f"Podcast update completed: {stats}")
if progress_callback:
progress_callback(100, f"Update complete. Found {stats['new_episodes']} new episodes.")
return stats
except Exception as e:
@ -151,6 +167,10 @@ def update_podcast(podcast_id):
logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
stats['feed_status'] = 'error'
stats['error'] = str(e)
if progress_callback:
progress_callback(100, f"Error: {str(e)}")
raise
def schedule_updates():

View file

@ -0,0 +1,200 @@
"""
Task manager service for Podcastrr.
Handles background tasks and provides status updates.
"""
import threading
import time
import uuid
import logging
from datetime import datetime
from enum import Enum
from flask import current_app
# Set up logging
logger = logging.getLogger(__name__)
class TaskStatus(Enum):
"""Task status enum"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
class Task:
"""
Represents a background task with status tracking.
"""
def __init__(self, task_type, description, target_func, target_args=None, target_kwargs=None):
self.id = str(uuid.uuid4())
self.type = task_type
self.description = description
self.status = TaskStatus.PENDING
self.progress = 0
self.message = "Task created"
self.result = None
self.error = None
self.created_at = datetime.utcnow()
self.started_at = None
self.completed_at = None
self.target_func = target_func
self.target_args = target_args or []
self.target_kwargs = target_kwargs or {}
def to_dict(self):
"""Convert task to dictionary for API responses."""
return {
'id': self.id,
'type': self.type,
'description': self.description,
'status': self.status.value,
'progress': self.progress,
'message': self.message,
'created_at': self.created_at.isoformat() if self.created_at else None,
'started_at': self.started_at.isoformat() if self.started_at else None,
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
'error': self.error
}
class TaskManager:
"""
Manages background tasks and provides status updates.
"""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(TaskManager, cls).__new__(cls)
cls._instance.tasks = {}
cls._instance.lock = threading.Lock()
return cls._instance
def create_task(self, task_type, description, target_func, *args, **kwargs):
"""
Create a new task and add it to the task manager.
Args:
task_type (str): Type of task (e.g., 'download', 'update')
description (str): Human-readable description of the task
target_func (callable): Function to execute in the background
*args: Arguments to pass to the target function
**kwargs: Keyword arguments to pass to the target function
Returns:
str: Task ID
"""
task = Task(task_type, description, target_func, args, kwargs)
with self.lock:
self.tasks[task.id] = task
# Get the current Flask app
app = current_app._get_current_object()
# Start the task in a background thread
thread = threading.Thread(target=self._run_task, args=(task.id, app))
thread.daemon = True
thread.start()
logger.info(f"Created task {task.id} of type {task_type}: {description}")
return task.id
def _run_task(self, task_id, app):
"""
Run a task in the background.
Args:
task_id (str): ID of the task to run
app: Flask application instance
"""
task = self.get_task(task_id)
if not task:
logger.error(f"Task {task_id} not found")
return
# Update task status
task.status = TaskStatus.RUNNING
task.started_at = datetime.utcnow()
task.message = "Task started"
try:
# Create a wrapper for the target function to update progress
def progress_callback(progress, message=None):
task.progress = progress
if message:
task.message = message
# Add progress_callback to kwargs
task.target_kwargs['progress_callback'] = progress_callback
# Run the target function within Flask application context
with app.app_context():
result = task.target_func(*task.target_args, **task.target_kwargs)
# Update task status
task.status = TaskStatus.COMPLETED
task.completed_at = datetime.utcnow()
task.progress = 100
task.result = result
task.message = "Task completed successfully"
logger.info(f"Task {task_id} completed successfully")
except Exception as e:
# Update task status on error
task.status = TaskStatus.FAILED
task.completed_at = datetime.utcnow()
task.error = str(e)
task.message = f"Task failed: {str(e)}"
logger.error(f"Task {task_id} failed: {str(e)}", exc_info=True)
# Clean up old tasks
self.clean_old_tasks()
def get_task(self, task_id):
"""
Get a task by ID.
Args:
task_id (str): ID of the task to get
Returns:
Task: Task object or None if not found
"""
with self.lock:
return self.tasks.get(task_id)
def get_all_tasks(self):
"""
Get all tasks.
Returns:
list: List of all tasks
"""
with self.lock:
return list(self.tasks.values())
def clean_old_tasks(self, max_age_seconds=60):
"""
Remove old completed or failed tasks.
Args:
max_age_seconds (int): Maximum age of tasks to keep in seconds
Returns:
int: Number of tasks removed
"""
now = datetime.utcnow()
to_remove = []
with self.lock:
for task_id, task in self.tasks.items():
if task.status in (TaskStatus.COMPLETED, TaskStatus.FAILED):
if task.completed_at and (now - task.completed_at).total_seconds() > max_age_seconds:
to_remove.append(task_id)
for task_id in to_remove:
del self.tasks[task_id]
return len(to_remove)
# Create a global task manager instance
task_manager = TaskManager()