""" Podcast downloader service for Podcastrr. """ import os import requests import logging from datetime import datetime, timedelta from flask import current_app from app.models.database import db from app.models.settings import Settings # Set up logging logger = logging.getLogger(__name__) def download_episode(episode_id, progress_callback=None): """ Download a podcast episode. Args: episode_id: ID of the Episode to download. progress_callback (callable, optional): Callback function for progress updates. Returns: str: Path to the downloaded file. """ from app.models.podcast import Episode, Podcast if progress_callback: progress_callback(2, "Loading episode data") # Load the episode with its podcast relationship episode = Episode.query.get(episode_id) if not episode: raise ValueError(f"Episode with ID {episode_id} not found") # Explicitly load the podcast to avoid lazy loading issues podcast = Podcast.query.get(episode.podcast_id) if not podcast: raise ValueError(f"Podcast with ID {episode.podcast_id} not found") if not episode.audio_url: raise ValueError("Episode has no audio URL") if progress_callback: progress_callback(5, "Getting settings") # Get settings settings = Settings.query.first() if not settings: settings = Settings( download_path=current_app.config['DOWNLOAD_PATH'], naming_format="{podcast_title}/{episode_title}" ) db.session.add(settings) db.session.commit() # Create download directory download_path = settings.download_path os.makedirs(download_path, exist_ok=True) if progress_callback: progress_callback(10, "Formatting filename") # Use podcast's naming format if available, otherwise use global settings naming_format = podcast.naming_format or settings.naming_format # Format filename using the naming format filename = format_filename(naming_format, podcast, episode) # Ensure the directory exists file_dir = os.path.dirname(os.path.join(download_path, filename)) os.makedirs(file_dir, exist_ok=True) # Add file extension based on content type file_path = os.path.normpath(os.path.join(download_path, filename)) # Download the file try: if progress_callback: progress_callback(15, "Connecting to server") response = requests.get(episode.audio_url, stream=True, timeout=30) response.raise_for_status() # Get content type and set appropriate extension content_type = response.headers.get('Content-Type', '') if 'mp3' in content_type: file_path += '.mp3' elif 'mpeg' in content_type: file_path += '.mp3' elif 'mp4' in content_type or 'm4a' in content_type: file_path += '.m4a' elif 'ogg' in content_type: file_path += '.ogg' elif 'wav' in content_type: file_path += '.wav' else: file_path += '.mp3' # Default to mp3 # Get file size if available file_size = int(response.headers.get('Content-Length', 0)) episode.file_size = file_size if progress_callback: progress_callback(20, "Starting download") # Write the file downloaded_bytes = 0 with open(file_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) downloaded_bytes += len(chunk) # Update progress if file size is known if file_size > 0 and progress_callback: progress = 20 + int((downloaded_bytes / file_size) * 70) # Scale to 20-90% progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB") if progress_callback: progress_callback(95, "Updating database") # Update episode in database episode.downloaded = True episode.file_path = file_path db.session.commit() if progress_callback: progress_callback(100, "Download complete") logger.info(f"Downloaded episode: {episode.title}") return file_path except Exception as e: logger.error(f"Error downloading episode: {str(e)}") raise def format_filename(format_string, podcast, episode): """ Format a filename using the provided format string and podcast/episode data. Args: format_string (str): Format string with placeholders. podcast: Podcast model instance. episode: Episode model instance. Returns: str: Formatted filename. """ # Calculate absolute number if needed absolute_number = '' if '{absolute_number}' in format_string: from app.models.podcast import Episode # Get all episodes for this podcast ordered by published date episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all() # Find the position of the current episode in the ordered list for i, ep in enumerate(episodes, 1): if ep.id == episode.id: absolute_number = str(i) break # Create a dictionary with all available variables format_vars = { 'podcast_title': sanitize_filename(podcast.title), 'episode_title': sanitize_filename(episode.title), 'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '', 'season': sanitize_filename(str(episode.season)) if episode.season else '', # Format season_episode as S01E01, ensuring season is always included 'season_episode': ( # If we have season and episode_number is a digit, format as S01E01 f"S{episode.season:02d}E{int(episode.episode_number):02d}" if episode.season and episode.episode_number and episode.episode_number.isdigit() # If episode_number exists but is not a digit, format as S01E{episode_number} else f"S{episode.season or 1:02d}E{episode.episode_number}" if episode.episode_number # Otherwise, return empty string else '' ), 'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '', 'author': sanitize_filename(podcast.author) if podcast.author else '', 'explicit': 'explicit' if episode.explicit else '', 'absolute_number': sanitize_filename(absolute_number) } # Format the string try: formatted_path = format_string.format(**format_vars) except KeyError as e: logger.warning(f"Invalid format variable: {str(e)}") # Fall back to a simple format formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}" # Replace forward slashes with OS-specific path separator formatted_path = formatted_path.replace('/', os.path.sep) # Handle empty path segments by removing them path_parts = formatted_path.split(os.path.sep) path_parts = [part for part in path_parts if part.strip()] # Rejoin the path with proper separators return os.path.sep.join(path_parts) def sanitize_filename(filename): """ Sanitize a string to be used as a filename. Args: filename (str): Original filename. Returns: str: Sanitized filename. """ if not filename: return "" # Replace invalid characters invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] for char in invalid_chars: filename = filename.replace(char, '_') # Remove leading and trailing whitespace and periods filename = filename.strip().strip('.') # Replace multiple spaces with a single space filename = ' '.join(filename.split()) # Limit length if len(filename) > 100: filename = filename[:97] + '...' # If filename is empty after sanitization, provide a default if not filename: filename = "unnamed" return filename def delete_old_episodes(days=30): """ Delete episodes older than the specified number of days. Args: days (int): Number of days to keep episodes. Returns: int: Number of episodes deleted. """ from app.models.podcast import Episode settings = Settings.query.first() if settings: days = settings.delete_after_days # Calculate the cutoff date cutoff_date = datetime.utcnow() - timedelta(days=days) # Find episodes to delete episodes = Episode.query.filter( Episode.downloaded == True, Episode.published_date < cutoff_date ).all() count = 0 for episode in episodes: if episode.file_path and os.path.exists(episode.file_path): try: os.remove(episode.file_path) episode.file_path = None episode.downloaded = False count += 1 except Exception as e: logger.error(f"Error deleting episode file: {str(e)}") db.session.commit() logger.info(f"Deleted {count} old episodes") return count def verify_downloaded_episodes(podcast_id=None, progress_callback=None): """ Verify that downloaded episodes still exist on disk and update their status. Args: podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts. progress_callback (callable, optional): Callback function for progress updates. Returns: dict: Statistics about the verification process. """ from app.models.podcast import Episode, Podcast # Get episodes to check query = Episode.query.filter(Episode.downloaded == True) if podcast_id: query = query.filter(Episode.podcast_id == podcast_id) episodes = query.all() total = len(episodes) if progress_callback: progress_callback(0, f"Verifying {total} downloaded episodes") missing = 0 for i, episode in enumerate(episodes): if progress_callback and total > 0: progress = int((i / total) * 100) progress_callback(progress, f"Verifying episode {i+1}/{total}") if not episode.file_path or not os.path.exists(episode.file_path): episode.downloaded = False if episode.file_path: logger.warning(f"Episode file not found: {episode.file_path}") missing += 1 db.session.commit() if progress_callback: progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.") logger.info(f"Verified {total} episodes. {missing} were missing.") return { 'total_checked': total, 'missing': missing } def rename_episode(episode_id, new_format=None, progress_callback=None): """ Rename a downloaded episode file using a new format. Args: episode_id: ID of the Episode to rename. new_format (str, optional): New format string. If None, use the podcast's format or the global settings format. progress_callback (callable, optional): Callback function for progress updates. Returns: str: New file path. """ from app.models.podcast import Episode, Podcast if progress_callback: progress_callback(5, "Loading episode data") # Load the episode with its podcast relationship episode = Episode.query.get(episode_id) if not episode: raise ValueError(f"Episode with ID {episode_id} not found") if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path): raise ValueError("Episode is not downloaded or file does not exist") if progress_callback: progress_callback(10, "Getting podcast and format settings") # Explicitly load the podcast to avoid lazy loading issues podcast = Podcast.query.get(episode.podcast_id) if not podcast: raise ValueError(f"Podcast with ID {episode.podcast_id} not found") settings = Settings.query.first() if not settings: settings = Settings( download_path=current_app.config['DOWNLOAD_PATH'], naming_format="{podcast_title}/{episode_title}" ) db.session.add(settings) db.session.commit() # Use provided format, podcast's format, or global settings format format_string = new_format or podcast.naming_format or settings.naming_format if progress_callback: progress_callback(20, "Formatting new filename") # Format new filename new_filename = format_filename(format_string, podcast, episode) # Get file extension from current file _, ext = os.path.splitext(episode.file_path) # Create full path for new file download_path = settings.download_path new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext)) # Ensure the directory exists new_file_dir = os.path.dirname(new_file_path) os.makedirs(new_file_dir, exist_ok=True) if progress_callback: progress_callback(50, f"Renaming file to {new_file_path}") # Rename the file try: # Check if the new path is different if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path): os.rename(episode.file_path, new_file_path) episode.file_path = new_file_path db.session.commit() if progress_callback: progress_callback(100, "File renamed successfully") logger.info(f"Renamed episode file: {episode.title} to {new_file_path}") return new_file_path else: if progress_callback: progress_callback(100, "File already has the correct name") logger.info(f"Episode file already has the correct name: {new_file_path}") return episode.file_path except Exception as e: if progress_callback: progress_callback(100, f"Error renaming file: {str(e)}") logger.error(f"Error renaming episode file: {str(e)}") raise