""" Podcast downloader service for Podcastrr. """ import os import requests import logging from datetime import datetime, timedelta from flask import current_app from app.models.database import db from app.models.settings import Settings # Set up logging logger = logging.getLogger(__name__) def download_episode(episode_id, progress_callback=None): """ Download a podcast episode. Args: episode_id: ID of the Episode to download. progress_callback (callable, optional): Callback function for progress updates. Returns: str: Path to the downloaded file. """ from app.models.podcast import Episode, Podcast if progress_callback: progress_callback(2, "Loading episode data") # Load the episode with its podcast relationship episode = Episode.query.get(episode_id) if not episode: raise ValueError(f"Episode with ID {episode_id} not found") # Explicitly load the podcast to avoid lazy loading issues podcast = Podcast.query.get(episode.podcast_id) if not podcast: raise ValueError(f"Podcast with ID {episode.podcast_id} not found") if not episode.audio_url: raise ValueError("Episode has no audio URL") if progress_callback: progress_callback(5, "Getting settings") # Get settings settings = Settings.query.first() if not settings: settings = Settings( download_path=current_app.config['DOWNLOAD_PATH'], naming_format="{podcast_title}/{episode_title}" ) db.session.add(settings) db.session.commit() # Create download directory download_path = settings.download_path os.makedirs(download_path, exist_ok=True) if progress_callback: progress_callback(10, "Formatting filename") # Use podcast's naming format if available, otherwise use global settings naming_format = podcast.naming_format or settings.naming_format # Format filename using the naming format filename = format_filename(naming_format, podcast, episode) # Ensure the directory exists file_dir = os.path.dirname(os.path.join(download_path, filename)) os.makedirs(file_dir, exist_ok=True) # Add file extension based on content type file_path = os.path.normpath(os.path.join(download_path, filename)) # Download the file try: if progress_callback: progress_callback(15, "Connecting to server") response = requests.get(episode.audio_url, stream=True, timeout=30) response.raise_for_status() # Get content type and set appropriate extension content_type = response.headers.get('Content-Type', '') if 'mp3' in content_type: file_path += '.mp3' elif 'mpeg' in content_type: file_path += '.mp3' elif 'mp4' in content_type or 'm4a' in content_type: file_path += '.m4a' elif 'ogg' in content_type: file_path += '.ogg' elif 'wav' in content_type: file_path += '.wav' else: file_path += '.mp3' # Default to mp3 # Get file size if available file_size = int(response.headers.get('Content-Length', 0)) episode.file_size = file_size if progress_callback: progress_callback(20, "Starting download") # Write the file downloaded_bytes = 0 with open(file_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) downloaded_bytes += len(chunk) # Update progress if file size is known if file_size > 0 and progress_callback: progress = 20 + int((downloaded_bytes / file_size) * 70) # Scale to 20-90% progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB") if progress_callback: progress_callback(95, "Updating database") # Update episode in database episode.downloaded = True episode.file_path = file_path db.session.commit() if progress_callback: progress_callback(100, "Download complete") logger.info(f"Downloaded episode: {episode.title}") return file_path except Exception as e: logger.error(f"Error downloading episode: {str(e)}") raise def format_filename(format_string, podcast, episode): """ Format a filename using the provided format string and podcast/episode data. Args: format_string (str): Format string with placeholders. podcast: Podcast model instance. episode: Episode model instance. Returns: str: Formatted filename. """ # Calculate absolute number if needed absolute_number = '' if '{absolute_number}' in format_string: from app.models.podcast import Episode # Get all episodes for this podcast ordered by published date episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all() # Find the position of the current episode in the ordered list for i, ep in enumerate(episodes, 1): if ep.id == episode.id: absolute_number = str(i) break # Create a dictionary with all available variables format_vars = { 'podcast_title': sanitize_filename(podcast.title), 'episode_title': sanitize_filename(episode.title), 'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '', 'season': sanitize_filename(str(episode.season)) if episode.season else '', # Format season_episode as S01E01, ensuring season is always included 'season_episode': ( # If we have season and episode_number is a digit, format as S01E01 f"S{episode.season:02d}E{int(episode.episode_number):02d}" if episode.season and episode.episode_number and episode.episode_number.isdigit() # If episode_number exists but is not a digit, format as S01E{episode_number} else f"S{episode.season or 1:02d}E{episode.episode_number}" if episode.episode_number # If neither season nor episode_number are available, use published date else episode.published_date.strftime('%Y-%m-%d') if episode.published_date # Otherwise, return empty string else '' ), 'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '', 'author': sanitize_filename(podcast.author) if podcast.author else '', 'explicit': 'explicit' if episode.explicit else '', 'absolute_number': sanitize_filename(absolute_number) } # Format the string try: formatted_path = format_string.format(**format_vars) except KeyError as e: logger.warning(f"Invalid format variable: {str(e)}") # Fall back to a simple format formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}" # Replace forward slashes with OS-specific path separator formatted_path = formatted_path.replace('/', os.path.sep) # Handle empty path segments by removing them path_parts = formatted_path.split(os.path.sep) # Remove empty segments and segments that would be just placeholders without values cleaned_parts = [] for part in path_parts: part = part.strip() if not part: continue # Check for common placeholders without values if part in ["Season ", "Season", "Episode ", "Episode", "E", "S"]: continue # Check for patterns like "S01E" without an episode number if part.startswith("S") and part.endswith("E") and len(part) > 2: continue cleaned_parts.append(part) # Rejoin the path with proper separators return os.path.sep.join(cleaned_parts) def sanitize_filename(filename): """ Sanitize a string to be used as a filename. Args: filename (str): Original filename. Returns: str: Sanitized filename. """ if not filename: return "" # Replace invalid characters invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] for char in invalid_chars: filename = filename.replace(char, '_') # Remove leading and trailing whitespace and periods filename = filename.strip().strip('.') # Replace multiple spaces with a single space filename = ' '.join(filename.split()) # Limit length if len(filename) > 100: filename = filename[:97] + '...' # If filename is empty after sanitization, provide a default if not filename: filename = "unnamed" return filename def delete_old_episodes(days=30): """ Delete episodes older than the specified number of days. Args: days (int): Number of days to keep episodes. Returns: int: Number of episodes deleted. """ from app.models.podcast import Episode settings = Settings.query.first() if settings: days = settings.delete_after_days # Calculate the cutoff date cutoff_date = datetime.utcnow() - timedelta(days=days) # Find episodes to delete episodes = Episode.query.filter( Episode.downloaded == True, Episode.published_date < cutoff_date ).all() count = 0 for episode in episodes: if episode.file_path and os.path.exists(episode.file_path): try: os.remove(episode.file_path) episode.file_path = None episode.downloaded = False count += 1 except Exception as e: logger.error(f"Error deleting episode file: {str(e)}") db.session.commit() logger.info(f"Deleted {count} old episodes") return count def verify_downloaded_episodes(podcast_id=None, progress_callback=None): """ Verify that downloaded episodes still exist on disk and update their status. Also checks for existing files for episodes that aren't marked as downloaded. Args: podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts. progress_callback (callable, optional): Callback function for progress updates. Returns: dict: Statistics about the verification process. """ from app.models.podcast import Episode, Podcast from app.models.settings import Settings # First, verify episodes that are marked as downloaded query = Episode.query.filter(Episode.downloaded == True) if podcast_id: query = query.filter(Episode.podcast_id == podcast_id) downloaded_episodes = query.all() total_downloaded = len(downloaded_episodes) if progress_callback: progress_callback(0, f"Verifying {total_downloaded} downloaded episodes") missing = 0 for i, episode in enumerate(downloaded_episodes): if progress_callback and total_downloaded > 0: progress = int((i / total_downloaded) * 50) # Use first half of progress for verification progress_callback(progress, f"Verifying episode {i+1}/{total_downloaded}") if not episode.file_path or not os.path.exists(episode.file_path): episode.downloaded = False if episode.file_path: logger.warning(f"Episode file not found: {episode.file_path}") missing += 1 db.session.commit() # Now check for existing files for episodes that aren't marked as downloaded query = Episode.query.filter(Episode.downloaded == False) if podcast_id: query = query.filter(Episode.podcast_id == podcast_id) undownloaded_episodes = query.all() total_undownloaded = len(undownloaded_episodes) if progress_callback: progress_callback(50, f"Checking for existing files for {total_undownloaded} undownloaded episodes") found = 0 if total_undownloaded > 0 and podcast_id: # Get the podcast podcast = Podcast.query.get(podcast_id) if not podcast: logger.error(f"Podcast with ID {podcast_id} not found") return { 'total_checked': total_downloaded, 'missing': missing, 'found': 0 } # Get settings settings = Settings.query.first() if not settings: settings = Settings( download_path=current_app.config['DOWNLOAD_PATH'], naming_format="{podcast_title}/{episode_title}" ) db.session.add(settings) db.session.commit() # Use podcast's naming format if available, otherwise use global settings naming_format = podcast.naming_format or settings.naming_format download_path = settings.download_path # Check each undownloaded episode for existing files for i, episode in enumerate(undownloaded_episodes): if progress_callback: progress = 50 + int((i / total_undownloaded) * 50) # Use second half of progress for file matching progress_callback(progress, f"Checking for file for episode {i+1}/{total_undownloaded}") try: # Format filename using the naming format filename = format_filename(naming_format, podcast, episode) # Check for common audio file extensions extensions = ['.mp3', '.m4a', '.ogg', '.wav'] for ext in extensions: file_path = os.path.normpath(os.path.join(download_path, filename + ext)) if os.path.exists(file_path): logger.info(f"Found existing file for episode: {file_path}") episode.downloaded = True episode.file_path = file_path found += 1 break except Exception as e: logger.error(f"Error checking for existing file for episode {episode.title}: {str(e)}") db.session.commit() if progress_callback: progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded, {found} files matched.") logger.info(f"Verified {total_downloaded} episodes. {missing} were missing. Found files for {found} undownloaded episodes.") return { 'total_checked': total_downloaded, 'missing': missing, 'found': found } def download_all_episodes(podcast_id, progress_callback=None): """ Download all episodes of a podcast that haven't been downloaded yet. Args: podcast_id: ID of the Podcast to download all episodes for. progress_callback (callable, optional): Callback function for progress updates. Returns: dict: Statistics about the download process. """ from app.models.podcast import Podcast, Episode if progress_callback: progress_callback(2, "Loading podcast data") # Load the podcast podcast = Podcast.query.get(podcast_id) if not podcast: raise ValueError(f"Podcast with ID {podcast_id} not found") # Get all episodes that haven't been downloaded yet episodes = Episode.query.filter_by(podcast_id=podcast_id, downloaded=False).all() total_episodes = len(episodes) if progress_callback: progress_callback(5, f"Found {total_episodes} episodes to download") if total_episodes == 0: if progress_callback: progress_callback(100, "No episodes to download") return {"total": 0, "downloaded": 0, "failed": 0} stats = {"total": total_episodes, "downloaded": 0, "failed": 0} # Download each episode for i, episode in enumerate(episodes): if progress_callback: progress = 5 + int((i / total_episodes) * 90) # Scale from 5% to 95% progress_callback(progress, f"Downloading episode {i+1}/{total_episodes}: {episode.title}") try: download_episode(episode.id) stats["downloaded"] += 1 logger.info(f"Downloaded episode {i+1}/{total_episodes}: {episode.title}") except Exception as e: stats["failed"] += 1 logger.error(f"Error downloading episode {episode.title}: {str(e)}") if progress_callback: progress_callback(100, f"Download complete. Downloaded {stats['downloaded']} episodes, {stats['failed']} failed.") logger.info(f"Podcast archive download completed: {stats}") return stats def rename_episode(episode_id, new_format=None, progress_callback=None): """ Rename a downloaded episode file using a new format. Args: episode_id: ID of the Episode to rename. new_format (str, optional): New format string. If None, use the podcast's format or the global settings format. progress_callback (callable, optional): Callback function for progress updates. Returns: str: New file path. """ from app.models.podcast import Episode, Podcast if progress_callback: progress_callback(5, "Loading episode data") # Load the episode with its podcast relationship episode = Episode.query.get(episode_id) if not episode: raise ValueError(f"Episode with ID {episode_id} not found") if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path): raise ValueError("Episode is not downloaded or file does not exist") if progress_callback: progress_callback(10, "Getting podcast and format settings") # Explicitly load the podcast to avoid lazy loading issues podcast = Podcast.query.get(episode.podcast_id) if not podcast: raise ValueError(f"Podcast with ID {episode.podcast_id} not found") settings = Settings.query.first() if not settings: settings = Settings( download_path=current_app.config['DOWNLOAD_PATH'], naming_format="{podcast_title}/{episode_title}" ) db.session.add(settings) db.session.commit() # Use provided format, podcast's format, or global settings format format_string = new_format or podcast.naming_format or settings.naming_format if progress_callback: progress_callback(20, "Formatting new filename") # Format new filename new_filename = format_filename(format_string, podcast, episode) # Get file extension from current file _, ext = os.path.splitext(episode.file_path) # Create full path for new file download_path = settings.download_path new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext)) # Ensure the directory exists new_file_dir = os.path.dirname(new_file_path) os.makedirs(new_file_dir, exist_ok=True) if progress_callback: progress_callback(50, f"Renaming file to {new_file_path}") # Rename the file try: # Check if the new path is different if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path): os.rename(episode.file_path, new_file_path) episode.file_path = new_file_path db.session.commit() if progress_callback: progress_callback(100, "File renamed successfully") logger.info(f"Renamed episode file: {episode.title} to {new_file_path}") return new_file_path else: if progress_callback: progress_callback(100, "File already has the correct name") logger.info(f"Episode file already has the correct name: {new_file_path}") return episode.file_path except Exception as e: if progress_callback: progress_callback(100, f"Error renaming file: {str(e)}") logger.error(f"Error renaming episode file: {str(e)}") raise