podcastrr/app/services/podcast_downloader.py
Cody Cook 095bf52a2f Updates
2025-06-15 21:20:30 -07:00

413 lines
14 KiB
Python

"""
Podcast downloader service for Podcastrr.
"""
import os
import requests
import logging
from datetime import datetime, timedelta
from flask import current_app
from app.models.database import db
from app.models.settings import Settings
# Set up logging
logger = logging.getLogger(__name__)
def download_episode(episode_id, progress_callback=None):
"""
Download a podcast episode.
Args:
episode_id: ID of the Episode to download.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
str: Path to the downloaded file.
"""
from app.models.podcast import Episode, Podcast
if progress_callback:
progress_callback(2, "Loading episode data")
# Load the episode with its podcast relationship
episode = Episode.query.get(episode_id)
if not episode:
raise ValueError(f"Episode with ID {episode_id} not found")
# Explicitly load the podcast to avoid lazy loading issues
podcast = Podcast.query.get(episode.podcast_id)
if not podcast:
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
if not episode.audio_url:
raise ValueError("Episode has no audio URL")
if progress_callback:
progress_callback(5, "Getting settings")
# Get settings
settings = Settings.query.first()
if not settings:
settings = Settings(
download_path=current_app.config['DOWNLOAD_PATH'],
naming_format="{podcast_title}/{episode_title}"
)
db.session.add(settings)
db.session.commit()
# Create download directory
download_path = settings.download_path
os.makedirs(download_path, exist_ok=True)
if progress_callback:
progress_callback(10, "Formatting filename")
# Use podcast's naming format if available, otherwise use global settings
naming_format = podcast.naming_format or settings.naming_format
# Format filename using the naming format
filename = format_filename(naming_format, podcast, episode)
# Ensure the directory exists
file_dir = os.path.dirname(os.path.join(download_path, filename))
os.makedirs(file_dir, exist_ok=True)
# Add file extension based on content type
file_path = os.path.normpath(os.path.join(download_path, filename))
# Download the file
try:
if progress_callback:
progress_callback(15, "Connecting to server")
response = requests.get(episode.audio_url, stream=True, timeout=30)
response.raise_for_status()
# Get content type and set appropriate extension
content_type = response.headers.get('Content-Type', '')
if 'mp3' in content_type:
file_path += '.mp3'
elif 'mpeg' in content_type:
file_path += '.mp3'
elif 'mp4' in content_type or 'm4a' in content_type:
file_path += '.m4a'
elif 'ogg' in content_type:
file_path += '.ogg'
elif 'wav' in content_type:
file_path += '.wav'
else:
file_path += '.mp3' # Default to mp3
# Get file size if available
file_size = int(response.headers.get('Content-Length', 0))
episode.file_size = file_size
if progress_callback:
progress_callback(20, "Starting download")
# Write the file
downloaded_bytes = 0
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded_bytes += len(chunk)
# Update progress if file size is known
if file_size > 0 and progress_callback:
progress = 20 + int((downloaded_bytes / file_size) * 70) # Scale to 20-90%
progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB")
if progress_callback:
progress_callback(95, "Updating database")
# Update episode in database
episode.downloaded = True
episode.file_path = file_path
db.session.commit()
if progress_callback:
progress_callback(100, "Download complete")
logger.info(f"Downloaded episode: {episode.title}")
return file_path
except Exception as e:
logger.error(f"Error downloading episode: {str(e)}")
raise
def format_filename(format_string, podcast, episode):
"""
Format a filename using the provided format string and podcast/episode data.
Args:
format_string (str): Format string with placeholders.
podcast: Podcast model instance.
episode: Episode model instance.
Returns:
str: Formatted filename.
"""
# Calculate absolute number if needed
absolute_number = ''
if '{absolute_number}' in format_string:
from app.models.podcast import Episode
# Get all episodes for this podcast ordered by published date
episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all()
# Find the position of the current episode in the ordered list
for i, ep in enumerate(episodes, 1):
if ep.id == episode.id:
absolute_number = str(i)
break
# Create a dictionary with all available variables
format_vars = {
'podcast_title': sanitize_filename(podcast.title),
'episode_title': sanitize_filename(episode.title),
'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '',
'season': sanitize_filename(str(episode.season)) if episode.season else '',
# Format season_episode as S01E01, ensuring season is always included
'season_episode': (
# If we have season and episode_number is a digit, format as S01E01
f"S{episode.season:02d}E{int(episode.episode_number):02d}"
if episode.season and episode.episode_number and episode.episode_number.isdigit()
# If episode_number exists but is not a digit, format as S01E{episode_number}
else f"S{episode.season or 1:02d}E{episode.episode_number}"
if episode.episode_number
# Otherwise, return empty string
else ''
),
'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '',
'author': sanitize_filename(podcast.author) if podcast.author else '',
'explicit': 'explicit' if episode.explicit else '',
'absolute_number': sanitize_filename(absolute_number)
}
# Format the string
try:
formatted_path = format_string.format(**format_vars)
except KeyError as e:
logger.warning(f"Invalid format variable: {str(e)}")
# Fall back to a simple format
formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}"
# Replace forward slashes with OS-specific path separator
formatted_path = formatted_path.replace('/', os.path.sep)
# Handle empty path segments by removing them
path_parts = formatted_path.split(os.path.sep)
path_parts = [part for part in path_parts if part.strip()]
# Rejoin the path with proper separators
return os.path.sep.join(path_parts)
def sanitize_filename(filename):
"""
Sanitize a string to be used as a filename.
Args:
filename (str): Original filename.
Returns:
str: Sanitized filename.
"""
if not filename:
return ""
# Replace invalid characters
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
for char in invalid_chars:
filename = filename.replace(char, '_')
# Remove leading and trailing whitespace and periods
filename = filename.strip().strip('.')
# Replace multiple spaces with a single space
filename = ' '.join(filename.split())
# Limit length
if len(filename) > 100:
filename = filename[:97] + '...'
# If filename is empty after sanitization, provide a default
if not filename:
filename = "unnamed"
return filename
def delete_old_episodes(days=30):
"""
Delete episodes older than the specified number of days.
Args:
days (int): Number of days to keep episodes.
Returns:
int: Number of episodes deleted.
"""
from app.models.podcast import Episode
settings = Settings.query.first()
if settings:
days = settings.delete_after_days
# Calculate the cutoff date
cutoff_date = datetime.utcnow() - timedelta(days=days)
# Find episodes to delete
episodes = Episode.query.filter(
Episode.downloaded == True,
Episode.published_date < cutoff_date
).all()
count = 0
for episode in episodes:
if episode.file_path and os.path.exists(episode.file_path):
try:
os.remove(episode.file_path)
episode.file_path = None
episode.downloaded = False
count += 1
except Exception as e:
logger.error(f"Error deleting episode file: {str(e)}")
db.session.commit()
logger.info(f"Deleted {count} old episodes")
return count
def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
"""
Verify that downloaded episodes still exist on disk and update their status.
Args:
podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
dict: Statistics about the verification process.
"""
from app.models.podcast import Episode, Podcast
# Get episodes to check
query = Episode.query.filter(Episode.downloaded == True)
if podcast_id:
query = query.filter(Episode.podcast_id == podcast_id)
episodes = query.all()
total = len(episodes)
if progress_callback:
progress_callback(0, f"Verifying {total} downloaded episodes")
missing = 0
for i, episode in enumerate(episodes):
if progress_callback and total > 0:
progress = int((i / total) * 100)
progress_callback(progress, f"Verifying episode {i+1}/{total}")
if not episode.file_path or not os.path.exists(episode.file_path):
episode.downloaded = False
if episode.file_path:
logger.warning(f"Episode file not found: {episode.file_path}")
missing += 1
db.session.commit()
if progress_callback:
progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.")
logger.info(f"Verified {total} episodes. {missing} were missing.")
return {
'total_checked': total,
'missing': missing
}
def rename_episode(episode_id, new_format=None, progress_callback=None):
"""
Rename a downloaded episode file using a new format.
Args:
episode_id: ID of the Episode to rename.
new_format (str, optional): New format string. If None, use the podcast's format or the global settings format.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
str: New file path.
"""
from app.models.podcast import Episode, Podcast
if progress_callback:
progress_callback(5, "Loading episode data")
# Load the episode with its podcast relationship
episode = Episode.query.get(episode_id)
if not episode:
raise ValueError(f"Episode with ID {episode_id} not found")
if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path):
raise ValueError("Episode is not downloaded or file does not exist")
if progress_callback:
progress_callback(10, "Getting podcast and format settings")
# Explicitly load the podcast to avoid lazy loading issues
podcast = Podcast.query.get(episode.podcast_id)
if not podcast:
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
settings = Settings.query.first()
if not settings:
settings = Settings(
download_path=current_app.config['DOWNLOAD_PATH'],
naming_format="{podcast_title}/{episode_title}"
)
db.session.add(settings)
db.session.commit()
# Use provided format, podcast's format, or global settings format
format_string = new_format or podcast.naming_format or settings.naming_format
if progress_callback:
progress_callback(20, "Formatting new filename")
# Format new filename
new_filename = format_filename(format_string, podcast, episode)
# Get file extension from current file
_, ext = os.path.splitext(episode.file_path)
# Create full path for new file
download_path = settings.download_path
new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext))
# Ensure the directory exists
new_file_dir = os.path.dirname(new_file_path)
os.makedirs(new_file_dir, exist_ok=True)
if progress_callback:
progress_callback(50, f"Renaming file to {new_file_path}")
# Rename the file
try:
# Check if the new path is different
if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path):
os.rename(episode.file_path, new_file_path)
episode.file_path = new_file_path
db.session.commit()
if progress_callback:
progress_callback(100, "File renamed successfully")
logger.info(f"Renamed episode file: {episode.title} to {new_file_path}")
return new_file_path
else:
if progress_callback:
progress_callback(100, "File already has the correct name")
logger.info(f"Episode file already has the correct name: {new_file_path}")
return episode.file_path
except Exception as e:
if progress_callback:
progress_callback(100, f"Error renaming file: {str(e)}")
logger.error(f"Error renaming episode file: {str(e)}")
raise