413 lines
14 KiB
Python
413 lines
14 KiB
Python
"""
|
|
Podcast downloader service for Podcastrr.
|
|
"""
|
|
import os
|
|
import requests
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from flask import current_app
|
|
from app.models.database import db
|
|
from app.models.settings import Settings
|
|
|
|
# Set up logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def download_episode(episode_id, progress_callback=None):
|
|
"""
|
|
Download a podcast episode.
|
|
|
|
Args:
|
|
episode_id: ID of the Episode to download.
|
|
progress_callback (callable, optional): Callback function for progress updates.
|
|
|
|
Returns:
|
|
str: Path to the downloaded file.
|
|
"""
|
|
from app.models.podcast import Episode, Podcast
|
|
|
|
if progress_callback:
|
|
progress_callback(2, "Loading episode data")
|
|
|
|
# Load the episode with its podcast relationship
|
|
episode = Episode.query.get(episode_id)
|
|
if not episode:
|
|
raise ValueError(f"Episode with ID {episode_id} not found")
|
|
|
|
# Explicitly load the podcast to avoid lazy loading issues
|
|
podcast = Podcast.query.get(episode.podcast_id)
|
|
if not podcast:
|
|
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
|
|
|
|
if not episode.audio_url:
|
|
raise ValueError("Episode has no audio URL")
|
|
|
|
if progress_callback:
|
|
progress_callback(5, "Getting settings")
|
|
|
|
# Get settings
|
|
settings = Settings.query.first()
|
|
if not settings:
|
|
settings = Settings(
|
|
download_path=current_app.config['DOWNLOAD_PATH'],
|
|
naming_format="{podcast_title}/{episode_title}"
|
|
)
|
|
db.session.add(settings)
|
|
db.session.commit()
|
|
|
|
# Create download directory
|
|
download_path = settings.download_path
|
|
os.makedirs(download_path, exist_ok=True)
|
|
|
|
if progress_callback:
|
|
progress_callback(10, "Formatting filename")
|
|
|
|
# Use podcast's naming format if available, otherwise use global settings
|
|
naming_format = podcast.naming_format or settings.naming_format
|
|
|
|
# Format filename using the naming format
|
|
filename = format_filename(naming_format, podcast, episode)
|
|
|
|
# Ensure the directory exists
|
|
file_dir = os.path.dirname(os.path.join(download_path, filename))
|
|
os.makedirs(file_dir, exist_ok=True)
|
|
|
|
# Add file extension based on content type
|
|
file_path = os.path.normpath(os.path.join(download_path, filename))
|
|
|
|
# Download the file
|
|
try:
|
|
if progress_callback:
|
|
progress_callback(15, "Connecting to server")
|
|
|
|
response = requests.get(episode.audio_url, stream=True, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
# Get content type and set appropriate extension
|
|
content_type = response.headers.get('Content-Type', '')
|
|
if 'mp3' in content_type:
|
|
file_path += '.mp3'
|
|
elif 'mpeg' in content_type:
|
|
file_path += '.mp3'
|
|
elif 'mp4' in content_type or 'm4a' in content_type:
|
|
file_path += '.m4a'
|
|
elif 'ogg' in content_type:
|
|
file_path += '.ogg'
|
|
elif 'wav' in content_type:
|
|
file_path += '.wav'
|
|
else:
|
|
file_path += '.mp3' # Default to mp3
|
|
|
|
# Get file size if available
|
|
file_size = int(response.headers.get('Content-Length', 0))
|
|
episode.file_size = file_size
|
|
|
|
if progress_callback:
|
|
progress_callback(20, "Starting download")
|
|
|
|
# Write the file
|
|
downloaded_bytes = 0
|
|
with open(file_path, 'wb') as f:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
f.write(chunk)
|
|
downloaded_bytes += len(chunk)
|
|
|
|
# Update progress if file size is known
|
|
if file_size > 0 and progress_callback:
|
|
progress = 20 + int((downloaded_bytes / file_size) * 70) # Scale to 20-90%
|
|
progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB")
|
|
|
|
if progress_callback:
|
|
progress_callback(95, "Updating database")
|
|
|
|
# Update episode in database
|
|
episode.downloaded = True
|
|
episode.file_path = file_path
|
|
db.session.commit()
|
|
|
|
if progress_callback:
|
|
progress_callback(100, "Download complete")
|
|
|
|
logger.info(f"Downloaded episode: {episode.title}")
|
|
return file_path
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error downloading episode: {str(e)}")
|
|
raise
|
|
|
|
def format_filename(format_string, podcast, episode):
|
|
"""
|
|
Format a filename using the provided format string and podcast/episode data.
|
|
|
|
Args:
|
|
format_string (str): Format string with placeholders.
|
|
podcast: Podcast model instance.
|
|
episode: Episode model instance.
|
|
|
|
Returns:
|
|
str: Formatted filename.
|
|
"""
|
|
# Calculate absolute number if needed
|
|
absolute_number = ''
|
|
if '{absolute_number}' in format_string:
|
|
from app.models.podcast import Episode
|
|
# Get all episodes for this podcast ordered by published date
|
|
episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all()
|
|
# Find the position of the current episode in the ordered list
|
|
for i, ep in enumerate(episodes, 1):
|
|
if ep.id == episode.id:
|
|
absolute_number = str(i)
|
|
break
|
|
|
|
# Create a dictionary with all available variables
|
|
format_vars = {
|
|
'podcast_title': sanitize_filename(podcast.title),
|
|
'episode_title': sanitize_filename(episode.title),
|
|
'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '',
|
|
'season': sanitize_filename(str(episode.season)) if episode.season else '',
|
|
# Format season_episode as S01E01, ensuring season is always included
|
|
'season_episode': (
|
|
# If we have season and episode_number is a digit, format as S01E01
|
|
f"S{episode.season:02d}E{int(episode.episode_number):02d}"
|
|
if episode.season and episode.episode_number and episode.episode_number.isdigit()
|
|
# If episode_number exists but is not a digit, format as S01E{episode_number}
|
|
else f"S{episode.season or 1:02d}E{episode.episode_number}"
|
|
if episode.episode_number
|
|
# Otherwise, return empty string
|
|
else ''
|
|
),
|
|
'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '',
|
|
'author': sanitize_filename(podcast.author) if podcast.author else '',
|
|
'explicit': 'explicit' if episode.explicit else '',
|
|
'absolute_number': sanitize_filename(absolute_number)
|
|
}
|
|
|
|
# Format the string
|
|
try:
|
|
formatted_path = format_string.format(**format_vars)
|
|
except KeyError as e:
|
|
logger.warning(f"Invalid format variable: {str(e)}")
|
|
# Fall back to a simple format
|
|
formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}"
|
|
|
|
# Replace forward slashes with OS-specific path separator
|
|
formatted_path = formatted_path.replace('/', os.path.sep)
|
|
|
|
# Handle empty path segments by removing them
|
|
path_parts = formatted_path.split(os.path.sep)
|
|
path_parts = [part for part in path_parts if part.strip()]
|
|
|
|
# Rejoin the path with proper separators
|
|
return os.path.sep.join(path_parts)
|
|
|
|
def sanitize_filename(filename):
|
|
"""
|
|
Sanitize a string to be used as a filename.
|
|
|
|
Args:
|
|
filename (str): Original filename.
|
|
|
|
Returns:
|
|
str: Sanitized filename.
|
|
"""
|
|
if not filename:
|
|
return ""
|
|
|
|
# Replace invalid characters
|
|
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
|
|
for char in invalid_chars:
|
|
filename = filename.replace(char, '_')
|
|
|
|
# Remove leading and trailing whitespace and periods
|
|
filename = filename.strip().strip('.')
|
|
|
|
# Replace multiple spaces with a single space
|
|
filename = ' '.join(filename.split())
|
|
|
|
# Limit length
|
|
if len(filename) > 100:
|
|
filename = filename[:97] + '...'
|
|
|
|
# If filename is empty after sanitization, provide a default
|
|
if not filename:
|
|
filename = "unnamed"
|
|
|
|
return filename
|
|
|
|
def delete_old_episodes(days=30):
|
|
"""
|
|
Delete episodes older than the specified number of days.
|
|
|
|
Args:
|
|
days (int): Number of days to keep episodes.
|
|
|
|
Returns:
|
|
int: Number of episodes deleted.
|
|
"""
|
|
from app.models.podcast import Episode
|
|
|
|
settings = Settings.query.first()
|
|
if settings:
|
|
days = settings.delete_after_days
|
|
|
|
# Calculate the cutoff date
|
|
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
|
|
|
# Find episodes to delete
|
|
episodes = Episode.query.filter(
|
|
Episode.downloaded == True,
|
|
Episode.published_date < cutoff_date
|
|
).all()
|
|
|
|
count = 0
|
|
for episode in episodes:
|
|
if episode.file_path and os.path.exists(episode.file_path):
|
|
try:
|
|
os.remove(episode.file_path)
|
|
episode.file_path = None
|
|
episode.downloaded = False
|
|
count += 1
|
|
except Exception as e:
|
|
logger.error(f"Error deleting episode file: {str(e)}")
|
|
|
|
db.session.commit()
|
|
logger.info(f"Deleted {count} old episodes")
|
|
return count
|
|
|
|
def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
|
|
"""
|
|
Verify that downloaded episodes still exist on disk and update their status.
|
|
|
|
Args:
|
|
podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts.
|
|
progress_callback (callable, optional): Callback function for progress updates.
|
|
|
|
Returns:
|
|
dict: Statistics about the verification process.
|
|
"""
|
|
from app.models.podcast import Episode, Podcast
|
|
|
|
# Get episodes to check
|
|
query = Episode.query.filter(Episode.downloaded == True)
|
|
if podcast_id:
|
|
query = query.filter(Episode.podcast_id == podcast_id)
|
|
|
|
episodes = query.all()
|
|
total = len(episodes)
|
|
|
|
if progress_callback:
|
|
progress_callback(0, f"Verifying {total} downloaded episodes")
|
|
|
|
missing = 0
|
|
for i, episode in enumerate(episodes):
|
|
if progress_callback and total > 0:
|
|
progress = int((i / total) * 100)
|
|
progress_callback(progress, f"Verifying episode {i+1}/{total}")
|
|
|
|
if not episode.file_path or not os.path.exists(episode.file_path):
|
|
episode.downloaded = False
|
|
if episode.file_path:
|
|
logger.warning(f"Episode file not found: {episode.file_path}")
|
|
missing += 1
|
|
|
|
db.session.commit()
|
|
|
|
if progress_callback:
|
|
progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.")
|
|
|
|
logger.info(f"Verified {total} episodes. {missing} were missing.")
|
|
return {
|
|
'total_checked': total,
|
|
'missing': missing
|
|
}
|
|
|
|
def rename_episode(episode_id, new_format=None, progress_callback=None):
|
|
"""
|
|
Rename a downloaded episode file using a new format.
|
|
|
|
Args:
|
|
episode_id: ID of the Episode to rename.
|
|
new_format (str, optional): New format string. If None, use the podcast's format or the global settings format.
|
|
progress_callback (callable, optional): Callback function for progress updates.
|
|
|
|
Returns:
|
|
str: New file path.
|
|
"""
|
|
from app.models.podcast import Episode, Podcast
|
|
|
|
if progress_callback:
|
|
progress_callback(5, "Loading episode data")
|
|
|
|
# Load the episode with its podcast relationship
|
|
episode = Episode.query.get(episode_id)
|
|
if not episode:
|
|
raise ValueError(f"Episode with ID {episode_id} not found")
|
|
|
|
if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path):
|
|
raise ValueError("Episode is not downloaded or file does not exist")
|
|
|
|
if progress_callback:
|
|
progress_callback(10, "Getting podcast and format settings")
|
|
|
|
# Explicitly load the podcast to avoid lazy loading issues
|
|
podcast = Podcast.query.get(episode.podcast_id)
|
|
if not podcast:
|
|
raise ValueError(f"Podcast with ID {episode.podcast_id} not found")
|
|
|
|
settings = Settings.query.first()
|
|
if not settings:
|
|
settings = Settings(
|
|
download_path=current_app.config['DOWNLOAD_PATH'],
|
|
naming_format="{podcast_title}/{episode_title}"
|
|
)
|
|
db.session.add(settings)
|
|
db.session.commit()
|
|
|
|
# Use provided format, podcast's format, or global settings format
|
|
format_string = new_format or podcast.naming_format or settings.naming_format
|
|
|
|
if progress_callback:
|
|
progress_callback(20, "Formatting new filename")
|
|
|
|
# Format new filename
|
|
new_filename = format_filename(format_string, podcast, episode)
|
|
|
|
# Get file extension from current file
|
|
_, ext = os.path.splitext(episode.file_path)
|
|
|
|
# Create full path for new file
|
|
download_path = settings.download_path
|
|
new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext))
|
|
|
|
# Ensure the directory exists
|
|
new_file_dir = os.path.dirname(new_file_path)
|
|
os.makedirs(new_file_dir, exist_ok=True)
|
|
|
|
if progress_callback:
|
|
progress_callback(50, f"Renaming file to {new_file_path}")
|
|
|
|
# Rename the file
|
|
try:
|
|
# Check if the new path is different
|
|
if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path):
|
|
os.rename(episode.file_path, new_file_path)
|
|
episode.file_path = new_file_path
|
|
db.session.commit()
|
|
|
|
if progress_callback:
|
|
progress_callback(100, "File renamed successfully")
|
|
|
|
logger.info(f"Renamed episode file: {episode.title} to {new_file_path}")
|
|
return new_file_path
|
|
else:
|
|
if progress_callback:
|
|
progress_callback(100, "File already has the correct name")
|
|
|
|
logger.info(f"Episode file already has the correct name: {new_file_path}")
|
|
return episode.file_path
|
|
except Exception as e:
|
|
if progress_callback:
|
|
progress_callback(100, f"Error renaming file: {str(e)}")
|
|
|
|
logger.error(f"Error renaming episode file: {str(e)}")
|
|
raise
|