podcastrr/app/services/podcast_downloader.py

"""
Podcast downloader service for Podcastrr.
"""
import os
import requests
import logging
from datetime import datetime, timedelta
from flask import current_app
from app.models.database import db
from app.models.settings import Settings

# Set up logging
logger = logging.getLogger(__name__)

def download_episode(episode_id, progress_callback=None):
    """
    Download a podcast episode.

    Args:
        episode_id: ID of the Episode to download.
        progress_callback (callable, optional): Callback function for progress updates.

    Returns:
        str: Path to the downloaded file.
    """
    from app.models.podcast import Episode, Podcast

    if progress_callback:
        progress_callback(2, "Loading episode data")

    # Load the episode with its podcast relationship
    episode = Episode.query.get(episode_id)
    if not episode:
        raise ValueError(f"Episode with ID {episode_id} not found")

    # Explicitly load the podcast to avoid lazy loading issues
    podcast = Podcast.query.get(episode.podcast_id)
    if not podcast:
        raise ValueError(f"Podcast with ID {episode.podcast_id} not found")

    if not episode.audio_url:
        raise ValueError("Episode has no audio URL")

    if progress_callback:
        progress_callback(5, "Getting settings")

    # Get settings
    settings = Settings.query.first()
    if not settings:
        settings = Settings(
            download_path=current_app.config['DOWNLOAD_PATH'],
            naming_format="{podcast_title}/{episode_title}"
        )
        db.session.add(settings)
        db.session.commit()

    # Create download directory
    download_path = settings.download_path
    os.makedirs(download_path, exist_ok=True)

    if progress_callback:
        progress_callback(10, "Formatting filename")

    # Use podcast's naming format if available, otherwise use global settings
    naming_format = podcast.naming_format or settings.naming_format

    # Format filename using the naming format
    filename = format_filename(naming_format, podcast, episode)

    # Ensure the directory exists
    file_dir = os.path.dirname(os.path.join(download_path, filename))
    os.makedirs(file_dir, exist_ok=True)

    # Add file extension based on content type
    file_path = os.path.normpath(os.path.join(download_path, filename))

    # Download the file
    try:
        if progress_callback:
            progress_callback(15, "Connecting to server")

        response = requests.get(episode.audio_url, stream=True, timeout=30)
        response.raise_for_status()

        # Get content type and set appropriate extension
        content_type = response.headers.get('Content-Type', '')
        if 'mp3' in content_type:
            file_path += '.mp3'
        elif 'mpeg' in content_type:
            file_path += '.mp3'
        elif 'mp4' in content_type or 'm4a' in content_type:
            file_path += '.m4a'
        elif 'ogg' in content_type:
            file_path += '.ogg'
        elif 'wav' in content_type:
            file_path += '.wav'
        else:
            file_path += '.mp3'  # Default to mp3

        # Get file size if available
        file_size = int(response.headers.get('Content-Length', 0))
        episode.file_size = file_size

        if progress_callback:
            progress_callback(20, "Starting download")

        # Write the file
        downloaded_bytes = 0
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    downloaded_bytes += len(chunk)

                    # Update progress if file size is known
                    if file_size > 0 and progress_callback:
                        progress = 20 + int((downloaded_bytes / file_size) * 70)  # Scale to 20-90%
                        progress_callback(min(progress, 90), f"Downloading: {downloaded_bytes/1024/1024:.1f}MB / {file_size/1024/1024:.1f}MB")

        if progress_callback:
            progress_callback(95, "Updating database")

        # Update episode in database
        episode.downloaded = True
        episode.file_path = file_path
        db.session.commit()

        if progress_callback:
            progress_callback(100, "Download complete")

        logger.info(f"Downloaded episode: {episode.title}")
        return file_path

    except Exception as e:
        logger.error(f"Error downloading episode: {str(e)}")
        raise

def format_filename(format_string, podcast, episode):
    """
    Format a filename using the provided format string and podcast/episode data.

    Args:
        format_string (str): Format string with placeholders.
        podcast: Podcast model instance.
        episode: Episode model instance.

    Returns:
        str: Formatted filename.
    """
    # Calculate absolute number if needed
    absolute_number = ''
    if '{absolute_number}' in format_string:
        from app.models.podcast import Episode
        # Get all episodes for this podcast ordered by published date
        episodes = Episode.query.filter_by(podcast_id=podcast.id).order_by(Episode.published_date.asc()).all()
        # Find the position of the current episode in the ordered list
        for i, ep in enumerate(episodes, 1):
            if ep.id == episode.id:
                absolute_number = str(i)
                break

    # Create a dictionary with all available variables
    format_vars = {
        'podcast_title': sanitize_filename(podcast.title),
        'episode_title': sanitize_filename(episode.title),
        'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '',
        'season': sanitize_filename(str(episode.season)) if episode.season else '',
        # Format season_episode as S01E01, ensuring season is always included
        'season_episode': (
            # If we have season and episode_number is a digit, format as S01E01
            f"S{episode.season:02d}E{int(episode.episode_number):02d}"
            if episode.season and episode.episode_number and episode.episode_number.isdigit()
            # If episode_number exists but is not a digit, format as S01E{episode_number}
            else f"S{episode.season or 1:02d}E{episode.episode_number}"
            if episode.episode_number
            # Otherwise, return empty string
            else ''
        ),
        'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '',
        'author': sanitize_filename(podcast.author) if podcast.author else '',
        'explicit': 'explicit' if episode.explicit else '',
        'absolute_number': sanitize_filename(absolute_number)
    }

    # Format the string
    try:
        formatted_path = format_string.format(**format_vars)
    except KeyError as e:
        logger.warning(f"Invalid format variable: {str(e)}")
        # Fall back to a simple format
        formatted_path = f"{format_vars['podcast_title']}/{format_vars['episode_title']}"

    # Replace forward slashes with OS-specific path separator
    formatted_path = formatted_path.replace('/', os.path.sep)

    # Handle empty path segments by removing them
    path_parts = formatted_path.split(os.path.sep)
    path_parts = [part for part in path_parts if part.strip()]

    # Rejoin the path with proper separators
    return os.path.sep.join(path_parts)

def sanitize_filename(filename):
    """
    Sanitize a string to be used as a filename.

    Args:
        filename (str): Original filename.

    Returns:
        str: Sanitized filename.
    """
    if not filename:
        return ""

    # Replace invalid characters
    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
    for char in invalid_chars:
        filename = filename.replace(char, '_')

    # Remove leading and trailing whitespace and periods
    filename = filename.strip().strip('.')

    # Replace multiple spaces with a single space
    filename = ' '.join(filename.split())

    # Limit length
    if len(filename) > 100:
        filename = filename[:97] + '...'

    # If filename is empty after sanitization, provide a default
    if not filename:
        filename = "unnamed"

    return filename

def delete_old_episodes(days=30):
    """
    Delete episodes older than the specified number of days.

    Args:
        days (int): Number of days to keep episodes.

    Returns:
        int: Number of episodes deleted.
    """
    from app.models.podcast import Episode

    settings = Settings.query.first()
    if settings:
        days = settings.delete_after_days

    # Calculate the cutoff date
    cutoff_date = datetime.utcnow() - timedelta(days=days)

    # Find episodes to delete
    episodes = Episode.query.filter(
        Episode.downloaded == True,
        Episode.published_date < cutoff_date
    ).all()

    count = 0
    for episode in episodes:
        if episode.file_path and os.path.exists(episode.file_path):
            try:
                os.remove(episode.file_path)
                episode.file_path = None
                episode.downloaded = False
                count += 1
            except Exception as e:
                logger.error(f"Error deleting episode file: {str(e)}")

    db.session.commit()
    logger.info(f"Deleted {count} old episodes")
    return count

def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
    """
    Verify that downloaded episodes still exist on disk and update their status.

    Args:
        podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts.
        progress_callback (callable, optional): Callback function for progress updates.

    Returns:
        dict: Statistics about the verification process.
    """
    from app.models.podcast import Episode, Podcast

    # Get episodes to check
    query = Episode.query.filter(Episode.downloaded == True)
    if podcast_id:
        query = query.filter(Episode.podcast_id == podcast_id)

    episodes = query.all()
    total = len(episodes)

    if progress_callback:
        progress_callback(0, f"Verifying {total} downloaded episodes")

    missing = 0
    for i, episode in enumerate(episodes):
        if progress_callback and total > 0:
            progress = int((i / total) * 100)
            progress_callback(progress, f"Verifying episode {i+1}/{total}")

        if not episode.file_path or not os.path.exists(episode.file_path):
            episode.downloaded = False
            if episode.file_path:
                logger.warning(f"Episode file not found: {episode.file_path}")
            missing += 1

    db.session.commit()

    if progress_callback:
        progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.")

    logger.info(f"Verified {total} episodes. {missing} were missing.")
    return {
        'total_checked': total,
        'missing': missing
    }

def rename_episode(episode_id, new_format=None, progress_callback=None):
    """
    Rename a downloaded episode file using a new format.

    Args:
        episode_id: ID of the Episode to rename.
        new_format (str, optional): New format string. If None, use the podcast's format or the global settings format.
        progress_callback (callable, optional): Callback function for progress updates.

    Returns:
        str: New file path.
    """
    from app.models.podcast import Episode, Podcast

    if progress_callback:
        progress_callback(5, "Loading episode data")

    # Load the episode with its podcast relationship
    episode = Episode.query.get(episode_id)
    if not episode:
        raise ValueError(f"Episode with ID {episode_id} not found")

    if not episode.downloaded or not episode.file_path or not os.path.exists(episode.file_path):
        raise ValueError("Episode is not downloaded or file does not exist")

    if progress_callback:
        progress_callback(10, "Getting podcast and format settings")

    # Explicitly load the podcast to avoid lazy loading issues
    podcast = Podcast.query.get(episode.podcast_id)
    if not podcast:
        raise ValueError(f"Podcast with ID {episode.podcast_id} not found")

    settings = Settings.query.first()
    if not settings:
        settings = Settings(
            download_path=current_app.config['DOWNLOAD_PATH'],
            naming_format="{podcast_title}/{episode_title}"
        )
        db.session.add(settings)
        db.session.commit()

    # Use provided format, podcast's format, or global settings format
    format_string = new_format or podcast.naming_format or settings.naming_format

    if progress_callback:
        progress_callback(20, "Formatting new filename")

    # Format new filename
    new_filename = format_filename(format_string, podcast, episode)

    # Get file extension from current file
    _, ext = os.path.splitext(episode.file_path)

    # Create full path for new file
    download_path = settings.download_path
    new_file_path = os.path.normpath(os.path.join(download_path, new_filename + ext))

    # Ensure the directory exists
    new_file_dir = os.path.dirname(new_file_path)
    os.makedirs(new_file_dir, exist_ok=True)

    if progress_callback:
        progress_callback(50, f"Renaming file to {new_file_path}")

    # Rename the file
    try:
        # Check if the new path is different
        if os.path.normpath(episode.file_path) != os.path.normpath(new_file_path):
            os.rename(episode.file_path, new_file_path)
            episode.file_path = new_file_path
            db.session.commit()

            if progress_callback:
                progress_callback(100, "File renamed successfully")

            logger.info(f"Renamed episode file: {episode.title} to {new_file_path}")
            return new_file_path
        else:
            if progress_callback:
                progress_callback(100, "File already has the correct name")

            logger.info(f"Episode file already has the correct name: {new_file_path}")
            return episode.file_path
    except Exception as e:
        if progress_callback:
            progress_callback(100, f"Error renaming file: {str(e)}")

        logger.error(f"Error renaming episode file: {str(e)}")
        raise