podcastrr/app/services/podcast_updater.py

"""
Podcast updater service for Podcastrr.
"""
import logging
from datetime import datetime, timedelta
from flask import current_app
from app.models.database import db
from app.models.podcast import Podcast, Episode
from app.models.settings import Settings
from app.services.podcast_search import get_podcast_episodes
from app.services.podcast_downloader import download_episode

# Set up logging
logger = logging.getLogger(__name__)

def update_all_podcasts():
    """
    Update all podcasts in the database.

    Returns:
        dict: Statistics about the update process.
    """
    podcasts = Podcast.query.all()

    stats = {
        'podcasts_updated': 0,
        'new_episodes': 0,
        'episodes_downloaded': 0,
        'errors': 0
    }

    for podcast in podcasts:
        try:
            result = update_podcast(podcast.id)
            stats['podcasts_updated'] += 1
            stats['new_episodes'] += result['new_episodes']
            stats['episodes_downloaded'] += result['episodes_downloaded']
        except Exception as e:
            logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
            stats['errors'] += 1

    return stats

def update_podcast(podcast_id, progress_callback=None):
    """
    Update a specific podcast.

    Args:
        podcast_id (int): ID of the podcast to update.
        progress_callback (callable, optional): Callback function for progress updates.

    Returns:
        dict: Statistics about the update process.
    """
    podcast = Podcast.query.get_or_404(podcast_id)

    stats = {
        'new_episodes': 0,
        'episodes_downloaded': 0,
        'feed_status': 'success'
    }

    try:
        logger.info(f"Updating podcast: {podcast.title} (ID: {podcast.id})")
        logger.info(f"Feed URL: {podcast.feed_url}")

        if progress_callback:
            progress_callback(10, f"Fetching episodes for {podcast.title}")

        # Get episodes from feed
        episodes = get_podcast_episodes(podcast.feed_url)

        # Update podcast last_checked timestamp
        podcast.last_checked = datetime.utcnow()

        if progress_callback:
            progress_callback(30, f"Found {len(episodes)} episodes")

        if not episodes:
            logger.warning(f"No episodes found for podcast: {podcast.title}")
            stats['feed_status'] = 'no_episodes'
        else:
            # Check if all episodes have download errors
            error_episodes = [ep for ep in episodes if ep.get('download_error')]
            if len(error_episodes) == len(episodes):
                logger.warning(f"All {len(episodes)} episodes have download errors for podcast: {podcast.title}")
                stats['feed_status'] = 'all_episodes_have_errors'
                # Store the most common error for reporting
                if error_episodes:
                    stats['error_message'] = error_episodes[0].get('download_error', 'Unknown error')
                    stats['status_code'] = error_episodes[0].get('status_code')

            # Check if we need to refresh the feed URL from iTunes
            if podcast.external_id:
                try:
                    from app.services.podcast_search import search_podcasts
                    logger.info(f"Trying to refresh feed URL from iTunes for podcast ID: {podcast.external_id}")

                    podcast_data = search_podcasts(podcast_id=podcast.external_id)
                    if podcast_data and podcast_data.get('feed_url') and podcast_data['feed_url'] != podcast.feed_url:
                        logger.info(f"Updated feed URL from {podcast.feed_url} to {podcast_data['feed_url']}")
                        podcast.feed_url = podcast_data['feed_url']
                        db.session.commit()

                        # Try again with the new feed URL
                        episodes = get_podcast_episodes(podcast.feed_url)
                        logger.info(f"Found {len(episodes)} episodes with updated feed URL")
                except Exception as e:
                    logger.error(f"Error refreshing feed URL: {str(e)}")

        # Process each episode
        total_episodes = len(episodes)
        for i, episode_data in enumerate(episodes):
            if progress_callback and total_episodes > 0:
                progress = 30 + int((i / total_episodes) * 60)  # Scale from 30% to 90%
                progress_callback(progress, f"Processing episode {i+1}/{total_episodes}")
            # Skip episodes without required fields
            if not episode_data.get('guid'):
                logger.warning(f"Skipping episode without GUID: {episode_data.get('title', 'Unknown')}")
                continue

            if not episode_data.get('audio_url'):
                logger.warning(f"Skipping episode without audio URL: {episode_data.get('title', 'Unknown')}")
                continue

            # Check if episode already exists
            existing = Episode.query.filter_by(guid=episode_data['guid']).first()

            if not existing:
                # Create new episode
                try:
                    episode = Episode(
                        podcast_id=podcast.id,
                        title=episode_data.get('title', ''),
                        description=episode_data.get('description', ''),
                        audio_url=episode_data.get('audio_url', ''),
                        image_url=episode_data.get('image_url', podcast.image_url),  # Use podcast image if episode has none
                        published_date=episode_data.get('published_date'),
                        duration=episode_data.get('duration'),
                        file_size=episode_data.get('file_size'),
                        season=episode_data.get('season'),  # Season number
                        episode_number=episode_data.get('episode_number'),
                        guid=episode_data['guid'],
                        downloaded=False,
                        explicit=episode_data.get('explicit'),  # Explicit flag
                        download_error=episode_data.get('download_error'),  # Error message if download failed
                        status_code=episode_data.get('status_code')  # HTTP status code
                    )

                    db.session.add(episode)
                    stats['new_episodes'] += 1
                    logger.info(f"Added new episode: {episode.title}")

                    # Need to commit first to ensure episode has an ID
                    db.session.commit()

                    # Check if file already exists for this episode
                    try:
                        from app.services.podcast_downloader import format_filename
                        import os
                        from app.models.settings import Settings

                        settings = Settings.query.first()
                        if not settings:
                            settings = Settings(
                                download_path=current_app.config['DOWNLOAD_PATH'],
                                naming_format="{podcast_title}/{episode_title}"
                            )
                            db.session.add(settings)
                            db.session.commit()

                        # Use podcast's naming format if available, otherwise use global settings
                        naming_format = podcast.naming_format or settings.naming_format

                        # Format filename using the naming format
                        filename = format_filename(naming_format, podcast, episode)
                        download_path = settings.download_path

                        # Check for common audio file extensions
                        extensions = ['.mp3', '.m4a', '.ogg', '.wav']
                        for ext in extensions:
                            file_path = os.path.normpath(os.path.join(download_path, filename + ext))
                            if os.path.exists(file_path):
                                logger.info(f"Found existing file for episode: {file_path}")
                                episode.downloaded = True
                                episode.file_path = file_path
                                db.session.commit()
                                break

                        logger.info(f"Checked for existing files for episode: {episode.title}")
                    except Exception as e:
                        logger.error(f"Error checking for existing files for episode {episode.title}: {str(e)}")

                    # Auto-download if enabled and not already downloaded
                    if podcast.auto_download and episode.audio_url and not episode.downloaded:
                        try:
                            download_episode(episode.id)
                            stats['episodes_downloaded'] += 1
                            logger.info(f"Auto-downloaded episode: {episode.title}")
                        except Exception as e:
                            logger.error(f"Error auto-downloading episode {episode.title}: {str(e)}")
                except Exception as e:
                    logger.error(f"Error adding episode: {str(e)}")

        # Update podcast last_updated timestamp if new episodes were found
        if stats['new_episodes'] > 0:
            podcast.last_updated = datetime.utcnow()

        db.session.commit()
        logger.info(f"Podcast update completed: {stats}")

        if progress_callback:
            progress_callback(100, f"Update complete. Found {stats['new_episodes']} new episodes.")

        return stats

    except Exception as e:
        db.session.rollback()
        logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
        stats['feed_status'] = 'error'
        stats['error'] = str(e)

        if progress_callback:
            progress_callback(100, f"Error: {str(e)}")

        raise

def schedule_updates():
    """
    Schedule podcast updates based on settings.

    This function is meant to be called by a scheduler (e.g., APScheduler).
    """
    logger.info("Starting scheduled podcast updates")

    try:
        stats = update_all_podcasts()
        logger.info(f"Scheduled update completed: {stats}")
    except Exception as e:
        logger.error(f"Error during scheduled update: {str(e)}")

def clean_old_downloads():
    """
    Clean up old downloaded episodes.

    This function is meant to be called by a scheduler (e.g., APScheduler).
    """
    from app.services.podcast_downloader import delete_old_episodes

    logger.info("Starting cleanup of old downloads")

    try:
        count = delete_old_episodes()
        logger.info(f"Deleted {count} old episodes")
    except Exception as e:
        logger.error(f"Error during cleanup: {str(e)}")