Initial commit

2025-06-14 22:53:38 -07:00 · 2025-06-14 22:53:38 -07:00 · e86ab53de5
commit e86ab53de5
35 changed files with 2638 additions and 0 deletions
--- a/app/services/init.py
+++ b/app/services/init.py
@ -0,0 +1,3 @@
+"""
+Services package for Podcastrr.
+"""
--- a/app/services/podcast_downloader.py
+++ b/app/services/podcast_downloader.py
@ -0,0 +1,179 @@
+"""
+Podcast downloader service for Podcastrr.
+"""
+import os
+import requests
+import logging
+from datetime import datetime, timedelta
+from flask import current_app
+from app.models.database import db
+from app.models.settings import Settings
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+def download_episode(episode):
+    """
+    Download a podcast episode.
+
+    Args:
+        episode: Episode model instance.
+
+    Returns:
+        str: Path to the downloaded file.
+    """
+    if not episode.audio_url:
+        raise ValueError("Episode has no audio URL")
+
+    # Get settings
+    settings = Settings.query.first()
+    if not settings:
+        settings = Settings(
+            download_path=current_app.config['DOWNLOAD_PATH'],
+            naming_format="{podcast_title}/{episode_title}"
+        )
+        db.session.add(settings)
+        db.session.commit()
+
+    # Create download directory
+    download_path = settings.download_path
+    os.makedirs(download_path, exist_ok=True)
+
+    # Format filename using the naming format
+    podcast = episode.podcast
+    filename = format_filename(settings.naming_format, podcast, episode)
+
+    # Ensure the directory exists
+    file_dir = os.path.dirname(os.path.join(download_path, filename))
+    os.makedirs(file_dir, exist_ok=True)
+
+    # Add file extension based on content type
+    file_path = os.path.join(download_path, filename)
+
+    # Download the file
+    try:
+        response = requests.get(episode.audio_url, stream=True)
+        response.raise_for_status()
+
+        # Get content type and set appropriate extension
+        content_type = response.headers.get('Content-Type', '')
+        if 'mp3' in content_type:
+            file_path += '.mp3'
+        elif 'mpeg' in content_type:
+            file_path += '.mp3'
+        elif 'mp4' in content_type or 'm4a' in content_type:
+            file_path += '.m4a'
+        elif 'ogg' in content_type:
+            file_path += '.ogg'
+        elif 'wav' in content_type:
+            file_path += '.wav'
+        else:
+            file_path += '.mp3'  # Default to mp3
+
+        # Write the file
+        with open(file_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+
+        # Update episode in database
+        episode.downloaded = True
+        episode.file_path = file_path
+        db.session.commit()
+
+        logger.info(f"Downloaded episode: {episode.title}")
+        return file_path
+
+    except Exception as e:
+        logger.error(f"Error downloading episode: {str(e)}")
+        raise
+
+def format_filename(format_string, podcast, episode):
+    """
+    Format a filename using the provided format string and podcast/episode data.
+
+    Args:
+        format_string (str): Format string with placeholders.
+        podcast: Podcast model instance.
+        episode: Episode model instance.
+
+    Returns:
+        str: Formatted filename.
+    """
+    # Create a dictionary with all available variables
+    format_vars = {
+        'podcast_title': sanitize_filename(podcast.title),
+        'episode_title': sanitize_filename(episode.title),
+        'episode_number': sanitize_filename(str(episode.episode_number)) if episode.episode_number else '',
+        'published_date': episode.published_date.strftime('%Y-%m-%d') if episode.published_date else '',
+        'author': sanitize_filename(podcast.author) if podcast.author else ''
+    }
+
+    # Format the string
+    try:
+        return format_string.format(**format_vars)
+    except KeyError as e:
+        logger.warning(f"Invalid format variable: {str(e)}")
+        # Fall back to a simple format
+        return f"{format_vars['podcast_title']}/{format_vars['episode_title']}"
+
+def sanitize_filename(filename):
+    """
+    Sanitize a string to be used as a filename.
+
+    Args:
+        filename (str): Original filename.
+
+    Returns:
+        str: Sanitized filename.
+    """
+    # Replace invalid characters
+    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
+    for char in invalid_chars:
+        filename = filename.replace(char, '_')
+
+    # Limit length
+    if len(filename) > 100:
+        filename = filename[:97] + '...'
+
+    return filename
+
+def delete_old_episodes(days=30):
+    """
+    Delete episodes older than the specified number of days.
+
+    Args:
+        days (int): Number of days to keep episodes.
+
+    Returns:
+        int: Number of episodes deleted.
+    """
+    from app.models.podcast import Episode
+
+    settings = Settings.query.first()
+    if settings:
+        days = settings.delete_after_days
+
+    # Calculate the cutoff date
+    cutoff_date = datetime.utcnow() - timedelta(days=days)
+
+    # Find episodes to delete
+    episodes = Episode.query.filter(
+        Episode.downloaded == True,
+        Episode.published_date < cutoff_date
+    ).all()
+
+    count = 0
+    for episode in episodes:
+        if episode.file_path and os.path.exists(episode.file_path):
+            try:
+                os.remove(episode.file_path)
+                episode.file_path = None
+                episode.downloaded = False
+                count += 1
+            except Exception as e:
+                logger.error(f"Error deleting episode file: {str(e)}")
+
+    db.session.commit()
+    logger.info(f"Deleted {count} old episodes")
+    return count
--- a/app/services/podcast_search.py
+++ b/app/services/podcast_search.py
@ -0,0 +1,317 @@
+"""
+Podcast search service for Podcastrr.
+"""
+import requests
+import feedparser
+from datetime import datetime
+import logging
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+def search_podcasts(query=None, podcast_id=None):
+    """
+    Search for podcasts using the iTunes API.
+
+    Args:
+        query (str): Search query for podcasts.
+        podcast_id (str): iTunes podcast ID to get specific podcast.
+
+    Returns:
+        list: List of podcast dictionaries if query is provided.
+        dict: Podcast dictionary if podcast_id is provided.
+    """
+    if not query and not podcast_id:
+        return [] if query is not None else None
+
+    try:
+        if podcast_id:
+            # Get specific podcast by ID
+            url = f"https://itunes.apple.com/lookup?id={podcast_id}&entity=podcast"
+            response = requests.get(url)
+            data = response.json()
+
+            if data['resultCount'] == 0:
+                return None
+
+            podcast = data['results'][0]
+            return _format_podcast(podcast)
+        else:
+            # Search for podcasts
+            url = f"https://itunes.apple.com/search?term={query}&entity=podcast&limit=20"
+            response = requests.get(url)
+            data = response.json()
+
+            results = []
+            for podcast in data['results']:
+                results.append(_format_podcast(podcast))
+
+            return results
+    except Exception as e:
+        logger.error(f"Error searching podcasts: {str(e)}")
+        return [] if query is not None else None
+
+def _format_podcast(podcast):
+    """
+    Format podcast data from iTunes API.
+
+    Args:
+        podcast (dict): Podcast data from iTunes API.
+
+    Returns:
+        dict: Formatted podcast data.
+    """
+    feed_url = podcast.get('feedUrl', '')
+
+    # Log feed URL for debugging
+    logger.info(f"Podcast: {podcast.get('collectionName', '')}, Feed URL: {feed_url}")
+
+    if not feed_url:
+        logger.warning(f"No feed URL found for podcast: {podcast.get('collectionName', '')}")
+
+    return {
+        'title': podcast.get('collectionName', ''),
+        'author': podcast.get('artistName', ''),
+        'description': podcast.get('description', ''),
+        'image_url': podcast.get('artworkUrl600', podcast.get('artworkUrl100', '')),
+        'feed_url': feed_url,
+        'external_id': str(podcast.get('collectionId', '')),
+        'genre': podcast.get('primaryGenreName', ''),
+        'country': podcast.get('country', '')
+    }
+
+def get_podcast_episodes(feed_url):
+    """
+    Get podcast episodes from RSS feed.
+
+    Args:
+        feed_url (str): URL of the podcast RSS feed.
+
+    Returns:
+        list: List of episode dictionaries.
+    """
+    try:
+        if not feed_url:
+            logger.error("Empty feed URL provided")
+            return []
+
+        logger.info(f"Fetching episodes from feed: {feed_url}")
+
+        # Check if the feed URL is valid and follow redirects
+        try:
+            import requests
+            response = requests.head(feed_url, allow_redirects=True, timeout=10)
+
+            if response.status_code != 200:
+                logger.error(f"Feed URL returned status code {response.status_code}: {feed_url}")
+
+            if response.url != feed_url:
+                logger.info(f"Feed URL redirected from {feed_url} to {response.url}")
+                feed_url = response.url
+        except Exception as e:
+            logger.warning(f"Error checking feed URL: {str(e)}")
+
+        # Parse the feed
+        feed = feedparser.parse(feed_url)
+
+        # Check for parsing errors
+        if hasattr(feed, 'bozo_exception') and feed.bozo_exception:
+            logger.error(f"Error parsing feed: {feed.bozo_exception}")
+
+            # Try to parse the feed with requests if feedparser fails
+            if len(feed.entries) == 0:
+                try:
+                    logger.info("Trying alternative method to fetch feed")
+                    response = requests.get(feed_url, timeout=10)
+                    feed = feedparser.parse(response.content)
+                    logger.info(f"Alternative method found {len(feed.entries)} entries")
+                except Exception as e:
+                    logger.error(f"Alternative method also failed: {str(e)}")
+
+        logger.info(f"Found {len(feed.entries)} entries in feed")
+
+        episodes = []
+        for entry in feed.entries:
+            # Log entry details for debugging
+            logger.debug(f"Processing entry: {entry.get('title', 'No title')}")
+
+            # Extract basic episode info
+            episode = {
+                'title': entry.get('title', ''),
+                'description': entry.get('description', ''),
+                'published_date': _parse_date(entry.get('published')),
+                'guid': entry.get('id', ''),
+                'duration': _parse_duration(entry.get('itunes_duration', '')),
+                'episode_number': entry.get('itunes_episode', '')
+            }
+
+            # Generate a GUID if one is not provided
+            if not episode['guid']:
+                # Try to use a link as GUID
+                for link in entry.get('links', []):
+                    if link.get('rel') == 'alternate' or link.get('type') == 'text/html':
+                        episode['guid'] = link.get('href', '')
+                        logger.debug(f"Generated GUID from link: {episode['guid']}")
+                        break
+
+                # If still no GUID, generate one from title and date
+                if not episode['guid'] and episode['title']:
+                    import hashlib
+                    # Create a hash from the title and published date (if available)
+                    hash_input = episode['title']
+                    if episode['published_date']:
+                        hash_input += episode['published_date'].isoformat()
+                    episode['guid'] = hashlib.md5(hash_input.encode('utf-8')).hexdigest()
+                    logger.debug(f"Generated GUID from title and date: {episode['guid']}")
+
+                # If still no GUID (no title), skip this episode
+                if not episode['guid']:
+                    logger.warning("Could not generate GUID for episode, skipping")
+                    continue
+
+            # Get audio URL
+            audio_found = False
+
+            # Method 1: Check links
+            for link in entry.get('links', []):
+                if link.get('type', '').startswith('audio/'):
+                    episode['audio_url'] = link.get('href', '')
+                    episode['file_size'] = link.get('length', 0)
+                    audio_found = True
+                    logger.debug(f"Found audio URL in links: {episode['audio_url']}")
+                    break
+
+            # Method 2: Check enclosures
+            if not audio_found and hasattr(entry, 'enclosures') and entry.enclosures:
+                for enclosure in entry.enclosures:
+                    if enclosure.get('type', '').startswith('audio/'):
+                        episode['audio_url'] = enclosure.get('href', '')
+                        episode['file_size'] = enclosure.get('length', 0)
+                        audio_found = True
+                        logger.debug(f"Found audio URL in enclosure: {episode['audio_url']}")
+                        break
+
+            # Method 3: Check media:content
+            if not audio_found and hasattr(entry, 'media_content'):
+                for media in entry.media_content:
+                    if media.get('type', '').startswith('audio/'):
+                        episode['audio_url'] = media.get('url', '')
+                        episode['file_size'] = media.get('fileSize', 0)
+                        audio_found = True
+                        logger.debug(f"Found audio URL in media:content: {episode['audio_url']}")
+                        break
+
+            # Method 4: Check for generic enclosure
+            if not audio_found and hasattr(entry, 'enclosures') and entry.enclosures:
+                # Try any enclosure if we haven't found an audio URL yet
+                enclosure = entry.enclosures[0]
+                episode['audio_url'] = enclosure.get('href', '')
+                episode['file_size'] = enclosure.get('length', 0)
+                audio_found = True
+                logger.debug(f"Found audio URL in generic enclosure: {episode['audio_url']}")
+
+            if not audio_found:
+                logger.warning(f"No audio URL found for episode: {episode['title']}")
+
+            # Get image URL
+            if 'image' in entry and 'href' in entry.image:
+                episode['image_url'] = entry.image.href
+
+            # Only add episodes with audio URLs
+            if audio_found and 'audio_url' in episode and episode['audio_url']:
+                # Validate the audio URL
+                try:
+                    # Check if the URL is valid
+                    if not episode['audio_url'].startswith(('http://', 'https://')):
+                        logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
+                        continue
+
+                    # Try to validate the URL without downloading the file
+                    import requests
+                    head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
+
+                    # Check if the URL is accessible
+                    if head_response.status_code >= 400:
+                        logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
+                        continue
+
+                    # Check if the content type is audio
+                    content_type = head_response.headers.get('Content-Type', '')
+                    if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
+                        logger.warning(f"Audio URL has non-audio content type: {content_type}")
+                        # Don't skip here as some servers might not report the correct content type
+
+                    # If we got here, the audio URL is probably valid
+                    episodes.append(episode)
+                    logger.debug(f"Added episode with valid audio URL: {episode['title']}")
+
+                except Exception as e:
+                    # If we can't validate the URL, still add the episode but log a warning
+                    logger.warning(f"Could not validate audio URL: {str(e)}")
+                    episodes.append(episode)
+                    logger.debug(f"Added episode with unvalidated audio URL: {episode['title']}")
+            else:
+                logger.warning(f"Skipping episode without audio URL: {episode['title']}")
+
+        logger.info(f"Processed {len(episodes)} valid episodes")
+        return episodes
+    except Exception as e:
+        logger.error(f"Error getting podcast episodes: {str(e)}")
+        return []
+
+def _parse_date(date_str):
+    """
+    Parse date string to datetime object.
+
+    Args:
+        date_str (str): Date string from RSS feed.
+
+    Returns:
+        datetime: Parsed datetime object or None.
+    """
+    if not date_str:
+        return None
+
+    try:
+        return datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S %z')
+    except ValueError:
+        try:
+            return datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S %Z')
+        except ValueError:
+            try:
+                return datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S%z')
+            except ValueError:
+                logger.warning(f"Could not parse date: {date_str}")
+                return None
+
+def _parse_duration(duration_str):
+    """
+    Parse duration string to seconds.
+
+    Args:
+        duration_str (str): Duration string from RSS feed.
+
+    Returns:
+        int: Duration in seconds or None.
+    """
+    if not duration_str:
+        return None
+
+    try:
+        # Try to parse as seconds
+        return int(duration_str)
+    except ValueError:
+        try:
+            # Try to parse as HH:MM:SS
+            parts = duration_str.split(':')
+            if len(parts) == 3:
+                hours, minutes, seconds = parts
+                return int(hours) * 3600 + int(minutes) * 60 + int(seconds)
+            elif len(parts) == 2:
+                minutes, seconds = parts
+                return int(minutes) * 60 + int(seconds)
+            else:
+                return None
+        except ValueError:
+            logger.warning(f"Could not parse duration: {duration_str}")
+            return None
--- a/app/services/podcast_updater.py
+++ b/app/services/podcast_updater.py
@ -0,0 +1,184 @@
+"""
+Podcast updater service for Podcastrr.
+"""
+import logging
+from datetime import datetime, timedelta
+from flask import current_app
+from app.models.database import db
+from app.models.podcast import Podcast, Episode
+from app.models.settings import Settings
+from app.services.podcast_search import get_podcast_episodes
+from app.services.podcast_downloader import download_episode
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+def update_all_podcasts():
+    """
+    Update all podcasts in the database.
+
+    Returns:
+        dict: Statistics about the update process.
+    """
+    podcasts = Podcast.query.all()
+
+    stats = {
+        'podcasts_updated': 0,
+        'new_episodes': 0,
+        'episodes_downloaded': 0,
+        'errors': 0
+    }
+
+    for podcast in podcasts:
+        try:
+            result = update_podcast(podcast.id)
+            stats['podcasts_updated'] += 1
+            stats['new_episodes'] += result['new_episodes']
+            stats['episodes_downloaded'] += result['episodes_downloaded']
+        except Exception as e:
+            logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
+            stats['errors'] += 1
+
+    return stats
+
+def update_podcast(podcast_id):
+    """
+    Update a specific podcast.
+
+    Args:
+        podcast_id (int): ID of the podcast to update.
+
+    Returns:
+        dict: Statistics about the update process.
+    """
+    podcast = Podcast.query.get_or_404(podcast_id)
+
+    stats = {
+        'new_episodes': 0,
+        'episodes_downloaded': 0,
+        'feed_status': 'success'
+    }
+
+    try:
+        logger.info(f"Updating podcast: {podcast.title} (ID: {podcast.id})")
+        logger.info(f"Feed URL: {podcast.feed_url}")
+
+        # Get episodes from feed
+        episodes = get_podcast_episodes(podcast.feed_url)
+
+        # Update podcast last_checked timestamp
+        podcast.last_checked = datetime.utcnow()
+
+        if not episodes:
+            logger.warning(f"No episodes found for podcast: {podcast.title}")
+            stats['feed_status'] = 'no_episodes'
+
+            # Check if we need to refresh the feed URL from iTunes
+            if podcast.external_id:
+                try:
+                    from app.services.podcast_search import search_podcasts
+                    logger.info(f"Trying to refresh feed URL from iTunes for podcast ID: {podcast.external_id}")
+
+                    podcast_data = search_podcasts(podcast_id=podcast.external_id)
+                    if podcast_data and podcast_data.get('feed_url') and podcast_data['feed_url'] != podcast.feed_url:
+                        logger.info(f"Updated feed URL from {podcast.feed_url} to {podcast_data['feed_url']}")
+                        podcast.feed_url = podcast_data['feed_url']
+                        db.session.commit()
+
+                        # Try again with the new feed URL
+                        episodes = get_podcast_episodes(podcast.feed_url)
+                        logger.info(f"Found {len(episodes)} episodes with updated feed URL")
+                except Exception as e:
+                    logger.error(f"Error refreshing feed URL: {str(e)}")
+
+        # Process each episode
+        for episode_data in episodes:
+            # Skip episodes without required fields
+            if not episode_data.get('guid'):
+                logger.warning(f"Skipping episode without GUID: {episode_data.get('title', 'Unknown')}")
+                continue
+
+            if not episode_data.get('audio_url'):
+                logger.warning(f"Skipping episode without audio URL: {episode_data.get('title', 'Unknown')}")
+                continue
+
+            # Check if episode already exists
+            existing = Episode.query.filter_by(guid=episode_data['guid']).first()
+
+            if not existing:
+                # Create new episode
+                try:
+                    episode = Episode(
+                        podcast_id=podcast.id,
+                        title=episode_data.get('title', ''),
+                        description=episode_data.get('description', ''),
+                        audio_url=episode_data.get('audio_url', ''),
+                        image_url=episode_data.get('image_url', podcast.image_url),  # Use podcast image if episode has none
+                        published_date=episode_data.get('published_date'),
+                        duration=episode_data.get('duration'),
+                        file_size=episode_data.get('file_size'),
+                        episode_number=episode_data.get('episode_number'),
+                        guid=episode_data['guid'],
+                        downloaded=False
+                    )
+
+                    db.session.add(episode)
+                    stats['new_episodes'] += 1
+                    logger.info(f"Added new episode: {episode.title}")
+
+                    # Auto-download if enabled
+                    if podcast.auto_download and episode.audio_url:
+                        try:
+                            download_episode(episode)
+                            stats['episodes_downloaded'] += 1
+                            logger.info(f"Auto-downloaded episode: {episode.title}")
+                        except Exception as e:
+                            logger.error(f"Error auto-downloading episode {episode.title}: {str(e)}")
+                except Exception as e:
+                    logger.error(f"Error adding episode: {str(e)}")
+
+        # Update podcast last_updated timestamp if new episodes were found
+        if stats['new_episodes'] > 0:
+            podcast.last_updated = datetime.utcnow()
+
+        db.session.commit()
+        logger.info(f"Podcast update completed: {stats}")
+
+        return stats
+
+    except Exception as e:
+        db.session.rollback()
+        logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
+        stats['feed_status'] = 'error'
+        stats['error'] = str(e)
+        raise
+
+def schedule_updates():
+    """
+    Schedule podcast updates based on settings.
+
+    This function is meant to be called by a scheduler (e.g., APScheduler).
+    """
+    logger.info("Starting scheduled podcast updates")
+
+    try:
+        stats = update_all_podcasts()
+        logger.info(f"Scheduled update completed: {stats}")
+    except Exception as e:
+        logger.error(f"Error during scheduled update: {str(e)}")
+
+def clean_old_downloads():
+    """
+    Clean up old downloaded episodes.
+
+    This function is meant to be called by a scheduler (e.g., APScheduler).
+    """
+    from app.services.podcast_downloader import delete_old_episodes
+
+    logger.info("Starting cleanup of old downloads")
+
+    try:
+        count = delete_old_episodes()
+        logger.info(f"Deleted {count} old episodes")
+    except Exception as e:
+        logger.error(f"Error during cleanup: {str(e)}")