Docker and more calendar work

2025-06-17 16:00:46 -07:00 · 2025-06-17 16:00:46 -07:00 · f7a919ebf2
commit f7a919ebf2
parent 4527504c80
22 changed files with 2036 additions and 79 deletions
--- a/app/services/podcast_search.py
+++ b/app/services/podcast_search.py
@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
        feed_url (str): URL of the podcast RSS feed.

    Returns:
-        list: List of episode dictionaries.
+        tuple: (list of episode dictionaries, podcast metadata dictionary)
    """
    try:
        if not feed_url:
            logger.error("Empty feed URL provided")
-            return []
+            return [], {}

        logger.info(f"Fetching episodes from feed: {feed_url}")

@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):

        logger.info(f"Found {len(feed.entries)} entries in feed")

+        # Extract podcast metadata
+        podcast_metadata = {
+            'title': feed.feed.get('title', ''),
+            'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
+            'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
+            'image_url': None  # Default to None, will try to extract below
+        }
+
+        # Try to get podcast image URL from various locations in the feed
+        if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
+            podcast_metadata['image_url'] = feed.feed.image.href
+            logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
+        elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
+            podcast_metadata['image_url'] = feed.feed.itunes_image.href
+            logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
+        elif 'image' in feed.feed and 'href' in feed.feed.image:
+            podcast_metadata['image_url'] = feed.feed.image.href
+            logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
+
+        logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
+
        episodes = []
        for entry in feed.entries:
            # Log entry details for debugging
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
                        logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
                        continue

-                    # Try to validate the URL without downloading the file
-                    import requests
-                    head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
-
-                    # Check if the URL is accessible
-                    if head_response.status_code >= 400:
-                        logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
-                        # Instead of skipping, add the episode with error information
-                        episode['download_error'] = f"Server returned status code {head_response.status_code}"
-                        episode['status_code'] = head_response.status_code
-                    else:
-                        # Check if the content type is audio
-                        content_type = head_response.headers.get('Content-Type', '')
-                        if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
-                            logger.warning(f"Audio URL has non-audio content type: {content_type}")
-                            # Don't skip here as some servers might not report the correct content type
-                            episode['download_error'] = f"Non-audio content type: {content_type}"
-                        else:
-                            # If we got here, the audio URL is valid with no issues
-                            episode['download_error'] = None
-                            episode['status_code'] = head_response.status_code
+                    # Skip validation for now - we'll validate when downloading
+                    # This prevents the import process from getting stuck on slow HEAD requests
+                    # The previous implementation made a HEAD request for each episode, which could
+                    # cause timeouts or hanging connections with feeds containing many episodes
+                    # Validation will happen when the episode is actually downloaded instead
+                    logger.debug(f"Skipping audio URL validation for {episode['title']}")
+                    episode['download_error'] = None
+                    episode['status_code'] = 200  # Assume success

                    # Add the episode regardless of status code
                    episodes.append(episode)
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
                logger.warning(f"Skipping episode without audio URL: {episode['title']}")

        logger.info(f"Processed {len(episodes)} valid episodes")
-        return episodes
+        return episodes, podcast_metadata
    except Exception as e:
        logger.error(f"Error getting podcast episodes: {str(e)}")
-        return []
+        return [], {}

 def _parse_date(date_str):
    """