Docker and more calendar work

2025-06-17 16:00:46 -07:00 · 2025-06-17 16:00:46 -07:00 · f7a919ebf2
commit f7a919ebf2
parent 4527504c80
22 changed files with 2036 additions and 79 deletions
--- a/app/services/opml_handler.py
+++ b/app/services/opml_handler.py
@ -22,16 +22,16 @@ def parse_opml(opml_content):
    """
    try:
        root = ET.fromstring(opml_content)
-        
+
        # Find all outline elements that represent podcasts
        podcasts = []
-        
+
        # Look for outlines in the body
        body = root.find('body')
        if body is None:
            logger.error("OPML file has no body element")
            return []
-            
+
        # Process all outline elements
        for outline in body.findall('.//outline'):
            # Check if this is a podcast outline (has xmlUrl attribute)
@ -44,7 +44,7 @@ def parse_opml(opml_content):
                    'html_url': outline.get('htmlUrl', '')
                }
                podcasts.append(podcast)
-                
+
        logger.info(f"Parsed OPML file and found {len(podcasts)} podcasts")
        return podcasts
    except Exception as e:
@ -65,17 +65,17 @@ def generate_opml(podcasts):
        # Create the root element
        root = ET.Element('opml')
        root.set('version', '2.0')
-        
+
        # Create the head element
        head = ET.SubElement(root, 'head')
        title = ET.SubElement(head, 'title')
        title.text = 'Podcastrr Subscriptions'
        date_created = ET.SubElement(head, 'dateCreated')
        date_created.text = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
-        
+
        # Create the body element
        body = ET.SubElement(root, 'body')
-        
+
        # Add each podcast as an outline element
        for podcast in podcasts:
            outline = ET.SubElement(body, 'outline')
@ -85,24 +85,25 @@ def generate_opml(podcasts):
            outline.set('xmlUrl', podcast.feed_url)
            if podcast.description:
                outline.set('description', podcast.description)
-                
+
        # Convert to pretty-printed XML
        xml_str = ET.tostring(root, encoding='utf-8')
        parsed_xml = minidom.parseString(xml_str)
        pretty_xml = parsed_xml.toprettyxml(indent="  ")
-        
+
        logger.info(f"Generated OPML file with {len(podcasts)} podcasts")
        return pretty_xml
    except Exception as e:
        logger.error(f"Error generating OPML file: {str(e)}")
        return ""

-def import_podcasts_from_opml(opml_content):
+def import_podcasts_from_opml(opml_content, progress_callback=None):
    """
    Import podcasts from OPML content into the database.

    Args:
        opml_content (str): OPML file content.
+        progress_callback (callable, optional): Function to call with progress updates.

    Returns:
        dict: Statistics about the import process.
@ -110,46 +111,59 @@ def import_podcasts_from_opml(opml_content):
    from app.models.podcast import Podcast
    from app.models.database import db
    from app.services.podcast_updater import update_podcast
-    
+
    podcasts = parse_opml(opml_content)
-    
+
    stats = {
        'total': len(podcasts),
        'imported': 0,
        'skipped': 0,
        'errors': 0
    }
-    
-    for podcast_data in podcasts:
+
+    # Initial progress update
+    if progress_callback:
+        progress_callback(0, f"Starting import of {len(podcasts)} podcasts")
+
+    for i, podcast_data in enumerate(podcasts):
        try:
            # Check if podcast already exists
            existing = Podcast.query.filter_by(feed_url=podcast_data['feed_url']).first()
-            
+
            if existing:
                logger.info(f"Podcast already exists: {podcast_data['title']}")
                stats['skipped'] += 1
                continue
-                
+
            # Create new podcast
            podcast = Podcast(
                title=podcast_data['title'],
                description=podcast_data.get('description', ''),
                feed_url=podcast_data['feed_url']
            )
-            
+
            db.session.add(podcast)
            db.session.commit()
-            
+
            # Update podcast to fetch episodes
            try:
                update_podcast(podcast.id)
            except Exception as e:
                logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
-                
+
            stats['imported'] += 1
            logger.info(f"Imported podcast: {podcast.title}")
        except Exception as e:
            stats['errors'] += 1
            logger.error(f"Error importing podcast: {str(e)}")
-            
-    return stats
+
+        # Update progress during the loop
+        if progress_callback and len(podcasts) > 0:
+            progress = int((i + 1) / len(podcasts) * 100)
+            progress_callback(progress, f"Processed {i + 1}/{len(podcasts)} podcasts")
+
+    # Final progress update
+    if progress_callback:
+        progress_callback(100, f"Import completed. Imported: {stats['imported']}, Skipped: {stats['skipped']}, Errors: {stats['errors']}")
+
+    return stats
--- a/app/services/podcast_search.py
+++ b/app/services/podcast_search.py
@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
        feed_url (str): URL of the podcast RSS feed.

    Returns:
-        list: List of episode dictionaries.
+        tuple: (list of episode dictionaries, podcast metadata dictionary)
    """
    try:
        if not feed_url:
            logger.error("Empty feed URL provided")
-            return []
+            return [], {}

        logger.info(f"Fetching episodes from feed: {feed_url}")

@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):

        logger.info(f"Found {len(feed.entries)} entries in feed")

+        # Extract podcast metadata
+        podcast_metadata = {
+            'title': feed.feed.get('title', ''),
+            'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
+            'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
+            'image_url': None  # Default to None, will try to extract below
+        }
+
+        # Try to get podcast image URL from various locations in the feed
+        if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
+            podcast_metadata['image_url'] = feed.feed.image.href
+            logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
+        elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
+            podcast_metadata['image_url'] = feed.feed.itunes_image.href
+            logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
+        elif 'image' in feed.feed and 'href' in feed.feed.image:
+            podcast_metadata['image_url'] = feed.feed.image.href
+            logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
+
+        logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
+
        episodes = []
        for entry in feed.entries:
            # Log entry details for debugging
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
                        logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
                        continue

-                    # Try to validate the URL without downloading the file
-                    import requests
-                    head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
-
-                    # Check if the URL is accessible
-                    if head_response.status_code >= 400:
-                        logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
-                        # Instead of skipping, add the episode with error information
-                        episode['download_error'] = f"Server returned status code {head_response.status_code}"
-                        episode['status_code'] = head_response.status_code
-                    else:
-                        # Check if the content type is audio
-                        content_type = head_response.headers.get('Content-Type', '')
-                        if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
-                            logger.warning(f"Audio URL has non-audio content type: {content_type}")
-                            # Don't skip here as some servers might not report the correct content type
-                            episode['download_error'] = f"Non-audio content type: {content_type}"
-                        else:
-                            # If we got here, the audio URL is valid with no issues
-                            episode['download_error'] = None
-                            episode['status_code'] = head_response.status_code
+                    # Skip validation for now - we'll validate when downloading
+                    # This prevents the import process from getting stuck on slow HEAD requests
+                    # The previous implementation made a HEAD request for each episode, which could
+                    # cause timeouts or hanging connections with feeds containing many episodes
+                    # Validation will happen when the episode is actually downloaded instead
+                    logger.debug(f"Skipping audio URL validation for {episode['title']}")
+                    episode['download_error'] = None
+                    episode['status_code'] = 200  # Assume success

                    # Add the episode regardless of status code
                    episodes.append(episode)
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
                logger.warning(f"Skipping episode without audio URL: {episode['title']}")

        logger.info(f"Processed {len(episodes)} valid episodes")
-        return episodes
+        return episodes, podcast_metadata
    except Exception as e:
        logger.error(f"Error getting podcast episodes: {str(e)}")
-        return []
+        return [], {}

 def _parse_date(date_str):
    """
--- a/app/services/podcast_updater.py
+++ b/app/services/podcast_updater.py
@ -13,14 +13,18 @@ from app.services.podcast_downloader import download_episode
 # Set up logging
 logger = logging.getLogger(__name__)

-def update_all_podcasts():
+def update_all_podcasts(progress_callback=None):
    """
    Update all podcasts in the database.

+    Args:
+        progress_callback (callable, optional): Callback function for progress updates.
+
    Returns:
        dict: Statistics about the update process.
    """
    podcasts = Podcast.query.all()
+    total_podcasts = len(podcasts)

    stats = {
        'podcasts_updated': 0,
@ -29,16 +33,32 @@ def update_all_podcasts():
        'errors': 0
    }

-    for podcast in podcasts:
+    if progress_callback:
+        progress_callback(0, f"Starting update of {total_podcasts} podcasts")
+
+    for i, podcast in enumerate(podcasts):
        try:
+            if progress_callback:
+                progress = int((i / total_podcasts) * 100)
+                progress_callback(progress, f"Updating podcast {i+1}/{total_podcasts}: {podcast.title}")
+
            result = update_podcast(podcast.id)
            stats['podcasts_updated'] += 1
            stats['new_episodes'] += result['new_episodes']
            stats['episodes_downloaded'] += result['episodes_downloaded']
+
+            if progress_callback:
+                progress_callback(progress, f"Updated podcast {i+1}/{total_podcasts}: {podcast.title} - Found {result['new_episodes']} new episodes")
        except Exception as e:
            logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
            stats['errors'] += 1

+            if progress_callback:
+                progress_callback(progress, f"Error updating podcast {i+1}/{total_podcasts}: {podcast.title} - {str(e)}")
+
+    if progress_callback:
+        progress_callback(100, f"Update complete. Updated {stats['podcasts_updated']} podcasts, found {stats['new_episodes']} new episodes.")
+
    return stats

 def update_podcast(podcast_id, progress_callback=None):
@ -67,12 +87,40 @@ def update_podcast(podcast_id, progress_callback=None):
        if progress_callback:
            progress_callback(10, f"Fetching episodes for {podcast.title}")

-        # Get episodes from feed
-        episodes = get_podcast_episodes(podcast.feed_url)
+        # Get episodes and podcast metadata from feed
+        episodes, podcast_metadata = get_podcast_episodes(podcast.feed_url)

        # Update podcast last_checked timestamp
        podcast.last_checked = datetime.utcnow()

+        # Update podcast metadata if available
+        updated = False
+
+        # Update image URL if available
+        if podcast_metadata.get('image_url'):
+            if podcast.image_url != podcast_metadata['image_url']:
+                logger.info(f"Updating podcast image URL from {podcast.image_url} to {podcast_metadata['image_url']}")
+                podcast.image_url = podcast_metadata['image_url']
+                updated = True
+
+        # Update author if available
+        if podcast_metadata.get('author'):
+            if podcast.author != podcast_metadata['author']:
+                logger.info(f"Updating podcast author from '{podcast.author}' to '{podcast_metadata['author']}'")
+                podcast.author = podcast_metadata['author']
+                updated = True
+
+        # Update description if available
+        if podcast_metadata.get('description'):
+            if podcast.description != podcast_metadata['description']:
+                logger.info(f"Updating podcast description")
+                podcast.description = podcast_metadata['description']
+                updated = True
+
+        # Commit changes if any updates were made
+        if updated:
+            db.session.commit()
+
        if progress_callback:
            progress_callback(30, f"Found {len(episodes)} episodes")

@ -103,8 +151,36 @@ def update_podcast(podcast_id, progress_callback=None):
                        db.session.commit()

                        # Try again with the new feed URL
-                        episodes = get_podcast_episodes(podcast.feed_url)
+                        episodes, updated_metadata = get_podcast_episodes(podcast.feed_url)
                        logger.info(f"Found {len(episodes)} episodes with updated feed URL")
+
+                        # Update podcast metadata with the new feed
+                        updated_from_new_feed = False
+
+                        # Update image URL if available
+                        if updated_metadata.get('image_url'):
+                            if podcast.image_url != updated_metadata['image_url']:
+                                logger.info(f"Updating podcast image URL from new feed: {updated_metadata['image_url']}")
+                                podcast.image_url = updated_metadata['image_url']
+                                updated_from_new_feed = True
+
+                        # Update author if available
+                        if updated_metadata.get('author'):
+                            if podcast.author != updated_metadata['author']:
+                                logger.info(f"Updating podcast author from new feed: '{updated_metadata['author']}'")
+                                podcast.author = updated_metadata['author']
+                                updated_from_new_feed = True
+
+                        # Update description if available
+                        if updated_metadata.get('description'):
+                            if podcast.description != updated_metadata['description']:
+                                logger.info(f"Updating podcast description from new feed")
+                                podcast.description = updated_metadata['description']
+                                updated_from_new_feed = True
+
+                        # Commit changes if any updates were made
+                        if updated_from_new_feed:
+                            db.session.commit()
                except Exception as e:
                    logger.error(f"Error refreshing feed URL: {str(e)}")