Add podgrab featureset

2025-06-16 22:55:39 -07:00 · 2025-06-16 22:55:39 -07:00 · 233dd5b5c0
commit 233dd5b5c0
parent 095bf52a2f
33 changed files with 2315 additions and 125 deletions
--- a/app/services/opml_handler.py
+++ b/app/services/opml_handler.py
@ -0,0 +1,155 @@
+"""
+OPML import/export functionality for Podcastrr.
+"""
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+import logging
+from datetime import datetime
+from flask import current_app
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+def parse_opml(opml_content):
+    """
+    Parse OPML content and extract podcast feed URLs.
+
+    Args:
+        opml_content (str): OPML file content.
+
+    Returns:
+        list: List of dictionaries containing podcast information.
+    """
+    try:
+        root = ET.fromstring(opml_content)
+        
+        # Find all outline elements that represent podcasts
+        podcasts = []
+        
+        # Look for outlines in the body
+        body = root.find('body')
+        if body is None:
+            logger.error("OPML file has no body element")
+            return []
+            
+        # Process all outline elements
+        for outline in body.findall('.//outline'):
+            # Check if this is a podcast outline (has xmlUrl attribute)
+            xml_url = outline.get('xmlUrl')
+            if xml_url:
+                podcast = {
+                    'feed_url': xml_url,
+                    'title': outline.get('title') or outline.get('text', 'Unknown Podcast'),
+                    'description': outline.get('description', ''),
+                    'html_url': outline.get('htmlUrl', '')
+                }
+                podcasts.append(podcast)
+                
+        logger.info(f"Parsed OPML file and found {len(podcasts)} podcasts")
+        return podcasts
+    except Exception as e:
+        logger.error(f"Error parsing OPML file: {str(e)}")
+        return []
+
+def generate_opml(podcasts):
+    """
+    Generate OPML content from a list of podcasts.
+
+    Args:
+        podcasts (list): List of Podcast model instances.
+
+    Returns:
+        str: OPML file content.
+    """
+    try:
+        # Create the root element
+        root = ET.Element('opml')
+        root.set('version', '2.0')
+        
+        # Create the head element
+        head = ET.SubElement(root, 'head')
+        title = ET.SubElement(head, 'title')
+        title.text = 'Podcastrr Subscriptions'
+        date_created = ET.SubElement(head, 'dateCreated')
+        date_created.text = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
+        
+        # Create the body element
+        body = ET.SubElement(root, 'body')
+        
+        # Add each podcast as an outline element
+        for podcast in podcasts:
+            outline = ET.SubElement(body, 'outline')
+            outline.set('type', 'rss')
+            outline.set('text', podcast.title)
+            outline.set('title', podcast.title)
+            outline.set('xmlUrl', podcast.feed_url)
+            if podcast.description:
+                outline.set('description', podcast.description)
+                
+        # Convert to pretty-printed XML
+        xml_str = ET.tostring(root, encoding='utf-8')
+        parsed_xml = minidom.parseString(xml_str)
+        pretty_xml = parsed_xml.toprettyxml(indent="  ")
+        
+        logger.info(f"Generated OPML file with {len(podcasts)} podcasts")
+        return pretty_xml
+    except Exception as e:
+        logger.error(f"Error generating OPML file: {str(e)}")
+        return ""
+
+def import_podcasts_from_opml(opml_content):
+    """
+    Import podcasts from OPML content into the database.
+
+    Args:
+        opml_content (str): OPML file content.
+
+    Returns:
+        dict: Statistics about the import process.
+    """
+    from app.models.podcast import Podcast
+    from app.models.database import db
+    from app.services.podcast_updater import update_podcast
+    
+    podcasts = parse_opml(opml_content)
+    
+    stats = {
+        'total': len(podcasts),
+        'imported': 0,
+        'skipped': 0,
+        'errors': 0
+    }
+    
+    for podcast_data in podcasts:
+        try:
+            # Check if podcast already exists
+            existing = Podcast.query.filter_by(feed_url=podcast_data['feed_url']).first()
+            
+            if existing:
+                logger.info(f"Podcast already exists: {podcast_data['title']}")
+                stats['skipped'] += 1
+                continue
+                
+            # Create new podcast
+            podcast = Podcast(
+                title=podcast_data['title'],
+                description=podcast_data.get('description', ''),
+                feed_url=podcast_data['feed_url']
+            )
+            
+            db.session.add(podcast)
+            db.session.commit()
+            
+            # Update podcast to fetch episodes
+            try:
+                update_podcast(podcast.id)
+            except Exception as e:
+                logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
+                
+            stats['imported'] += 1
+            logger.info(f"Imported podcast: {podcast.title}")
+        except Exception as e:
+            stats['errors'] += 1
+            logger.error(f"Error importing podcast: {str(e)}")
+            
+    return stats