Docker and more calendar work

This commit is contained in:
Cody Cook 2025-06-17 16:00:46 -07:00
commit f7a919ebf2
22 changed files with 2036 additions and 79 deletions

View file

@ -22,16 +22,16 @@ def parse_opml(opml_content):
"""
try:
root = ET.fromstring(opml_content)
# Find all outline elements that represent podcasts
podcasts = []
# Look for outlines in the body
body = root.find('body')
if body is None:
logger.error("OPML file has no body element")
return []
# Process all outline elements
for outline in body.findall('.//outline'):
# Check if this is a podcast outline (has xmlUrl attribute)
@ -44,7 +44,7 @@ def parse_opml(opml_content):
'html_url': outline.get('htmlUrl', '')
}
podcasts.append(podcast)
logger.info(f"Parsed OPML file and found {len(podcasts)} podcasts")
return podcasts
except Exception as e:
@ -65,17 +65,17 @@ def generate_opml(podcasts):
# Create the root element
root = ET.Element('opml')
root.set('version', '2.0')
# Create the head element
head = ET.SubElement(root, 'head')
title = ET.SubElement(head, 'title')
title.text = 'Podcastrr Subscriptions'
date_created = ET.SubElement(head, 'dateCreated')
date_created.text = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
# Create the body element
body = ET.SubElement(root, 'body')
# Add each podcast as an outline element
for podcast in podcasts:
outline = ET.SubElement(body, 'outline')
@ -85,24 +85,25 @@ def generate_opml(podcasts):
outline.set('xmlUrl', podcast.feed_url)
if podcast.description:
outline.set('description', podcast.description)
# Convert to pretty-printed XML
xml_str = ET.tostring(root, encoding='utf-8')
parsed_xml = minidom.parseString(xml_str)
pretty_xml = parsed_xml.toprettyxml(indent=" ")
logger.info(f"Generated OPML file with {len(podcasts)} podcasts")
return pretty_xml
except Exception as e:
logger.error(f"Error generating OPML file: {str(e)}")
return ""
def import_podcasts_from_opml(opml_content):
def import_podcasts_from_opml(opml_content, progress_callback=None):
"""
Import podcasts from OPML content into the database.
Args:
opml_content (str): OPML file content.
progress_callback (callable, optional): Function to call with progress updates.
Returns:
dict: Statistics about the import process.
@ -110,46 +111,59 @@ def import_podcasts_from_opml(opml_content):
from app.models.podcast import Podcast
from app.models.database import db
from app.services.podcast_updater import update_podcast
podcasts = parse_opml(opml_content)
stats = {
'total': len(podcasts),
'imported': 0,
'skipped': 0,
'errors': 0
}
for podcast_data in podcasts:
# Initial progress update
if progress_callback:
progress_callback(0, f"Starting import of {len(podcasts)} podcasts")
for i, podcast_data in enumerate(podcasts):
try:
# Check if podcast already exists
existing = Podcast.query.filter_by(feed_url=podcast_data['feed_url']).first()
if existing:
logger.info(f"Podcast already exists: {podcast_data['title']}")
stats['skipped'] += 1
continue
# Create new podcast
podcast = Podcast(
title=podcast_data['title'],
description=podcast_data.get('description', ''),
feed_url=podcast_data['feed_url']
)
db.session.add(podcast)
db.session.commit()
# Update podcast to fetch episodes
try:
update_podcast(podcast.id)
except Exception as e:
logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
stats['imported'] += 1
logger.info(f"Imported podcast: {podcast.title}")
except Exception as e:
stats['errors'] += 1
logger.error(f"Error importing podcast: {str(e)}")
return stats
# Update progress during the loop
if progress_callback and len(podcasts) > 0:
progress = int((i + 1) / len(podcasts) * 100)
progress_callback(progress, f"Processed {i + 1}/{len(podcasts)} podcasts")
# Final progress update
if progress_callback:
progress_callback(100, f"Import completed. Imported: {stats['imported']}, Skipped: {stats['skipped']}, Errors: {stats['errors']}")
return stats

View file

@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
feed_url (str): URL of the podcast RSS feed.
Returns:
list: List of episode dictionaries.
tuple: (list of episode dictionaries, podcast metadata dictionary)
"""
try:
if not feed_url:
logger.error("Empty feed URL provided")
return []
return [], {}
logger.info(f"Fetching episodes from feed: {feed_url}")
@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):
logger.info(f"Found {len(feed.entries)} entries in feed")
# Extract podcast metadata
podcast_metadata = {
'title': feed.feed.get('title', ''),
'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
'image_url': None # Default to None, will try to extract below
}
# Try to get podcast image URL from various locations in the feed
if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
podcast_metadata['image_url'] = feed.feed.image.href
logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
podcast_metadata['image_url'] = feed.feed.itunes_image.href
logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
elif 'image' in feed.feed and 'href' in feed.feed.image:
podcast_metadata['image_url'] = feed.feed.image.href
logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
episodes = []
for entry in feed.entries:
# Log entry details for debugging
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
continue
# Try to validate the URL without downloading the file
import requests
head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
# Check if the URL is accessible
if head_response.status_code >= 400:
logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
# Instead of skipping, add the episode with error information
episode['download_error'] = f"Server returned status code {head_response.status_code}"
episode['status_code'] = head_response.status_code
else:
# Check if the content type is audio
content_type = head_response.headers.get('Content-Type', '')
if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
logger.warning(f"Audio URL has non-audio content type: {content_type}")
# Don't skip here as some servers might not report the correct content type
episode['download_error'] = f"Non-audio content type: {content_type}"
else:
# If we got here, the audio URL is valid with no issues
episode['download_error'] = None
episode['status_code'] = head_response.status_code
# Skip validation for now - we'll validate when downloading
# This prevents the import process from getting stuck on slow HEAD requests
# The previous implementation made a HEAD request for each episode, which could
# cause timeouts or hanging connections with feeds containing many episodes
# Validation will happen when the episode is actually downloaded instead
logger.debug(f"Skipping audio URL validation for {episode['title']}")
episode['download_error'] = None
episode['status_code'] = 200 # Assume success
# Add the episode regardless of status code
episodes.append(episode)
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
logger.warning(f"Skipping episode without audio URL: {episode['title']}")
logger.info(f"Processed {len(episodes)} valid episodes")
return episodes
return episodes, podcast_metadata
except Exception as e:
logger.error(f"Error getting podcast episodes: {str(e)}")
return []
return [], {}
def _parse_date(date_str):
"""

View file

@ -13,14 +13,18 @@ from app.services.podcast_downloader import download_episode
# Set up logging
logger = logging.getLogger(__name__)
def update_all_podcasts():
def update_all_podcasts(progress_callback=None):
"""
Update all podcasts in the database.
Args:
progress_callback (callable, optional): Callback function for progress updates.
Returns:
dict: Statistics about the update process.
"""
podcasts = Podcast.query.all()
total_podcasts = len(podcasts)
stats = {
'podcasts_updated': 0,
@ -29,16 +33,32 @@ def update_all_podcasts():
'errors': 0
}
for podcast in podcasts:
if progress_callback:
progress_callback(0, f"Starting update of {total_podcasts} podcasts")
for i, podcast in enumerate(podcasts):
try:
if progress_callback:
progress = int((i / total_podcasts) * 100)
progress_callback(progress, f"Updating podcast {i+1}/{total_podcasts}: {podcast.title}")
result = update_podcast(podcast.id)
stats['podcasts_updated'] += 1
stats['new_episodes'] += result['new_episodes']
stats['episodes_downloaded'] += result['episodes_downloaded']
if progress_callback:
progress_callback(progress, f"Updated podcast {i+1}/{total_podcasts}: {podcast.title} - Found {result['new_episodes']} new episodes")
except Exception as e:
logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
stats['errors'] += 1
if progress_callback:
progress_callback(progress, f"Error updating podcast {i+1}/{total_podcasts}: {podcast.title} - {str(e)}")
if progress_callback:
progress_callback(100, f"Update complete. Updated {stats['podcasts_updated']} podcasts, found {stats['new_episodes']} new episodes.")
return stats
def update_podcast(podcast_id, progress_callback=None):
@ -67,12 +87,40 @@ def update_podcast(podcast_id, progress_callback=None):
if progress_callback:
progress_callback(10, f"Fetching episodes for {podcast.title}")
# Get episodes from feed
episodes = get_podcast_episodes(podcast.feed_url)
# Get episodes and podcast metadata from feed
episodes, podcast_metadata = get_podcast_episodes(podcast.feed_url)
# Update podcast last_checked timestamp
podcast.last_checked = datetime.utcnow()
# Update podcast metadata if available
updated = False
# Update image URL if available
if podcast_metadata.get('image_url'):
if podcast.image_url != podcast_metadata['image_url']:
logger.info(f"Updating podcast image URL from {podcast.image_url} to {podcast_metadata['image_url']}")
podcast.image_url = podcast_metadata['image_url']
updated = True
# Update author if available
if podcast_metadata.get('author'):
if podcast.author != podcast_metadata['author']:
logger.info(f"Updating podcast author from '{podcast.author}' to '{podcast_metadata['author']}'")
podcast.author = podcast_metadata['author']
updated = True
# Update description if available
if podcast_metadata.get('description'):
if podcast.description != podcast_metadata['description']:
logger.info(f"Updating podcast description")
podcast.description = podcast_metadata['description']
updated = True
# Commit changes if any updates were made
if updated:
db.session.commit()
if progress_callback:
progress_callback(30, f"Found {len(episodes)} episodes")
@ -103,8 +151,36 @@ def update_podcast(podcast_id, progress_callback=None):
db.session.commit()
# Try again with the new feed URL
episodes = get_podcast_episodes(podcast.feed_url)
episodes, updated_metadata = get_podcast_episodes(podcast.feed_url)
logger.info(f"Found {len(episodes)} episodes with updated feed URL")
# Update podcast metadata with the new feed
updated_from_new_feed = False
# Update image URL if available
if updated_metadata.get('image_url'):
if podcast.image_url != updated_metadata['image_url']:
logger.info(f"Updating podcast image URL from new feed: {updated_metadata['image_url']}")
podcast.image_url = updated_metadata['image_url']
updated_from_new_feed = True
# Update author if available
if updated_metadata.get('author'):
if podcast.author != updated_metadata['author']:
logger.info(f"Updating podcast author from new feed: '{updated_metadata['author']}'")
podcast.author = updated_metadata['author']
updated_from_new_feed = True
# Update description if available
if updated_metadata.get('description'):
if podcast.description != updated_metadata['description']:
logger.info(f"Updating podcast description from new feed")
podcast.description = updated_metadata['description']
updated_from_new_feed = True
# Commit changes if any updates were made
if updated_from_new_feed:
db.session.commit()
except Exception as e:
logger.error(f"Error refreshing feed URL: {str(e)}")