Docker and more calendar work

This commit is contained in:
Cody Cook 2025-06-17 16:00:46 -07:00
commit f7a919ebf2
22 changed files with 2036 additions and 79 deletions

View file

@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
feed_url (str): URL of the podcast RSS feed.
Returns:
list: List of episode dictionaries.
tuple: (list of episode dictionaries, podcast metadata dictionary)
"""
try:
if not feed_url:
logger.error("Empty feed URL provided")
return []
return [], {}
logger.info(f"Fetching episodes from feed: {feed_url}")
@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):
logger.info(f"Found {len(feed.entries)} entries in feed")
# Extract podcast metadata
podcast_metadata = {
'title': feed.feed.get('title', ''),
'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
'image_url': None # Default to None, will try to extract below
}
# Try to get podcast image URL from various locations in the feed
if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
podcast_metadata['image_url'] = feed.feed.image.href
logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
podcast_metadata['image_url'] = feed.feed.itunes_image.href
logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
elif 'image' in feed.feed and 'href' in feed.feed.image:
podcast_metadata['image_url'] = feed.feed.image.href
logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
episodes = []
for entry in feed.entries:
# Log entry details for debugging
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
continue
# Try to validate the URL without downloading the file
import requests
head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
# Check if the URL is accessible
if head_response.status_code >= 400:
logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
# Instead of skipping, add the episode with error information
episode['download_error'] = f"Server returned status code {head_response.status_code}"
episode['status_code'] = head_response.status_code
else:
# Check if the content type is audio
content_type = head_response.headers.get('Content-Type', '')
if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
logger.warning(f"Audio URL has non-audio content type: {content_type}")
# Don't skip here as some servers might not report the correct content type
episode['download_error'] = f"Non-audio content type: {content_type}"
else:
# If we got here, the audio URL is valid with no issues
episode['download_error'] = None
episode['status_code'] = head_response.status_code
# Skip validation for now - we'll validate when downloading
# This prevents the import process from getting stuck on slow HEAD requests
# The previous implementation made a HEAD request for each episode, which could
# cause timeouts or hanging connections with feeds containing many episodes
# Validation will happen when the episode is actually downloaded instead
logger.debug(f"Skipping audio URL validation for {episode['title']}")
episode['download_error'] = None
episode['status_code'] = 200 # Assume success
# Add the episode regardless of status code
episodes.append(episode)
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
logger.warning(f"Skipping episode without audio URL: {episode['title']}")
logger.info(f"Processed {len(episodes)} valid episodes")
return episodes
return episodes, podcast_metadata
except Exception as e:
logger.error(f"Error getting podcast episodes: {str(e)}")
return []
return [], {}
def _parse_date(date_str):
"""