Docker and more calendar work
This commit is contained in:
parent
4527504c80
commit
f7a919ebf2
22 changed files with 2036 additions and 79 deletions
|
@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
|
|||
feed_url (str): URL of the podcast RSS feed.
|
||||
|
||||
Returns:
|
||||
list: List of episode dictionaries.
|
||||
tuple: (list of episode dictionaries, podcast metadata dictionary)
|
||||
"""
|
||||
try:
|
||||
if not feed_url:
|
||||
logger.error("Empty feed URL provided")
|
||||
return []
|
||||
return [], {}
|
||||
|
||||
logger.info(f"Fetching episodes from feed: {feed_url}")
|
||||
|
||||
|
@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):
|
|||
|
||||
logger.info(f"Found {len(feed.entries)} entries in feed")
|
||||
|
||||
# Extract podcast metadata
|
||||
podcast_metadata = {
|
||||
'title': feed.feed.get('title', ''),
|
||||
'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
|
||||
'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
|
||||
'image_url': None # Default to None, will try to extract below
|
||||
}
|
||||
|
||||
# Try to get podcast image URL from various locations in the feed
|
||||
if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
|
||||
podcast_metadata['image_url'] = feed.feed.image.href
|
||||
logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
|
||||
elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
|
||||
podcast_metadata['image_url'] = feed.feed.itunes_image.href
|
||||
logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
|
||||
elif 'image' in feed.feed and 'href' in feed.feed.image:
|
||||
podcast_metadata['image_url'] = feed.feed.image.href
|
||||
logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
|
||||
|
||||
logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
|
||||
|
||||
episodes = []
|
||||
for entry in feed.entries:
|
||||
# Log entry details for debugging
|
||||
|
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
|
|||
logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
|
||||
continue
|
||||
|
||||
# Try to validate the URL without downloading the file
|
||||
import requests
|
||||
head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
|
||||
|
||||
# Check if the URL is accessible
|
||||
if head_response.status_code >= 400:
|
||||
logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
|
||||
# Instead of skipping, add the episode with error information
|
||||
episode['download_error'] = f"Server returned status code {head_response.status_code}"
|
||||
episode['status_code'] = head_response.status_code
|
||||
else:
|
||||
# Check if the content type is audio
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
|
||||
logger.warning(f"Audio URL has non-audio content type: {content_type}")
|
||||
# Don't skip here as some servers might not report the correct content type
|
||||
episode['download_error'] = f"Non-audio content type: {content_type}"
|
||||
else:
|
||||
# If we got here, the audio URL is valid with no issues
|
||||
episode['download_error'] = None
|
||||
episode['status_code'] = head_response.status_code
|
||||
# Skip validation for now - we'll validate when downloading
|
||||
# This prevents the import process from getting stuck on slow HEAD requests
|
||||
# The previous implementation made a HEAD request for each episode, which could
|
||||
# cause timeouts or hanging connections with feeds containing many episodes
|
||||
# Validation will happen when the episode is actually downloaded instead
|
||||
logger.debug(f"Skipping audio URL validation for {episode['title']}")
|
||||
episode['download_error'] = None
|
||||
episode['status_code'] = 200 # Assume success
|
||||
|
||||
# Add the episode regardless of status code
|
||||
episodes.append(episode)
|
||||
|
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
|
|||
logger.warning(f"Skipping episode without audio URL: {episode['title']}")
|
||||
|
||||
logger.info(f"Processed {len(episodes)} valid episodes")
|
||||
return episodes
|
||||
return episodes, podcast_metadata
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting podcast episodes: {str(e)}")
|
||||
return []
|
||||
return [], {}
|
||||
|
||||
def _parse_date(date_str):
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue