Docker and more calendar work
This commit is contained in:
parent
4527504c80
commit
f7a919ebf2
22 changed files with 2036 additions and 79 deletions
|
@ -22,16 +22,16 @@ def parse_opml(opml_content):
|
|||
"""
|
||||
try:
|
||||
root = ET.fromstring(opml_content)
|
||||
|
||||
|
||||
# Find all outline elements that represent podcasts
|
||||
podcasts = []
|
||||
|
||||
|
||||
# Look for outlines in the body
|
||||
body = root.find('body')
|
||||
if body is None:
|
||||
logger.error("OPML file has no body element")
|
||||
return []
|
||||
|
||||
|
||||
# Process all outline elements
|
||||
for outline in body.findall('.//outline'):
|
||||
# Check if this is a podcast outline (has xmlUrl attribute)
|
||||
|
@ -44,7 +44,7 @@ def parse_opml(opml_content):
|
|||
'html_url': outline.get('htmlUrl', '')
|
||||
}
|
||||
podcasts.append(podcast)
|
||||
|
||||
|
||||
logger.info(f"Parsed OPML file and found {len(podcasts)} podcasts")
|
||||
return podcasts
|
||||
except Exception as e:
|
||||
|
@ -65,17 +65,17 @@ def generate_opml(podcasts):
|
|||
# Create the root element
|
||||
root = ET.Element('opml')
|
||||
root.set('version', '2.0')
|
||||
|
||||
|
||||
# Create the head element
|
||||
head = ET.SubElement(root, 'head')
|
||||
title = ET.SubElement(head, 'title')
|
||||
title.text = 'Podcastrr Subscriptions'
|
||||
date_created = ET.SubElement(head, 'dateCreated')
|
||||
date_created.text = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
|
||||
|
||||
# Create the body element
|
||||
body = ET.SubElement(root, 'body')
|
||||
|
||||
|
||||
# Add each podcast as an outline element
|
||||
for podcast in podcasts:
|
||||
outline = ET.SubElement(body, 'outline')
|
||||
|
@ -85,24 +85,25 @@ def generate_opml(podcasts):
|
|||
outline.set('xmlUrl', podcast.feed_url)
|
||||
if podcast.description:
|
||||
outline.set('description', podcast.description)
|
||||
|
||||
|
||||
# Convert to pretty-printed XML
|
||||
xml_str = ET.tostring(root, encoding='utf-8')
|
||||
parsed_xml = minidom.parseString(xml_str)
|
||||
pretty_xml = parsed_xml.toprettyxml(indent=" ")
|
||||
|
||||
|
||||
logger.info(f"Generated OPML file with {len(podcasts)} podcasts")
|
||||
return pretty_xml
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating OPML file: {str(e)}")
|
||||
return ""
|
||||
|
||||
def import_podcasts_from_opml(opml_content):
|
||||
def import_podcasts_from_opml(opml_content, progress_callback=None):
|
||||
"""
|
||||
Import podcasts from OPML content into the database.
|
||||
|
||||
Args:
|
||||
opml_content (str): OPML file content.
|
||||
progress_callback (callable, optional): Function to call with progress updates.
|
||||
|
||||
Returns:
|
||||
dict: Statistics about the import process.
|
||||
|
@ -110,46 +111,59 @@ def import_podcasts_from_opml(opml_content):
|
|||
from app.models.podcast import Podcast
|
||||
from app.models.database import db
|
||||
from app.services.podcast_updater import update_podcast
|
||||
|
||||
|
||||
podcasts = parse_opml(opml_content)
|
||||
|
||||
|
||||
stats = {
|
||||
'total': len(podcasts),
|
||||
'imported': 0,
|
||||
'skipped': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
for podcast_data in podcasts:
|
||||
|
||||
# Initial progress update
|
||||
if progress_callback:
|
||||
progress_callback(0, f"Starting import of {len(podcasts)} podcasts")
|
||||
|
||||
for i, podcast_data in enumerate(podcasts):
|
||||
try:
|
||||
# Check if podcast already exists
|
||||
existing = Podcast.query.filter_by(feed_url=podcast_data['feed_url']).first()
|
||||
|
||||
|
||||
if existing:
|
||||
logger.info(f"Podcast already exists: {podcast_data['title']}")
|
||||
stats['skipped'] += 1
|
||||
continue
|
||||
|
||||
|
||||
# Create new podcast
|
||||
podcast = Podcast(
|
||||
title=podcast_data['title'],
|
||||
description=podcast_data.get('description', ''),
|
||||
feed_url=podcast_data['feed_url']
|
||||
)
|
||||
|
||||
|
||||
db.session.add(podcast)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
# Update podcast to fetch episodes
|
||||
try:
|
||||
update_podcast(podcast.id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
|
||||
|
||||
|
||||
stats['imported'] += 1
|
||||
logger.info(f"Imported podcast: {podcast.title}")
|
||||
except Exception as e:
|
||||
stats['errors'] += 1
|
||||
logger.error(f"Error importing podcast: {str(e)}")
|
||||
|
||||
return stats
|
||||
|
||||
# Update progress during the loop
|
||||
if progress_callback and len(podcasts) > 0:
|
||||
progress = int((i + 1) / len(podcasts) * 100)
|
||||
progress_callback(progress, f"Processed {i + 1}/{len(podcasts)} podcasts")
|
||||
|
||||
# Final progress update
|
||||
if progress_callback:
|
||||
progress_callback(100, f"Import completed. Imported: {stats['imported']}, Skipped: {stats['skipped']}, Errors: {stats['errors']}")
|
||||
|
||||
return stats
|
||||
|
|
|
@ -88,12 +88,12 @@ def get_podcast_episodes(feed_url):
|
|||
feed_url (str): URL of the podcast RSS feed.
|
||||
|
||||
Returns:
|
||||
list: List of episode dictionaries.
|
||||
tuple: (list of episode dictionaries, podcast metadata dictionary)
|
||||
"""
|
||||
try:
|
||||
if not feed_url:
|
||||
logger.error("Empty feed URL provided")
|
||||
return []
|
||||
return [], {}
|
||||
|
||||
logger.info(f"Fetching episodes from feed: {feed_url}")
|
||||
|
||||
|
@ -130,6 +130,27 @@ def get_podcast_episodes(feed_url):
|
|||
|
||||
logger.info(f"Found {len(feed.entries)} entries in feed")
|
||||
|
||||
# Extract podcast metadata
|
||||
podcast_metadata = {
|
||||
'title': feed.feed.get('title', ''),
|
||||
'description': feed.feed.get('description', feed.feed.get('subtitle', '')),
|
||||
'author': feed.feed.get('author', feed.feed.get('itunes_author', '')),
|
||||
'image_url': None # Default to None, will try to extract below
|
||||
}
|
||||
|
||||
# Try to get podcast image URL from various locations in the feed
|
||||
if hasattr(feed.feed, 'image') and hasattr(feed.feed.image, 'href'):
|
||||
podcast_metadata['image_url'] = feed.feed.image.href
|
||||
logger.debug(f"Found podcast image in feed.image.href: {podcast_metadata['image_url']}")
|
||||
elif hasattr(feed.feed, 'itunes_image') and hasattr(feed.feed.itunes_image, 'href'):
|
||||
podcast_metadata['image_url'] = feed.feed.itunes_image.href
|
||||
logger.debug(f"Found podcast image in feed.itunes_image.href: {podcast_metadata['image_url']}")
|
||||
elif 'image' in feed.feed and 'href' in feed.feed.image:
|
||||
podcast_metadata['image_url'] = feed.feed.image.href
|
||||
logger.debug(f"Found podcast image in feed.image['href']: {podcast_metadata['image_url']}")
|
||||
|
||||
logger.info(f"Extracted podcast metadata: title='{podcast_metadata['title']}', image_url={podcast_metadata['image_url']}")
|
||||
|
||||
episodes = []
|
||||
for entry in feed.entries:
|
||||
# Log entry details for debugging
|
||||
|
@ -344,27 +365,14 @@ def get_podcast_episodes(feed_url):
|
|||
logger.warning(f"Invalid audio URL format: {episode['audio_url']}")
|
||||
continue
|
||||
|
||||
# Try to validate the URL without downloading the file
|
||||
import requests
|
||||
head_response = requests.head(episode['audio_url'], timeout=5, allow_redirects=True)
|
||||
|
||||
# Check if the URL is accessible
|
||||
if head_response.status_code >= 400:
|
||||
logger.warning(f"Audio URL returned status code {head_response.status_code}: {episode['audio_url']}")
|
||||
# Instead of skipping, add the episode with error information
|
||||
episode['download_error'] = f"Server returned status code {head_response.status_code}"
|
||||
episode['status_code'] = head_response.status_code
|
||||
else:
|
||||
# Check if the content type is audio
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
if not content_type.startswith('audio/') and 'application/octet-stream' not in content_type:
|
||||
logger.warning(f"Audio URL has non-audio content type: {content_type}")
|
||||
# Don't skip here as some servers might not report the correct content type
|
||||
episode['download_error'] = f"Non-audio content type: {content_type}"
|
||||
else:
|
||||
# If we got here, the audio URL is valid with no issues
|
||||
episode['download_error'] = None
|
||||
episode['status_code'] = head_response.status_code
|
||||
# Skip validation for now - we'll validate when downloading
|
||||
# This prevents the import process from getting stuck on slow HEAD requests
|
||||
# The previous implementation made a HEAD request for each episode, which could
|
||||
# cause timeouts or hanging connections with feeds containing many episodes
|
||||
# Validation will happen when the episode is actually downloaded instead
|
||||
logger.debug(f"Skipping audio URL validation for {episode['title']}")
|
||||
episode['download_error'] = None
|
||||
episode['status_code'] = 200 # Assume success
|
||||
|
||||
# Add the episode regardless of status code
|
||||
episodes.append(episode)
|
||||
|
@ -380,10 +388,10 @@ def get_podcast_episodes(feed_url):
|
|||
logger.warning(f"Skipping episode without audio URL: {episode['title']}")
|
||||
|
||||
logger.info(f"Processed {len(episodes)} valid episodes")
|
||||
return episodes
|
||||
return episodes, podcast_metadata
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting podcast episodes: {str(e)}")
|
||||
return []
|
||||
return [], {}
|
||||
|
||||
def _parse_date(date_str):
|
||||
"""
|
||||
|
|
|
@ -13,14 +13,18 @@ from app.services.podcast_downloader import download_episode
|
|||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def update_all_podcasts():
|
||||
def update_all_podcasts(progress_callback=None):
|
||||
"""
|
||||
Update all podcasts in the database.
|
||||
|
||||
Args:
|
||||
progress_callback (callable, optional): Callback function for progress updates.
|
||||
|
||||
Returns:
|
||||
dict: Statistics about the update process.
|
||||
"""
|
||||
podcasts = Podcast.query.all()
|
||||
total_podcasts = len(podcasts)
|
||||
|
||||
stats = {
|
||||
'podcasts_updated': 0,
|
||||
|
@ -29,16 +33,32 @@ def update_all_podcasts():
|
|||
'errors': 0
|
||||
}
|
||||
|
||||
for podcast in podcasts:
|
||||
if progress_callback:
|
||||
progress_callback(0, f"Starting update of {total_podcasts} podcasts")
|
||||
|
||||
for i, podcast in enumerate(podcasts):
|
||||
try:
|
||||
if progress_callback:
|
||||
progress = int((i / total_podcasts) * 100)
|
||||
progress_callback(progress, f"Updating podcast {i+1}/{total_podcasts}: {podcast.title}")
|
||||
|
||||
result = update_podcast(podcast.id)
|
||||
stats['podcasts_updated'] += 1
|
||||
stats['new_episodes'] += result['new_episodes']
|
||||
stats['episodes_downloaded'] += result['episodes_downloaded']
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(progress, f"Updated podcast {i+1}/{total_podcasts}: {podcast.title} - Found {result['new_episodes']} new episodes")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating podcast {podcast.title}: {str(e)}")
|
||||
stats['errors'] += 1
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(progress, f"Error updating podcast {i+1}/{total_podcasts}: {podcast.title} - {str(e)}")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(100, f"Update complete. Updated {stats['podcasts_updated']} podcasts, found {stats['new_episodes']} new episodes.")
|
||||
|
||||
return stats
|
||||
|
||||
def update_podcast(podcast_id, progress_callback=None):
|
||||
|
@ -67,12 +87,40 @@ def update_podcast(podcast_id, progress_callback=None):
|
|||
if progress_callback:
|
||||
progress_callback(10, f"Fetching episodes for {podcast.title}")
|
||||
|
||||
# Get episodes from feed
|
||||
episodes = get_podcast_episodes(podcast.feed_url)
|
||||
# Get episodes and podcast metadata from feed
|
||||
episodes, podcast_metadata = get_podcast_episodes(podcast.feed_url)
|
||||
|
||||
# Update podcast last_checked timestamp
|
||||
podcast.last_checked = datetime.utcnow()
|
||||
|
||||
# Update podcast metadata if available
|
||||
updated = False
|
||||
|
||||
# Update image URL if available
|
||||
if podcast_metadata.get('image_url'):
|
||||
if podcast.image_url != podcast_metadata['image_url']:
|
||||
logger.info(f"Updating podcast image URL from {podcast.image_url} to {podcast_metadata['image_url']}")
|
||||
podcast.image_url = podcast_metadata['image_url']
|
||||
updated = True
|
||||
|
||||
# Update author if available
|
||||
if podcast_metadata.get('author'):
|
||||
if podcast.author != podcast_metadata['author']:
|
||||
logger.info(f"Updating podcast author from '{podcast.author}' to '{podcast_metadata['author']}'")
|
||||
podcast.author = podcast_metadata['author']
|
||||
updated = True
|
||||
|
||||
# Update description if available
|
||||
if podcast_metadata.get('description'):
|
||||
if podcast.description != podcast_metadata['description']:
|
||||
logger.info(f"Updating podcast description")
|
||||
podcast.description = podcast_metadata['description']
|
||||
updated = True
|
||||
|
||||
# Commit changes if any updates were made
|
||||
if updated:
|
||||
db.session.commit()
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(30, f"Found {len(episodes)} episodes")
|
||||
|
||||
|
@ -103,8 +151,36 @@ def update_podcast(podcast_id, progress_callback=None):
|
|||
db.session.commit()
|
||||
|
||||
# Try again with the new feed URL
|
||||
episodes = get_podcast_episodes(podcast.feed_url)
|
||||
episodes, updated_metadata = get_podcast_episodes(podcast.feed_url)
|
||||
logger.info(f"Found {len(episodes)} episodes with updated feed URL")
|
||||
|
||||
# Update podcast metadata with the new feed
|
||||
updated_from_new_feed = False
|
||||
|
||||
# Update image URL if available
|
||||
if updated_metadata.get('image_url'):
|
||||
if podcast.image_url != updated_metadata['image_url']:
|
||||
logger.info(f"Updating podcast image URL from new feed: {updated_metadata['image_url']}")
|
||||
podcast.image_url = updated_metadata['image_url']
|
||||
updated_from_new_feed = True
|
||||
|
||||
# Update author if available
|
||||
if updated_metadata.get('author'):
|
||||
if podcast.author != updated_metadata['author']:
|
||||
logger.info(f"Updating podcast author from new feed: '{updated_metadata['author']}'")
|
||||
podcast.author = updated_metadata['author']
|
||||
updated_from_new_feed = True
|
||||
|
||||
# Update description if available
|
||||
if updated_metadata.get('description'):
|
||||
if podcast.description != updated_metadata['description']:
|
||||
logger.info(f"Updating podcast description from new feed")
|
||||
podcast.description = updated_metadata['description']
|
||||
updated_from_new_feed = True
|
||||
|
||||
# Commit changes if any updates were made
|
||||
if updated_from_new_feed:
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing feed URL: {str(e)}")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue