Add podgrab featureset

This commit is contained in:
Cody Cook 2025-06-16 22:55:39 -07:00
commit 233dd5b5c0
33 changed files with 2315 additions and 125 deletions

View file

@ -173,6 +173,8 @@ def format_filename(format_string, podcast, episode):
# If episode_number exists but is not a digit, format as S01E{episode_number}
else f"S{episode.season or 1:02d}E{episode.episode_number}"
if episode.episode_number
# If neither season nor episode_number are available, use published date
else episode.published_date.strftime('%Y-%m-%d') if episode.published_date
# Otherwise, return empty string
else ''
),
@ -195,10 +197,23 @@ def format_filename(format_string, podcast, episode):
# Handle empty path segments by removing them
path_parts = formatted_path.split(os.path.sep)
path_parts = [part for part in path_parts if part.strip()]
# Remove empty segments and segments that would be just placeholders without values
cleaned_parts = []
for part in path_parts:
part = part.strip()
if not part:
continue
# Check for common placeholders without values
if part in ["Season ", "Season", "Episode ", "Episode", "E", "S"]:
continue
# Check for patterns like "S01E" without an episode number
if part.startswith("S") and part.endswith("E") and len(part) > 2:
continue
cleaned_parts.append(part)
# Rejoin the path with proper separators
return os.path.sep.join(path_parts)
return os.path.sep.join(cleaned_parts)
def sanitize_filename(filename):
"""
@ -277,6 +292,7 @@ def delete_old_episodes(days=30):
def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
"""
Verify that downloaded episodes still exist on disk and update their status.
Also checks for existing files for episodes that aren't marked as downloaded.
Args:
podcast_id (int, optional): ID of the podcast to check. If None, check all podcasts.
@ -286,23 +302,24 @@ def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
dict: Statistics about the verification process.
"""
from app.models.podcast import Episode, Podcast
from app.models.settings import Settings
# Get episodes to check
# First, verify episodes that are marked as downloaded
query = Episode.query.filter(Episode.downloaded == True)
if podcast_id:
query = query.filter(Episode.podcast_id == podcast_id)
episodes = query.all()
total = len(episodes)
downloaded_episodes = query.all()
total_downloaded = len(downloaded_episodes)
if progress_callback:
progress_callback(0, f"Verifying {total} downloaded episodes")
progress_callback(0, f"Verifying {total_downloaded} downloaded episodes")
missing = 0
for i, episode in enumerate(episodes):
if progress_callback and total > 0:
progress = int((i / total) * 100)
progress_callback(progress, f"Verifying episode {i+1}/{total}")
for i, episode in enumerate(downloaded_episodes):
if progress_callback and total_downloaded > 0:
progress = int((i / total_downloaded) * 50) # Use first half of progress for verification
progress_callback(progress, f"Verifying episode {i+1}/{total_downloaded}")
if not episode.file_path or not os.path.exists(episode.file_path):
episode.downloaded = False
@ -312,15 +329,133 @@ def verify_downloaded_episodes(podcast_id=None, progress_callback=None):
db.session.commit()
if progress_callback:
progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded.")
# Now check for existing files for episodes that aren't marked as downloaded
query = Episode.query.filter(Episode.downloaded == False)
if podcast_id:
query = query.filter(Episode.podcast_id == podcast_id)
logger.info(f"Verified {total} episodes. {missing} were missing.")
undownloaded_episodes = query.all()
total_undownloaded = len(undownloaded_episodes)
if progress_callback:
progress_callback(50, f"Checking for existing files for {total_undownloaded} undownloaded episodes")
found = 0
if total_undownloaded > 0 and podcast_id:
# Get the podcast
podcast = Podcast.query.get(podcast_id)
if not podcast:
logger.error(f"Podcast with ID {podcast_id} not found")
return {
'total_checked': total_downloaded,
'missing': missing,
'found': 0
}
# Get settings
settings = Settings.query.first()
if not settings:
settings = Settings(
download_path=current_app.config['DOWNLOAD_PATH'],
naming_format="{podcast_title}/{episode_title}"
)
db.session.add(settings)
db.session.commit()
# Use podcast's naming format if available, otherwise use global settings
naming_format = podcast.naming_format or settings.naming_format
download_path = settings.download_path
# Check each undownloaded episode for existing files
for i, episode in enumerate(undownloaded_episodes):
if progress_callback:
progress = 50 + int((i / total_undownloaded) * 50) # Use second half of progress for file matching
progress_callback(progress, f"Checking for file for episode {i+1}/{total_undownloaded}")
try:
# Format filename using the naming format
filename = format_filename(naming_format, podcast, episode)
# Check for common audio file extensions
extensions = ['.mp3', '.m4a', '.ogg', '.wav']
for ext in extensions:
file_path = os.path.normpath(os.path.join(download_path, filename + ext))
if os.path.exists(file_path):
logger.info(f"Found existing file for episode: {file_path}")
episode.downloaded = True
episode.file_path = file_path
found += 1
break
except Exception as e:
logger.error(f"Error checking for existing file for episode {episode.title}: {str(e)}")
db.session.commit()
if progress_callback:
progress_callback(100, f"Verification complete. {missing} episodes marked as not downloaded, {found} files matched.")
logger.info(f"Verified {total_downloaded} episodes. {missing} were missing. Found files for {found} undownloaded episodes.")
return {
'total_checked': total,
'missing': missing
'total_checked': total_downloaded,
'missing': missing,
'found': found
}
def download_all_episodes(podcast_id, progress_callback=None):
"""
Download all episodes of a podcast that haven't been downloaded yet.
Args:
podcast_id: ID of the Podcast to download all episodes for.
progress_callback (callable, optional): Callback function for progress updates.
Returns:
dict: Statistics about the download process.
"""
from app.models.podcast import Podcast, Episode
if progress_callback:
progress_callback(2, "Loading podcast data")
# Load the podcast
podcast = Podcast.query.get(podcast_id)
if not podcast:
raise ValueError(f"Podcast with ID {podcast_id} not found")
# Get all episodes that haven't been downloaded yet
episodes = Episode.query.filter_by(podcast_id=podcast_id, downloaded=False).all()
total_episodes = len(episodes)
if progress_callback:
progress_callback(5, f"Found {total_episodes} episodes to download")
if total_episodes == 0:
if progress_callback:
progress_callback(100, "No episodes to download")
return {"total": 0, "downloaded": 0, "failed": 0}
stats = {"total": total_episodes, "downloaded": 0, "failed": 0}
# Download each episode
for i, episode in enumerate(episodes):
if progress_callback:
progress = 5 + int((i / total_episodes) * 90) # Scale from 5% to 95%
progress_callback(progress, f"Downloading episode {i+1}/{total_episodes}: {episode.title}")
try:
download_episode(episode.id)
stats["downloaded"] += 1
logger.info(f"Downloaded episode {i+1}/{total_episodes}: {episode.title}")
except Exception as e:
stats["failed"] += 1
logger.error(f"Error downloading episode {episode.title}: {str(e)}")
if progress_callback:
progress_callback(100, f"Download complete. Downloaded {stats['downloaded']} episodes, {stats['failed']} failed.")
logger.info(f"Podcast archive download completed: {stats}")
return stats
def rename_episode(episode_id, new_format=None, progress_callback=None):
"""
Rename a downloaded episode file using a new format.