Add podgrab featureset
This commit is contained in:
parent
095bf52a2f
commit
233dd5b5c0
33 changed files with 2315 additions and 125 deletions
|
@ -142,15 +142,126 @@ def get_podcast_episodes(feed_url):
|
|||
'published_date': _parse_date(entry.get('published')),
|
||||
'guid': entry.get('id', ''),
|
||||
'duration': _parse_duration(entry.get('itunes_duration', '')),
|
||||
'season': entry.get('itunes_season'), # Season number
|
||||
'episode_number': entry.get('itunes_episode', ''), # Episode number within season
|
||||
'season': None, # Default to None
|
||||
'episode_number': None, # Default to None, will try to extract from various sources
|
||||
'explicit': False # Default to False
|
||||
}
|
||||
|
||||
# Handle explicit flag safely
|
||||
itunes_explicit = entry.get('itunes_explicit', '')
|
||||
if isinstance(itunes_explicit, str) and itunes_explicit:
|
||||
episode['explicit'] = itunes_explicit.lower() == 'yes'
|
||||
# Handle season tag - try multiple ways to access it
|
||||
try:
|
||||
# Try as attribute first
|
||||
if hasattr(entry, 'itunes_season'):
|
||||
episode['season'] = int(entry.itunes_season) if entry.itunes_season else None
|
||||
logger.debug(f"Found season as attribute: {episode['season']}")
|
||||
# Try as dictionary key
|
||||
elif entry.get('itunes_season'):
|
||||
episode['season'] = int(entry.get('itunes_season')) if entry.get('itunes_season') else None
|
||||
logger.debug(f"Found season as dict key: {episode['season']}")
|
||||
# Try looking in tags
|
||||
elif hasattr(entry, 'tags'):
|
||||
for tag in entry.tags:
|
||||
if tag.get('term', '').startswith('Season'):
|
||||
try:
|
||||
episode['season'] = int(tag.get('term').replace('Season', '').strip())
|
||||
logger.debug(f"Found season in tags: {episode['season']}")
|
||||
break
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing season: {str(e)}")
|
||||
|
||||
# Handle episode number - try multiple ways to access it
|
||||
try:
|
||||
# Try as attribute first (itunes_episode)
|
||||
if hasattr(entry, 'itunes_episode') and entry.itunes_episode:
|
||||
episode['episode_number'] = entry.itunes_episode
|
||||
logger.debug(f"Found episode number as attribute: {episode['episode_number']}")
|
||||
# Try as dictionary key
|
||||
elif entry.get('itunes_episode'):
|
||||
episode['episode_number'] = entry.get('itunes_episode')
|
||||
logger.debug(f"Found episode number as dict key: {episode['episode_number']}")
|
||||
# Try to extract from title if it contains "Episode X" or "Ep X" or "#X"
|
||||
elif episode['title']:
|
||||
import re
|
||||
# Common patterns for episode numbers in titles
|
||||
patterns = [
|
||||
r'Episode\s+(\d+)', # "Episode 123"
|
||||
r'Ep\s*(\d+)', # "Ep123" or "Ep 123"
|
||||
r'#(\d+)', # "#123"
|
||||
r'E(\d+)', # "E123" or "S1E123"
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, episode['title'], re.IGNORECASE)
|
||||
if match:
|
||||
episode['episode_number'] = match.group(1)
|
||||
logger.debug(f"Extracted episode number from title: {episode['episode_number']}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing episode number: {str(e)}")
|
||||
|
||||
# Handle explicit flag - try multiple ways to access it
|
||||
try:
|
||||
# Try as attribute first
|
||||
if hasattr(entry, 'itunes_explicit'):
|
||||
explicit_value = entry.itunes_explicit
|
||||
if isinstance(explicit_value, str):
|
||||
episode['explicit'] = explicit_value.lower() in ('yes', 'true')
|
||||
logger.debug(f"Found explicit as attribute: {episode['explicit']}")
|
||||
# Try as dictionary key
|
||||
elif entry.get('itunes_explicit'):
|
||||
explicit_value = entry.get('itunes_explicit')
|
||||
if isinstance(explicit_value, str):
|
||||
episode['explicit'] = explicit_value.lower() in ('yes', 'true')
|
||||
logger.debug(f"Found explicit as dict key: {episode['explicit']}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing explicit flag: {str(e)}")
|
||||
|
||||
# Handle the different combinations of season and episode numbers
|
||||
# Case 1: No season, no episode - use published date to create a sequential order
|
||||
if episode['season'] is None and (episode['episode_number'] is None or episode['episode_number'] == ''):
|
||||
if episode['published_date']:
|
||||
# Use the publication date to create a pseudo-episode number
|
||||
# Format: YYYYMMDD (e.g., 20230101 for January 1, 2023)
|
||||
episode['episode_number'] = episode['published_date'].strftime('%Y%m%d')
|
||||
logger.debug(f"No season or episode number, using date as episode number: {episode['episode_number']}")
|
||||
else:
|
||||
# If no publication date, use a placeholder
|
||||
episode['episode_number'] = "unknown"
|
||||
logger.debug("No season, episode number, or date available")
|
||||
|
||||
# Case 2: No season, but episode number exists - keep episode number as is
|
||||
elif episode['season'] is None and episode['episode_number'] is not None:
|
||||
logger.debug(f"Using episode number without season: {episode['episode_number']}")
|
||||
|
||||
# Case 3: Season exists, no episode number - use season as prefix for ordering
|
||||
elif episode['season'] is not None and (episode['episode_number'] is None or episode['episode_number'] == ''):
|
||||
if episode['published_date']:
|
||||
# Use the publication date with season prefix
|
||||
# Format: S01_YYYYMMDD
|
||||
episode['episode_number'] = f"S{episode['season']:02d}_{episode['published_date'].strftime('%Y%m%d')}"
|
||||
logger.debug(f"Season without episode number, using season+date: {episode['episode_number']}")
|
||||
else:
|
||||
# If no publication date, use season with unknown suffix
|
||||
episode['episode_number'] = f"S{episode['season']:02d}_unknown"
|
||||
logger.debug(f"Season without episode number or date: {episode['episode_number']}")
|
||||
|
||||
# Case 4: Both season and episode exist - format as S01E02
|
||||
elif episode['season'] is not None and episode['episode_number'] is not None:
|
||||
# Check if episode_number is already formatted as S01E02
|
||||
import re
|
||||
if not re.match(r'^S\d+E\d+$', str(episode['episode_number']), re.IGNORECASE):
|
||||
try:
|
||||
# Try to convert episode_number to integer for proper formatting
|
||||
ep_num = int(episode['episode_number'])
|
||||
episode['episode_number'] = f"S{episode['season']:02d}E{ep_num:02d}"
|
||||
logger.debug(f"Formatted season and episode as: {episode['episode_number']}")
|
||||
except (ValueError, TypeError):
|
||||
# If episode_number can't be converted to int, use as is with season prefix
|
||||
episode['episode_number'] = f"S{episode['season']:02d}_{episode['episode_number']}"
|
||||
logger.debug(f"Using season prefix with non-numeric episode: {episode['episode_number']}")
|
||||
else:
|
||||
logger.debug(f"Episode already formatted correctly: {episode['episode_number']}")
|
||||
|
||||
# Generate a GUID if one is not provided
|
||||
if not episode['guid']:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue