Implement image fetching and storage functionality with logging
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
import aiohttp
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
import uuid
|
||||
from urllib.parse import urlparse
|
||||
|
||||
async def get_latest_hot_posts(subreddits):
|
||||
post_limit = os.getenv("POST_LIMIT", 20)
|
||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; bnuy-api/0.0.1)"}
|
||||
posts = []
|
||||
logging.info(f"SUBREDDITS raw: {subreddits!r}, split: {subreddits.split(',')!r}")
|
||||
|
||||
for subreddit in subreddits.split(","):
|
||||
url = f"https://www.reddit.com/r/{subreddit.strip()}/hot.rss?limit={post_limit}"
|
||||
logging.info(f"Fetching hot posts from r/{subreddit.strip()}...")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status != 200:
|
||||
logging.error(f"Failed to fetch RSS feed: {response.status}")
|
||||
continue
|
||||
xml = await response.text()
|
||||
except Exception as e:
|
||||
logging.error(f"Error fetching RSS feed: {e}")
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(xml)
|
||||
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
||||
for entry in root.findall("atom:entry", ns):
|
||||
post_title = entry.find("atom:title", ns)
|
||||
content = entry.find("atom:content", ns)
|
||||
if post_title is None or content is None or content.text is None:
|
||||
continue
|
||||
link_match = re.search(r'<a href="([^"]+)">\[link\]</a>', content.text)
|
||||
if link_match and link_match.group(1).lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
|
||||
logging.debug(f"Found image post: {post_title.text} - {link_match.group(1)}")
|
||||
posts.append((post_title.text, link_match.group(1), subreddit.strip()))
|
||||
except ET.ParseError as e:
|
||||
logging.error(f"Error parsing RSS feed: {e}")
|
||||
continue
|
||||
return posts
|
||||
|
||||
async def save_picture(pool):
|
||||
os.makedirs("data/images", exist_ok=True)
|
||||
subreddits = os.getenv("SUBREDDITS", "bunnies,bnuuy")
|
||||
posts = await get_latest_hot_posts(subreddits)
|
||||
if not posts:
|
||||
logging.info("No image posts found.")
|
||||
return
|
||||
for title, url, subreddit in posts:
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.cursor() as cursor:
|
||||
await cursor.execute("SELECT COUNT(*) FROM images WHERE url = %s", (url,))
|
||||
result = await cursor.fetchone()
|
||||
if result[0] > 0:
|
||||
logging.info(f"Post already exists in database: {title} - {url}")
|
||||
continue
|
||||
logging.info(f"Saving post: {title} - {url}")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as response:
|
||||
if response.status != 200:
|
||||
logging.error(f"Failed to download image: {response.status}")
|
||||
continue
|
||||
content = await response.read()
|
||||
generate_filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[1]
|
||||
filename = os.path.join("data/images", generate_filename)
|
||||
with open(filename, "wb") as f:
|
||||
f.write(content)
|
||||
logging.info(f"Saved image to {filename}")
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.cursor() as cursor:
|
||||
await cursor.execute(
|
||||
"INSERT INTO images (url, filename, subreddit) VALUES (%s, %s, %s)",
|
||||
(url, generate_filename, subreddit)
|
||||
)
|
||||
await conn.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error saving image: {e}")
|
||||
|
||||
Reference in New Issue
Block a user