Implement image fetching and storage functionality with logging

2026-05-11 15:59:49 +02:00
parent a43ce47339
commit 9dc13fb9be
3 changed files with 156 additions and 8 deletions
@@ -0,0 +1,83 @@
+import aiohttp
+import os
+from dotenv import load_dotenv
+import logging
+import xml.etree.ElementTree as ET
+import re
+import uuid
+from urllib.parse import urlparse
+
+async def get_latest_hot_posts(subreddits):
+    post_limit = os.getenv("POST_LIMIT", 20)
+    headers = {"User-Agent": "Mozilla/5.0 (compatible; bnuy-api/0.0.1)"}
+    posts = []
+    logging.info(f"SUBREDDITS raw: {subreddits!r}, split: {subreddits.split(',')!r}")
+
+    for subreddit in subreddits.split(","):
+        url = f"https://www.reddit.com/r/{subreddit.strip()}/hot.rss?limit={post_limit}"
+        logging.info(f"Fetching hot posts from r/{subreddit.strip()}...")
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.get(url, headers=headers) as response:
+                    if response.status != 200:
+                        logging.error(f"Failed to fetch RSS feed: {response.status}")
+                        continue
+                    xml = await response.text()
+            except Exception as e:
+                logging.error(f"Error fetching RSS feed: {e}")
+                continue
+        try:
+            root = ET.fromstring(xml)
+            ns = {"atom": "http://www.w3.org/2005/Atom"}
+            for entry in root.findall("atom:entry", ns):
+                post_title = entry.find("atom:title", ns)
+                content = entry.find("atom:content", ns)
+                if post_title is None or content is None or content.text is None:
+                    continue
+                link_match = re.search(r'<a href="([^"]+)">\[link\]</a>', content.text)
+                if link_match and link_match.group(1).lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
+                    logging.debug(f"Found image post: {post_title.text} - {link_match.group(1)}")
+                    posts.append((post_title.text, link_match.group(1), subreddit.strip()))
+        except ET.ParseError as e:
+            logging.error(f"Error parsing RSS feed: {e}")
+            continue
+    return posts
+
+async def save_picture(pool):
+    os.makedirs("data/images", exist_ok=True)
+    subreddits = os.getenv("SUBREDDITS", "bunnies,bnuuy")
+    posts = await get_latest_hot_posts(subreddits)
+    if not posts:
+        logging.info("No image posts found.")
+        return
+    for title, url, subreddit in posts:
+        async with pool.acquire() as conn:
+                async with conn.cursor() as cursor:
+                    await cursor.execute("SELECT COUNT(*) FROM images WHERE url = %s", (url,))
+                    result = await cursor.fetchone()
+                    if result[0] > 0:
+                        logging.info(f"Post already exists in database: {title} - {url}")
+                        continue
+        logging.info(f"Saving post: {title} - {url}")
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url) as response:
+                    if response.status != 200:
+                        logging.error(f"Failed to download image: {response.status}")
+                        continue
+                    content = await response.read()
+                    generate_filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[1]
+                    filename = os.path.join("data/images", generate_filename)
+                    with open(filename, "wb") as f:
+                        f.write(content)
+                    logging.info(f"Saved image to {filename}")
+                    async with pool.acquire() as conn:
+                        async with conn.cursor() as cursor:
+                            await cursor.execute(
+                                "INSERT INTO images (url, filename, subreddit) VALUES (%s, %s, %s)",
+                                (url, generate_filename, subreddit)
+                            )
+                            await conn.commit()
+        except Exception as e:
+            logging.error(f"Error saving image: {e}")
+