From 9dc13fb9be485ca69ef238db14a7181c581fb279 Mon Sep 17 00:00:00 2001 From: Michelle Date: Mon, 11 May 2026 15:59:49 +0200 Subject: [PATCH] Implement image fetching and storage functionality with logging --- collector.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 78 +++++++++++++++++++++++++++++++++++++++++---- requirements.txt | 3 +- 3 files changed, 156 insertions(+), 8 deletions(-) create mode 100644 collector.py diff --git a/collector.py b/collector.py new file mode 100644 index 0000000..f168219 --- /dev/null +++ b/collector.py @@ -0,0 +1,83 @@ +import aiohttp +import os +from dotenv import load_dotenv +import logging +import xml.etree.ElementTree as ET +import re +import uuid +from urllib.parse import urlparse + +async def get_latest_hot_posts(subreddits): + post_limit = os.getenv("POST_LIMIT", 20) + headers = {"User-Agent": "Mozilla/5.0 (compatible; bnuy-api/0.0.1)"} + posts = [] + logging.info(f"SUBREDDITS raw: {subreddits!r}, split: {subreddits.split(',')!r}") + + for subreddit in subreddits.split(","): + url = f"https://www.reddit.com/r/{subreddit.strip()}/hot.rss?limit={post_limit}" + logging.info(f"Fetching hot posts from r/{subreddit.strip()}...") + async with aiohttp.ClientSession() as session: + try: + async with session.get(url, headers=headers) as response: + if response.status != 200: + logging.error(f"Failed to fetch RSS feed: {response.status}") + continue + xml = await response.text() + except Exception as e: + logging.error(f"Error fetching RSS feed: {e}") + continue + try: + root = ET.fromstring(xml) + ns = {"atom": "http://www.w3.org/2005/Atom"} + for entry in root.findall("atom:entry", ns): + post_title = entry.find("atom:title", ns) + content = entry.find("atom:content", ns) + if post_title is None or content is None or content.text is None: + continue + link_match = re.search(r'\[link\]', content.text) + if link_match and link_match.group(1).lower().endswith(('.jpg', '.jpeg', '.png', '.gif')): + logging.debug(f"Found image post: {post_title.text} - {link_match.group(1)}") + posts.append((post_title.text, link_match.group(1), subreddit.strip())) + except ET.ParseError as e: + logging.error(f"Error parsing RSS feed: {e}") + continue + return posts + +async def save_picture(pool): + os.makedirs("data/images", exist_ok=True) + subreddits = os.getenv("SUBREDDITS", "bunnies,bnuuy") + posts = await get_latest_hot_posts(subreddits) + if not posts: + logging.info("No image posts found.") + return + for title, url, subreddit in posts: + async with pool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute("SELECT COUNT(*) FROM images WHERE url = %s", (url,)) + result = await cursor.fetchone() + if result[0] > 0: + logging.info(f"Post already exists in database: {title} - {url}") + continue + logging.info(f"Saving post: {title} - {url}") + try: + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + if response.status != 200: + logging.error(f"Failed to download image: {response.status}") + continue + content = await response.read() + generate_filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[1] + filename = os.path.join("data/images", generate_filename) + with open(filename, "wb") as f: + f.write(content) + logging.info(f"Saved image to {filename}") + async with pool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute( + "INSERT INTO images (url, filename, subreddit) VALUES (%s, %s, %s)", + (url, generate_filename, subreddit) + ) + await conn.commit() + except Exception as e: + logging.error(f"Error saving image: {e}") + diff --git a/main.py b/main.py index 17ff2f7..ec29ab8 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,31 @@ -from fastapi import FastAPI +import aiohttp +from fastapi import FastAPI, HTTPException +from fastapi.responses import FileResponse import asyncmy import asyncio import os +from dotenv import load_dotenv from contextlib import asynccontextmanager +import logging + +load_dotenv() + +log_level = os.getenv("LOG_LEVEL", "INFO").upper() +log_formatter = logging.Formatter( + fmt="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +file_handler = logging.FileHandler("data/api.log", encoding="utf-8") +file_handler.setFormatter(log_formatter) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(log_formatter) + +logging.basicConfig( + level=getattr(logging, log_level, logging.INFO), + handlers=[file_handler, console_handler], +) @asynccontextmanager async def connect_db(app: FastAPI): @@ -15,18 +38,34 @@ async def connect_db(app: FastAPI): minsize=5, maxsize=20 ) + await create_tables(app.state.pool) + task = asyncio.create_task(fetch_images()) try: yield finally: + task.cancel() + try: + await task + except asyncio.CancelledError: + pass app.state.pool.close() await app.state.pool.wait_closed() app = FastAPI(lifespan=connect_db) -@asynccontextmanager -async def get_connection(): - async with app.state.pool.acquire() as conn: - yield conn +async def create_tables(pool): + async with pool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute(""" + CREATE TABLE IF NOT EXISTS images ( + id INT AUTO_INCREMENT PRIMARY KEY, + url VARCHAR(255) NOT NULL, + filename VARCHAR(255) NOT NULL, + subreddit VARCHAR(255) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + await conn.commit() @app.get("/") async def root(): @@ -34,5 +73,30 @@ async def root(): @app.get("/random") async def get_random_bnuy(): - async with get_connection() as conn: - return {"message": "here could be a bnuy, if I would've implemented it"} \ No newline at end of file + return {"message": "here could be a bnuy, if I would've implemented it"} + +@app.get("/images/{filename}") +async def get_image(filename: str): + async with app.state.pool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute("SELECT filename FROM images WHERE filename = %s", (filename,)) + result = await cursor.fetchone() + if result: + filepath = os.path.join("data/images", result[0]) + if os.path.exists(filepath): + return FileResponse(filepath) + else: + raise HTTPException(status_code=404, detail="Image file not found") + else: + raise HTTPException(status_code=404, detail="Image not found") + +async def fetch_images(): + from collector import save_picture + while True: + try: + logging.info("Starting image collection...") + await save_picture(app.state.pool) + logging.info("Image collection completed. Sleeping for 1 hour...") + except Exception as e: + logging.error(f"Error during image collection: {e}") + await asyncio.sleep(86400) # Sleep for 24 hours \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 36d1d38..5ef1b0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ fastapi[standard] -asyncmy \ No newline at end of file +asyncmy +aiohttp \ No newline at end of file