Implement image fetching and storage functionality with logging

This commit is contained in:
2026-05-11 15:59:49 +02:00
parent a43ce47339
commit 9dc13fb9be
3 changed files with 156 additions and 8 deletions
+83
View File
@@ -0,0 +1,83 @@
import aiohttp
import os
from dotenv import load_dotenv
import logging
import xml.etree.ElementTree as ET
import re
import uuid
from urllib.parse import urlparse
async def get_latest_hot_posts(subreddits):
post_limit = os.getenv("POST_LIMIT", 20)
headers = {"User-Agent": "Mozilla/5.0 (compatible; bnuy-api/0.0.1)"}
posts = []
logging.info(f"SUBREDDITS raw: {subreddits!r}, split: {subreddits.split(',')!r}")
for subreddit in subreddits.split(","):
url = f"https://www.reddit.com/r/{subreddit.strip()}/hot.rss?limit={post_limit}"
logging.info(f"Fetching hot posts from r/{subreddit.strip()}...")
async with aiohttp.ClientSession() as session:
try:
async with session.get(url, headers=headers) as response:
if response.status != 200:
logging.error(f"Failed to fetch RSS feed: {response.status}")
continue
xml = await response.text()
except Exception as e:
logging.error(f"Error fetching RSS feed: {e}")
continue
try:
root = ET.fromstring(xml)
ns = {"atom": "http://www.w3.org/2005/Atom"}
for entry in root.findall("atom:entry", ns):
post_title = entry.find("atom:title", ns)
content = entry.find("atom:content", ns)
if post_title is None or content is None or content.text is None:
continue
link_match = re.search(r'<a href="([^"]+)">\[link\]</a>', content.text)
if link_match and link_match.group(1).lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
logging.debug(f"Found image post: {post_title.text} - {link_match.group(1)}")
posts.append((post_title.text, link_match.group(1), subreddit.strip()))
except ET.ParseError as e:
logging.error(f"Error parsing RSS feed: {e}")
continue
return posts
async def save_picture(pool):
os.makedirs("data/images", exist_ok=True)
subreddits = os.getenv("SUBREDDITS", "bunnies,bnuuy")
posts = await get_latest_hot_posts(subreddits)
if not posts:
logging.info("No image posts found.")
return
for title, url, subreddit in posts:
async with pool.acquire() as conn:
async with conn.cursor() as cursor:
await cursor.execute("SELECT COUNT(*) FROM images WHERE url = %s", (url,))
result = await cursor.fetchone()
if result[0] > 0:
logging.info(f"Post already exists in database: {title} - {url}")
continue
logging.info(f"Saving post: {title} - {url}")
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status != 200:
logging.error(f"Failed to download image: {response.status}")
continue
content = await response.read()
generate_filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[1]
filename = os.path.join("data/images", generate_filename)
with open(filename, "wb") as f:
f.write(content)
logging.info(f"Saved image to {filename}")
async with pool.acquire() as conn:
async with conn.cursor() as cursor:
await cursor.execute(
"INSERT INTO images (url, filename, subreddit) VALUES (%s, %s, %s)",
(url, generate_filename, subreddit)
)
await conn.commit()
except Exception as e:
logging.error(f"Error saving image: {e}")
+71 -7
View File
@@ -1,8 +1,31 @@
from fastapi import FastAPI import aiohttp
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
import asyncmy import asyncmy
import asyncio import asyncio
import os import os
from dotenv import load_dotenv
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
import logging
load_dotenv()
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
log_formatter = logging.Formatter(
fmt="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
file_handler = logging.FileHandler("data/api.log", encoding="utf-8")
file_handler.setFormatter(log_formatter)
console_handler = logging.StreamHandler()
console_handler.setFormatter(log_formatter)
logging.basicConfig(
level=getattr(logging, log_level, logging.INFO),
handlers=[file_handler, console_handler],
)
@asynccontextmanager @asynccontextmanager
async def connect_db(app: FastAPI): async def connect_db(app: FastAPI):
@@ -15,18 +38,34 @@ async def connect_db(app: FastAPI):
minsize=5, minsize=5,
maxsize=20 maxsize=20
) )
await create_tables(app.state.pool)
task = asyncio.create_task(fetch_images())
try: try:
yield yield
finally: finally:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
app.state.pool.close() app.state.pool.close()
await app.state.pool.wait_closed() await app.state.pool.wait_closed()
app = FastAPI(lifespan=connect_db) app = FastAPI(lifespan=connect_db)
@asynccontextmanager async def create_tables(pool):
async def get_connection(): async with pool.acquire() as conn:
async with app.state.pool.acquire() as conn: async with conn.cursor() as cursor:
yield conn await cursor.execute("""
CREATE TABLE IF NOT EXISTS images (
id INT AUTO_INCREMENT PRIMARY KEY,
url VARCHAR(255) NOT NULL,
filename VARCHAR(255) NOT NULL,
subreddit VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
await conn.commit()
@app.get("/") @app.get("/")
async def root(): async def root():
@@ -34,5 +73,30 @@ async def root():
@app.get("/random") @app.get("/random")
async def get_random_bnuy(): async def get_random_bnuy():
async with get_connection() as conn: return {"message": "here could be a bnuy, if I would've implemented it"}
return {"message": "here could be a bnuy, if I would've implemented it"}
@app.get("/images/{filename}")
async def get_image(filename: str):
async with app.state.pool.acquire() as conn:
async with conn.cursor() as cursor:
await cursor.execute("SELECT filename FROM images WHERE filename = %s", (filename,))
result = await cursor.fetchone()
if result:
filepath = os.path.join("data/images", result[0])
if os.path.exists(filepath):
return FileResponse(filepath)
else:
raise HTTPException(status_code=404, detail="Image file not found")
else:
raise HTTPException(status_code=404, detail="Image not found")
async def fetch_images():
from collector import save_picture
while True:
try:
logging.info("Starting image collection...")
await save_picture(app.state.pool)
logging.info("Image collection completed. Sleeping for 1 hour...")
except Exception as e:
logging.error(f"Error during image collection: {e}")
await asyncio.sleep(86400) # Sleep for 24 hours
+2 -1
View File
@@ -1,2 +1,3 @@
fastapi[standard] fastapi[standard]
asyncmy asyncmy
aiohttp