Implement image fetching and storage functionality with logging
This commit is contained in:
@@ -0,0 +1,83 @@
|
|||||||
|
import aiohttp
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import logging
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
async def get_latest_hot_posts(subreddits):
|
||||||
|
post_limit = os.getenv("POST_LIMIT", 20)
|
||||||
|
headers = {"User-Agent": "Mozilla/5.0 (compatible; bnuy-api/0.0.1)"}
|
||||||
|
posts = []
|
||||||
|
logging.info(f"SUBREDDITS raw: {subreddits!r}, split: {subreddits.split(',')!r}")
|
||||||
|
|
||||||
|
for subreddit in subreddits.split(","):
|
||||||
|
url = f"https://www.reddit.com/r/{subreddit.strip()}/hot.rss?limit={post_limit}"
|
||||||
|
logging.info(f"Fetching hot posts from r/{subreddit.strip()}...")
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
try:
|
||||||
|
async with session.get(url, headers=headers) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
logging.error(f"Failed to fetch RSS feed: {response.status}")
|
||||||
|
continue
|
||||||
|
xml = await response.text()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error fetching RSS feed: {e}")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(xml)
|
||||||
|
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
||||||
|
for entry in root.findall("atom:entry", ns):
|
||||||
|
post_title = entry.find("atom:title", ns)
|
||||||
|
content = entry.find("atom:content", ns)
|
||||||
|
if post_title is None or content is None or content.text is None:
|
||||||
|
continue
|
||||||
|
link_match = re.search(r'<a href="([^"]+)">\[link\]</a>', content.text)
|
||||||
|
if link_match and link_match.group(1).lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
|
||||||
|
logging.debug(f"Found image post: {post_title.text} - {link_match.group(1)}")
|
||||||
|
posts.append((post_title.text, link_match.group(1), subreddit.strip()))
|
||||||
|
except ET.ParseError as e:
|
||||||
|
logging.error(f"Error parsing RSS feed: {e}")
|
||||||
|
continue
|
||||||
|
return posts
|
||||||
|
|
||||||
|
async def save_picture(pool):
|
||||||
|
os.makedirs("data/images", exist_ok=True)
|
||||||
|
subreddits = os.getenv("SUBREDDITS", "bunnies,bnuuy")
|
||||||
|
posts = await get_latest_hot_posts(subreddits)
|
||||||
|
if not posts:
|
||||||
|
logging.info("No image posts found.")
|
||||||
|
return
|
||||||
|
for title, url, subreddit in posts:
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
async with conn.cursor() as cursor:
|
||||||
|
await cursor.execute("SELECT COUNT(*) FROM images WHERE url = %s", (url,))
|
||||||
|
result = await cursor.fetchone()
|
||||||
|
if result[0] > 0:
|
||||||
|
logging.info(f"Post already exists in database: {title} - {url}")
|
||||||
|
continue
|
||||||
|
logging.info(f"Saving post: {title} - {url}")
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(url) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
logging.error(f"Failed to download image: {response.status}")
|
||||||
|
continue
|
||||||
|
content = await response.read()
|
||||||
|
generate_filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[1]
|
||||||
|
filename = os.path.join("data/images", generate_filename)
|
||||||
|
with open(filename, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
logging.info(f"Saved image to {filename}")
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
async with conn.cursor() as cursor:
|
||||||
|
await cursor.execute(
|
||||||
|
"INSERT INTO images (url, filename, subreddit) VALUES (%s, %s, %s)",
|
||||||
|
(url, generate_filename, subreddit)
|
||||||
|
)
|
||||||
|
await conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error saving image: {e}")
|
||||||
|
|
||||||
@@ -1,8 +1,31 @@
|
|||||||
from fastapi import FastAPI
|
import aiohttp
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
import asyncmy
|
import asyncmy
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
import logging
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
||||||
|
log_formatter = logging.Formatter(
|
||||||
|
fmt="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler("data/api.log", encoding="utf-8")
|
||||||
|
file_handler.setFormatter(log_formatter)
|
||||||
|
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(log_formatter)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, log_level, logging.INFO),
|
||||||
|
handlers=[file_handler, console_handler],
|
||||||
|
)
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def connect_db(app: FastAPI):
|
async def connect_db(app: FastAPI):
|
||||||
@@ -15,18 +38,34 @@ async def connect_db(app: FastAPI):
|
|||||||
minsize=5,
|
minsize=5,
|
||||||
maxsize=20
|
maxsize=20
|
||||||
)
|
)
|
||||||
|
await create_tables(app.state.pool)
|
||||||
|
task = asyncio.create_task(fetch_images())
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
app.state.pool.close()
|
app.state.pool.close()
|
||||||
await app.state.pool.wait_closed()
|
await app.state.pool.wait_closed()
|
||||||
|
|
||||||
app = FastAPI(lifespan=connect_db)
|
app = FastAPI(lifespan=connect_db)
|
||||||
|
|
||||||
@asynccontextmanager
|
async def create_tables(pool):
|
||||||
async def get_connection():
|
async with pool.acquire() as conn:
|
||||||
async with app.state.pool.acquire() as conn:
|
async with conn.cursor() as cursor:
|
||||||
yield conn
|
await cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS images (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
url VARCHAR(255) NOT NULL,
|
||||||
|
filename VARCHAR(255) NOT NULL,
|
||||||
|
subreddit VARCHAR(255) NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
await conn.commit()
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
@@ -34,5 +73,30 @@ async def root():
|
|||||||
|
|
||||||
@app.get("/random")
|
@app.get("/random")
|
||||||
async def get_random_bnuy():
|
async def get_random_bnuy():
|
||||||
async with get_connection() as conn:
|
|
||||||
return {"message": "here could be a bnuy, if I would've implemented it"}
|
return {"message": "here could be a bnuy, if I would've implemented it"}
|
||||||
|
|
||||||
|
@app.get("/images/{filename}")
|
||||||
|
async def get_image(filename: str):
|
||||||
|
async with app.state.pool.acquire() as conn:
|
||||||
|
async with conn.cursor() as cursor:
|
||||||
|
await cursor.execute("SELECT filename FROM images WHERE filename = %s", (filename,))
|
||||||
|
result = await cursor.fetchone()
|
||||||
|
if result:
|
||||||
|
filepath = os.path.join("data/images", result[0])
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
return FileResponse(filepath)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=404, detail="Image file not found")
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=404, detail="Image not found")
|
||||||
|
|
||||||
|
async def fetch_images():
|
||||||
|
from collector import save_picture
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
logging.info("Starting image collection...")
|
||||||
|
await save_picture(app.state.pool)
|
||||||
|
logging.info("Image collection completed. Sleeping for 1 hour...")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error during image collection: {e}")
|
||||||
|
await asyncio.sleep(86400) # Sleep for 24 hours
|
||||||
@@ -1,2 +1,3 @@
|
|||||||
fastapi[standard]
|
fastapi[standard]
|
||||||
asyncmy
|
asyncmy
|
||||||
|
aiohttp
|
||||||
Reference in New Issue
Block a user