Python November 19 ,2025

Table of Contents

Python Project Work 

Practical mini-projects combining concepts:

A. To-Do List App (File-based persistence)

Theory / design
Goal: small CLI app to add, list, update, and remove to-do items, storing data on disk so tasks persist between runs. Core concepts: file I/O, JSON serialization, simple program structure, error handling, basic CLI interaction. Use JSON (human readable) or CSV for persistence. Keep operations atomic (write to a temp file then rename) to avoid corruption.

Practical (file: todo.py)

import json
import os
from datetime import datetime
from tempfile import NamedTemporaryFile

DB_PATH = "todos.json"

def load_todos():
    if not os.path.exists(DB_PATH):
        return []
    with open(DB_PATH, "r", encoding="utf-8") as f:
        return json.load(f)

def save_todos(todos):
    # atomic write
    tmp = NamedTemporaryFile("w", delete=False, dir=".")
    try:
        json.dump(todos, tmp, indent=2, ensure_ascii=False)
        tmp.close()
        os.replace(tmp.name, DB_PATH)
    except Exception:
        os.remove(tmp.name)
        raise

def add_task(title, due=None):
    todos = load_todos()
    item = {
        "id": int(datetime.now().timestamp() * 1000),
        "title": title,
        "done": False,
        "created": datetime.utcnow().isoformat(),
        "due": due
    }
    todos.append(item)
    save_todos(todos)
    return item

def list_tasks(show_all=False):
    todos = load_todos()
    for t in todos:
        if not show_all and t["done"]:
            continue
        status = "✓" if t["done"] else " "
        print(f'[{status}] {t["id"]} - {t["title"]} (due: {t["due"]})')

def mark_done(task_id):
    todos = load_todos()
    for t in todos:
        if t["id"] == task_id:
            t["done"] = True
            save_todos(todos)
            return True
    return False

def delete_task(task_id):
    todos = load_todos()
    new = [t for t in todos if t["id"] != task_id]
    if len(new) != len(todos):
        save_todos(new)
        return True
    return False

# Simple CLI
if __name__ == "__main__":
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("cmd", choices=["add","list","done","del"])
    p.add_argument("--title", "-t")
    p.add_argument("--id", type=int)
    p.add_argument("--all", action="store_true")
    args = p.parse_args()

    if args.cmd == "add" and args.title:
        item = add_task(args.title)
        print("Added:", item)
    elif args.cmd == "list":
        list_tasks(show_all=args.all)
    elif args.cmd == "done" and args.id:
        ok = mark_done(args.id)
        print("Marked done" if ok else "Not found")
    elif args.cmd == "del" and args.id:
        ok = delete_task(args.id)
        print("Deleted" if ok else "Not found")
    else:
        print("Invalid usage. See --help")

Extensions: add categories/tags, sort by due date, simple TUI (curses), export/import, synchronize with remote storage.

B. Student Management System (OOP-based)

Theory / design
Goal: a small console program to manage students and courses. Core concepts: classes, encapsulation, composition, methods, simple persistence (pickle/JSON), input validation. Design classes: Student, Course, StudentManager. Show OOP patterns: add/remove students, enroll in course, compute GPA.

Practical (file: sms.py)

import json
from dataclasses import dataclass, asdict, field
from typing import List

DB = "students.json"

@dataclass
class Course:
    name: str
    credits: int
    grade: float  # 0.0 - 10.0 or similar

@dataclass
class Student:
    student_id: str
    name: str
    courses: List[Course] = field(default_factory=list)

    def enroll(self, course: Course):
        self.courses.append(course)

    def gpa(self):
        if not self.courses:
            return 0.0
        total_credits = sum(c.credits for c in self.courses)
        weighted = sum(c.credits * c.grade for c in self.courses)
        return weighted / total_credits if total_credits else 0.0

def load_students():
    if not os.path.exists(DB): return {}
    with open(DB, "r", encoding="utf-8") as f:
        raw = json.load(f)
    students = {}
    for sid, info in raw.items():
        courses = [Course(**c) for c in info.get("courses", [])]
        students[sid] = Student(student_id=sid, name=info["name"], courses=courses)
    return students

def save_students(students):
    raw = {sid: {"name": s.name, "courses": [asdict(c) for c in s.courses]} for sid, s in students.items()}
    with open(DB, "w", encoding="utf-8") as f:
        json.dump(raw, f, indent=2)

# Example usage
if __name__ == "__main__":
    import os
    students = load_students()
    s = Student("S100", "Alice")
    s.enroll(Course("Math", 4, 9.0))
    s.enroll(Course("Physics", 3, 8.0))
    students[s.student_id] = s
    save_students(students)
    print("Saved. GPA:", s.gpa())

Extensions: CLI interface, CSV import/export, search, validations, unit tests for GPA calculation, integrate SQLite for larger scale.

  1. Web Scraper using BeautifulSoup

Theory / design
Goal: fetch web pages and extract structured data. Core concepts: HTTP requests, HTML parsing, CSS selectors/XPath, rate limiting, politeness (robots.txt), error handling, saving results (CSV/JSON). Use requests + BeautifulSoup. Respect website rules and do not scrape protected content.

Practical (file: scraper.py)

import requests
from bs4 import BeautifulSoup
import time
import csv

BASE = "https://example.com/articles"

def fetch(url):
    r = requests.get(url, timeout=10)
    r.raise_for_status()
    return r.text

def parse_list(html):
    soup = BeautifulSoup(html, "html.parser")
    links = []
    for a in soup.select("article h2 a"):
        links.append(a.get("href"))
    return links

def parse_article(html):
    soup = BeautifulSoup(html, "html.parser")
    title = soup.select_one("h1").get_text(strip=True)
    date = soup.select_one(".publish-date").get_text(strip=True) if soup.select_one(".publish-date") else ""
    body = "\n".join(p.get_text(strip=True) for p in soup.select(".content p"))
    return {"title": title, "date": date, "body": body}

def run():
    page = fetch(BASE)
    article_urls = parse_list(page)
    rows = []
    for rel in article_urls:
        url = rel if rel.startswith("http") else "https://example.com" + rel
        try:
            ahtml = fetch(url)
            rows.append(parse_article(ahtml))
            time.sleep(1)  # politeness
        except Exception as e:
            print("Failed", url, e)
    with open("articles.csv", "w", newline='', encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["title","date","body"])
        writer.writeheader()
        writer.writerows(rows)

if __name__ == "__main__":
    run()

Extensions: add CLI, proxy support, user-agent header, incremental scraping, use Scrapy for scale and concurrency, save to DB, handle JavaScript via Selenium or Playwright if needed.

C. CSV Data Analyzer using Pandas

Theory / design
Goal: read CSV dataset(s), perform cleaning, summary statistics, filtering, groupby/aggregation and produce reports (CSV/Excel/plots). Core concepts: pandas DataFrame, reading/writing, handling missing values, vectorized ops, pivot tables, visualization using matplotlib/seaborn.

Practical (file: analyzer.py)

import pandas as pd

def load(path):
    return pd.read_csv(path)

def basic_report(df):
    print("Shape:", df.shape)
    print("Columns:", df.columns.tolist())
    print("Missing values:\n", df.isna().sum())
    print("\nDescriptive stats:\n", df.describe(include='all'))

def clean(df):
    # example: fill numeric NaN with median
    for col in df.select_dtypes(include='number'):
        df[col].fillna(df[col].median(), inplace=True)
    # drop duplicates
    df.drop_duplicates(inplace=True)
    return df

def group_summary(df, group_col, agg_col):
    return df.groupby(group_col)[agg_col].agg(["count","mean","median","sum"]).reset_index()

if __name__ == "__main__":
    df = load("data.csv")
    basic_report(df)
    df = clean(df)
    summary = group_summary(df, "category", "price")
    summary.to_csv("summary.csv", index=False)
    print("Saved summary.csv")

Extensions: interactive dashboard with Streamlit, plotting histograms/boxplots, outlier detection, CLI options, unit tests for transformation functions.

D. Chatbot using if-else logic

Theory / design
Goal: a rule-based conversational bot for simple interactions. Core concepts: string matching, basic NLP techniques (lowercasing, tokenization), regex, finite state for multi-turn dialogs, modular response handlers. This project demonstrates control flow, dictionaries, file I/O for logs, and simple pattern matching — not ML.

Practical (file: bot.py)

import re

RESPONSES = {
    "hello": "Hello! How can I help you today?",
    "help": "I can tell the time, greet you, or answer simple questions. Try: 'time' or 'about'.",
    "about": "I am a simple rule-based chatbot built in Python."
}

def respond(text):
    t = text.strip().lower()
    if re.search(r"\bhello\b|\bhi\b|\bhey\b", t):
        return RESPONSES["hello"]
    if "time" in t:
        from datetime import datetime
        return "Current time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    if "help" in t:
        return RESPONSES["help"]
    if "about" in t:
        return RESPONSES["about"]
    if "bye" in t or "exit" in t:
        return "Goodbye!"
    return "Sorry, I didn't understand. Type 'help' to see options."

if __name__ == "__main__":
    print("Chatbot (type 'exit' to quit)")
    while True:
        try:
            msg = input("> ")
        except (EOFError, KeyboardInterrupt):
            break
        reply = respond(msg)
        print(reply)
        if reply == "Goodbye!":
            break

Extensions: add small knowledge base (JSON lookup), fuzzy matching, confidence scoring, limited context memory, integrate with Telegram or Slack bot APIs.

E. Weather App using APIs

Theory / design
Goal: fetch current weather for a city using a public API (e.g., OpenWeatherMap). Core concepts: HTTP requests, JSON parsing, API key management (env vars), error handling, formatting output. Respect API rate limits and secure keys (do not hardcode).

Practical (file: weather.py) — you need an API key from a weather provider

import os
import requests

API_KEY = os.getenv("OPENWEATHER_API_KEY")
BASE = "https://api.openweathermap.org/data/2.5/weather"

def get_weather(city):
    if not API_KEY:
        raise RuntimeError("Set OPENWEATHER_API_KEY env variable")
    params = {"q": city, "appid": API_KEY, "units": "metric"}
    r = requests.get(BASE, params=params, timeout=10)
    r.raise_for_status()
    data = r.json()
    return {
        "city": data["name"],
        "temp": data["main"]["temp"],
        "desc": data["weather"][0]["description"],
        "humidity": data["main"]["humidity"],
        "wind": data["wind"]["speed"]
    }

if __name__ == "__main__":
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("city")
    args = p.parse_args()
    try:
        w = get_weather(args.city)
        print(f'{w["city"]}: {w["temp"]}°C, {w["desc"]}, Humidity: {w["humidity"]}%, Wind: {w["wind"]} m/s')
    except Exception as e:
        print("Error:", e)

Run with:

export OPENWEATHER_API_KEY=yourkey
python weather.py London

Extensions: cache results, build web front-end with Flask/FastAPI, show 5-day forecast, map integration.

F. Multi-threaded Downloader

Theory / design
Goal: download many files (images, PDFs) concurrently to speed up I/O-bound tasks. Core concepts: threading vs multiprocessing (I/O bound → threads), ThreadPoolExecutor, safe file writes, retries, progress reporting, limiting concurrency (semaphore), handling timeouts. Handle large files by streaming in chunks to avoid memory blow-ups.

Practical (file: downloader.py)

import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

URLS = [
    "https://example.com/file1.jpg",
    "https://example.com/file2.jpg",
    # add more
]

DOWNLOAD_DIR = "downloads"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

def download(url, timeout=15):
    local = os.path.join(DOWNLOAD_DIR, os.path.basename(url))
    try:
        with requests.get(url, stream=True, timeout=timeout) as r:
            r.raise_for_status()
            with open(local, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
        return url, True, None
    except Exception as e:
        return url, False, str(e)

def run(urls, workers=5):
    results = []
    with ThreadPoolExecutor(max_workers=workers) as ex:
        futures = {ex.submit(download, u): u for u in urls}
        for fut in as_completed(futures):
            url, ok, err = fut.result()
            print(f"{'OK' if ok else 'FAIL'} - {url} {err or ''}")
            results.append((url, ok, err))
    return results

if __name__ == "__main__":
    run(URLS, workers=8)

Extensions: retry logic with backoff, per-host concurrency limits, progress bars (tqdm), asynchronous version using asyncio + aiohttp for even higher concurrency, resume support for partial downloads.

Advice for using these projects

• Start small: implement core functionality first, then add features.
• Add unit tests for pure logic (GPA calculation, data cleaning).
• Use virtual environments and requirements.txt for reproducibility.
• Use logging instead of print for production-grade behavior.
• Add simple CLI arg parsing (argparse) to make tools flexible.
• For web scraping and API usage, respect robots.txt and API terms.
• Version control each project in its own Git repo or subfolder.
• Document README with how to run, dependencies, and extension ideas.

 

Next Blog- Python Interview Preparation

 

Sanjiv
0

You must logged in to post comments.

Get In Touch

Kurki bazar Uttar Pradesh

+91-8808946970

techiefreak87@gmail.com