On this page

Files and JSON

12 min read TextCh. 4 — OOP and Files

Files and JSON

Almost every real-world program reads or writes data to persistent storage — configuration files, user data, logs, exports. Python makes file I/O straightforward, and the pathlib module provides an elegant, object-oriented API for working with file system paths. The json and csv modules handle the two most common data interchange formats.

Reading Text Files

The built-in open() function opens a file and returns a file object. Always use the with statement to ensure the file is closed automatically:

# Write a test file first
with open("sample.txt", "w", encoding="utf-8") as f:
    f.write("Line 1: Hello, Python!\n")
    f.write("Line 2: File I/O is easy.\n")
    f.write("Line 3: Use 'with' for safety.\n")

# Read the entire file as a single string
with open("sample.txt", "r", encoding="utf-8") as f:
    content = f.read()
    print(content)

# Read line by line (memory-efficient for large files)
with open("sample.txt", encoding="utf-8") as f:
    for line in f:                     # f is an iterator!
        print(line.rstrip("\n"))       # strip the trailing newline

# Read all lines into a list
with open("sample.txt", encoding="utf-8") as f:
    lines = f.readlines()
    print(lines)
    # ['Line 1: Hello, Python!\n', ...]

# Read lines without trailing newlines
with open("sample.txt", encoding="utf-8") as f:
    lines_clean = [line.rstrip() for line in f]
    print(lines_clean)
    # ['Line 1: Hello, Python!', ...]

tip type: warning title: "Always specify encoding"

Always pass encoding="utf-8" when opening text files. Without it, Python uses the system default encoding, which differs between operating systems (UTF-8 on Linux/macOS, often CP1252 on Windows). Specifying utf-8 explicitly makes your code portable.

Writing and Appending to Files

# Mode "w" — overwrite (creates the file if it does not exist)
with open("log.txt", "w", encoding="utf-8") as f:
    f.write("Application started\n")
    f.writelines(["Event 1\n", "Event 2\n", "Event 3\n"])

# Mode "a" — append (adds to the end, creates if needed)
with open("log.txt", "a", encoding="utf-8") as f:
    f.write("New event added\n")

# Mode "x" — exclusive creation (raises FileExistsError if file exists)
try:
    with open("new_file.txt", "x", encoding="utf-8") as f:
        f.write("This file is brand new\n")
except FileExistsError:
    print("File already exists!")

# Read the result
with open("log.txt", encoding="utf-8") as f:
    print(f.read())

Binary Files

For images, PDFs, and other binary data, use mode "rb" (read binary) or "wb" (write binary):

# Copy a binary file
def copy_file(src: str, dst: str) -> None:
    with open(src, "rb") as source, open(dst, "wb") as destination:
        while chunk := source.read(65_536):   # read 64KB at a time
            destination.write(chunk)

# Example: copy log.txt as binary (works for any file type)
copy_file("log.txt", "log_copy.txt")

# Read a file and compute its checksum
import hashlib

def file_md5(path: str) -> str:
    hasher = hashlib.md5()
    with open(path, "rb") as f:
        while chunk := f.read(65_536):
            hasher.update(chunk)
    return hasher.hexdigest()

print(file_md5("log.txt"))

`pathlib` — Object-Oriented Paths

pathlib.Path represents a file system path and provides methods for every common operation:

from pathlib import Path

# Creating path objects
cwd = Path.cwd()
home = Path.home()
docs = home / "Documents"            # / operator joins paths
config = docs / "myapp" / "config.json"

# Path components
print(config.name)        # config.json
print(config.stem)        # config
print(config.suffix)      # .json
print(config.parent)      # ~/Documents/myapp
print(config.parts)       # ('/', 'home', '...', 'Documents', 'myapp', 'config.json')

# Checking existence
print(config.exists())    # True or False
print(config.is_file())   # True if it's a file
print(config.is_dir())    # True if it's a directory

# Creating directories
output_dir = Path("output")
output_dir.mkdir(parents=True, exist_ok=True)   # create if missing, no error if exists

# Reading and writing (convenient methods)
config_file = output_dir / "settings.txt"
config_file.write_text("debug=true\nport=8000\n", encoding="utf-8")
content = config_file.read_text(encoding="utf-8")
print(content)

# Listing files
for path in output_dir.iterdir():
    print(path)

# Glob patterns
py_files = list(Path(".").glob("**/*.py"))     # all .py files recursively
txt_files = list(output_dir.glob("*.txt"))     # .txt files in output_dir only

# File stats
stat = config_file.stat()
print(f"Size: {stat.st_size} bytes")

# Renaming and deleting
new_path = config_file.rename(output_dir / "settings_v1.txt")
new_path.unlink()   # delete the file

# Remove an empty directory
output_dir.rmdir()

# Remove a non-empty directory tree
import shutil
Path("temp_dir").mkdir(exist_ok=True)
(Path("temp_dir") / "file.txt").write_text("test")
shutil.rmtree("temp_dir")   # removes everything recursively

Working with JSON

JSON (JavaScript Object Notation) is the most common data interchange format for APIs and configuration files. Python's json module maps JSON types to Python types:

JSON Python
object {} dict
array [] list
string str
number int or float
true/false True/False
null None
import json

# Python dict → JSON string
data = {
    "name": "Alice",
    "age": 30,
    "skills": ["Python", "SQL", "Docker"],
    "active": True,
    "address": None,
    "score": 9.5,
}

json_str = json.dumps(data)
print(json_str)
# {"name": "Alice", "age": 30, "skills": ["Python", "SQL", "Docker"], ...}

# Pretty-print with indentation
pretty = json.dumps(data, indent=2, ensure_ascii=False)
print(pretty)

# JSON string → Python dict
parsed = json.loads(json_str)
print(parsed["skills"])   # ['Python', 'SQL', 'Docker']
print(type(parsed))       # <class 'dict'>

# Write JSON to a file
with open("data.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# Read JSON from a file
with open("data.json", encoding="utf-8") as f:
    loaded = json.load(f)

print(loaded["name"])   # Alice

Serializing Custom Objects

from datetime import datetime, date
import json

class DateEncoder(json.JSONEncoder):
    """Custom encoder that handles date and datetime objects."""

    def default(self, obj):
        if isinstance(obj, (date, datetime)):
            return obj.isoformat()
        return super().default(obj)

event = {
    "title": "PyCon 2025",
    "start": date(2025, 5, 15),
    "created_at": datetime.now(),
}

# Use the custom encoder
json_str = json.dumps(event, cls=DateEncoder, indent=2)
print(json_str)

# Alternative: use a default function
def json_default(obj):
    if isinstance(obj, (date, datetime)):
        return obj.isoformat()
    raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")

json_str2 = json.dumps(event, default=json_default, indent=2)
print(json_str2)

tip type: tip title: "Use pathlib for JSON files"

pathlib.Path has a convenient read_text() / write_text() API. You can load JSON with: data = json.loads(Path("config.json").read_text(encoding="utf-8")) — no with open() needed.

Working with CSV

CSV (Comma-Separated Values) is a common format for tabular data. Python's csv module handles the quoting and delimiter rules correctly:

import csv
from pathlib import Path

# Write CSV
employees = [
    {"name": "Alice", "department": "Engineering", "salary": 95000},
    {"name": "Bob", "department": "Marketing", "salary": 72000},
    {"name": "Carol", "department": "Engineering", "salary": 105000},
    {"name": "Dave", "department": "HR", "salary": 68000},
]

csv_path = Path("employees.csv")

with csv_path.open("w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["name", "department", "salary"])
    writer.writeheader()
    writer.writerows(employees)

# Read CSV
with csv_path.open(encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"{row['name']} ({row['department']}): ${int(row['salary']):,}")
# Alice (Engineering): $95,000
# Bob (Marketing): $72,000
# Carol (Engineering): $105,000
# Dave (HR): $68,000

# Read CSV into a list of dicts
with csv_path.open(encoding="utf-8") as f:
    all_employees = list(csv.DictReader(f))

# Compute average salary
total = sum(int(e["salary"]) for e in all_employees)
avg = total / len(all_employees)
print(f"Average salary: ${avg:,.0f}")   # Average salary: $85,000

# Clean up
csv_path.unlink()
Path("data.json").unlink(missing_ok=True)

Temporary Files

For testing and intermediate processing, use tempfile:

import tempfile
from pathlib import Path

# Named temporary file — deleted when closed
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=True, encoding="utf-8") as tf:
    tf.write("Temporary data\n")
    print(f"Temp file: {tf.name}")
    # File is deleted when the with block exits

# Temporary directory — deleted with all contents on exit
with tempfile.TemporaryDirectory() as tmpdir:
    tmp_path = Path(tmpdir)
    (tmp_path / "output.json").write_text('{"result": 42}')
    files = list(tmp_path.iterdir())
    print(f"Files in temp dir: {files}")
    # Entire directory deleted on exit

nextSteps

  • http-and-requests