On this page
Files and JSON
Files and JSON
Almost every real-world program reads or writes data to persistent storage — configuration files, user data, logs, exports. Python makes file I/O straightforward, and the pathlib module provides an elegant, object-oriented API for working with file system paths. The json and csv modules handle the two most common data interchange formats.
Reading Text Files
The built-in open() function opens a file and returns a file object. Always use the with statement to ensure the file is closed automatically:
# Write a test file first
with open("sample.txt", "w", encoding="utf-8") as f:
f.write("Line 1: Hello, Python!\n")
f.write("Line 2: File I/O is easy.\n")
f.write("Line 3: Use 'with' for safety.\n")
# Read the entire file as a single string
with open("sample.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# Read line by line (memory-efficient for large files)
with open("sample.txt", encoding="utf-8") as f:
for line in f: # f is an iterator!
print(line.rstrip("\n")) # strip the trailing newline
# Read all lines into a list
with open("sample.txt", encoding="utf-8") as f:
lines = f.readlines()
print(lines)
# ['Line 1: Hello, Python!\n', ...]
# Read lines without trailing newlines
with open("sample.txt", encoding="utf-8") as f:
lines_clean = [line.rstrip() for line in f]
print(lines_clean)
# ['Line 1: Hello, Python!', ...]tip type: warning title: "Always specify encoding"
Always pass
encoding="utf-8"when opening text files. Without it, Python uses the system default encoding, which differs between operating systems (UTF-8 on Linux/macOS, often CP1252 on Windows). Specifyingutf-8explicitly makes your code portable.
Writing and Appending to Files
# Mode "w" — overwrite (creates the file if it does not exist)
with open("log.txt", "w", encoding="utf-8") as f:
f.write("Application started\n")
f.writelines(["Event 1\n", "Event 2\n", "Event 3\n"])
# Mode "a" — append (adds to the end, creates if needed)
with open("log.txt", "a", encoding="utf-8") as f:
f.write("New event added\n")
# Mode "x" — exclusive creation (raises FileExistsError if file exists)
try:
with open("new_file.txt", "x", encoding="utf-8") as f:
f.write("This file is brand new\n")
except FileExistsError:
print("File already exists!")
# Read the result
with open("log.txt", encoding="utf-8") as f:
print(f.read())Binary Files
For images, PDFs, and other binary data, use mode "rb" (read binary) or "wb" (write binary):
# Copy a binary file
def copy_file(src: str, dst: str) -> None:
with open(src, "rb") as source, open(dst, "wb") as destination:
while chunk := source.read(65_536): # read 64KB at a time
destination.write(chunk)
# Example: copy log.txt as binary (works for any file type)
copy_file("log.txt", "log_copy.txt")
# Read a file and compute its checksum
import hashlib
def file_md5(path: str) -> str:
hasher = hashlib.md5()
with open(path, "rb") as f:
while chunk := f.read(65_536):
hasher.update(chunk)
return hasher.hexdigest()
print(file_md5("log.txt"))`pathlib` — Object-Oriented Paths
pathlib.Path represents a file system path and provides methods for every common operation:
from pathlib import Path
# Creating path objects
cwd = Path.cwd()
home = Path.home()
docs = home / "Documents" # / operator joins paths
config = docs / "myapp" / "config.json"
# Path components
print(config.name) # config.json
print(config.stem) # config
print(config.suffix) # .json
print(config.parent) # ~/Documents/myapp
print(config.parts) # ('/', 'home', '...', 'Documents', 'myapp', 'config.json')
# Checking existence
print(config.exists()) # True or False
print(config.is_file()) # True if it's a file
print(config.is_dir()) # True if it's a directory
# Creating directories
output_dir = Path("output")
output_dir.mkdir(parents=True, exist_ok=True) # create if missing, no error if exists
# Reading and writing (convenient methods)
config_file = output_dir / "settings.txt"
config_file.write_text("debug=true\nport=8000\n", encoding="utf-8")
content = config_file.read_text(encoding="utf-8")
print(content)
# Listing files
for path in output_dir.iterdir():
print(path)
# Glob patterns
py_files = list(Path(".").glob("**/*.py")) # all .py files recursively
txt_files = list(output_dir.glob("*.txt")) # .txt files in output_dir only
# File stats
stat = config_file.stat()
print(f"Size: {stat.st_size} bytes")
# Renaming and deleting
new_path = config_file.rename(output_dir / "settings_v1.txt")
new_path.unlink() # delete the file
# Remove an empty directory
output_dir.rmdir()
# Remove a non-empty directory tree
import shutil
Path("temp_dir").mkdir(exist_ok=True)
(Path("temp_dir") / "file.txt").write_text("test")
shutil.rmtree("temp_dir") # removes everything recursivelyWorking with JSON
JSON (JavaScript Object Notation) is the most common data interchange format for APIs and configuration files. Python's json module maps JSON types to Python types:
| JSON | Python |
|---|---|
object {} |
dict |
array [] |
list |
| string | str |
| number | int or float |
| true/false | True/False |
| null | None |
import json
# Python dict → JSON string
data = {
"name": "Alice",
"age": 30,
"skills": ["Python", "SQL", "Docker"],
"active": True,
"address": None,
"score": 9.5,
}
json_str = json.dumps(data)
print(json_str)
# {"name": "Alice", "age": 30, "skills": ["Python", "SQL", "Docker"], ...}
# Pretty-print with indentation
pretty = json.dumps(data, indent=2, ensure_ascii=False)
print(pretty)
# JSON string → Python dict
parsed = json.loads(json_str)
print(parsed["skills"]) # ['Python', 'SQL', 'Docker']
print(type(parsed)) # <class 'dict'>
# Write JSON to a file
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Read JSON from a file
with open("data.json", encoding="utf-8") as f:
loaded = json.load(f)
print(loaded["name"]) # AliceSerializing Custom Objects
from datetime import datetime, date
import json
class DateEncoder(json.JSONEncoder):
"""Custom encoder that handles date and datetime objects."""
def default(self, obj):
if isinstance(obj, (date, datetime)):
return obj.isoformat()
return super().default(obj)
event = {
"title": "PyCon 2025",
"start": date(2025, 5, 15),
"created_at": datetime.now(),
}
# Use the custom encoder
json_str = json.dumps(event, cls=DateEncoder, indent=2)
print(json_str)
# Alternative: use a default function
def json_default(obj):
if isinstance(obj, (date, datetime)):
return obj.isoformat()
raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
json_str2 = json.dumps(event, default=json_default, indent=2)
print(json_str2)tip type: tip title: "Use pathlib for JSON files"
pathlib.Pathhas a convenientread_text()/write_text()API. You can load JSON with:data = json.loads(Path("config.json").read_text(encoding="utf-8"))— nowith open()needed.
Working with CSV
CSV (Comma-Separated Values) is a common format for tabular data. Python's csv module handles the quoting and delimiter rules correctly:
import csv
from pathlib import Path
# Write CSV
employees = [
{"name": "Alice", "department": "Engineering", "salary": 95000},
{"name": "Bob", "department": "Marketing", "salary": 72000},
{"name": "Carol", "department": "Engineering", "salary": 105000},
{"name": "Dave", "department": "HR", "salary": 68000},
]
csv_path = Path("employees.csv")
with csv_path.open("w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["name", "department", "salary"])
writer.writeheader()
writer.writerows(employees)
# Read CSV
with csv_path.open(encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['name']} ({row['department']}): ${int(row['salary']):,}")
# Alice (Engineering): $95,000
# Bob (Marketing): $72,000
# Carol (Engineering): $105,000
# Dave (HR): $68,000
# Read CSV into a list of dicts
with csv_path.open(encoding="utf-8") as f:
all_employees = list(csv.DictReader(f))
# Compute average salary
total = sum(int(e["salary"]) for e in all_employees)
avg = total / len(all_employees)
print(f"Average salary: ${avg:,.0f}") # Average salary: $85,000
# Clean up
csv_path.unlink()
Path("data.json").unlink(missing_ok=True)Temporary Files
For testing and intermediate processing, use tempfile:
import tempfile
from pathlib import Path
# Named temporary file — deleted when closed
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=True, encoding="utf-8") as tf:
tf.write("Temporary data\n")
print(f"Temp file: {tf.name}")
# File is deleted when the with block exits
# Temporary directory — deleted with all contents on exit
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
(tmp_path / "output.json").write_text('{"result": 42}')
files = list(tmp_path.iterdir())
print(f"Files in temp dir: {files}")
# Entire directory deleted on exitnextSteps
- http-and-requests
Sign in to track your progress