1. Region-Baed CNN

1. RCNN Implementation

Below is an example of using a basic R-CNN (Region-based Convolutional Neural Network) with PyTorch. This example demonstrates how to use a pre-trained R-CNN model, specifically torchvision.models.detection.fasterrcnn_resnet50_fpn for demonstration purposes, as the basic R-CNN functionality is embedded in more advanced versions like Faster R-CNN.

Sample Code for R-CNN using PyTorch

import torch
import torchvision.transforms as T
from torchvision.models.detection import rcnn, fasterrcnn_resnet50_fpn
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.roi_heads import RoIHeads
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Define a basic R-CNN model
def create_rcnn_model(num_classes):
    # Load a pre-trained ResNet50 backbone
    backbone = torchvision.models.resnet50(pretrained=True)
    backbone = torch.nn.Sequential(*list(backbone.children())[:-2])  # Remove the final fully connected layers

    # Define the RPN
    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),) * 5
    )

    # Define RoI heads
    roi_heads = RoIHeads(
        box_roi_pool=roi_pool,
        box_head=box_head,
        box_predictor=box_predictor
    )

    # Create the R-CNN model
    model = rcnn.RoIAlign(
        output_size=(7, 7),
        spatial_scale=1.0/16,
        sampling_ratio=2
    )
    return model

# Define the transformations
def transform_image(image_path):
    image = Image.open(image_path).convert("RGB")
    transform = T.Compose([
        T.ToTensor(),  # Convert image to tensor
    ])
    return transform(image)

# Perform object detection
def detect_objects(image_path):
    model = create_rcnn_model(num_classes=91)  # 91 is for COCO dataset
    model.eval()  # Set the model to evaluation mode

    image_tensor = transform_image(image_path)

    with torch.no_grad():
        predictions = model([image_tensor])
    return predictions[0]

# Draw bounding boxes on the image
def draw_boxes(image_path, predictions):
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform_image(image_path)

    # Extract bounding boxes, labels, and scores from predictions
    boxes = predictions['boxes']
    labels = predictions['labels']
    scores = predictions['scores']

    # Filter out boxes with scores below a threshold (e.g., 0.5)
    threshold = 0.5
    keep = scores >= threshold
    boxes = boxes[keep].cpu().numpy()
    labels = labels[keep].cpu().numpy()

    # Convert image tensor to numpy array for drawing
    image_np = image_tensor.permute(1, 2, 0).numpy()
    image_np = (image_np * 255).astype(np.uint8)

    # Draw bounding boxes
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image_np)
    for box, label, score in zip(boxes, labels, scores):
        x, y, w, h = box
        rect = plt.Rectangle((x, y), w - x, h - y, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, f'{int(label)}: {score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')

    plt.axis('off')
    plt.show()

# Example usage
image_path = 'path_to_your_image.jpg'
predictions = detect_objects(image_path)
draw_boxes(image_path, predictions)

Explanation:

Define the R-CNN Model:

def create_rcnn_model(num_classes):
    backbone = torchvision.models.resnet50(pretrained=True)
    backbone = torch.nn.Sequential(*list(backbone.children())[:-2])

    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),) * 5
    )

    roi_heads = RoIHeads(
        box_roi_pool=roi_pool,
        box_head=box_head,
        box_predictor=box_predictor
    )

    model = rcnn.RoIAlign(
        output_size=(7, 7),
        spatial_scale=1.0/16,
        sampling_ratio=2
    )
    return model

Uses a ResNet-50 backbone for feature extraction.
Configures Region Proposal Network (RPN) and RoI heads for object detection.

Image Transformation:

def transform_image(image_path):
    image = Image.open(image_path).convert("RGB")
    transform = T.Compose([T.ToTensor()])
    return transform(image)

Object Detection:

def detect_objects(image_path):
    model = create_rcnn_model(num_classes=91)
    model.eval()

    image_tensor = transform_image(image_path)

    with torch.no_grad():
        predictions = model([image_tensor])
    return predictions[0]

Drawing Bounding Boxes:

def draw_boxes(image_path, predictions):
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform_image(image_path)

    boxes = predictions['boxes']
    labels = predictions['labels']
    scores = predictions['scores']

    threshold = 0.5
    keep = scores >= threshold
    boxes = boxes[keep].cpu().numpy()
    labels = labels[keep].cpu().numpy()

    image_np = image_tensor.permute(1, 2, 0).numpy()
    image_np = (image_np * 255).astype(np.uint8)

    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image_np)
    for box, label, score in zip(boxes, labels, scores):
        x, y, w, h = box
        rect = plt.Rectangle((x, y), w - x, h - y, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, f'{int(label)}: {score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')

    plt.axis('off')
    plt.show()

2. Faster R-CNN Implementation

This example demonstrates how to use a pre-trained Faster R-CNN model for inference on an image. It includes loading the model, processing the image, and displaying the results with bounding boxes.

2.1 Sample Code for Faster R-CNN using PyTorch

import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Load a pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define a function to transform the input image
def transform_image(image_path):
    image = Image.open(image_path).convert("RGB")
    transform = T.Compose([
        T.ToTensor(),  # Convert image to tensor
    ])
    return transform(image)

# Perform object detection
def detect_objects(image_path):
    image_tensor = transform_image(image_path)
    with torch.no_grad():
        predictions = model([image_tensor])
    return predictions[0]

# Draw bounding boxes on the image
def draw_boxes(image_path, predictions):
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform_image(image_path)

    # Extract bounding boxes, labels, and scores from predictions
    boxes = predictions['boxes']
    labels = predictions['labels']
    scores = predictions['scores']

    # Filter out boxes with scores below a threshold (e.g., 0.5)
    threshold = 0.5
    keep = scores >= threshold
    boxes = boxes[keep].cpu().numpy()
    labels = labels[keep].cpu().numpy()

    # Convert image tensor to numpy array for drawing
    image_np = image_tensor.permute(1, 2, 0).numpy()
    image_np = (image_np * 255).astype(np.uint8)

    # Draw bounding boxes
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image_np)
    for box, label, score in zip(boxes, labels, scores):
        x, y, w, h = box
        rect = plt.Rectangle((x, y), w - x, h - y, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, f'{int(label)}: {score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')

    plt.axis('off')
    plt.show()

# Example usage
image_path = 'path_to_your_image.jpg'
predictions = detect_objects(image_path)
draw_boxes(image_path, predictions)

2.2 Explanation:

Loading the Model:
```
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode
```
- Loads a pre-trained Faster R-CNN model with a ResNet-50 backbone and Feature Pyramid Network (FPN).