Mask 75% of an image's patches, ask a transformer to reconstruct the rest — and you get representations that transfer. Here's how MAE works, built from scratch.
class=class="text-green">"text-faint italic"># Randomly mask class="text-orange">75% of patches, keep the rest
def random_masking(x, mask_ratio=class="text-orange">0.75):
N, L, D = x.shape
keep = int(L * (class="text-orange">1 - mask_ratio))
idx = torch.argsort(torch.rand(N, L), dim=class="text-orange">1)
return torch.gather(x, class="text-orange">1, idx[:, :keep, None].expand(-class="text-orange">1, -class="text-orange">1, D))