In-Class Programming 1
Implement vanilla autoregressive text generation: repeatedly sample the next token and append it to the sequence.
PyTorch Functions You’ll Need
-
Converting Logits to Probabilities
# logits: 1D tensor of shape [vocab_size] probs = torch.softmax(logits, dim=-1) # probs: 1D tensor of shape [vocab_size], sums to 1.0 -
Sampling from a Probability Distribution
# probs: 1D tensor of probabilities # num_samples: how many samples to draw sampled_token_id = torch.multinomial(probs, num_samples=1) # Returns: tensor of shape [1] containing the sampled index # To get the actual integer value: token_id = sampled_token_id.item() # Returns: Python int -
Tensor Concatenation
# Append a new token to a sequence # input_ids: 1D tensor [10, 20, 30] # new_token: Python int, e.g., 40 new_input_ids = torch.cat([ input_ids, torch.tensor([new_token]) ]) # Result: 1D tensor [10, 20, 30, 40] -
Creating Tensors
# From a list of integers tokens = torch.tensor([100, 200, 300]) # From a single integer (useful for appending) single_token = torch.tensor([42]) -
Getting Tensor Length
# input_ids: 1D tensor length = len(input_ids) # Python int -
Encoding Text to Token IDs
from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B-Base") # Encode a string to token IDs token_ids = tokenizer.encode("Hello world", return_tensors="pt") # Returns: 2D tensor of shape [1, seq_len] # Get 1D tensor (remove batch dimension) token_ids = tokenizer.encode("Hello world", return_tensors="pt")[0] # Returns: 1D tensor of shape [seq_len] -
Decoding Token IDs to Text
# Decode a list of token IDs back to text text = tokenizer.decode([100, 200, 300]) # Returns: string # Decode a single token token_text = tokenizer.decode([42]) # Returns: string (usually a single character or word piece) # Skip special tokens (like <|endoftext|>) text = tokenizer.decode(token_ids, skip_special_tokens=True) -
End-of-sequence token (used to stop generation)
# End-of-sequence token (used to stop generation) eos_token_id = tokenizer.eos_token_id # Returns: int (e.g., 151643) # Check if a token is EOS if sampled_token_id == tokenizer.eos_token_id: break # Stop generation
Access the Logit Server
import torch
from logit_client import LogitClient
from transformers import AutoTokenizer
client = LogitClient("https://nerc.guha-anderson.com")
MODEL_NAME = "Qwen/Qwen3-8B-Base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
input_ids = tokenizer.encode("Shakespeare was a great", return_tensors="pt")[0]
logits = client.get_logits(MODEL_NAME, input_ids)
next_token = torch.argmax(logits)
print(tokenizer.decode(next_token))