In-Class Programming 1

Implement vanilla autoregressive text generation: repeatedly sample the next token and append it to the sequence.

PyTorch Functions You’ll Need

  1. Converting Logits to Probabilities

     # logits: 1D tensor of shape [vocab_size]
     probs = torch.softmax(logits, dim=-1)
     # probs: 1D tensor of shape [vocab_size], sums to 1.0
    
  2. Sampling from a Probability Distribution

     # probs: 1D tensor of probabilities
     # num_samples: how many samples to draw
     sampled_token_id = torch.multinomial(probs, num_samples=1)
     # Returns: tensor of shape [1] containing the sampled index
    
     # To get the actual integer value:
     token_id = sampled_token_id.item()  # Returns: Python int
    
  3. Tensor Concatenation

     # Append a new token to a sequence
     # input_ids: 1D tensor [10, 20, 30]
     # new_token: Python int, e.g., 40
    
     new_input_ids = torch.cat([
         input_ids,
         torch.tensor([new_token])
     ])
     # Result: 1D tensor [10, 20, 30, 40]
    
  4. Creating Tensors

     # From a list of integers
     tokens = torch.tensor([100, 200, 300])
    
     # From a single integer (useful for appending)
     single_token = torch.tensor([42])
    
  5. Getting Tensor Length

     # input_ids: 1D tensor
     length = len(input_ids)  # Python int
    
  6. Encoding Text to Token IDs

     from transformers import AutoTokenizer
    
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B-Base")
    
     # Encode a string to token IDs
     token_ids = tokenizer.encode("Hello world", return_tensors="pt")
     # Returns: 2D tensor of shape [1, seq_len]
    
     # Get 1D tensor (remove batch dimension)
     token_ids = tokenizer.encode("Hello world", return_tensors="pt")[0]
     # Returns: 1D tensor of shape [seq_len]
    
  7. Decoding Token IDs to Text

     # Decode a list of token IDs back to text
     text = tokenizer.decode([100, 200, 300])
     # Returns: string
    
     # Decode a single token
     token_text = tokenizer.decode([42])
     # Returns: string (usually a single character or word piece)
    
     # Skip special tokens (like <|endoftext|>)
     text = tokenizer.decode(token_ids, skip_special_tokens=True)
    
  8. End-of-sequence token (used to stop generation)

     # End-of-sequence token (used to stop generation)
     eos_token_id = tokenizer.eos_token_id
     # Returns: int (e.g., 151643)
    
     # Check if a token is EOS
     if sampled_token_id == tokenizer.eos_token_id:
         break  # Stop generation
    

Access the Logit Server

import torch
from logit_client import LogitClient
from transformers import AutoTokenizer

client = LogitClient("https://nerc.guha-anderson.com")

MODEL_NAME = "Qwen/Qwen3-8B-Base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
input_ids = tokenizer.encode("Shakespeare was a great", return_tensors="pt")[0]
logits = client.get_logits(MODEL_NAME, input_ids)

next_token = torch.argmax(logits)
print(tokenizer.decode(next_token))