Text Generation and Proxy Tuning

In-Class Programming 1

Implement vanilla autoregressive text generation: repeatedly sample the next token and append it to the sequence.

PyTorch Functions You’ll Need

Converting Logits to Probabilities

 # logits: 1D tensor of shape [vocab_size]
 probs = torch.softmax(logits, dim=-1)
 # probs: 1D tensor of shape [vocab_size], sums to 1.0

Sampling from a Probability Distribution

 # probs: 1D tensor of probabilities
 # num_samples: how many samples to draw
 sampled_token_id = torch.multinomial(probs, num_samples=1)
 # Returns: tensor of shape [1] containing the sampled index

 # To get the actual integer value:
 token_id = sampled_token_id.item()  # Returns: Python int

Tensor Concatenation

 # Append a new token to a sequence
 # input_ids: 1D tensor [10, 20, 30]
 # new_token: Python int, e.g., 40

 new_input_ids = torch.cat([
     input_ids,
     torch.tensor([new_token])
 ])
 # Result: 1D tensor [10, 20, 30, 40]

Creating Tensors

 # From a list of integers
 tokens = torch.tensor([100, 200, 300])

 # From a single integer (useful for appending)
 single_token = torch.tensor([42])

Getting Tensor Length

 # input_ids: 1D tensor
 length = len(input_ids)  # Python int

Encoding Text to Token IDs

 from transformers import AutoTokenizer

 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B-Base")

 # Encode a string to token IDs
 token_ids = tokenizer.encode("Hello world", return_tensors="pt")
 # Returns: 2D tensor of shape [1, seq_len]

 # Get 1D tensor (remove batch dimension)
 token_ids = tokenizer.encode("Hello world", return_tensors="pt")[0]
 # Returns: 1D tensor of shape [seq_len]

Decoding Token IDs to Text

 # Decode a list of token IDs back to text
 text = tokenizer.decode([100, 200, 300])
 # Returns: string

 # Decode a single token
 token_text = tokenizer.decode([42])
 # Returns: string (usually a single character or word piece)

 # Skip special tokens (like <|endoftext|>)
 text = tokenizer.decode(token_ids, skip_special_tokens=True)

End-of-sequence token (used to stop generation)

 # End-of-sequence token (used to stop generation)
 eos_token_id = tokenizer.eos_token_id
 # Returns: int (e.g., 151643)

 # Check if a token is EOS
 if sampled_token_id == tokenizer.eos_token_id:
     break  # Stop generation

Access the Logit Server

import torch
from logit_client import LogitClient
from transformers import AutoTokenizer

client = LogitClient("https://nerc.guha-anderson.com")

MODEL_NAME = "Qwen/Qwen3-8B-Base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
input_ids = tokenizer.encode("Shakespeare was a great", return_tensors="pt")[0]
logits = client.get_logits(MODEL_NAME, input_ids)

next_token = torch.argmax(logits)
print(tokenizer.decode(next_token))