# Clear any existing invalid HF_TOKEN environment variable
import os
if 'HF_TOKEN' in os.environ:
del os.environ['HF_TOKEN']
print("Cleared HF_TOKEN environment variable")from huggingface_hub import login
login()import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_tokenimport torch.nn.functional as F
inputs = tokenizer(
"Fact: The capital of the country containing Manchester is",
padding=True,
truncation=True,
return_tensors="pt"
)
print(f"Tokenized {inputs['input_ids'].shape[0]} sequences with max length {inputs['input_ids'].shape[1]}.")
with torch.no_grad():
outputs = model(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
output_hidden_states=True,
)
last_token_logits = outputs.logits[0, -1, :]
probs = F.softmax(last_token_logits, dim=-1)
top_k = 5
top_probs, top_indices = torch.topk(probs, k=top_k)
print(f"Fact: the capital of the state containing Dallas is",)
print("-" * 30)
for i in range(top_k):
token = tokenizer.decode(top_indices[i])
probability = top_probs[i].item() * 100
print(f"{i+1}. {token:10} | Confidence: {probability:.2f}%")Tokenized 1 sequences with max length 11.
Fact: the capital of the state containing Dallas is
------------------------------
1. London | Confidence: 12.89%
2. Birmingham | Confidence: 7.81%
3. not | Confidence: 6.08%
4. Manchester | Confidence: 5.37%
5. called | Confidence: 4.74%