hf

# Clear any existing invalid HF_TOKEN environment variable
import os
if 'HF_TOKEN' in os.environ:
    del os.environ['HF_TOKEN']
    print("Cleared HF_TOKEN environment variable")

from huggingface_hub import login
login()

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

import torch.nn.functional as F

inputs = tokenizer(
    "Fact: The capital of the country containing Manchester is",
    padding=True,
    truncation=True,
    return_tensors="pt"
)

print(f"Tokenized {inputs['input_ids'].shape[0]} sequences with max length {inputs['input_ids'].shape[1]}.")

with torch.no_grad():
    outputs = model(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        output_hidden_states=True,
    )

last_token_logits = outputs.logits[0, -1, :]

probs = F.softmax(last_token_logits, dim=-1)

top_k = 5
top_probs, top_indices = torch.topk(probs, k=top_k)

print(f"Fact: the capital of the state containing Dallas is",)
print("-" * 30)
for i in range(top_k):
    token = tokenizer.decode(top_indices[i])
    probability = top_probs[i].item() * 100
    print(f"{i+1}. {token:10} | Confidence: {probability:.2f}%")

Tokenized 1 sequences with max length 11.
Fact: the capital of the state containing Dallas is
------------------------------
1.  London    | Confidence: 12.89%
2.  Birmingham | Confidence: 7.81%
3.  not       | Confidence: 6.08%
4.  Manchester | Confidence: 5.37%
5.  called    | Confidence: 4.74%