Website: https://huggingface.co/models
Huggingface is like github for models. Dominik does not recommend searching for a model on Huggingface, but he recommends to follow blogs/twitter etc to find good model names. They are still hosted on Huggingface.
Huggingface uses a Transformers library, that helps with the download of these models.
Personally I want to:
- Download the model locally once
- Load it at startup or first usage from disk
![[utils_llm.py]]
Download a model from Hugging-face
from huggingface_hub import login
from pathlib import Path
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
current_file_directory = Path(__file__).parent
model_name = "mistralai/Mistral-7B-v0.3"
model_save_directory = Path(current_file_directory / "downloaded_models" / model_name)
def download_model_locally():
if not os.path.isdir(model_save_directory):
# Replace 'your_huggingface_token' with your actual Hugging Face token
login(token="YOURTOKEN")
model = AutoModelForCausalLM.from_pretrained(model_name, token=True)
model.save_pretrained(model_save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)
os.makedirs(model_save_directory, exist_ok=True)
tokenizer.save_pretrained(model_save_directory)
else:
raise Exception(f"""Model Folder at path {model_save_directory} already exists.
Not doing anything. If you do want to download the model again,
delete it before running this method.""")
if __name__ == "__main__":
download_model_locally()
Use said model
@skip("requires to download the model locally first via executing utils_llm.py")
def test_invoice_extraction(self):
model_fp = Path(... / "mistralai" / "Mistral-7B-v0.3")
model = AutoModelForCausalLM.from_pretrained(model_fp, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_fp, local_files_only=True)
text_generation_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
# ---------
llm_input = "..."
llm_output = text_generation_pipeline(llm_input, max_length=10, truncation=True, num_return_sequences=1)
print(llm_output[0]["generated_text"])