Website: https://huggingface.co/models

Huggingface is like github for models. Dominik does not recommend searching for a model on Huggingface, but he recommends to follow blogs/twitter etc to find good model names. They are still hosted on Huggingface.

Huggingface uses a Transformers library, that helps with the download of these models.

Personally I want to:

Download the model locally once
Load it at startup or first usage from disk

![[utils_llm.py]]

Download a model from Hugging-face

from huggingface_hub import login
from pathlib import Path
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

current_file_directory = Path(__file__).parent
model_name = "mistralai/Mistral-7B-v0.3"
model_save_directory = Path(current_file_directory / "downloaded_models" / model_name)

def download_model_locally():
    if not os.path.isdir(model_save_directory):
        # Replace 'your_huggingface_token' with your actual Hugging Face token
        login(token="YOURTOKEN")

        model = AutoModelForCausalLM.from_pretrained(model_name, token=True)
        model.save_pretrained(model_save_directory)

        tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)

        os.makedirs(model_save_directory, exist_ok=True)
        tokenizer.save_pretrained(model_save_directory)
    else:
        raise Exception(f"""Model Folder at path {model_save_directory} already exists. 
                        Not doing anything. If you do want to download the model again, 
                        delete it before running this method.""") 

if __name__ == "__main__":
    download_model_locally()

Use said model

@skip("requires to download the model locally first via executing utils_llm.py")
    def test_invoice_extraction(self):
        model_fp = Path(... / "mistralai" / "Mistral-7B-v0.3")

        model = AutoModelForCausalLM.from_pretrained(model_fp, local_files_only=True)
        tokenizer = AutoTokenizer.from_pretrained(model_fp, local_files_only=True)

        text_generation_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

        # ---------

        llm_input = "..."
        llm_output = text_generation_pipeline(llm_input, max_length=10, truncation=True, num_return_sequences=1)
        
        print(llm_output[0]["generated_text"])