@ -5,27 +5,51 @@ model_name = "Qwen/Qwen2.5-7B-Instruct"
model = AutoModelForCausalLM . from_pretrained (
model = AutoModelForCausalLM . from_pretrained (
model_name ,
model_name ,
torch_dtype = " auto " ,
torch_dtype = torch . bfloat16 ,
device_map = " auto "
device_map = " auto "
)
)
tokenizer = AutoTokenizer . from_pretrained ( model_name )
tokenizer = AutoTokenizer . from_pretrained ( model_name )
def generate_response ( prompt , max_new_tokens = 256 ) :
print ( " Modèle initialisé : " + model_name )
input_ids = tokenizer . encode ( prompt , return_tensors = " pt " )
print ( " Pour quitter, entrez \" exit \" " )
input_ids = input_ids . to ( ' cuda ' )
print ( " " )
with torch . no_grad ( ) :
def generate_response ( prompt , max_new_tokens = 512 ) :
output = model . generate ( input_ids , max_new_tokens = max_new_tokens , num_return_sequences = 1 , pad_token_id = 50256 )
chat . append ( { " role " : " user " , " content " : prompt } )
formatted_chat = tokenizer . apply_chat_template ( chat , tokenize = False , add_generation_prompt = True )
inputs = tokenizer ( formatted_chat , return_tensors = " pt " , add_special_tokens = False )
inputs = { key : tensor . to ( model . device ) for key , tensor in inputs . items ( ) }
response = tokenizer . decode ( output [ 0 ] , skip_special_tokens = True )
outputs = model . generate ( * * inputs , max_new_tokens = 512 , temperature = 0.1 )
return response
decoded_output = tokenizer . decode ( outputs [ 0 ] [ inputs [ ' input_ids ' ] . size ( 1 ) : ] , skip_special_tokens = True )
chat . append ( { " role " : " assistant " , " content " : decoded_output } )
return decoded_output
chat = [
{ " role " : " system " , " content " : " Tu es un chatbot appelé Kirk. Ton but est de converser le plus naturellement possible avec les utilisateurs. Commence par saluer l ' utilisateur. " } ,
]
formatted_chat = tokenizer . apply_chat_template ( chat , tokenize = False , add_generation_prompt = True )
inputs = tokenizer ( formatted_chat , return_tensors = " pt " , add_special_tokens = False )
inputs = { key : tensor . to ( model . device ) for key , tensor in inputs . items ( ) }
outputs = model . generate ( * * inputs , max_new_tokens = 512 , temperature = 0.1 )
decoded_output = tokenizer . decode ( outputs [ 0 ] [ inputs [ ' input_ids ' ] . size ( 1 ) : ] , skip_special_tokens = True )
chat . append ( { " role " : " assistant " , " content " : decoded_output } )
print ( decoded_output )
print ( " Chatbot: Hi there! How can I help you? " )
while True :
while True :
user_input = input ( " You: " )
user_input = input ( " You: " )
if user_input . lower ( ) == " exit " :
if user_input . lower ( ) == " exit " :
print ( " Chatbot: Goodbye! " )
print ( " Chatbot: Goodbye! " )
break
break
response = generate_response ( user_input )
response = generate_response ( user_input )
print ( " Chatbot: " , response )
print ( " Chatbot: " , response )