<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>IT 세계의 후아</title>
    <link>https://hoooa.tistory.com/</link>
    <description></description>
    <language>ko</language>
    <pubDate>Sun, 5 Jul 2026 09:05:31 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>후__아</managingEditor>
    <item>
      <title>[프로그래머스]LV1_완주하지 못한 선수(Hash)</title>
      <link>https://hoooa.tistory.com/71</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;706&quot; data-origin-height=&quot;286&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9WLa8/btsJQkfxUkL/aMkSJ75XnrXIUrVrwbeAL0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9WLa8/btsJQkfxUkL/aMkSJ75XnrXIUrVrwbeAL0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9WLa8/btsJQkfxUkL/aMkSJ75XnrXIUrVrwbeAL0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9WLa8%2FbtsJQkfxUkL%2FaMkSJ75XnrXIUrVrwbeAL0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;706&quot; height=&quot;286&quot; data-origin-width=&quot;706&quot; data-origin-height=&quot;286&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;participant: 참가자 목록&lt;br /&gt;completion: 완주자 목록&lt;br /&gt;return: 완주하지 못한 참가자 한 명의 이름&lt;br /&gt;*중복된 이름이 있을 수 있음&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1727448444305&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 내 첫 풀이
def solution(participant, completion):
    names_dic = dict(zip(participant, [0]*len(participant)))
    
    for i in participant:
        names_dic[i] += 1
    
    for j in completion:
        names_dic[j] -= 1
    
    return [k for k,v in names_dic.items() if v!=0][0]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1727448577946&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 다른 사람 풀이 참고-Hash 활용
def solution(participant, completion):
    answer = ''
    temp = 0
    dic = {}
    
    for part in participant:
        dic[hash(part)] = part
        temp += int(hash(part))
    for com in completion:
        temp -= hash(com)
    answer = dic[temp]

    return answer&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Coding/Algorithm</category>
      <category>LV1</category>
      <category>코테</category>
      <category>프로그래머스</category>
      <category>해시</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/71</guid>
      <comments>https://hoooa.tistory.com/71#entry71comment</comments>
      <pubDate>Fri, 27 Sep 2024 23:49:50 +0900</pubDate>
    </item>
    <item>
      <title>[논문]QLoRA: Efficient Finetuning of Quantized LLMs</title>
      <link>https://hoooa.tistory.com/70</link>
      <description>&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;LLM Fine-Tuning에 대해 찾아보다 QLoRA를 접한 후 공부가 필요하다 느껴 관련 논문을 리뷰해보고자 한다&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;하지만 그 전에 'Quantization' 양자화에 대한 것도 공부해야 한다...(역시 공부는 공부를 부르고...)&lt;/span&gt;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;들어가기에 앞서..&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;※&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;Quantization 양자화&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;정확하고 세밀한 단위의 입력값 &amp;rarr; 단순화한 단위값(경량화)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;즉, 정보를 표현하는 데 필요한&amp;nbsp;&lt;b&gt;비트의 수를 줄여주는&lt;/b&gt;&amp;nbsp;것&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ex) 인공신경망에서, 가중치 매개변수(weight) &amp;amp; 활성 노드 연산(activation function) 양자화&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;rarr; lower-bit의 수학연산 &amp;amp; 신경망 중간 계산값 양자화&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;※ 장단점&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;메모리 액세스&amp;darr;&amp;nbsp;연산량&amp;darr; 전력 효율성&amp;uarr;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;but 압축되는 과정에서 채널의 수가 줄어드는 만큼 정보가 손실됨&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;&lt;b&gt;정확도&lt;/b&gt;가 기존 모델에 비해&amp;nbsp;&lt;b&gt;낮아질 수밖에&lt;/b&gt;&amp;nbsp;없음&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;there4; 모델을 손상시키지 않으면서 크기와 계산 비용을 줄이는 것이 목표&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;※ 종류&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;보통 tensorflow/pytorch의 파라미터는 32bit 부동소수점 연산, FP32(float32) 형태로 저장됨&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;이를 INT8/INT4 or FP8/FP4로 변환하게 됨&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- Dynamic Quantization 동적 양자화&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;weight에 대해 먼저 양자화, 계산 수행 직전에 동적으로 양자화 됨&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- Static Quantization 정적 양자화(Post Training Quantization)&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;훈련 이후 양자화 적용, parameter size가 큰 모델에서 효과적&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- Quantization Aware Training 양자화 인식 교육&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;훈련 도중 양자화를 고려하여 모델을 조정가중치 양자화에 대한 학습(fake quantization node)을 포함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;rarr; 원본 모델을 보다 양자화에 robust하게 만듦&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;보다 높은 accuracy&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;참고) 머신러닝 효율화 기법&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;양자화에 대해.. 그리고 관련 다른 기법들과도 헷갈려서(알던 것도 다 까먹었기 때문에)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;더 가닥을 잘 잡기 위해 기본개념 복습!!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://www.youtube.com/watch?v=2ySpRWvUShI&quot;&gt;https://www.youtube.com/watch?v=2ySpRWvUShI&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;학습을 효율적으로 도와주는 기법&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;(1) 정규화 Normalization&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;데이터 x값 간에 차이가 너무 나면 정규화 과정이 필요함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;가중치값을 조절하는 것은 굉장히 어렵고 비효율적임&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;표준정규분포를 따르도록 하는 StandardScaler(표준화)가 대표적&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Batch Normalization&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;대규모 학습 데이터셋을 작은 batch로 나누어 학습시킬 수 있음&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;예를 들어, batch size=15일 때 epoch(전체를 학습시키는 경우)=1이라면 batch size=5일 때 epoch=3, 1일 때 15가 됨&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Batch별&amp;nbsp;&lt;b&gt;같은 feature에 해당하는 값들(위치가 똑같은 값들&lt;/b&gt;)을 정규화&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;&amp;there4; batch size에 따라 성능이 좌우될 수 있&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&lt;b&gt;&amp;nbsp;Layer Normalization&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;하나의 batch/token의 값(한번에 들어온 값)을 정규화&amp;nbsp;&amp;rarr; batch normalization보다 계산이 쉬움&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;트랜스포머에서 사용 多(트랜스포머 이후의 언어모델에 대부분 사용)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;(2) 최적화 Optimization&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Gradient Descent&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;1) Batch&amp;nbsp;Gradient Descent&amp;nbsp;: 모든 데이터를 한번에 다 넣어서 가중치를 업데이트&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2) Mini-Batch&amp;nbsp;Gradient Descent&amp;nbsp;:&amp;nbsp;데이터를 조금씩 쪼개서 업데이트&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;3)&amp;nbsp;Stochastic Gradient Descent: batch size가 1인 경우, 하나의 배치당 가중치 업데이트 한 번&amp;nbsp;&amp;rarr; 랜덤으로 가중치가 바뀔 수 있음&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Momentum&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;직전 가중치의 업데이트 방향을 반영&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;update(t) = r * update(t-1) + n&amp;nabla;w&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;W(t+1) = W(t) - update(t)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- RMSprop&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;학습률(learning rate)을 각 가중치별로 조정&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;GD가 상대적으로 큰 가중치에는 작은 학습률, 작으면 큰 학습률을 적용하여 수렴 속도를 향상시킴&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;there4;&amp;nbsp;가중치 업데이트가 많을수록 덜 학습을 하도록 함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;&lt;b&gt;Adam&lt;/b&gt;&lt;/span&gt;(Adaptive moment estimator)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Gradient와 learning rate를 모두 조정하는 방식(모멘텀 + RMSprop)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;(3) Dropout&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;과적합 해소를 위한 방식, regularization의 대표 방&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;모델을 만든 후 노드 몇 개를 의도적으로 삭제함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;특정 노드의 의존도&amp;darr; 여러 개의 다른 신경망 모델을 앙상블하는 효과&amp;nbsp;&amp;there4;&amp;nbsp;보다 일반화된 패턴&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://arxiv.org/pdf/2305.14314&quot;&gt;https://arxiv.org/pdf/2305.14314&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;오늘 읽어보려는 논문은&lt;b&gt;&lt;i&gt;&amp;nbsp;&quot;QLoRA: Efficient Finetuning of Quantized LLMs&quot;&lt;/i&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;(QLoRA를 알기 전에 LoRA를 공부해야 한다는 걸 잊고 시작한 나란 바보가 QLoRA 읽다 말고 LoRA를 읽은 리뷰&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;rarr;&amp;nbsp;)&lt;/span&gt;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;0. Abstract&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- QLoRA,&amp;nbsp;an efficient finetuning approach that&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;reduces memory usage&lt;/span&gt;&amp;nbsp;enough to finetune a&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;65B parameter model on a single 48GB GPU&lt;/span&gt;&amp;nbsp;while preserving full 16-bit finetuning task performance&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #333333; font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- backpropagates gradients through a frozen, 4-bit quantized pretrained language model into Low Rank Adapters (LoRA)&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- introduces a number of innovations&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;(a) 4-bit NormalFloat (&lt;b&gt;NF4&lt;/b&gt;), a new data type that is information theoretically optimal for normally distributed weights&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;(b)&amp;nbsp;&lt;b&gt;Double Quantization&lt;/b&gt;&amp;nbsp;to reduce the average memory footprint by quantizing &lt;span style=&quot;background-color: #f6e199;&quot;&gt;the quantization constants*&lt;/span&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;(c)&amp;nbsp;&lt;b&gt;Paged Optimizers&lt;/b&gt;&amp;nbsp;to manage memory spikes&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- provide a detailed analysis of chatbot performance based on both human and GPT-4 evaluations showing that&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;GPT-4 evaluations are a cheap and reasonable alternative to human evaluation&lt;/span&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- find that current chatbot benchmarks are not trustworthy to accurately evaluate the performance levels of chatbots.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;* the quantization constant&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- scaling factor로서 양자화 과정 가운데 값들이 어떻게 quantized format의 range에 scale될지를 결정함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- scaling 이후 값들의 상대적 차이를 유지시킴으로써, neural network의 행동을 보존시킴&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- dequantization에서 정확하게 기존 값을 복원시키는 데에 사용됨&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;1. Introduction&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- LLM 모델을 finetuning 하는 건 성능을 올리는 데에 효과적 but GPU 메모리를 필요로 하기에 굉장히 비쌈&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- QLoRA를 통해,&amp;nbsp;&lt;i&gt;it is possible to finetune a quantized 4-bit model without any performance degradation&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;QLORA&amp;rsquo;s efficiency enables us to&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;perform an in-depth study of instruction finetuning and chatbot performance&lt;/span&gt;&amp;nbsp;on model scales that would be impossible using regular finetuning due to memory overhead&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;Guanaco&lt;/span&gt;(LLaMA 7B 기반 학습된 언어모델)를 학습시킴으로써&amp;nbsp;&lt;b&gt;trained model에 관한 trend&lt;/b&gt;를 발견함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;first,&amp;nbsp;&lt;b&gt;data quality&lt;/b&gt;&amp;nbsp;is far more important than dataset size&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;second,&amp;nbsp;&lt;b&gt;dataset suitability&lt;/b&gt;&amp;nbsp;matters more than size for a given task.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;also provide a&amp;nbsp;&lt;b&gt;extensive analysis of chatbot performance&lt;/b&gt;&amp;nbsp;that uses both human raters and GPT-4 for evaluation&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 토너먼트 형식으로 모델을 비교 &amp;amp;&amp;nbsp;&lt;i&gt;Elo scores(determine the ranking of chatbot performance)&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;786&quot; data-origin-height=&quot;391&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ConWg/btsI3P9DVPw/CYTC8n3b9ZCaNgzGis9s41/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ConWg/btsI3P9DVPw/CYTC8n3b9ZCaNgzGis9s41/img.png&quot; data-alt=&quot;Finetuning 기법 차이&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ConWg/btsI3P9DVPw/CYTC8n3b9ZCaNgzGis9s41/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FConWg%2FbtsI3P9DVPw%2FCYTC8n3b9ZCaNgzGis9s41%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;786&quot; height=&quot;391&quot; data-origin-width=&quot;786&quot; data-origin-height=&quot;391&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;Finetuning 기법 차이&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;2. Background&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Block-wise k-bit Quantization&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;To ensure that the entire range of the low-bit data type is used,&amp;nbsp;&lt;b&gt;the input data type is commonly rescaled into the target data type&lt;/b&gt;&amp;nbsp;range through&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;normalization by the absolute maximum of the input elements&lt;/span&gt;, which are usually structured as a&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;tensor*&lt;/span&gt;.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;750&quot; data-origin-height=&quot;157&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bR6Bw8/btsI3OJJ2aR/WrVdtzRtm5d9CauO4ciCq0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bR6Bw8/btsI3OJJ2aR/WrVdtzRtm5d9CauO4ciCq0/img.png&quot; data-alt=&quot;quantizing a FP32 tensor into a Int8 tensor with range [-127, 127]&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bR6Bw8/btsI3OJJ2aR/WrVdtzRtm5d9CauO4ciCq0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbR6Bw8%2FbtsI3OJJ2aR%2FWrVdtzRtm5d9CauO4ciCq0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;750&quot; height=&quot;157&quot; data-origin-width=&quot;750&quot; data-origin-height=&quot;157&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;quantizing a FP32 tensor into a Int8 tensor with range [-127, 127]&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;The problem with this approach is that if a large magnitude value (i.e., an&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;outlier) occurs in the input tensor&lt;/span&gt;, then&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;the quantization bins**&lt;/span&gt;&amp;mdash;certain bit combinations&amp;mdash;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;are not utilized well&lt;/span&gt;&amp;nbsp;with few or no numbers quantized in some bins.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;To prevent the outlier issue, a common approach is to chunk the input tensor into blocks that are independently quantized, each with their own quantization constant c. We chunk the input tensor&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;X &amp;isin; R b&amp;times;h into n contiguous blocks of size B&lt;/span&gt;&amp;nbsp;by&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;flattening the input tensor and slicing the linear segment into n = (b &amp;times; h)/B blocks.&lt;/span&gt;&amp;nbsp;We&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;quantize these blocks independently with Equation 1&lt;/span&gt;&amp;nbsp;to create a quantized tensor and n quantization constants ci .&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;즉, outlier 문제를 해결하기 위해 input tensor X를 flattening하고 크기가 B인 n개의 연속적인 블록으로 나누었고,&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;결국 각각의 블록이 양자화된 값(ci)을 만들어낸다는 뜻.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;* tensor: 데이터의 배열 ex_scalar - vector - matrix - 3d tensor - nd tensor&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;** quantization bin: 양자화 함수 y=Q(x)에서 K-levd scalar quantizer는 k+1개의 decision level(d0, d1, ... ... , dk)과 K개의 output level(y0, y1, ... ... , yk)로 구성됨. 이때 di+1부터 di까지의 region을 quantization bin이라고 칭함.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Low-rank Adapters&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;Low-rank&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;Adapter*&lt;/span&gt;&amp;nbsp;(LoRA) finetuning is a method that&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;reduces memory requirements&lt;/span&gt;&amp;nbsp;by&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;using a small set of trainable parameters&lt;/span&gt;, often termed adapters, while&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;not updating the full model parameters&lt;/span&gt;&amp;nbsp;which remain fixed.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;LoRA augments a linear projection through an additional factorized projection.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;+ LLM의 가중치 행렬에 근사화하는 두 개의 작은 행렬을 파인튜닝함&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;741&quot; data-origin-height=&quot;95&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wG6s3/btsI5adWuxx/4anzHigYXhPmu8qhMnEOR0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wG6s3/btsI5adWuxx/4anzHigYXhPmu8qhMnEOR0/img.png&quot; data-alt=&quot;XW=y를 받을 때 LoRA의 계산법&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wG6s3/btsI5adWuxx/4anzHigYXhPmu8qhMnEOR0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FwG6s3%2FbtsI5adWuxx%2F4anzHigYXhPmu8qhMnEOR0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;741&quot; height=&quot;95&quot; data-origin-width=&quot;741&quot; data-origin-height=&quot;95&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;XW=y를 받을 때 LoRA의 계산법&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;* adapter: 기존에 학습이 완료된 모델 사이사이에 학습가능한 작은 feed-forward networks를 삽입하는 구조&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Memory Requirement of Parameter-Efficient Finetuning&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;While LoRA was designed as a 3&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;Parameter Efficient Finetuning (PEFT) method*&lt;/span&gt;, most of the&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;memory footprint**&lt;/span&gt;&amp;nbsp;for LLM finetuning comes from activation gradients and not from the learned LoRA parameters. ... ...&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;gradient checkpointing is important but also that aggressively reducing the amount of LoRA parameter yields only minor memory benefits.&lt;/span&gt;&amp;nbsp;This means we&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;can use more adapters&amp;nbsp;without significantly increasing the overall training memory footprint&lt;/span&gt;. As discussed later, this is crucial for recovering full 16-bit precision performance.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;* PEFT: 사전학습된 LLM의 대부분의 파라미터는 고정, 필요한 일부 파라미터만 파인튜닝함&amp;nbsp;&amp;rarr;&amp;nbsp;저장공간&amp;amp;계산능력&amp;darr; Catastrophic Forgetting 극복&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;** memory footprint: the amount of main memory that a program uses or ferences while running&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;3. QLoRA Finetuning&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;QLoRA&amp;nbsp;achieves high-fidelity 4-bit finetuning via two techniques we propose&amp;mdash;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;4-bit NormalFloat (NF4) quantization&lt;/span&gt;&amp;nbsp;and&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;Double&amp;nbsp;Quantization&lt;/span&gt;&lt;/span&gt;.&amp;nbsp;Additionally, we introduce&amp;nbsp;&lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;Paged Optimizers&lt;/span&gt;, to prevent memory spikes during gradient checkpointing from causing out-of-memory errors that have traditionally made finetuning on a single machine difficult for large models.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;4-bit NormalFloat Quantization&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;NormalFloat (NF) data type builds on&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;Quantile Quantization&lt;/span&gt;&amp;nbsp;which is an information-theoretically optimal data type that ensures&amp;nbsp;&lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;each quantization bin has an equal number of values assigned from the input tensor&lt;/span&gt;.&amp;nbsp;Quantile quantization works by estimating the quantile of the input tensor through the&amp;nbsp;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;empirical cumulative distribution function&lt;/span&gt;.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;즉, Quantile Quantization 기법은 누적분포 함수의 quantile을 추적하여 4-bit quantization 수행하도록 함&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- SRAM quantile&lt;/i&gt;과 같은&amp;nbsp;&lt;i&gt;fast quantile approximation algorithm&lt;/i&gt;을 사용하지만,&amp;nbsp;&lt;i&gt;large quantization errors for outliers&lt;/i&gt;가 발생하는 한계 O&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;Expensive quantile estimates and approximation errors can be avoided when input tensors come from a distribution fixed up to a quantization constant.&lt;/i&gt; &lt;i&gt;... ... transform all weights to a single fixed distribution by &lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;scaling &amp;sigma; such that the distribution fits exactly into the range of our data type. &lt;/span&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;데이터 타입과 neural network weights를 [-1, 1]로 정규화함&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;602&quot; data-origin-height=&quot;78&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/rDXDq/btsI8HKwh7y/9M25vUj6BLVa2gV9kYnXj1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/rDXDq/btsI8HKwh7y/9M25vUj6BLVa2gV9kYnXj1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/rDXDq/btsI8HKwh7y/9M25vUj6BLVa2gV9kYnXj1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FrDXDq%2FbtsI8HKwh7y%2F9M25vUj6BLVa2gV9kYnXj1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;602&quot; height=&quot;78&quot; data-origin-width=&quot;602&quot; data-origin-height=&quot;78&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Double Quantization&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&lt;i&gt; the process of quantizing the quantization constants for additional memory savings.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- treats quantization constants c2&lt;sup&gt;FP32&lt;/sup&gt; of the first quantization as inputs to a second quantization.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;On average, for a blocksize of 64, this quantization reduces the memory footprint per parameter from 32/64 = 0.5 bits, to 8/64 + 32/(64 &amp;middot; 256) = 0.127 bits, a reduction of 0.373 bits per parameter.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt; &lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;4bit NFQ로 압축된 c2를 8bit로 한 번 더 압축시켜 c1을 계산함 &lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;rarr; 파라미터 당 0.373bit의 리소스 절약&lt;/span&gt; &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Paged Optimizers&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&lt;i&gt; The feature works like regular &lt;span style=&quot;background-color: #f6e199;&quot;&gt;memory paging*&lt;/span&gt; between CPU RAM and the disk. We use this feature to allocate paged memory for the optimizer states which are then automatically evicted to CPU RAM when the GPU runs out-of-memory and paged back into GPU memory when the memory is needed in the optimizer update step. &lt;/i&gt;&lt;i&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;* memory paging: 프로세스의 논리 주소 공간을 page 단위로 자르고, 메모리의 물리적 주소 공간을 frame 단위로 자른 뒤, page를 frame에 할당하는 가상 메모리 관리 기법&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;QLoRA&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&lt;i&gt; QLORA has one storage data type (usually 4-bit NormalFloat) and a computation data type (16-bit BrainFloat). We dequantize the storage data type to the computation data type to perform the forward and backward pass, but we only compute weight gradients for the LoRA parameters which use &lt;span style=&quot;background-color: #f6e199;&quot;&gt;16-bit BrainFloat*&lt;/span&gt;. &lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;736&quot; data-origin-height=&quot;132&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/EsbMq/btsJclzkv9u/c2m0uMBFIJR3X3kphZN1g1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/EsbMq/btsJclzkv9u/c2m0uMBFIJR3X3kphZN1g1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/EsbMq/btsJclzkv9u/c2m0uMBFIJR3X3kphZN1g1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FEsbMq%2FbtsJclzkv9u%2Fc2m0uMBFIJR3X3kphZN1g1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;736&quot; height=&quot;132&quot; data-origin-width=&quot;736&quot; data-origin-height=&quot;132&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;데이터를 4bit로 압축 저장하지만, weight gradient를 계산할 때는 16bit BrainFlot으로 압축 해제하여 수행함&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;* 16-bit BrainFloat(BF16): 32비트 부동 소수점 형식보다 정확도&amp;darr; 메모리 요구 사항&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;darr; &amp;there4;모델 학습에 용이&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;cf) FP16 역시 메모리 사용량&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;darr; 모델 훈련에는 일반적으로 FP32를 사용하고, 추론 단계에서 FP16을 사용해 연산 속도를 높이는 편.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;≫ Mixed Precision: FP16, FP32를 혼합하며 모델학습에 사용하는 방식&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;4. QLoRA&amp;nbsp; vs&amp;nbsp; Standard Finetuing&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- whether QLoRA can perform as well as full-model finetuning.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- want to analyze the components of QLoRA including the impact of NormalFloat4 over standard Float4. &lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;741&quot; data-origin-height=&quot;201&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/WsPQa/btsJbHXeDIw/ZikKoAE69gZpfsxmhcRz71/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/WsPQa/btsJbHXeDIw/ZikKoAE69gZpfsxmhcRz71/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/WsPQa/btsJbHXeDIw/ZikKoAE69gZpfsxmhcRz71/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FWsPQa%2FbtsJbHXeDIw%2FZikKoAE69gZpfsxmhcRz71%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;741&quot; height=&quot;201&quot; data-origin-width=&quot;741&quot; data-origin-height=&quot;201&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- &lt;i&gt;Our results consistently show that 4-bit QLORA with NF4 data type matches 16- bit full finetuning and 16-bit LoRA finetuning performance on academic benchmarks with wellestablished evaluation setups. We have also shown that NF4 is more effective than FP4 and that double quantization does not degrade performance.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;5. Pushing the Chatbot State-of-the-art with QLoRA &amp;amp; &lt;/b&gt;&lt;b&gt;6. Qualitative Analysis&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;-&amp;nbsp;&lt;i&gt;we use the &lt;span style=&quot;background-color: #f6e199;&quot;&gt;MMLU&lt;/span&gt; (Massively Multitask Language Understanding) benchmark to measure performance on a range of language understanding tasks. This is &lt;span style=&quot;background-color: #f6e199;&quot;&gt;a multiple-choice benchmark covering 57 tasks&lt;/span&gt; including elementary mathematics, US history, computer science, law, and more.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- also test generative language capabilities through both automated and human evaluations.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;735&quot; data-origin-height=&quot;135&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/c1bvTK/btsJcGwxXRe/5X9i0qTmne6OAkWmXmtOk0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/c1bvTK/btsJcGwxXRe/5X9i0qTmne6OAkWmXmtOk0/img.png&quot; data-alt=&quot;MMLU 테스트 정확도 비교&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/c1bvTK/btsJcGwxXRe/5X9i0qTmne6OAkWmXmtOk0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fc1bvTK%2FbtsJcGwxXRe%2F5X9i0qTmne6OAkWmXmtOk0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;735&quot; height=&quot;135&quot; data-origin-width=&quot;735&quot; data-origin-height=&quot;135&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;MMLU 테스트 정확도 비교&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;667&quot; data-origin-height=&quot;277&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9g6Jp/btsJaIiAmm5/Yen0fkPlMkeIyh5CGDFqy1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9g6Jp/btsJaIiAmm5/Yen0fkPlMkeIyh5CGDFqy1/img.png&quot; data-alt=&quot;Elo rating for a tournament between models where models compete to generate the best response for a prompt, judged by human raters or GPT-4.&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9g6Jp/btsJaIiAmm5/Yen0fkPlMkeIyh5CGDFqy1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9g6Jp%2FbtsJaIiAmm5%2FYen0fkPlMkeIyh5CGDFqy1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;667&quot; height=&quot;277&quot; data-origin-width=&quot;667&quot; data-origin-height=&quot;277&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;Elo rating for a tournament between models where models compete to generate the best response for a prompt, judged by human raters or GPT-4.&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;7. Related Works&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Quantization of Large Language Models&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Finetuning with Adapters&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;- Instruction Finetuning&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt; To help a pretrained LLM follow the instructions provided in a prompt, instruction finetuning uses input-output pairs of various data sources to finetune a pretrained LLM to generate the output given the input as a prompt.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;i&gt;- &lt;/i&gt;Chatbots&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #666666; font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;We do not use reinforcement learning, but our best model, Guanaco, is finetuned on multi-turn chat interactions from the Open Assistant dataset which was designed to be used for &lt;span style=&quot;background-color: #f6e199;&quot;&gt;RLHF training*&lt;/span&gt;.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #666666; font-family: 'Noto Sans Light';&quot;&gt;* RLHF training(Reinforcement Learning from Human Feedback): 사람의 피드백을 기반으로 ML 모델을 최적화함으로써, 자가학습을 보다 효율적으로 수행하는 ML 기법. AI 시스템이 더 인간적으로 보이도록 훈련시킴.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;8. Limitations and Discussion&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- &lt;i&gt;we did &lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;not establish that QLORA can match full 16-bit finetuning performance at 33B and 65B scales&lt;/span&gt;. &lt;/i&gt;&lt;i&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- &lt;i&gt;we did not evaluate on other benchmarks such as BigBench, RAFT, and HELM, and it is &lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;not ensured that our evaluations generalize to these benchmarks&lt;/span&gt;. On the other hand, we perform a very broad study on MMLU and develop new methods for evaluating chatbots.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- &lt;i&gt;another limitation is that we &lt;span style=&quot;background-color: #c0d1e7;&quot;&gt;only do a limited responsible AI evaluation of Guanaco&lt;/span&gt;.&lt;/i&gt;&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;i&gt;- we did not evaluate different bit-precisions, such as using 3-bit base models, or different adapter methods&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;9. Broader Impacts&lt;/b&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #666666;&quot;&gt;- &lt;span style=&quot;color: #333333;&quot;&gt;&lt;i&gt;Our QLORA finetuning method is the first method that &lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;enables the finetuning of 33B parameter models on a single consumer GPU and 65B parameter models on a single professional GPU&lt;/span&gt;, while not degrading performance relative to a full finetuning baseline. We have demonstrated that our best 33B model trained on the Open Assistant dataset can rival ChatGPT on the Vicuna benchmark.&lt;/i&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;color: #333333;&quot;&gt;&lt;i&gt;- Another potential source of impact is deployment to mobile phones. We believe our QLORA method might enable the critical milestone of enabling the finetuning of LLMs on phones and other low resource settings.&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;cf)&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://aws.amazon.com/ko/what-is/reinforcement-learning-from-human-feedback/&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://aws.amazon.com/ko/what-is/reinforcement-learning-from-human-feedback/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://huggingface.co/blog/4bit-transformers-bitsandbytes&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://huggingface.co/blog/4bit-transformers-bitsandbytes&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://jaeyung1001.tistory.com/entry/bf16-fp16-fp32%EC%9D%98-%EC%B0%A8%EC%9D%B4%EC%A0%90&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://jaeyung1001.tistory.com/entry/bf16-fp16-fp32%EC%9D%98-%EC%B0%A8%EC%9D%B4%EC%A0%90&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://training.continuumlabs.ai/training/the-fine-tuning-process/parameter-efficient-fine-tuning/the-quantization-constant&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://training.continuumlabs.ai/training/the-fine-tuning-process/parameter-efficient-fine-tuning/the-quantization-constant&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/232761&quot;&gt;https://wikidocs.net/232761&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://devocean.sk.com/blog/techBoardDetail.do?ID=164779&amp;amp;boardType=techBlog&quot;&gt;https://devocean.sk.com/blog/techBoardDetail.do?ID=164779&amp;amp;boardType=techBlog&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://www.databricks.com/kr/blog/efficient-fine-tuning-lora-guide-llms&quot;&gt;https://www.databricks.com/kr/blog/efficient-fine-tuning-lora-guide-llms&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://www.sciencedirect.com/topics/engineering/quantization-bin&quot;&gt;https://www.sciencedirect.com/topics/engineering/quantization-bin&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://guanaco-model.github.io/&quot;&gt;https://guanaco-model.github.io/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://pytorch.org/blog/introduction-to-quantization-on-pytorch/&quot;&gt;https://pytorch.org/blog/introduction-to-quantization-on-pytorch/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://pytorch.org/docs/stable/quantization.html&quot;&gt;https://pytorch.org/docs/stable/quantization.html&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://m.post.naver.com/viewer/postView.nhn?volumeNo=19437431&amp;amp;memberNo=20717909&quot;&gt;https://m.post.naver.com/viewer/postView.nhn?volumeNo=19437431&amp;amp;memberNo=20717909&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/70</guid>
      <comments>https://hoooa.tistory.com/70#entry70comment</comments>
      <pubDate>Thu, 22 Aug 2024 15:13:22 +0900</pubDate>
    </item>
    <item>
      <title>[AI]RAG 기본 이론&amp;amp;실습(3)</title>
      <link>https://hoooa.tistory.com/69</link>
      <description>&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/67&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://hoooa.tistory.com/67&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;figure id=&quot;og_1722838817910&quot; contenteditable=&quot;false&quot; data-ke-type=&quot;opengraph&quot; data-ke-align=&quot;alignCenter&quot; data-og-type=&quot;article&quot; data-og-title=&quot;[AI]RAG 기본 이론&amp;amp;실습(2)&quot; data-og-description=&quot;https://hoooa.tistory.com/65 에서 정리했던 RAG의 기본 파이프라인(Data Load, Text Split, Indexing, Retrieval, Generation)을 한층 자세하게 들어가보자~&amp;nbsp;[AI]RAG 기본 이론&amp;amp;실습(1)RAG에 대해선 이전에 아주 짧게 다뤄&quot; data-og-host=&quot;hoooa.tistory.com&quot; data-og-source-url=&quot;https://hoooa.tistory.com/67&quot; data-og-url=&quot;https://hoooa.tistory.com/67&quot; data-og-image=&quot;https://scrap.kakaocdn.net/dn/eFCBpm/hyWKuJpZky/29btsuKIM2HR8s589uY30K/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/rMVlP/hyWKzKJnuO/7XmUkpIxi70KlpM1sX3ZP1/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/rRQ5y/hyWKFKWt1w/qbPyY1qNPYho2kcILVI6k1/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/67&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot; data-source-url=&quot;https://hoooa.tistory.com/67&quot;&gt;
&lt;div class=&quot;og-image&quot; style=&quot;background-image: url('https://scrap.kakaocdn.net/dn/eFCBpm/hyWKuJpZky/29btsuKIM2HR8s589uY30K/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/rMVlP/hyWKzKJnuO/7XmUkpIxi70KlpM1sX3ZP1/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/rRQ5y/hyWKFKWt1w/qbPyY1qNPYho2kcILVI6k1/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400');&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;og-text&quot;&gt;
&lt;p class=&quot;og-title&quot; data-ke-size=&quot;size16&quot;&gt;[AI]RAG 기본 이론&amp;amp;실습(2)&lt;/p&gt;
&lt;p class=&quot;og-desc&quot; data-ke-size=&quot;size16&quot;&gt;https://hoooa.tistory.com/65 에서 정리했던 RAG의 기본 파이프라인(Data Load, Text Split, Indexing, Retrieval, Generation)을 한층 자세하게 들어가보자~&amp;nbsp;[AI]RAG 기본 이론&amp;amp;실습(1)RAG에 대해선 이전에 아주 짧게 다뤄&lt;/p&gt;
&lt;p class=&quot;og-host&quot; data-ke-size=&quot;size16&quot;&gt;hoooa.tistory.com&lt;/p&gt;
&lt;/div&gt;
&lt;/a&gt;&lt;/figure&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;에 이어서&amp;nbsp; 마지막 RAG 정리글!&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;4. Vector Store&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;임베딩 벡터를 효육적으로 저장&amp;amp;검색하는 시스템(DB)&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 벡터 저장: 고차원 임베딩 벡터(텍스트/이미지/소리 등)를 처리 가능한 데이터 저장 구조 필요&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 벡터 검색: 저장벡터 중 사용자 쿼리에 가장 유사한 벡터를 찾는 과정, ex) 코사인 유사도/유클리드 거리 등&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 결과 반환&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;1. Chroma&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 임베딩/메타데이터 저장, 문서/쿼리 임베딩, 임베딩 검색 가능&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;(1) 유사도 기반 검색&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1722842785000&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;### Chroma - 유사도 기반
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

loader = TextLoader(path + 'test.txt')
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250,
    chunk_overlap=50,
    encoding_name='cl100k_base'
)

texts = text_splitter.split_text(data[0].page_content)
embeddings_model = OpenAIEmbeddings()
db = Chroma.from_texts(
    texts, 
    embeddings_model,
    collection_name = 'test',
    persist_directory = path,
    collection_metadata = {'hnsw:space': 'cosine'}, # l2 is the default
)

print(texts[0])
db&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722842816680&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;한국의 역사는 수천 년에 걸쳐 이어져 온 긴 여정 속에서 다양한 문화와 전통이 형성되고 발전해 왔습니다. 고조선에서 시작해 삼국 시대의 경쟁, 그리고 통일 신라와 고려를 거쳐 조선까지, 한반도는 많은 변화를 겪었습니다.

고조선은 기원전 2333년 단군왕검에 의해 세워졌다고 전해집니다. 이는 한국 역사상 최초의 국가로, 한민족의 시원이라 할 수 있습니다. 이후 기원전 1세기경에는 한반도와 만주 일대에서 여러 소국이 성장하며 삼한 시대로 접어듭니다.
&amp;lt;langchain_community.vectorstores.chroma.Chroma at 0x7ac8a7e18ac0&amp;gt;&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722842826835&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;query = '한국의 최초 국가는 어디인가요?'
docs = db.similarity_search(query)
print(docs[0].page_content)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722842863888&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;WARNING:chromadb.segment.impl.vector.local_persistent_hnsw:Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
한국의 역사는 수천 년에 걸쳐 이어져 온 긴 여정 속에서 다양한 문화와 전통이 형성되고 발전해 왔습니다. 고조선에서 시작해 삼국 시대의 경쟁, 그리고 통일 신라와 고려를 거쳐 조선까지, 한반도는 많은 변화를 겪었습니다.

고조선은 기원전 2333년 단군왕검에 의해 세워졌다고 전해집니다. 이는 한국 역사상 최초의 국가로, 한민족의 시원이라 할 수 있습니다. 이후 기원전 1세기경에는 한반도와 만주 일대에서 여러 소국이 성장하며 삼한 시대로 접어듭니다.&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;(2) MMR&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;최대 한계 관련성(Maximum Marginal Relevance) 검색 방식&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;유사성과 다양성의 균형 &amp;rarr; 검색 결과 품질 향상&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;쿼리와 관련성이 높으면서, 서로 다른 측면/정보를 제공하도록 설정(유사도 상위 fetch_k개의 문서)&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722842999066&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;### Chroma - MMR

from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings


loader = PyMuPDFLoader(path+'SPRI_AI_Brief_2023년12월호_F.pdf')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000,
    chunk_overlap=200,
    encoding_name='cl100k_base'
)

documents = text_splitter.split_documents(data)
print(len(documents))

embeddings_model = OpenAIEmbeddings()
db2 = Chroma.from_documents(
    documents, 
    embeddings_model,
    collection_name = 'esg',
    persist_directory = path,
    collection_metadata = {'hnsw:space': 'cosine'}, # l2 is the default
)

db2&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843007320&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;&amp;lt;langchain_community.vectorstores.chroma.Chroma at 0x7ac8a426e980&amp;gt;&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843052655&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# MMR
# 상위 10개의 유사 문서 중 서로 다른 정보를 제공하는 4개 문서 선택
mmr_docs = db2.max_marginal_relevance_search(query, k=4, fetch_k=10)
print(len(mmr_docs))
print(mmr_docs[0].page_content)


## 유사도 검색으로 하면
# query = '통이치엔원의 세부내용을 알려줘?'
# docs = db2.similarity_search(query)
# print(docs[0].page_content)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843067824&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;4
1. 정책/법제  
2. 기업/산업 
3. 기술/연구 
 4. 인력/교육
알리바바 클라우드, 최신 LLM &amp;lsquo;통이치엔원 2.0&amp;rsquo; 공개
n 알리바바 클라우드가 복잡한 지침 이해, 광고문구 작성, 추론, 암기 등에서 성능이 향상된 최신 
LLM &amp;lsquo;통이치엔원 2.0&amp;rsquo;을 공개
n 알리바바 클라우드는 산업별로 특화된 생성 AI 모델을 공개하는 한편, 모델 개발과 애플리케이션 
구축 절차를 간소화하는 올인원 AI 모델 구축 플랫폼도 출시
KEY Contents
&amp;pound; 알리바바의 통이치엔원 2.0, 주요 벤치마크 테스트에서 여타 LLM 능가
n 중국의 알리바바 클라우드가 2023년 10월 31일 열린 연례 기술 컨퍼런스에서 최신 LLM &amp;lsquo;통이
치엔원(Tongyi Qianwen) 2.0&amp;rsquo;을 공개
∙알리바바 클라우드는 통이치엔원 2.0이 2023년 4월 출시된 1.0 버전보다 복잡한 지침 이해, 
광고문구 작성, 추론, 암기 등에서 성능이 향상되었다고 설명
∙통이치엔원 2.0은 언어 이해 테스트(MMLU), 수학(GSM8k), 질문 답변(ARC-C)과 같은 벤치마크 
테스트에서 라마(Llama-2-70B)와 GPT-3.5를 비롯한 주요 AI 모델을 능가 
∙통이치엔원 2.0은 알리바바 클라우드의 웹사이트와 모바일 앱을 통해 대중에 제공되며 개발자는 
API를 통해 사용 가능 
n 알리바바 클라우드는 여러 산업 영역에서 생성 AI를 활용해 사업 성과를 개선할 수 있도록 지원
하는 산업별 모델도 출시
∙산업 영역은 고객지원, 법률 상담, 의료, 금융, 문서관리, 오디오와 동영상 관리, 코드 개발, 캐릭터 
제작을 포함
n 알리바바 클라우드는 급증하는 생성 AI 수요에 대응해 모델 개발과 애플리케이션 구축 절차를 
간소화하는 올인원 AI 모델 구축 플랫폼 &amp;lsquo;젠AI(GenAI)&amp;rsquo;도 공개
∙이 플랫폼은 데이터 관리, 모델 배포와 평가, 신속한 엔지니어링을 위한 종합 도구 모음을 제공하여 
다양한 기업들이 맞춤형 AI 모델을 한층 쉽게 개발할 수 있도록 지원&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843176824&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;print(mmr_docs[-1].page_content)   # 가장 유사도가 낮은 문서&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843195263&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;&amp;pound; AI 에이전트가 의료와 교육, 생산성, 엔터테인먼트&amp;middot;쇼핑 영역의 서비스 대중화를 주도할 것
n 에이전트로 인해 주목할 만한 변화는 고비용 서비스의 대중화로 특히 △의료 △교육 △생산성 △
엔터테인먼트&amp;middot;쇼핑의 4개 영역에서 대규모 변화 예상
∙(의료) 에이전트가 환자 분류를 지원하고 건강 문제에 대한 조언을 제공하며 치료의 필요 여부를 결정하면서 
의료진의 의사결정과 생산성 향상에 기여
∙(교육) 에이전트가 1대 1 가정교사의 역할을 맡아 모든 학생에게 평등한 교육 기회를 제공할 수 있으며, 
아이가 좋아하는 게임이나 노래 등을 활용해 시청각 기반의 풍부한 맞춤형 교육 경험을 제공
∙(생산성) 사용자의 아이디어를 기반으로 에이전트가 사업계획과 발표 자료 작성, 제품 이미지 생성을 
지원하며, 임원의 개인 비서와 같은 역할도 수행 
∙(엔터테인먼트&amp;middot;쇼핑) 쇼핑 시 에이전트가 모든 리뷰를 읽고 요약해 최적의 제품을 추천하고 사용자 대신 
주문할 수 있으며 사용자의 관심사에 맞춤화된 뉴스와 엔터테인먼트를 구독 가능
☞ 출처 : GatesNotes, AI is about to completely change how you use computers, 2023.11.09.&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. FAISS&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Facebook AI Simlarity Search, 벡터의 압축된 표현 사용 - 메모리 사용 &amp;darr; 검색 속도&amp;uarr;&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;(1) 유사도 기반 검색&lt;br /&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;- l2(default): 유클리디안 거리&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;- ip(내적): 두 벡터의 방향성&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;- cosine: 각도가 작을수록 G&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722843841128&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# pip install faiss-cpu sentence-transformers

### FAISS - 유사도 기반
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(
    model_name='jhgan/ko-sbert-nli',
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True},
)

vectorstore = FAISS.from_documents(documents,
                                   embedding = embeddings_model,
                                   distance_strategy = DistanceStrategy.COSINE
                                  )

query = '질문~~'
docs = vectorstore.similarity_search(query)
print(len(docs))
print(docs[0].page_content)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843851848&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;mmr_docs = vectorstore.max_marginal_relevance_search(query, k=4, fetch_k=10)
print(len(mmr_docs))
print(mmr_docs[0].page_content)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722843860816&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;## FAISS DB 로컬 저장
vectorstore.save_local(path+'faiss')

db3 = FAISS.load_local(path+'faiss', embeddings_model)   # 저장 시 사용된 임베딩 모델과 동일해야 함&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;5. Retriever&lt;/span&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Retrieval Augmented Generation의 검색도구&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;LangChain이 제공하는 다양한 검색도구&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;1. Vector Store Retriever&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;대량의 텍스트 데이터에서 효율적 검색&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722844965705&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_community.embeddings import HuggingFaceEmbeddings

# 데이터 로드 및 chunk 분할
loader = PyMuPDFLoader(path+'SPRI_AI_Brief_2023년12월호_F.pdf')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000,
    chunk_overlap=200,
    encoding_name='cl100k_base'
)

documents = text_splitter.split_documents(data)

# 임베딩 후 저장
embeddings_model = HuggingFaceEmbeddings(
    model_name='jhgan/ko-sbert-nli',
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True},
)
vectorstore = FAISS.from_documents(documents,
                                   embedding = embeddings_model,
                                   distance_strategy = DistanceStrategy.COSINE  
                                  )

# 단일 검색
query = '통이치엔원의 세부내용을 알려줘'
retriever = vectorstore.as_retriever(search_kwargs={'k': 1}   )# 가장 유사도가 높은 문장 하나
docs = retriever.get_relevant_documents(query)
print(&quot;*******************단일 검색*******************&quot;)
print(len(docs))
print(docs[0])

# MMR 검색
retriever = vectorstore.as_retriever(
    search_type='mmr',
    search_kwargs={'k': 5, 'fetch_k': 50}
)
docs = retriever.get_relevant_documents(query)
print(&quot;*******************MMR 검색*******************&quot;)
print(len(docs))
print(docs[0])

# MMR 검색2
retriever = vectorstore.as_retriever(
    search_type='mmr',
    search_kwargs={'k': 5, 'lambda_mult': 0.15}   # lambda_mult: 관련성-다양성 균형, 작을수록 다양성 G
)
docs = retriever.get_relevant_documents(query)
print(len(docs))
print(docs[-1])

# 유사도 점수 임계값 기반 검색
# Similarity score threshold (기준 스코어 이상인 문서를 대상으로 추출)
retriever = vectorstore.as_retriever(
    search_type='similarity_score_threshold',
    search_kwargs={'score_threshold': 0.3}  # 쿼리와 최소 0.3 이상의 유사도인 문서만
)
docs = retriever.get_relevant_documents(query)
print(len(docs))

# 메타데이터 필터링
retriever = vectorstore.as_retriever(
    search_kwargs={'filter': {'format':'PDF 1.4'}}
)
docs = retriever.get_relevant_documents(query)
print(len(docs))&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722844982537&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 실제 답변 생성
# 검색 - 프롬프트 생성 - 모델 - 문서 포맷팅 - 체인 - 실행

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# retrieval
retriever = vectorstore.as_retriever(
    search_type = 'mmr',
    search_kwargs = {'k': 5, 'lambda_mult': 0.15}
)
docs = retriever.get_relevant_documents(query)

# prompt
template = '''Answer the question based only on the following context:
{context}
Question: {question}
'''
prompt = ChatPromptTemplate.from_template(template)

# model
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo-0125',
    temperature = 0,
    max_tokens = 500,
)

def format_docs(docs):
  return '\n\n'.join([d.page_content for d in docs])

# chain
chain = prompt | llm | StrOutputParser()

# run
response = chain.invoke({'context': (format_docs(docs)), 'question': query})&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. Multi Query Retriever&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;VSRetriever의 한계 극복&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;입력된 쿼리의 의미를 다각도로 포착 == 단일 쿼리 기반 다양한 관점의 멀티 쿼리를 자동 생성&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ㄴLLM을 통해 입력 문장을 Paraphrasing&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;# 예시에서는 임베딩 모델을 huggingface에서 따로 다운받고 FAISS를 활용했는데, 본인이 테스트한 문서에서 맞지 않는 듯 하여 설정을 바꿨다&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722845614396&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma


# 데이터 로드 및 chunk 분할
loader = PyMuPDFLoader(path+'SPRI_AI_Brief_2023년12월호_F.pdf')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000,
    chunk_overlap=200,
    encoding_name='cl100k_base'
)

documents = text_splitter.split_documents(data)

embeddings_model = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(
    documents, 
    embeddings_model,
    collection_name = 'test',
    persist_directory = path,
    collection_metadata = {'hnsw:space': 'cosine'}, # l2 is the default
)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722845666073&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain.retrievers.multi_query import MultiQueryRetriever

quest = '통이치엔원에 대해 알려줘'

llm = ChatOpenAI(
    model='gpt-3.5-turbo-0125',
    temperature=0,
    max_tokens=500,
)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever = vectorstore.as_retriever(), llm = llm
)

# 로깅 설정: multiquery에 대한 정보를 로그로 기록&amp;amp;확인
import logging
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

unique_docs = retriever_from_llm.get_relevant_documents(query=quest)
print(len(unique_docs))
print(unique_docs[0])&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722845675145&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;INFO:langchain.retrievers.multi_query:Generated queries: ['1. 어떤 정보가 통이치엔원에 대해 있는가?', '2. 통이치엔원에 관련된 자료를 찾아볼까요?', '3. 통이치엔원에 대한 내용을 알고 싶어요.']
7
page_content='1. 정책/법제  
2. 기업/산업 
3. 기술/연구 
 4. 인력/교육
알리바바 클라우드, 최신 LLM &amp;lsquo;통이치엔원 2.0&amp;rsquo; 공개
n 알리바바 클라우드가 복잡한 지침 이해, 광고문구 작성, 추론, 암기 등에서 성능이 향상된 최신 
LLM &amp;lsquo;통이치엔원 2.0&amp;rsquo;을 공개
n 알리바바 클라우드는 산업별로 특화된 생성 AI 모델을 공개하는 한편, 모델 개발과 애플리케이션 
구축 절차를 간소화하는 올인원 AI 모델 구축 플랫폼도 출시
KEY Contents
&amp;pound; 알리바바의 통이치엔원 2.0, 주요 벤치마크 테스트에서 여타 LLM 능가
n 중국의 알리바바 클라우드가 2023년 10월 31일 열린 연례 기술 컨퍼런스에서 최신 LLM &amp;lsquo;통이
치엔원(Tongyi Qianwen) 2.0&amp;rsquo;을 공개
∙알리바바 클라우드는 통이치엔원 2.0이 2023년 4월 출시된 1.0 버전보다 복잡한 지침 이해, 
광고문구 작성, 추론, 암기 등에서 성능이 향상되었다고 설명
∙통이치엔원 2.0은 언어 이해 테스트(MMLU), 수학(GSM8k), 질문 답변(ARC-C)과 같은 벤치마크 
테스트에서 라마(Llama-2-70B)와 GPT-3.5를 비롯한 주요 AI 모델을 능가 
∙통이치엔원 2.0은 알리바바 클라우드의 웹사이트와 모바일 앱을 통해 대중에 제공되며 개발자는 
API를 통해 사용 가능 
n 알리바바 클라우드는 여러 산업 영역에서 생성 AI를 활용해 사업 성과를 개선할 수 있도록 지원
하는 산업별 모델도 출시
∙산업 영역은 고객지원, 법률 상담, 의료, 금융, 문서관리, 오디오와 동영상 관리, 코드 개발, 캐릭터 
제작을 포함
n 알리바바 클라우드는 급증하는 생성 AI 수요에 대응해 모델 개발과 애플리케이션 구축 절차를 
간소화하는 올인원 AI 모델 구축 플랫폼 &amp;lsquo;젠AI(GenAI)&amp;rsquo;도 공개
∙이 플랫폼은 데이터 관리, 모델 배포와 평가, 신속한 엔지니어링을 위한 종합 도구 모음을 제공하여 
다양한 기업들이 맞춤형 AI 모델을 한층 쉽게 개발할 수 있도록 지원' metadata={'author': 'dj', 'creationDate': &quot;D:20231208132838+09'00'&quot;, 'creator': 'Hwp 2018 10.0.0.13462', 'file_path': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': &quot;D:20231208132838+09'00'&quot;, 'page': 11, 'producer': 'Hancom PDF 1.3.0.542', 'source': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'subject': '', 'title': '', 'total_pages': 23, 'trapped': ''}&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722845688441&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


# Prompt
template = '''Answer the question based only on the following context:
{context}

Question: {question}
'''

prompt = ChatPromptTemplate.from_template(template)

# Model
llm = ChatOpenAI(
    model='gpt-3.5-turbo-0125',
    temperature=0,
)

def format_docs(docs):
    return '\n\n'.join([d.page_content for d in docs])

# Chain
chain = (
    {'context': retriever_from_llm | format_docs, 'question': RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Run
response = chain.invoke('통이치엔원에 대해 요약해서 알려주세요.')
response&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722845699561&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;INFO:langchain.retrievers.multi_query:Generated queries: ['1. 요약해서 통이치엔원에 대한 정보를 알려드릴까요?', '2. 통이치엔원에 대한 간략한 설명을 드릴까요?', '3. 통이치엔원에 대한 요약 정보를 제공해 드릴까요?']
알리바바 클라우드가 최신 LLM '통이치엔원 2.0'을 공개했는데, 이는 복잡한 지침 이해, 광고문구 작성, 추론, 암기 등에서 성능이 향상된 AI 모델이다. 이 모델은 다양한 벤치마크 테스트에서 다른 주요 AI 모델을 능가하며, 산업별로 특화된 생성 AI 모델을 제공하고 올인원 AI 모델 구축 플랫폼도 출시했다.&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;3. Contextual compression&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;검색된 문서 중 쿼리와 관련된 정보만 추출하여 반환&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;무관한 정보를 제거하는 방식&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;우선 기본 검색기를 먼저 설정한 후&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722845824673&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;quest = '통이치엔원에 대해 알려줘'

llm = ChatOpenAI(
    model='gpt-3.5-turbo-0125',
    temperature=0,
    max_tokens=500,
)
base_retriever = vectorstore.as_retriever(
                                search_type='mmr',
                                search_kwargs={'k':7, 'fetch_k': 20})
docs = base_retriever.get_relevant_documents(question)&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;해당 문서들을 효율적으로 압축함&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722845957620&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=base_retriever
)

compressed_docs = compression_retriever.get_relevant_documents(quest)
print(len(compressed_docs))&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722845969634&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;[Document(metadata={'author': 'dj', 'creationDate': &quot;D:20231208132838+09'00'&quot;, 'creator': 'Hwp 2018 10.0.0.13462', 'file_path': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': &quot;D:20231208132838+09'00'&quot;, 'page': 11, 'producer': 'Hancom PDF 1.3.0.542', 'source': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'subject': '', 'title': '', 'total_pages': 23, 'trapped': ''}, page_content='알리바바 클라우드, 최신 LLM &amp;lsquo;통이치엔원 2.0&amp;rsquo; 공개\n알리바바 클라우드가 복잡한 지침 이해, 광고문구 작성, 추론, 암기 등에서 성능이 향상된 최신 \nLLM &amp;lsquo;통이치엔원 2.0&amp;rsquo;을 공개\n알리바바 클라우드는 산업별로 특화된 생성 AI 모델을 공개하는 한편, 모델 개발과 애플리케이션 \n구축 절차를 간소화하는 올인원 AI 모델 구축 플랫폼도 출시\n알리바바의 통이치엔원 2.0, 주요 벤치마크 테스트에서 여타 LLM 능가\n중국의 알리바바 클라우드가 2023년 10월 31일 열린 연례 기술 컨퍼런스에서 최신 LLM &amp;lsquo;통이\n치엔원(Tongyi Qianwen) 2.0&amp;rsquo;을 공개\n알리바바 클라우드는 통이치엔원 2.0이 2023년 4월 출시된 1.0 버전보다 복잡한 지침 이해, \n광고문구 작성, 추론, 암기 등에서 성능이 향상되었다고 설명\n통이치엔원 2.0은 언어 이해 테스트(MMLU), 수학(GSM8k), 질문 답변(ARC-C)과 같은 벤치마크 \n테스트에서 라마(Llama-2-70B)와 GPT-3.5를 비롯한 주요 AI 모델을'),
 Document(page_content='한국전쟁이 발발하여 큰 피해를 입었습니다. 전쟁 후 남한은 빠른 경제 발전을 이루며 오늘날에 이르렀습니다.'),
 Document(metadata={'author': 'dj', 'creationDate': &quot;D:20231208132838+09'00'&quot;, 'creator': 'Hwp 2018 10.0.0.13462', 'file_path': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': &quot;D:20231208132838+09'00'&quot;, 'page': 7, 'producer': 'Hancom PDF 1.3.0.542', 'source': '/content/drive/MyDrive/재정정보경진대회/SPRI_AI_Brief_2023년12월호_F.pdf', 'subject': '', 'title': '', 'total_pages': 23, 'trapped': ''}, page_content='FTC는 아마존 AI 비서 &amp;lsquo;알렉사(Alexa)&amp;rsquo;와 스마트홈 보안 기기 &amp;lsquo;링(Ring)&amp;rsquo;이 소비자의 사적 \n정보를 알고리즘 훈련에 사용하여 프라이버시를 침해한 혐의를 조사하는 등 법적 권한을 활용해 AI \n관련 불법 행위에 대처하고 있음\n* FTC는 2023년 5월 31일 동의를 받지 않고 어린이들의 음성과 위치 정보를 활용한 &amp;lsquo;알렉사&amp;rsquo;와 고객의 사적 영상에 대하여 \n직원에게 무제한 접근 권한을 부여한 &amp;lsquo;링&amp;rsquo;에 3,080만 달러(약 420억 원)의 과징금을 부과')]&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;cf)&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231364&quot;&gt;https://wikidocs.net/231364&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>langchain</category>
      <category>rag</category>
      <category>랭체인</category>
      <category>위키독스</category>
      <category>파이썬</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/69</guid>
      <comments>https://hoooa.tistory.com/69#entry69comment</comments>
      <pubDate>Mon, 5 Aug 2024 17:26:02 +0900</pubDate>
    </item>
    <item>
      <title>[error]ModuleNotFoundError: No module named 'pillow_heif'</title>
      <link>https://hoooa.tistory.com/68</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/67&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://hoooa.tistory.com/67&lt;/a&gt; 에서 UnstructuredPDFLoader 실습하던 중 만난 오류,,&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722501233020&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_community.document_loaders import UnstructuredPDFLoader

pdf = '/content/drive/MyDrive/재정정보경진대회/data/train_source/1-1 2024 주요 재정통계 1권.pdf'
loader = UnstructuredPDFLoader(pdf, mode='elements')
pages = loader.load()

print(len(pages))
pages[20].page_content[:10]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;pillow_heif 처음 들어보는 모듈인데 뭘까..찾아봐도 모르겠던 와중&lt;/span&gt;&lt;/p&gt;
&lt;div style=&quot;background-color: #1e1e1e; color: #d4d4d4;&quot;&gt;
&lt;div&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt;pip install unstructured&lt;/span&gt;&lt;span style=&quot;color: #dcdcdc;&quot;&gt;[&lt;/span&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt;all-docs&lt;/span&gt;&lt;span style=&quot;color: #dcdcdc;&quot;&gt;]&lt;/span&gt;&lt;span style=&quot;color: #dcdcdc;&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;이 아이를 다시 설치해보라는 얘기가 있어서 했더니 성공,,,ㅎㅎㅎ&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;파이썬 &lt;b&gt;Unsturctured&lt;/b&gt; 라이브러리&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;unstructed data &amp;rarr; structured data로 변환&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;PDF, HTML, JSON, XML 등&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;'pip install unstructured[파일 형태]'&amp;nbsp; &amp;nbsp;# or [all-docs]&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- Data Loader로 다양하게 쓰임&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ex) from langchain_unstructured import UnstructuredLoader&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;from langchain_community.document_loaders import UnstructuredCSVLoader&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;cf)&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://python.langchain.com/v0.2/docs/integrations/providers/unstructured/&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://python.langchain.com/v0.2/docs/integrations/providers/unstructured/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>error</category>
      <category>langchain</category>
      <category>unstructured</category>
      <category>파이썬</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/68</guid>
      <comments>https://hoooa.tistory.com/68#entry68comment</comments>
      <pubDate>Thu, 1 Aug 2024 17:46:00 +0900</pubDate>
    </item>
    <item>
      <title>[AI]RAG 기본 이론&amp;amp;실습(2)</title>
      <link>https://hoooa.tistory.com/67</link>
      <description>&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/65&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://hoooa.tistory.com/6&lt;/a&gt;&lt;a href=&quot;https://hoooa.tistory.com/65&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;5&lt;/a&gt; 에서 정리했던 RAG의 기본 파이프라인(Data Load, Text Split, Indexing, Retrieval, Generation)을 한층 자세하게 들어가보자~&lt;/span&gt;&lt;/p&gt;
&lt;figure id=&quot;og_1722496745018&quot; contenteditable=&quot;false&quot; data-ke-type=&quot;opengraph&quot; data-ke-align=&quot;alignCenter&quot; data-og-type=&quot;article&quot; data-og-title=&quot;[AI]RAG 기본 이론&amp;amp;실습(1)&quot; data-og-description=&quot;RAG에 대해선 이전에 아주 짧게 다뤄봤어서 공부가 필요한 상황,,,경진대회 문제라도 제대로 풀려면 해야된다 아자아자!!! (이제는 더 이상 물러설 곳이 없다)&amp;nbsp; https://hoooa.tistory.com/58에 이어서 Lang&quot; data-og-host=&quot;hoooa.tistory.com&quot; data-og-source-url=&quot;https://hoooa.tistory.com/65&quot; data-og-url=&quot;https://hoooa.tistory.com/65&quot; data-og-image=&quot;https://scrap.kakaocdn.net/dn/000e4/hyWKBOJDkF/f2rtjSkqrMvMHhF8g3sdXk/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/dlr0dV/hyWKy5xILT/0oTDSCrtXdqNzClfnSmvxk/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/cPBNiv/hyWKAhZpLc/JU4lYy3POxypnD1dvDV271/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/65&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot; data-source-url=&quot;https://hoooa.tistory.com/65&quot;&gt;
&lt;div class=&quot;og-image&quot; style=&quot;background-image: url('https://scrap.kakaocdn.net/dn/000e4/hyWKBOJDkF/f2rtjSkqrMvMHhF8g3sdXk/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/dlr0dV/hyWKy5xILT/0oTDSCrtXdqNzClfnSmvxk/img.png?width=800&amp;amp;height=800&amp;amp;face=0_0_800_800,https://scrap.kakaocdn.net/dn/cPBNiv/hyWKAhZpLc/JU4lYy3POxypnD1dvDV271/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400');&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;og-text&quot;&gt;
&lt;p class=&quot;og-title&quot; data-ke-size=&quot;size16&quot;&gt;[AI]RAG 기본 이론&amp;amp;실습(1)&lt;/p&gt;
&lt;p class=&quot;og-desc&quot; data-ke-size=&quot;size16&quot;&gt;RAG에 대해선 이전에 아주 짧게 다뤄봤어서 공부가 필요한 상황,,,경진대회 문제라도 제대로 풀려면 해야된다 아자아자!!! (이제는 더 이상 물러설 곳이 없다)&amp;nbsp; https://hoooa.tistory.com/58에 이어서 Lang&lt;/p&gt;
&lt;p class=&quot;og-host&quot; data-ke-size=&quot;size16&quot;&gt;hoooa.tistory.com&lt;/p&gt;
&lt;/div&gt;
&lt;/a&gt;&lt;/figure&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;1. Data Load&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;불러오고자 하는 데이터의 형태에 따라 다양한 Document Loader를 활용할 수 있음!&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;웹 문서 WebBaseLoader &lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;특정 웹 페이지에서 문서를 가져오기&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722497295879&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import bs4
from langchain_community.document_loaders import WebBaseLoader

url1 = &quot;https://blog.langchain.dev/week-of-1-22-24-langchain-release-notes/&quot;
url2 = &quot;https://blog.langchain.dev/week-of-2-5-24-langchain-release-notes/&quot;

loader = WebBaseLoader(
    web_paths=(url1, url2),	# 로드할 웹페이지 url - 단일 문자열 or 시퀀스 배열
    bs_kwargs=dict(
        parse_only = bs4.SoupStrainer( # 특정 클래스 이름의 HTML 요소만 추출
            class_ = (&quot;article-header&quot;, &quot;article-content&quot;)
        )
    ),
)
docs = loader.load()
print(len(docs))
docs[0]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;텍스트 문서&amp;nbsp;TextLoader&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1722497958822&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 텍스트 문서
path = '/content/drive/MyDrive/재정정보경진대회/'
from langchain_community.document_loaders import TextLoader

loader = TextLoader(path + 'test.txt')
data = loader.load()

print(type(data))
data[0].page_content&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722498269883&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;&amp;lt;class 'list'&amp;gt;
안녕하세요~ 테스트 load 테스트 하고 있습니다~&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;폴더 DirectoryLoader &lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;csv 파일 CSVLoader&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;PDF&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- PyPDFLoader(PDF문서 페이지별) / UnstructuredPDFLoader(형식없는 PDF) / PyMuPDFLoader(상세한 MetaData) / OnlinePDFLoader(온라인에 업로드된 PDF) / PyPDFDirectoryLoader(특정 폴더의 모든 PDF)&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722501037646&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;!pip install -q pypdf
#!pip install unstructured unstructured-inference
!pip install unstructured[all-docs]
!pip install pymupdf

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.document_loaders import OnlinePDFLoade
from langchain_community.document_loaders import PyPDFDirectoryLoader

pdf = 'test.pdf'

# PyPDFLoader
loader = PyPDFLoader(pdf)
pages = loader.load()

print(len(pages))
print(pages[20])

# UnstructuredPDFLoader
loader = UnstructuredPDFLoader(pdf, mode='elements')	# elements: 텍스트 청크가 분리된 채로 유지 - 원본 레이아웃과 유사함
pages = loader.load()

# PyMuPDFLoader
loader = PyMuPDFLoader(pdf)
pages = loader.load()

# OnlinePDFLoader
loader = OnlinePDFLoader(&quot;https://arxiv.org/pdf/1706.03762.pdf&quot;)    # Transformer 논문
pages = loader.load()
pages[0].page_content[:1000]

# PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader('./')
data = loader.load()&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;+UnstructuredPDFLoader 오류 해결~&amp;nbsp; &lt;a href=&quot;https://hoooa.tistory.com/68&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://hoooa.tistory.com/68&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;2. Text Split&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;LLM의 입력 토큰 한도에 맞추기 위해 긴 문서 &amp;rarr; Chunk로 분리&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 각 청크가 독립적 의미를 갖도록 나눠야함&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: left;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- LLM 모델의 입력 크기/비용을 고려하여 적합한 최적 크기를 조정할 수 있음&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;CharacterTextSplitter&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;개별 문자(Separator)를 기준으로 청크 분리&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722835760473&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_community.document_loaders import TextLoader

loader = TextLoader(path+'test.txt')
data = loader.load()

from langchain_text_splitters import CharacterTextSplitter
ts = CharacterTextSplitter(
    separator = '',       # 청크 나누는 기준
    chunk_size = 500,     # 청크 최대 길이
    chunk_overlap = 100,  # 인접 청크 사이 중복으로 포함될 문자 수
    length_function = len,  # 청크 길이 계산 함수
)

texts = ts.split_text(data[0].page_content)
print(len(texts))
print(len(texts[0]))


text_splitter = CharacterTextSplitter(
    separator = '\n',   # 줄바꿈 문자 기준으로 청크 나누기
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
)

texts = text_splitter.split_text(data[0].page_content)
texts[0]&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722835781078&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;한국의 역사는 수천 년에 걸쳐 이어져 온 긴 여정 속에서 다양한 문화와 전통이 형성되고 발전해 왔습니다. 고조선에서 시작해 삼국 시대의 경쟁, 그리고 통일 신라와 고려를 거쳐 조선까지, 한반도는 많은 변화를 겪었습니다.\n고조선은 기원전 2333년 단군왕검에 의해 세워졌다고 전해집니다. 이는 한국 역사상 최초의 국가로, 한민족의 시원이라 할 수 있습니다. 이후 기원전 1세기경에는 한반도와 만주 일대에서 여러 소국이 성장하며 삼한 시대로 접어듭니다.\n...&amp;lt;중략&amp;gt;...\n해방 후 한반도는 남북으로 분단되어 각각 다른 정부가 수립되었고, 1950년에는 한국전쟁이 발발하여 큰 피해를 입었습니다. 전쟁 후 남한은 빠른 경제 발전을 이루며 오늘날에 이르렀습니다.&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;RecursiveCharacterTextSplitter&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;재귀적으로 텍스트 분할, 의미적으로 관련있는 청크 조각들이 모이도록 함&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722835841695&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
)

texts = text_splitter.split_text(data[0].page_content)
print(len(texts[0]), len(texts[1]))
texts[0]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Tokenizer&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;토큰 수 기준으로 분할&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722835883095&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=200,
    encoding_name='cl100k_base'
)

docs = text_splitter.split_documents(data)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style1&quot;&gt;&lt;span style=&quot;font-family: 'Noto Serif KR';&quot;&gt;&lt;b&gt;3. Embedding&lt;/b&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: center;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;텍스트 &amp;rarr; 숫자 벡터&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 텍스트 데이터를 벡터 공간 내에서 다룸: 텍스트 간 유사성 계산, 머신러닝/자연어처리 가능&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 활용&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ㄴ의미 검색: 의미적 유사 텍스트 검색, 관련도 높은 문서/정보&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ㄴ문서 분류: 특정 카테고리/주제에 분류&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ㄴ텍스트 유사도 계싼&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;임베딩 모델 1. OpenAIEmbeddings&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;embed_documents(문서), embed_query(단일 쿼리)&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722837084011&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()
embeddings = embeddings_model.embed_documents(
    [
        '안녕하세요!',
        '어! 오랜만이에요',
        '이름이 어떻게 되세요?',
        '날씨가 추워요',
        'Hello LLM!'
    ]
)

# 텍스트 리스트 개수(임베딩 과정을 거친 총 문서 수), 첫번째 문서의 벡터 차원
print(len(embeddings), len(embeddings[0]))
embeddings[0][:10]&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722837098576&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;(5, 1536)
[-0.010432514362037182,
 -0.013580637983977795,
 -0.0064862752333283424,
 -0.018673377111554146,
 -0.018267985433340073,
 0.01667175441980362,
 -0.009222672320902348,
 0.003898732829838991,
 -0.00743641285225749,
 0.010071462020277977]&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722837112865&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# embed_query: 단일 쿼리 문자열 - 임베딩
embedded_query = embeddings_model.embed_query('첫인사를 하고 이름을 물어봤나요?')
embedded_query[:10]&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722837126185&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;[0.003605559002608061,
 -0.024263586848974228,
 0.010929940268397331,
 -0.04110211506485939,
 -0.004533691331744194,
 0.021859880536794662,
 -0.004130976274609566,
 0.020613981410861015,
 -0.006814695429056883,
 0.007387306075543165]&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;해당 문서와 쿼리 간의 유사도를 측정해보면,&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722837134544&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 코사인 유사도(-1 ~ 1)
# 상위 문서와 쿼리 간 유사도 측정
import numpy as np
from numpy import dot
from numpy.linalg import norm

def cos_sim(a, b):
  return dot(a,b) / (norm(a) * norm(b))

for embedding in embeddings:
  print(cos_sim(embedding, embedded_query))&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722837216760&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;0.8348635137337618
0.8153783857089105
0.8844739248939817
0.7899103053431074
0.7468845030598241&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;세번째 문서('이름이 어떻게 되세요?')와 쿼리('첫인사를 하고 이름을 물어봤나요?')의 유사도가 가장 높게 나옴&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. HuggingFaceEmbeddings&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;sentence-transformers 라이브러리를 통해 사전훈련된 임베딩 모델 활&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722838505239&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;### HuggingFaceEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(
    model_name = 'jhgan/ko-sroberta-nli',   # 사용할 모델: 자연어 추론NLI에 적합한 ko-sroberta
    model_kwargs = {'device': 'cpu'},       # 'cuda'는 GPU
    # 임베딩 정규화하여 모든 벡터가 같은 범위 값을 같도록 -&amp;gt; 유사도 계산 시 일관성 높임
    encode_kwargs = {'normalize_embeddings': True},
)

embeddings_model&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722838526376&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: RobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
), model_name='jhgan/ko-sroberta-nli', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True}, multi_process=False, show_progress=False)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722838531807&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;embeddings = embeddings_model.embed_documents(
    [
        '안녕하세요!',
        '어! 오랜만이에요',
        '이름이 어떻게 되세요?',
        '날씨가 추워요',
        'Hello LLM!'
    ]
)
embedded_query = embeddings_model.embed_query('첫인사를 하고 이름을 물어봤나요?')

for embedding in embeddings:
    print(cos_sim(embedding, embedded_query))&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722838541288&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;0.5899016189601531
0.4182631225980652
0.7240604521610333
0.05702662997392148
0.4316418328113528&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;cf)&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231364&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://wikidocs.net/231364&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>AI</category>
      <category>langchain</category>
      <category>rag</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/67</guid>
      <comments>https://hoooa.tistory.com/67#entry67comment</comments>
      <pubDate>Thu, 1 Aug 2024 17:31:33 +0900</pubDate>
    </item>
    <item>
      <title>[AI]RAG 기본 이론&amp;amp;실습(1)</title>
      <link>https://hoooa.tistory.com/65</link>
      <description>&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;RAG에 대해선 이전에 아주 짧게 다뤄봤어서 공부가 필요한 상황,,,&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;경진대회 문제라도 제대로 풀려면 해야된다 아자아자!!! &lt;s&gt;(이제는 더 이상 물러설 곳이 없다)&lt;/s&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt; &lt;a href=&quot;https://hoooa.tistory.com/58&quot;&gt;https://hoooa.tistory.com/58&lt;/a&gt;에 이어서 LangChain과 관련된 RAG를 공부 및 실습해보고자 한다! &lt;/span&gt;&lt;/p&gt;
&lt;figure id=&quot;og_1722492049052&quot; contenteditable=&quot;false&quot; data-ke-type=&quot;opengraph&quot; data-ke-align=&quot;alignCenter&quot; data-og-type=&quot;article&quot; data-og-title=&quot;[AI]LangChain 기본 이론&amp;amp;실습(1)&quot; data-og-description=&quot;※ Langchain 대규모 언어 모델(LLM)과 애플리케이션의 통합을 간소화하는 SDK API를 노출하여 기본 LLM의 구현 세부 사항을 요약 =&amp;gt; 코드를 크게 변경하지 않고도 모델 교체/대체 가능≫ 언어모델 용도&quot; data-og-host=&quot;hoooa.tistory.com&quot; data-og-source-url=&quot;https://hoooa.tistory.com/58&quot; data-og-url=&quot;https://hoooa.tistory.com/58&quot; data-og-image=&quot;https://scrap.kakaocdn.net/dn/bWidDH/hyWKJ63dRE/Mntw8oAFaNmz7RRjm8rSYK/img.png?width=800&amp;amp;height=98&amp;amp;face=0_0_800_98,https://scrap.kakaocdn.net/dn/Un0QY/hyWGYdK24C/UR2qTmN9vMYvocMeu6ibnK/img.png?width=800&amp;amp;height=98&amp;amp;face=0_0_800_98,https://scrap.kakaocdn.net/dn/cNov0W/hyWG0QaaTc/txGsu9XmrC7p8hhhePDJv0/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400&quot;&gt;&lt;a href=&quot;https://hoooa.tistory.com/58&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot; data-source-url=&quot;https://hoooa.tistory.com/58&quot;&gt;
&lt;div class=&quot;og-image&quot; style=&quot;background-image: url('https://scrap.kakaocdn.net/dn/bWidDH/hyWKJ63dRE/Mntw8oAFaNmz7RRjm8rSYK/img.png?width=800&amp;amp;height=98&amp;amp;face=0_0_800_98,https://scrap.kakaocdn.net/dn/Un0QY/hyWGYdK24C/UR2qTmN9vMYvocMeu6ibnK/img.png?width=800&amp;amp;height=98&amp;amp;face=0_0_800_98,https://scrap.kakaocdn.net/dn/cNov0W/hyWG0QaaTc/txGsu9XmrC7p8hhhePDJv0/img.png?width=400&amp;amp;height=400&amp;amp;face=0_0_400_400');&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;og-text&quot;&gt;
&lt;p class=&quot;og-title&quot; data-ke-size=&quot;size16&quot;&gt;[AI]LangChain 기본 이론&amp;amp;실습(1)&lt;/p&gt;
&lt;p class=&quot;og-desc&quot; data-ke-size=&quot;size16&quot;&gt;※ Langchain 대규모 언어 모델(LLM)과 애플리케이션의 통합을 간소화하는 SDK API를 노출하여 기본 LLM의 구현 세부 사항을 요약 =&amp;gt; 코드를 크게 변경하지 않고도 모델 교체/대체 가능≫ 언어모델 용도&lt;/p&gt;
&lt;p class=&quot;og-host&quot; data-ke-size=&quot;size16&quot;&gt;hoooa.tistory.com&lt;/p&gt;
&lt;/div&gt;
&lt;/a&gt;&lt;/figure&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ RAG(Retrieval-Augmented Generation)&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;기존의 LLM을 확장, 더욱 정화하고 풍부한 정보를 제공하기 위함&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;학습 데이터에 &lt;b&gt;불포함된 외부 데이터&lt;/b&gt;를 &lt;span style=&quot;background-color: #ffc1c8;&quot;&gt;실시간으로 검색(retrieval) &amp;amp; 답변 생성(generation)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;≫ Hallucination 방지 &amp;amp; 최신 정보 반영&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&amp;gt;&amp;gt; 기본 구조&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 검색 단계(Retrieval Phase): 질문/컨텍스트 in &amp;rarr; 관련 외부 데이터 검색 from 검색 엔진/DB 등&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 생성 단계(Generation Phase): 검색 정보+기존 지식&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;nbsp;&amp;rarr; 주어진 질문에 대한 답변 생성&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;1. Load Data&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;RAG에 사용할 데이터 불러오기&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722494582658&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 데이터 로드
from langchain_community.document_loaders import WebBaseLoader
url = 'https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EC%A0%95%EC%B1%85%EA%B3%BC_%EC%A7%80%EC%B9%A8'
loader = WebBaseLoader(url)

docs = loader.load()    # 웹페이지 텍스트 -&amp;gt; Documents
print(len(docs))
print(len(docs[0].page_content))
print(docs[0].page_content[5000:6000])&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722494592209&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;1
13153
좀 더 빠르게 강력한 수단을 이용해야 합니다. 특히 정책 문서에 명시된 원칙을 지키지 않는 것은 대부분의 경우 다른 사용자에게 받아들여지지 않습니다 (다른 분들에게 예외 상황임을 설득할 수 있다면 가능하기는 하지만요). 이는 당신을 포함해서 편집자 개개인이 정책과 지침을 직접 집행 및 적용한다는 것을 의미합니다.
특정 사용자가 명백히 정책에 반하는 행동을 하거나 정책과 상충되는 방식으로 지침을 어기는 경우, 특히 의도적이고 지속적으로 그런 행위를 하는 경우 해당 사용자는 관리자의 제재 조치로 일시적, 혹은 영구적으로 편집이 차단될 수 있습니다. 영어판을 비롯한 타 언어판에서는 일반적인 분쟁 해결 절차로 끝낼 수 없는 사안은 중재위원회가 개입하기도 합니다.

문서 내용
정책과 지침의 문서 내용은 처음 읽는 사용자라도 원칙과 규범을 잘 이해할 수 있도록 다음 원칙을 지켜야 합니다.

명확하게 작성하세요. 소수만 알아듣거나 준법률적인 단어, 혹은 지나치게 단순한 표현은 피해야 합니다. 명확하고, 직접적이고, 모호하지 않고, 구체적으로 작성하세요. 지나치게 상투적인 표현이나 일반론은 피하세요. 지침, 도움말 문서 및 기타 정보문 문서에서도 &quot;해야 합니다&quot; 혹은 &quot;하지 말아야 합니다&quot; 같이 직접적인 표현을 굳이 꺼릴 필요는 없습니다.
가능한 간결하게, 너무 단순하지는 않게. 정책이 중언부언하면 오해를 부릅니다. 불필요한 말은 생략하세요. 직접적이고 간결한 설명이 마구잡이식 예시 나열보다 더 이해하기 쉽습니다. 각주나 관련 문서 링크를 이용하여 더 상세히 설명할 수도 있습니다.
규칙을 만든 의도를 강조하세요. 사용자들이 상식대로 행동하리라 기대하세요. 정책의 의도가 명료하다면, 추가 설명은 필요 없죠. 즉 규칙을 '어떻게' 지키는지와 더불어 '왜' 지켜야 하는지 확실하게 밝혀야 합니다.
범위는 분명히, 중복은 피하기. 되도록 앞부분에서 정책 및 지침의 목적과 범위를 분명하게 밝혀야 합니다. 독자 대부분은 도입부 초반만 읽고 나가버리니까요. 각 정책 문서의 내용은 해당 정&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. Text Split&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;데이터를 Chunk로 분할&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;검색 효율성 &amp;uarr;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722494840226&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 텍스트 분할
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = splitter.split_documents(docs)

print(len(splits))
print(splits[10])   # page_content: 분할된 텍스트 조각 / metadata: 원본 문서의 정보&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722494848970&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;18
page_content='제안과 채택
 백:아님 &amp;sect; 관료주의  문서를 참고하십시오. 단축백:제안
제안 문서란 정책과 지침으로 채택하자고 의견을 묻는 문서이나 아직 위키백과 내에 받아들여지는 원칙으로 확립되지는 않은 문서입니다. {{제안}} 틀을 붙여 공동체 내에서 정책이나 지침으로 채택할 지 의견을 물을 수 있습니다. 제안 문서는 정책과 지침이 아니므로 아무리 실제 있는 정책이나 지침을 요약하거나 인용해서 다른 문서에 쓴다고 해도 함부로 정책이나 지침 틀을 붙여서는 안 됩니다.
'제안'은 완전 새로운 원칙이라기보다, 기존의 불문율이나 토론 총의의 문서를 통한 구체화에 가깝습니다. 많은 사람들이 쉽게 제안을 받아들이도록 하기 위해서는, 기초적인 원칙을 우선 정하고 기본 틀을 짜야 합니다. 정책과 지침의 기본 원칙은 &quot;왜 지켜야 하는가?&quot;, &quot;어떻게 지켜야 하는가?&quot; 두 가지입니다. 특정 원칙을 정책이나 지침으로 확립하기 위해서는 우선 저 두 가지 물음에 성실하게 답하는 제안 문서를 작성해야 합니다.
좋은 아이디어를 싣기 위해 사랑방이나 관련 위키프로젝트에 도움을 구해 피드백을 요청할 수 있습니다. 이 과정에서 공동체가 어느 정도 받아들일 수 있는 원칙이 구체화됩니다. 많은 이와의 토론을 통해 공감대가 형성되고 제안을 개선할 수 있습니다.
정책이나 지침은 위키백과 내의 모든 편집자들에게 적용되는 원칙이므로 높은 수준의 총의가 요구됩니다. 제안 문서가 잘 짜여졌고 충분히 논의되었다면, 더 많은 공동체의 편집자와 논의를 하기 위해 승격 제안을 올려야 합니다. 제안 문서 맨 위에 {{제안}}을 붙여 제안 안건임을 알려주고, 토론 문서에 {{의견 요청}}을 붙인 뒤 채택 제안에 관한 토론 문단을 새로 만들면 됩니다. 많은 편집자들에게 알리기 위해 관련 내용을 {{위키백과 소식}}에 올리고 사랑방에 이를 공지해야 하며, 합의가 있을 경우 미디어위키의 sitenotice(위키백과 최상단에 노출되는 구역)에 공지할 수도 있습니다.' metadata={'source': 'https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EC%A0%95%EC%B1%85%EA%B3%BC_%EC%A7%80%EC%B9%A8', 'title': '위키백과:정책과 지침 - 위키백과, 우리 모두의 백과사전', 'language': 'ko'}&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;3. Indexing&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;분할 텍스트 &amp;rarr; 검색 가능한 형태로 변환: 텍스트 - 임베딩 - 벡터저장소에 저장 - 유사성 검색검색 시간&amp;darr; 정확도&amp;uarr;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722495472682&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# indexing
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

vs = Chroma.from_documents(documents=splits,
                           embedding=OpenAIEmbeddings())

docs = vs.similarity_search(&quot;격하 과정에 대해 설명해주세요.&quot;)
print(len(docs))  # 저장된 문서 중 가장 유사한 문서들 개수
print(docs[0].page_content)   # 그 중 가장 유사도가 높은 첫 번째 문서&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722495480618&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;4
격하
특정 정책이나 지침이 편집 관행이나 공동체 규범이 바뀌며 쓸모없어질 수 있고, 다른 문서가 개선되어 내용이 중복될 수 있으며, 불필요한 내용이 증식할 수도 있습니다. 이 경우 편집자들은 정책을 지침으로 격하하거나, 정책 또는 지침을 보충 설명, 정보문, 수필 또는 중단 문서로 격하할 것을 제안할 수 있습니다. 
격하 과정은 채택 과정과 비슷합니다. 일반적으로 토론 문서 내 논의가 시작되고 프로젝트 문서 상단에 {{새로운 토론|문단=진행 중인 토론 문단}} 틀을 붙여 공동체의 참여를 요청합니다. 논의가 충분히 이루어진 후, 제3의 편집자가 토론을 종료하고 평가한 후 상태 변경 총의가 형성되었는지 판단해야 합니다. 폐지된 정책이나 지침은 최상단에 {{중단}} 틀을 붙여 더 이상 사용하지 않는 정책/지침임을 알립니다.
소수의 공동체 인원만 지지하는 수필, 정보문 및 기타 비공식 문서는 일반적으로 주된 작성자의 사용자 이름공간으로 이동합니다. 이러한 논의는 일반적으로 해당 문서의 토론란에서 이루어지며, 간혹 위키백과:의견 요청을 통해 처리되기도 합니다.

같이 보기
위키백과:위키백과의 정책과 지침 목록
위키백과:의견 요청
수필

위키백과:제품, 절차, 정책
위키백과:위키백과 공동체의 기대와 규범
기타 링크&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;4. Retrieval &amp;amp; Generation&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;사용자 입력을 바탕으로 쿼리 생성 후, 인덱싱된 데이터에서 가장 관련성 높은 정보 검색 by LangChain의 Retriever()&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722495968421&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# 검색&amp;amp;생성까지 포함된 전체 코드

from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):  # doc 결합
  return '\n\n'.join(doc.page_content for doc in docs)

url = 'https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EC%A0%95%EC%B1%85%EA%B3%BC_%EC%A7%80%EC%B9%A8'
loader = WebBaseLoader(url)

docs = loader.load()    # 웹페이지 텍스트 -&amp;gt; Documents

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = splitter.split_documents(docs)

vs = Chroma.from_documents(documents=splits,
                           embedding=OpenAIEmbeddings())

template = '''Answer the question based only on the following context:
{context}
Question: {question}
'''

prom = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0)
retriever = vs.as_retriever()   # 검색

rag_chain = (
    {'context': retriever | format_docs,
     'question': RunnablePassthrough()}
    | prom
    | model
    | StrOutputParser()
)

rag_chain.invoke(&quot;격하 과정에 대해 설명해주세요.&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722495980388&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;격하 과정은 특정 정책이나 지침이 더 이상 필요하지 않거나 개선이 필요한 경우에 해당 정책이나 지침을 수정하거나 중단하는 과정을 말합니다. 이를 위해 편집자들은 해당 정책이나 지침을 다른 형태로 변형하거나 중단할 것을 제안하고, 이에 대한 토론을 거친 후 결정이 내려집니다. 격하 과정은 채택 과정과 유사하며, 토론을 통해 공동체의 참여를 유도하고, 결정이 내려진 후에는 해당 정책이나 지침이 중단되었음을 알리는 틀을 붙여줍니다.&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;cf)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231364&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://wikidocs.net/231364&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://aws.amazon.com/ko/what-is/langchain/&quot;&gt;https://aws.amazon.com/ko/what-is/langchain/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;&lt;a href=&quot;https://www.samsungsds.com/kr/insights/what-is-langchain.html&quot;&gt;https://www.samsungsds.com/kr/insights/what-is-langchain.html&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>AI</category>
      <category>langchain</category>
      <category>rag</category>
      <category>데이콘경진대회</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/65</guid>
      <comments>https://hoooa.tistory.com/65#entry65comment</comments>
      <pubDate>Thu, 1 Aug 2024 16:06:59 +0900</pubDate>
    </item>
    <item>
      <title>[error]ValidationError: 1 validation error for ChatOpenAI</title>
      <link>https://hoooa.tistory.com/64</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231375&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://wikidocs.net/231375&lt;/a&gt; &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;랭체인 LLM 실습 도중 오류 발생&lt;/span&gt;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Parameters {'presence_penalty', 'frequency_penalty', 'stop'} should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter. (type=value_error)&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1722489634813&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# LLM
from langchain_openai import ChatOpenAI

params = {# 기본 파라미터
    &quot;temperature&quot;: 0.7,
    &quot;max_tokens&quot;: 100,
}

kwargs = {# 선택 파라미터
    &quot;frequency_penalty&quot;: 0.5,
    &quot;presence_penalty&quot;: 0.5,
    &quot;stop&quot;: ['\n']
}

model = ChatOpenAI(model = &quot;gpt-3.5-turbo-0125&quot;, **params, model_kwargs = kwargs)
quest = &quot;태양계에서 가장 큰 행성은 무엇인가요?&quot;
resp = model.invoke(input = quest)

resp&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;857&quot; data-origin-height=&quot;462&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/DWWpJ/btsISpWYOzJ/PO1P57xBWPKUme20GuM9tk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/DWWpJ/btsISpWYOzJ/PO1P57xBWPKUme20GuM9tk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/DWWpJ/btsISpWYOzJ/PO1P57xBWPKUme20GuM9tk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FDWWpJ%2FbtsISpWYOzJ%2FPO1P57xBWPKUme20GuM9tk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;857&quot; height=&quot;462&quot; data-origin-width=&quot;857&quot; data-origin-height=&quot;462&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;파라미터 오류인 듯 싶어 랭체인 LLM 모델의 파라미터를 수정해보려고 했다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://api.python.langchain.com/en/latest/llms/langchain_openai.llms.base.OpenAI.html&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://api.python.langchain.com/en/latest/llms/langchain_openai.llms.base.OpenAI.html&lt;/a&gt; 에서 찾아보니까&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt; '해당 파라미터가 specified explicitly 해야 된다'고 오류에 나와있는데&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;687&quot; data-origin-height=&quot;86&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MNlYH/btsIRLF9d2Y/fKfo2DS7OsjlIIR4OuW1x0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MNlYH/btsIRLF9d2Y/fKfo2DS7OsjlIIR4OuW1x0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MNlYH/btsIRLF9d2Y/fKfo2DS7OsjlIIR4OuW1x0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMNlYH%2FbtsIRLF9d2Y%2FfKfo2DS7OsjlIIR4OuW1x0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;687&quot; height=&quot;86&quot; data-origin-width=&quot;687&quot; data-origin-height=&quot;86&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;model_kwargs로 설정할 때는 아니라고 돼있어서&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;'presence_penalty' 인자 설정을 확인해봤더니&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;563&quot; data-origin-height=&quot;78&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/S64g0/btsIRNDY4RJ/Fjuiowri2XOZdJjWPyrnV0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/S64g0/btsIRNDY4RJ/Fjuiowri2XOZdJjWPyrnV0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/S64g0/btsIRNDY4RJ/Fjuiowri2XOZdJjWPyrnV0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FS64g0%2FbtsIRNDY4RJ%2FFjuiowri2XOZdJjWPyrnV0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;563&quot; height=&quot;78&quot; data-origin-width=&quot;563&quot; data-origin-height=&quot;78&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;params에 기본 인자로 넣어도 되게 생겨서 넣어봤는데 됐다..얏호~&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;# 수정 이후 코드&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722489966227&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_openai import ChatOpenAI

params = {# 기본 파라미터
    &quot;temperature&quot;: 0.7,
    &quot;max_tokens&quot;: 100,
    &quot;frequency_penalty&quot;: 0.5,
    &quot;presence_penalty&quot;: 0.5,
    &quot;stop&quot;: ['\n']
}

model = ChatOpenAI(model = &quot;gpt-3.5-turbo-0125&quot;, **params)
quest = &quot;태양계에서 가장 큰 행성은 무엇인가요?&quot;
resp = model.invoke(input = quest)

resp.content&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;835&quot; data-origin-height=&quot;58&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/k3xiO/btsITO2mdNY/jZePk56nVqWHk0DCVTaqc0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/k3xiO/btsITO2mdNY/jZePk56nVqWHk0DCVTaqc0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/k3xiO/btsITO2mdNY/jZePk56nVqWHk0DCVTaqc0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fk3xiO%2FbtsITO2mdNY%2FjZePk56nVqWHk0DCVTaqc0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;835&quot; height=&quot;58&quot; data-origin-width=&quot;835&quot; data-origin-height=&quot;58&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>chatopenai</category>
      <category>langchain</category>
      <category>llm</category>
      <category>오류</category>
      <category>파이썬</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/64</guid>
      <comments>https://hoooa.tistory.com/64#entry64comment</comments>
      <pubDate>Thu, 1 Aug 2024 14:28:25 +0900</pubDate>
    </item>
    <item>
      <title>[AI]LangChain 기본 이론&amp;amp;실습(3)</title>
      <link>https://hoooa.tistory.com/63</link>
      <description>&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;LangChain이 제공하는 언어 모델 두 가지&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ LLM&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;단일 요청에 대한 복잡한 출력 생성 ex) 문서 요약, 질문 답변 생성, etc&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;텍스트 문자열 in &amp;rarr; 텍스트 문자열 out&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;+표준화된 인터페이스 &amp;rarr; 다양한 LLM 제공 업체 간 호환성 &amp;rarr; 유연한 모델 전환/다중 LLM 통합 가&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ ChatModel&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;사용자와의 상호작용을 통한 연속적 대화 관리 ex) 챗봇&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;메시지 리스트 in &amp;rarr; 하나의 메시지 out&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;대화의 맥락을 유지하며 적절한 응답 생성&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;+다양한 모델 제공 업체/작동 모드&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-style=&quot;style7&quot; data-ke-type=&quot;horizontalRule&quot; /&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ LLM 모델 파라미터&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;-Temperature: 생성된 텍스트의 다양성 조정&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;- Max Tokens: 생성할 최대 토큰 수(텍스트 길이 제한)&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;- Top P(Probability): 생성 과정에서 특정 확률 분포 내 상위 P% 토큰만 고려&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;- Frequency Penalty: 값이 클수록 재등장할 확률 감소시키기, 반복&amp;darr; 다양성&amp;uarr;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;- Presence Penalty: 텍스트 내 단어의 존재 유무에 따른 해당 단어의 선택 확률 조정&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;- Stop Sequences: 특정 단어/구절이 등장하면 생성을 멈추도록 설정&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;LLM 모델 만들기&lt;br /&gt;1. 파라미터 직접 전달&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1722411848231&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;from langchain_openai import ChatOpenAI

params = {# 기본 파라미터
    &quot;temperature&quot;: 0.7,
    &quot;max_tokens&quot;: 100,
    &quot;frequency_penalty&quot;: 0.5,
    &quot;presence_penalty&quot;: 0.5,
    &quot;stop&quot;: ['\n']
}

model = ChatOpenAI(model = &quot;gpt-3.5-turbo-0125&quot;, **params)
quest = &quot;태양계에서 가장 큰 행성은 무엇인가요?&quot;
resp = model.invoke(input = quest)

resp.content&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;835&quot; data-origin-height=&quot;58&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dLHU8g/btsISOCcRCO/BUFZfsgDiKxkR3s0ziDwZ0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dLHU8g/btsISOCcRCO/BUFZfsgDiKxkR3s0ziDwZ0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dLHU8g/btsISOCcRCO/BUFZfsgDiKxkR3s0ziDwZ0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdLHU8g%2FbtsISOCcRCO%2FBUFZfsgDiKxkR3s0ziDwZ0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;835&quot; height=&quot;58&quot; data-origin-width=&quot;835&quot; data-origin-height=&quot;58&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. 모델 파라미터 추가-bind 메소드&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;특정 모델 설정을 기본값으로 사용할 때 or 일부 파라미터만 다르게 적용할 때 bind 활용&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;코드의 가독성&amp;amp;재사용성 &amp;uarr;&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848235&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;# bind
from langchain_core.prompts import ChatPromptTemplate

prom = ChatPromptTemplate.from_messages([
    ('system', &quot;이 시스템은 역사학 질문에 답변할 수 있습니다.&quot;),
    ('user', '{user_input}'),
])

model = ChatOpenAI(model='gpt-3.5-turbo-0125', max_tokens=100)
messages = prom.format_messages(user_input = &quot;한국의 독립기념일은 언제인가요?&quot;)
answer1 = model.invoke(messages)
print(answer1)    # binding 이전

chain = prom | model.bind(max_tokens = 10)
answer2 = chain.invoke({'user_input': &quot;한국의 독립기념일은 언제인가요?&quot;})
print(answer2)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722490737550&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;content='한국의 독립기념일은 3월 1일입니다. 이 날은 1919년 3월 1일 대한민국의 광복을 위한 독립운동이 시작된 날로 기념됩니다. 현재 대한민국에서는 3월 1일을 독립운동 기념일로 지' response_metadata={'token_usage': {'completion_tokens': 100, 'prompt_tokens': 56, 'total_tokens': 156}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'length', 'logprobs': None} id='run-861a8964-0965-4bdf-a1ad-79fab9a4c1e0-0' usage_metadata={'input_tokens': 56, 'output_tokens': 100, 'total_tokens': 156}
content='대한민국의 독' response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 56, 'total_tokens': 65}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'length', 'logprobs': None} id='run-bc0f9ec8-d128-46ca-8e74-522bc6b1f6a2-0' usage_metadata={'input_tokens': 56, 'output_tokens': 9, 'total_tokens': 65}&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style7&quot; /&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ 출력 파서 Output Parser&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;-출력 포맷 변경: 원하는 형식으로 출력&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;-정보 추출: 필요한 정보만 추출&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;-결과 정제: 후처리 작업 수행&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;-조건부 로직 적용: 출력 데이터 기반 다른 처리 수&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;1. CSV Parser&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;랭체인의 CommaSeparatedListOutputParser&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;모델이 생성한 텍스트에서 ','로 구분된 항목 추출 &amp;amp; 리스트로 파싱&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;get_format_instructions(): 모델에 전달할 포맷 지시사항&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848239&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()

prom = PromptTemplate(
    template = &quot;다섯 명의 {인물}을 나열해주세요. \n{format_instructions}&quot;,
    input_variables = [&quot;subject&quot;],
    partial_variables = {'format_instructions': format_instructions},
)

llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature = 0)  # temperature=0: 일관된 출력 생성
chain = prom | llm | output_parser
chain.invoke({&quot;인물&quot;: &quot;세계 최고의 부자&quot;})&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722491443541&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;['Jeff Bezos', 'Elon Musk', 'Bernard Arnault', 'Bill Gates', 'Mark Zuckerberg']&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;2. JSON Parser&lt;/span&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;span style=&quot;&quot;&gt;다음 예제에선 JsonOutputPaser와 Pydantic 사용 &amp;rarr; 모델 출력(JSON 파싱) 후 Pydantic 모델로 구조화&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848242&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;#JSON
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# 자료구조 정의 (pydantic)
class CusineRecipe(BaseModel):
    name: str = Field(description=&quot;name of a cusine&quot;)
    recipe: str = Field(description=&quot;recipe to cook the cusine&quot;)

# 출력 파서 정의
output_parser = JsonOutputParser(pydantic_object=CusineRecipe)
format_instructions = output_parser.get_format_instructions()

print(format_instructions)

prompt = PromptTemplate(
    template=&quot;Answer the user query.\n{format_instructions}\n{query}\n&quot;,
    input_variables=[&quot;query&quot;],
    partial_variables={&quot;format_instructions&quot;: format_instructions},
)
chain = prompt | model | output_parser

chain.invoke({&quot;query&quot;: &quot;Let me know how to cook Bibimbap&quot;})&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722491789913&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {&quot;properties&quot;: {&quot;foo&quot;: {&quot;title&quot;: &quot;Foo&quot;, &quot;description&quot;: &quot;a list of strings&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}}, &quot;required&quot;: [&quot;foo&quot;]}
the object {&quot;foo&quot;: [&quot;bar&quot;, &quot;baz&quot;]} is a well-formatted instance of the schema. The object {&quot;properties&quot;: {&quot;foo&quot;: [&quot;bar&quot;, &quot;baz&quot;]}} is not well-formatted.

Here is the output schema:
```
{&quot;properties&quot;: {&quot;name&quot;: {&quot;title&quot;: &quot;Name&quot;, &quot;description&quot;: &quot;name of a cusine&quot;, &quot;type&quot;: &quot;string&quot;}, &quot;recipe&quot;: {&quot;title&quot;: &quot;Recipe&quot;, &quot;description&quot;: &quot;recipe to cook the cusine&quot;, &quot;type&quot;: &quot;string&quot;}}, &quot;required&quot;: [&quot;name&quot;, &quot;recipe&quot;]}
```

{'name': 'Bibimbap',
 'recipe': 'Bibimbap is a Korean mixed rice dish made with warm white rice topped with saut&amp;eacute;ed and seasoned vegetables, chili pepper paste, soy sauce, or doenjang, and a raw or fried egg. The ingredients are stirred together just before eating. It can be served either cold or hot.'}&lt;/code&gt;&lt;/pre&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;cf)&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231346&quot;&gt;https://wikidocs.net/231346&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://aws.amazon.com/ko/what-is/langchain/&quot;&gt;https://aws.amazon.com/ko/what-is/langchain/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;&lt;a href=&quot;https://www.samsungsds.com/kr/insights/what-is-langchain.html&quot;&gt;https://www.samsungsds.com/kr/insights/what-is-langchain.html&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>langchain</category>
      <category>llm</category>
      <category>Parser</category>
      <category>랭체인</category>
      <category>파이썬</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/63</guid>
      <comments>https://hoooa.tistory.com/63#entry63comment</comments>
      <pubDate>Wed, 31 Jul 2024 18:25:10 +0900</pubDate>
    </item>
    <item>
      <title>[AI]LangChain 이론&amp;amp;실습(2)</title>
      <link>https://hoooa.tistory.com/61</link>
      <description>&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;※ 프롬프트&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;사용자와 언어 모델 간의 대화 속 질문/요청 형태의 입력문&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;rarr; 프롬프트 템플릿 중요&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt;※ 작성 원칙&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000; text-align: start;&quot;&gt;- 명확성&amp;amp;구체성: 질문이 모호해서는 안 됨&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt;- 배경 정보 포함: 문맥을 이해할 수 있도록 정보 제공 &lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;rarr; Hallucination&amp;darr; 응답 관련도&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&amp;uarr;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 간결성: 불필요한 정보 B, 최대한 간결하게 G&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 열린 질문: 예/아니오 B, 많은 정보를 제공받을 수 있도록 열린 질문 G&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 명확한 목표: 얻고자 하는 정보/결과를 정확하게 정의&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;- 언어/문체: 맥락에 적합하게&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt; &lt;span style=&quot;color: #000000; text-align: start;&quot;&gt;※&lt;/span&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt; 프롬프트 템플릿(PromptTemplate)&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;단일문장 or 간단한 명령 == 문자열 기반&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&quot;langchain_core.prompts&quot; 모듈의 &quot;PromptTemplate&quot; 클래스 사용&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-style=&quot;style7&quot; data-ke-type=&quot;horizontalRule&quot; /&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;&quot;&gt;PromptTemplate&lt;/span&gt;&lt;/span&gt;&lt;/blockquote&gt;
&lt;pre id=&quot;code_1722411848231&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;from langchain_core.prompts import PromptTemplate
text = &quot;안녕하세요, 제 이름은 {name}이고, 나이는 {age}살입니다.&quot;
prom_temp = PromptTemplate.from_template(text)    # PromptTemplate 인스턴스
print(prom_temp.format(name = '홍길동', age = 30))&lt;/code&gt;&lt;/pre&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;PromptTemplate + PromptTemplate&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;여러 개의 프롬프트 템플릿을 결합하여 format을 만들 수 있음&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848235&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;# 프롬프트 템플릿 결합
prom_temp2 = (
    prom_temp
    + PromptTemplate.from_template(&quot;\n아버지를 아버지라 부를 수 없습니다.&quot;)
    + &quot;위의 문장을 \n{language}로 번역해주세요&quot;
)
print(prom_temp2.format(name = '홍길동', age = 30, language = '중국어'))&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;522&quot; data-origin-height=&quot;97&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YSJXb/btsIRw9LLaR/kFgKWqlkBESvqIfENyVUyk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YSJXb/btsIRw9LLaR/kFgKWqlkBESvqIfENyVUyk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YSJXb/btsIRw9LLaR/kFgKWqlkBESvqIfENyVUyk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYSJXb%2FbtsIRw9LLaR%2FkFgKWqlkBESvqIfENyVUyk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;522&quot; height=&quot;97&quot; data-origin-width=&quot;522&quot; data-origin-height=&quot;97&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;체인까지 만들어보면 최종적으로 다음과 같은 코드가 됨&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848239&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;from langchain.chat_models import ChatOpenAI    # from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model = &quot;gpt-3.5-turbo-0125&quot;)
chain = prom_temp2 | llm | StrOutputParser()
chain.invoke({&quot;name&quot;: '홍길동', &quot;age&quot;: 30, &quot;language&quot;: '중국어'})&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;513&quot; data-origin-height=&quot;45&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bvfLSe/btsITiWONqI/DcUCYtU5fOK751KIqV5pg1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bvfLSe/btsITiWONqI/DcUCYtU5fOK751KIqV5pg1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bvfLSe/btsITiWONqI/DcUCYtU5fOK751KIqV5pg1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbvfLSe%2FbtsITiWONqI%2FDcUCYtU5fOK751KIqV5pg1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;513&quot; height=&quot;45&quot; data-origin-width=&quot;513&quot; data-origin-height=&quot;45&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style7&quot; /&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt;※&lt;/span&gt;&lt;span style=&quot;color: #000000; text-align: start;&quot;&gt; 챗&amp;nbsp;프롬프트 템플릿(ChatPromptTemplate)&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;대화 상황에서 여러 메시지 기반 단일 메시지 응답을 생성 &amp;rarr; 대화형 모델/챗봇 개발&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;입력: 여러 메시지를 원소로 갖는 리스트&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;메시지: role &amp;amp; content로 구성&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;ㄴMessage 유형: System/Human/AI/Function/Tool&lt;/span&gt;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;nbsp;1. ChatPromptTemplate.from_messages 형식&lt;/span&gt;&lt;/blockquote&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;전달된 메시지 기반으로 프롬프트 구성&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1722411848242&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42; text-align: start;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;# ChatPromptTemplate
# 2-튜플 형태의 메시지 리스트(역할, 내용)
from langchain_core.prompts import ChatPromptTemplate

chat_prom = ChatPromptTemplate.from_messages([
    (&quot;system&quot;, &quot;이 시스템은 음식 질문에 답변할 수 있습니다.&quot;),
    (&quot;user&quot;, &quot;{user_input}&quot;),
])

messages = chat_prom.format_messages(user_input = &quot;대한민국 음식 중 조리과정이 3분 이내인 요리는 무엇이 있나요?&quot;)
# messages&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;797&quot; data-origin-height=&quot;67&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/2ke1T/btsIRfNZHaN/UbjLzUyBKaYBcv3IsEKSrK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/2ke1T/btsIRfNZHaN/UbjLzUyBKaYBcv3IsEKSrK/img.png&quot; data-alt=&quot;System에 '역할', User에 '질문'을 지정&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/2ke1T/btsIRfNZHaN/UbjLzUyBKaYBcv3IsEKSrK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F2ke1T%2FbtsIRfNZHaN%2FUbjLzUyBKaYBcv3IsEKSrK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;797&quot; height=&quot;67&quot; data-origin-width=&quot;797&quot; data-origin-height=&quot;67&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;System에 '역할', User에 '질문'을 지정&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;pre id=&quot;code_1722416865958&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;from langchain_core.output_parsers import StrOutputParser

chain = chat_prom | llm | StrOutputParser()
chain.invoke({&quot;user_input&quot;: &quot;대한민국 음식 중 조리과정이 3분 이내인 요리는 무엇이 있나요?&quot;})&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;786&quot; data-origin-height=&quot;92&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dZanN7/btsIS63lOYb/cktMcpEd2KIkt3yn2QulfK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dZanN7/btsIS63lOYb/cktMcpEd2KIkt3yn2QulfK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dZanN7/btsIS63lOYb/cktMcpEd2KIkt3yn2QulfK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdZanN7%2FbtsIS63lOYb%2FcktMcpEd2KIkt3yn2QulfK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;786&quot; height=&quot;92&quot; data-origin-width=&quot;786&quot; data-origin-height=&quot;92&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&amp;nbsp; 2. MessagePromptTemplate 형식&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;메시지 리스트의 Role( &lt;span style=&quot;font-family: 'Noto Sans Light'; color: #333333; text-align: start;&quot;&gt;System/Human/AI/Function/Tool)&lt;/span&gt;과 Content를 명확하게 표현&lt;/span&gt;&lt;/p&gt;
&lt;pre class=&quot;python&quot; style=&quot;background-color: #383838; color: #d5d5d5; text-align: start;&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;[SystemMessage(content='이 시스템은 음식 질문에 답변할 수 있습니다.'),
 HumanMessage(content='대한민국 음식 중 조리과정이 3분 이내인 요리는 무엇이 있나요?')]&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1722417202684&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# MessagePromptTemplate
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate

chat_prom2 = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(&quot;이 시스템은 음식 질문에 답변할 수 있습니다.&quot;),
    HumanMessagePromptTemplate.from_template(&quot;{user_input}&quot;),
])

chain2 = chat_prom2 | llm | StrOutputParser()
chain2.invoke({&quot;user_input&quot;: &quot;대한민국 음식 중 조리과정이 3분 이내인 요리는 무엇이 있나요?&quot;})&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;790&quot; data-origin-height=&quot;88&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MGRV9/btsITvIquMD/3cizdpVakGET6yZf3mpfaK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MGRV9/btsITvIquMD/3cizdpVakGET6yZf3mpfaK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MGRV9/btsITvIquMD/3cizdpVakGET6yZf3mpfaK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMGRV9%2FbtsITvIquMD%2F3cizdpVakGET6yZf3mpfaK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;790&quot; height=&quot;88&quot; data-origin-width=&quot;790&quot; data-origin-height=&quot;88&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;cf)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;a href=&quot;https://blog.naver.com/htk1019/223413412145&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://blog.naver.com/htk1019/223413412145&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://wikidocs.net/231346&quot;&gt;https://wikidocs.net/231346&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;a href=&quot;https://aws.amazon.com/ko/what-is/langchain/&quot;&gt;https://aws.amazon.com/ko/what-is/langchain/&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;color: #333333; text-align: start;&quot; data-ke-size=&quot;size14&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light'; color: #000000;&quot;&gt;&lt;a href=&quot;https://www.samsungsds.com/kr/insights/what-is-langchain.html&quot;&gt;https://www.samsungsds.com/kr/insights/what-is-langchain.html&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>AI</category>
      <category>langchain</category>
      <category>OpenAI</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/61</guid>
      <comments>https://hoooa.tistory.com/61#entry61comment</comments>
      <pubDate>Wed, 31 Jul 2024 15:39:33 +0900</pubDate>
    </item>
    <item>
      <title>[error]ModuleNotFoundError: No module named 'langchain_community' / 'langchain_openai'</title>
      <link>https://hoooa.tistory.com/60</link>
      <description>&lt;blockquote data-ke-style=&quot;style2&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;Langchain 설치하고 import 오류&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;1. langchain_community&lt;/span&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;822&quot; data-origin-height=&quot;602&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/lA4P9/btsIS8GIRlR/nlGoAXgSPNwZ5e2bVn5hKk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/lA4P9/btsIS8GIRlR/nlGoAXgSPNwZ5e2bVn5hKk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/lA4P9/btsIS8GIRlR/nlGoAXgSPNwZ5e2bVn5hKk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FlA4P9%2FbtsIS8GIRlR%2FnlGoAXgSPNwZ5e2bVn5hKk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;822&quot; height=&quot;602&quot; data-origin-width=&quot;822&quot; data-origin-height=&quot;602&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div style=&quot;background-color: #1e1e1e; color: #d4d4d4;&quot;&gt;
&lt;div&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;&lt;span style=&quot;color: #82c6ff;&quot;&gt;!&lt;/span&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt;pip install langchain-community langchain-core&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Noto Sans Light';&quot;&gt;로 해결!&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;2. langchain_openai&lt;/blockquote&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;752&quot; data-origin-height=&quot;457&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/JgnZ9/btsIRx2fZNI/3H2kWzkEfNz3O9H2WQnke0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/JgnZ9/btsIRx2fZNI/3H2kWzkEfNz3O9H2WQnke0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/JgnZ9/btsIRx2fZNI/3H2kWzkEfNz3O9H2WQnke0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJgnZ9%2FbtsIRx2fZNI%2F3H2kWzkEfNz3O9H2WQnke0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;752&quot; height=&quot;457&quot; data-origin-width=&quot;752&quot; data-origin-height=&quot;457&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;div style=&quot;background-color: #1e1e1e; color: #d4d4d4;&quot;&gt;
&lt;div&gt;&lt;span style=&quot;color: #82c6ff;&quot;&gt;!&lt;/span&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt;pip install langchain-openai&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;해보거나&lt;/p&gt;
&lt;div style=&quot;background-color: #1e1e1e; color: #d4d4d4;&quot;&gt;
&lt;div&gt;&lt;span style=&quot;color: #c586c0;&quot;&gt;from&lt;/span&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt; langchain.chat_models &lt;/span&gt;&lt;span style=&quot;color: #c586c0;&quot;&gt;import&lt;/span&gt;&lt;span style=&quot;color: #d4d4d4;&quot;&gt; ChatOpenAI&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아예 다른 패키지를 사용해보기!&lt;/p&gt;</description>
      <category>Coding/Study</category>
      <category>Import</category>
      <category>langchain</category>
      <category>오류</category>
      <author>후__아</author>
      <guid isPermaLink="true">https://hoooa.tistory.com/60</guid>
      <comments>https://hoooa.tistory.com/60#entry60comment</comments>
      <pubDate>Wed, 31 Jul 2024 14:19:25 +0900</pubDate>
    </item>
  </channel>
</rss>