Scikit-Learn 和大模型 LLM 强强联手!
↓推荐关注↓
作者:Fareed Khan
我们以前介绍Pandas和ChaGPT整合,这样可以不了解Pandas的情况下对DataFrame进行操作。比如pandas-ai的出现:
Scikit-learn
LLM
安装
pip install scikit-llm
from skllm.config import SKLLMConfig
# Set your OpenAI API key
SKLLMConfig.set_openai_key("<YOUR_KEY>")
# Set your OpenAI organization (optional)
SKLLMConfig.set_openai_org("<YOUR_ORGANIZATION>")
ZeroShotGPTClassifier
from skllm import ZeroShotGPTClassifier
from skllm.datasets import get_classification_dataset
# get classification dataset from sklearn
X, y = get_classification_dataset()
# defining the model
clf = ZeroShotGPTClassifier(openai_model="gpt-3.5-turbo")
# fitting the data
clf.fit(X, y)
# predicting the data
labels = clf.predict(X)
# importing zeroshotgptclassifier module and classification dataset
from skllm import ZeroShotGPTClassifier
from skllm.datasets import get_classification_dataset
# get classification dataset from sklearn for prediction only
X, _ = get_classification_dataset()
# defining the model
clf = ZeroShotGPTClassifier()
# Since no training so passing the labels only for prediction
clf.fit(None, ['positive', 'negative', 'neutral'])
# predicting the labels
labels = clf.predict(X)
MultiLabelZeroShotGPTClassifier
多标签也类似
# importing Multi-Label zeroshot module and classification dataset
from skllm import MultiLabelZeroShotGPTClassifier
from skllm.datasets import get_multilabel_classification_dataset
# get classification dataset from sklearn
X, y = get_multilabel_classification_dataset()
# defining the model
clf = MultiLabelZeroShotGPTClassifier(max_labels=3)
# fitting the model
clf.fit(X, y)
# making predictions
labels = clf.predict(X)
# getting classification dataset for prediction only
X, _ = get_multilabel_classification_dataset()
# Defining all the labels that needs to predicted
candidate_labels = [
"Quality",
"Price",
"Delivery",
"Service",
"Product Variety"
]
# creating the model
clf = MultiLabelZeroShotGPTClassifier(max_labels=3)
# fitting the labels only
clf.fit(None, [candidate_labels])
# predicting the data
labels = clf.predict(X)
文本向量化
# Importing the necessary modules and classes
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
# Creating an instance of LabelEncoder class
le = LabelEncoder()
# Encoding the training labels 'y_train' using LabelEncoder
y_train_encoded = le.fit_transform(y_train)
# Encoding the test labels 'y_test' using LabelEncoder
y_test_encoded = le.transform(y_test)
# Defining the steps of the pipeline as a list of tuples
steps = [('GPT', GPTVectorizer()), ('Clf', XGBClassifier())]
# Creating a pipeline with the defined steps
clf = Pipeline(steps)
# Fitting the pipeline on the training data 'X_train' and the encoded training labels 'y_train_encoded'
clf.fit(X_train, y_train_encoded)
# Predicting the labels for the test data 'X_test' using the trained pipeline
yh = clf.predict(X_test)
文本摘要
# Importing the GPTSummarizer class from the skllm.preprocessing module
from skllm.preprocessing import GPTSummarizer
# Importing the get_summarization_dataset function
from skllm.datasets import get_summarization_dataset
# Calling the get_summarization_dataset function
X = get_summarization_dataset()
# Creating an instance of the GPTSummarizer
s = GPTSummarizer(openai_model='gpt-3.5-turbo', max_words=15)
# Applying the fit_transform method of the GPTSummarizer instance to the input data 'X'.
# It fits the model to the data and generates the summaries, which are assigned to the variable 'summaries'
summaries = s.fit_transform(X)
总结
- EOF -
加主页君微信,不仅Python技能+1
主页君日常还会在个人微信分享Python相关工具、资源和精选技术文章,不定期分享一些有意思的活动、岗位内推以及如何用技术做业余项目
加个微信,打开一扇窗
觉得本文对你有帮助?请分享给更多人
推荐关注「Python开发者」,提升Python技能
点赞和在看就是最大的支持❤️