大家好,这里是七七,今天是Python学习专题的最后一期,要介绍两个代码,一个是名称字符串匹配代码,一个是时间序列分解实现代码。
首先介绍名称字符串匹配代码。
一、名称字符串匹配代码
import pandas as pd from fuzzywuzzy import fuzz from fuzzywuzzy import process import re info_data=pd.read_excel("./data/附件1.xlsx") sale_data=pd.read_excel("./data/附件2.xlsx") buy_data=pd.read_excel("./data/附件3.xlsx") loss_data=pd.read_excel("./data/附件4.xlsx") data=pd.merge(buy_data,info_data,on="单品编码",how="left") data=data[["日期","单品名称"]] data["日期"]=pd.to_datetime(data["日期"]) data=data.set_index("日期") grouped=data.groupby("日期") for group_name,group_data in grouped: strings=group_data["单品名称"].tolist() threshold=80 similar_strings={} for string in strings: best_match=process.extractOne( string, [s for s in strings if s not in [string]], scorer=fuzz.ratio) if best_match[1]>=threshold and best_match[0]!=string and best_match[0][:2]==string[:2]: if re.search(r'\(\d+\)',best_match[0])and re.search(r'\(\d+\)',string): similar_strings[string]=best_match[0] strings=[s for s in strings if s not in [string]] if bool(similar_strings): print(group_name) for original,similar in similar_strings.items(): print(f"主要相同的字符串:'{original}‘和'{similar}'") print(data.info)
二、时间序列分解实现代码
import matplotlib.pyplot as plt import pandas as pd from statsmodels.tsa.seasonal import seasonal_decompose plt.rcParams['font.sans-serif'] = [u'simHei'] plt.rcParams['axes.unicode_minus'] = False def time_series_3d(pd_list:list,name): num_plots=4 plt.figure(figsize=(8,6)) trend_df=pd.DataFrame() for df in pd_list: result=seasonal_decompose(df,model='additive',period=365) for i in range(num_plots): if i==1: plt.plot(result.trend,label='Trend') plt.legend(loc='upper left') trend_df[df.name]=result.trend trend_df.dropna(inplace=True) trend_df.to_csv(f"/trend/{name}.csv",encoding="GBK") print(trend_df) plt.title(name,fontsize=16) plt.tight_layout() plt.show() ###################### #读取数据 info_data=pd.read_excel("./data/附件1.xlsx") sale_data=pd.read_excel("./data/附件2.xlsx") buy_data=pd.read_excel("./data/附件3.xlsx") loss_data=pd.read_excel("./data/附件4.xlsx") print(sale_data) sale_data["销售日期"]=pd.to_datetime(sale_data["销售日期"]) rst_data=pd.read_excel("") #################### #处理 grouped=rst_data.groupby("品类") for groupe_name,group_data in grouped: group_data["销售日期"]=pd.to_datetime(group_data["销售日期"]) group_data=group_data.set_index("销售日期") time_series_3d(group_data["销量(千克)"],group_data["利润率"],group_data["批发价格(元/千克)"],group_data["销售单价(元/千克)"],name=groupe_name)