wechatAnaly/pro_script/wechat_group.py

137 lines
5.2 KiB
Python
Raw Permalink Normal View History

2022-01-18 08:07:24 +00:00
#!/usr/bin/env python3.4
# encoding: utf-8
"""
Created on 19-3-25
@title: '微信群好友信息分析'
@author: Xusl
"""
import os
import itchat
import pandas as pd
import logging.config
import matplotlib.pyplot as plt
from config import logger_path
from pro_script.wechat_login import wc_login
logging.config.fileConfig(logger_path)
logger = logging.getLogger("root")
def grp_sex_ratio(grp_nm, ):
"""
群好友性别比例
:param grp_nm: 想获取群聊信息的群名
:return:
"""
func_name = "群好友性别"
logger.info('start %s ' % func_name)
df_member = pd.read_csv("df_member.csv")
mem_sex = dict(df_member['Sex'].replace({1: '', 2: '', 0: '其他'}).value_counts(normalize=True)) # 使用pandas库自带的统计值函数
sex_li = []
proportion = []
for key, value in mem_sex.items():
sex_li.append(key)
proportion.append(format(value, '.2'))
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.figure(figsize=(5, 5)) # 绘制的图片为正圆
radius = [0.01, 0.01, 0.01] # 设定各项距离圆心n个半径
colors = ['red', 'yellowgreen', 'lightskyblue']
logger.debug('proportion:%s' % proportion)
plt.pie(proportion, explode=radius, labels=sex_li, colors=colors, autopct='%.2f%%') # 绘制饼图
# 加入图例 loc = 'upper right' 位于右上角 bbox_to_anchor=[0.5, 0.5] # 外边距 上边 右边 borderaxespad = 0.3图例的内边距
plt.legend(loc="upper right", fontsize=10, bbox_to_anchor=(1.1, 1.1), borderaxespad=0.3)
plt.title(grp_nm + '群好友性别比例') # 绘制标题
# 获取上一层目录
pwd_path = os.path.abspath(os.path.dirname(os.getcwd()))
desc_full = os.path.join(pwd_path, 'res')
plt.savefig(desc_full + '/' + grp_nm + '好友性别比例') # 保存图片
plt.show()
logger.info('end %s ' % func_name)
def grp_city(grp_nm):
"""
群好友城市比例
:param grp_nm: 想获取群聊信息的群名
:return:
"""
func_name = "群好友城市比例"
logger.info('start %s ' % func_name)
df_member = pd.read_csv("df_member.csv")
df_member = df_member['DisplayName'].values
mem_nm_li = []
for mem_nm in df_member:
if mem_nm:
mem_nm = str(mem_nm).replace(' ', '')
mem_nm = mem_nm.replace('', '-').replace('_', '-').replace('', '-').replace(' ', ' ')\
.replace(' ', '-').replace('~', '-').replace('', '-').replace('+', '-').replace('','-')
mem_nm = mem_nm.replace('--', '-')
mem_nm_li.append(mem_nm)
logger.debug('mem_nm_li%s' % mem_nm_li)
city_dict, industry_dict = {}, {}
for per_info in mem_nm_li:
split_num = per_info.count("-", 0, len(per_info))
if split_num == 2:
nm, city, industry = per_info.split('-')
if city and city not in city_dict.keys():
city_dict[city] = 1
elif city:
city_dict[city] += 1
logger.debug('city_dict%s' % city_dict)
city_dict_top20 = sorted(city_dict.items(), key=lambda x: x[1], reverse=True)[0:20]
city_nm, city_num = [], [] # 城市名 + 数量
for prov_data in city_dict_top20:
city_nm.append(prov_data[0])
city_num.append(prov_data[1])
pwd_path = os.path.abspath(os.path.dirname(os.getcwd()))
desc_full = os.path.join(pwd_path, 'res')
colors = ['#00FFFF', '#7FFFD4', '#F08080', '#90EE90', '#AFEEEE',
'#98FB98', '#B0E0E6', '#00FF7F', '#FFFF00', '#9ACD32',
'#00FFFF', '#7FFFD4', '#F08080', '#90EE90', '#AFEEEE',
'#98FB98', '#B0E0E6', '#00FF7F', '#FFFF00', '#9ACD32'
]
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
index = range(len(city_num))
plt.bar(index, city_num, color=colors, width=0.5, align='center')
plt.xticks(range(len(city_nm)), city_nm, fontsize=8) # 横坐轴标签
for x, y in enumerate(city_num):
# 在柱子上方1.0处标注值
plt.text(x, y + 1.0, '%s' % y, ha='center', fontsize=10)
plt.ylabel('群好友人数') # 设置纵坐标标签
prov_title = '中产之路2群(新)群好友人数Top20'
plt.title(prov_title) # 设置标题
plt.savefig(desc_full + '/中产之路2群(新)群好友人数Top20') # 保存图片
logger.info('end %s ' % func_name)
def deal():
"""
处理入口
:return:
"""
wc_login()
# grp_lost = itchat.get_chatrooms() # 获取所有群聊信息
grp_nm = '中产之路2群(新)'
mid_road = itchat.search_chatrooms(name=grp_nm) # 获取群聊名为中产之路2群(新)的群好友信息
# 群聊用户列表的获取方法为update_chatroom, detailedMember=True将返回指定用户的信息组成的列表
member_list = itchat.update_chatroom(mid_road[0]['UserName'], detailedMember=True)
df_member = pd.DataFrame(member_list['MemberList']) # 将用户信息转化为dateFrame格式
df_member.to_csv('df_member.csv')
grp_sex_ratio(grp_nm)
grp_city(grp_nm)
if __name__ == '__main__':
deal()