137 lines
5.2 KiB
Python
137 lines
5.2 KiB
Python
|
#!/usr/bin/env python3.4
|
|||
|
# encoding: utf-8
|
|||
|
"""
|
|||
|
Created on 19-3-25
|
|||
|
@title: '微信群好友信息分析'
|
|||
|
@author: Xusl
|
|||
|
"""
|
|||
|
import os
|
|||
|
import itchat
|
|||
|
import pandas as pd
|
|||
|
import logging.config
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
|
|||
|
from config import logger_path
|
|||
|
from pro_script.wechat_login import wc_login
|
|||
|
|
|||
|
logging.config.fileConfig(logger_path)
|
|||
|
logger = logging.getLogger("root")
|
|||
|
|
|||
|
|
|||
|
def grp_sex_ratio(grp_nm, ):
|
|||
|
"""
|
|||
|
群好友性别比例
|
|||
|
:param grp_nm: 想获取群聊信息的群名
|
|||
|
:return:
|
|||
|
"""
|
|||
|
func_name = "群好友性别"
|
|||
|
logger.info('start %s ' % func_name)
|
|||
|
df_member = pd.read_csv("df_member.csv")
|
|||
|
mem_sex = dict(df_member['Sex'].replace({1: '男', 2: '女', 0: '其他'}).value_counts(normalize=True)) # 使用pandas库自带的统计值函数
|
|||
|
sex_li = []
|
|||
|
proportion = []
|
|||
|
for key, value in mem_sex.items():
|
|||
|
sex_li.append(key)
|
|||
|
proportion.append(format(value, '.2'))
|
|||
|
|
|||
|
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|||
|
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
|||
|
plt.figure(figsize=(5, 5)) # 绘制的图片为正圆
|
|||
|
radius = [0.01, 0.01, 0.01] # 设定各项距离圆心n个半径
|
|||
|
colors = ['red', 'yellowgreen', 'lightskyblue']
|
|||
|
logger.debug('proportion:%s' % proportion)
|
|||
|
|
|||
|
plt.pie(proportion, explode=radius, labels=sex_li, colors=colors, autopct='%.2f%%') # 绘制饼图
|
|||
|
# 加入图例 loc = 'upper right' 位于右上角 bbox_to_anchor=[0.5, 0.5] # 外边距 上边 右边 borderaxespad = 0.3图例的内边距
|
|||
|
plt.legend(loc="upper right", fontsize=10, bbox_to_anchor=(1.1, 1.1), borderaxespad=0.3)
|
|||
|
|
|||
|
plt.title(grp_nm + '群好友性别比例') # 绘制标题
|
|||
|
# 获取上一层目录
|
|||
|
pwd_path = os.path.abspath(os.path.dirname(os.getcwd()))
|
|||
|
desc_full = os.path.join(pwd_path, 'res')
|
|||
|
plt.savefig(desc_full + '/' + grp_nm + '好友性别比例') # 保存图片
|
|||
|
plt.show()
|
|||
|
logger.info('end %s ' % func_name)
|
|||
|
|
|||
|
|
|||
|
def grp_city(grp_nm):
|
|||
|
"""
|
|||
|
群好友城市比例
|
|||
|
:param grp_nm: 想获取群聊信息的群名
|
|||
|
:return:
|
|||
|
"""
|
|||
|
func_name = "群好友城市比例"
|
|||
|
logger.info('start %s ' % func_name)
|
|||
|
df_member = pd.read_csv("df_member.csv")
|
|||
|
df_member = df_member['DisplayName'].values
|
|||
|
mem_nm_li = []
|
|||
|
for mem_nm in df_member:
|
|||
|
if mem_nm:
|
|||
|
mem_nm = str(mem_nm).replace(' ', '')
|
|||
|
mem_nm = mem_nm.replace('一', '-').replace('_', '-').replace('-', '-').replace(' ', ' ')\
|
|||
|
.replace(' ', '-').replace('~', '-').replace('—', '-').replace('+', '-').replace('~','-')
|
|||
|
mem_nm = mem_nm.replace('--', '-')
|
|||
|
mem_nm_li.append(mem_nm)
|
|||
|
logger.debug('mem_nm_li:%s' % mem_nm_li)
|
|||
|
|
|||
|
city_dict, industry_dict = {}, {}
|
|||
|
for per_info in mem_nm_li:
|
|||
|
split_num = per_info.count("-", 0, len(per_info))
|
|||
|
if split_num == 2:
|
|||
|
nm, city, industry = per_info.split('-')
|
|||
|
if city and city not in city_dict.keys():
|
|||
|
city_dict[city] = 1
|
|||
|
elif city:
|
|||
|
city_dict[city] += 1
|
|||
|
|
|||
|
logger.debug('city_dict:%s' % city_dict)
|
|||
|
city_dict_top20 = sorted(city_dict.items(), key=lambda x: x[1], reverse=True)[0:20]
|
|||
|
|
|||
|
city_nm, city_num = [], [] # 城市名 + 数量
|
|||
|
for prov_data in city_dict_top20:
|
|||
|
city_nm.append(prov_data[0])
|
|||
|
city_num.append(prov_data[1])
|
|||
|
pwd_path = os.path.abspath(os.path.dirname(os.getcwd()))
|
|||
|
desc_full = os.path.join(pwd_path, 'res')
|
|||
|
colors = ['#00FFFF', '#7FFFD4', '#F08080', '#90EE90', '#AFEEEE',
|
|||
|
'#98FB98', '#B0E0E6', '#00FF7F', '#FFFF00', '#9ACD32',
|
|||
|
'#00FFFF', '#7FFFD4', '#F08080', '#90EE90', '#AFEEEE',
|
|||
|
'#98FB98', '#B0E0E6', '#00FF7F', '#FFFF00', '#9ACD32'
|
|||
|
]
|
|||
|
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|||
|
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
|||
|
|
|||
|
index = range(len(city_num))
|
|||
|
plt.bar(index, city_num, color=colors, width=0.5, align='center')
|
|||
|
|
|||
|
plt.xticks(range(len(city_nm)), city_nm, fontsize=8) # 横坐轴标签
|
|||
|
for x, y in enumerate(city_num):
|
|||
|
# 在柱子上方1.0处标注值
|
|||
|
plt.text(x, y + 1.0, '%s' % y, ha='center', fontsize=10)
|
|||
|
plt.ylabel('群好友人数') # 设置纵坐标标签
|
|||
|
prov_title = '中产之路2群(新)群好友人数Top20'
|
|||
|
plt.title(prov_title) # 设置标题
|
|||
|
plt.savefig(desc_full + '/中产之路2群(新)群好友人数Top20') # 保存图片
|
|||
|
logger.info('end %s ' % func_name)
|
|||
|
|
|||
|
|
|||
|
def deal():
|
|||
|
"""
|
|||
|
处理入口
|
|||
|
:return:
|
|||
|
"""
|
|||
|
wc_login()
|
|||
|
# grp_lost = itchat.get_chatrooms() # 获取所有群聊信息
|
|||
|
grp_nm = '中产之路2群(新)'
|
|||
|
mid_road = itchat.search_chatrooms(name=grp_nm) # 获取群聊名为中产之路2群(新)的群好友信息
|
|||
|
# 群聊用户列表的获取方法为update_chatroom, detailedMember=True将返回指定用户的信息组成的列表
|
|||
|
member_list = itchat.update_chatroom(mid_road[0]['UserName'], detailedMember=True)
|
|||
|
df_member = pd.DataFrame(member_list['MemberList']) # 将用户信息转化为dateFrame格式
|
|||
|
df_member.to_csv('df_member.csv')
|
|||
|
grp_sex_ratio(grp_nm)
|
|||
|
grp_city(grp_nm)
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
deal()
|