严选作业数

#!/usr/bin/env python
# coding: utf-8

# In[16]:


import pandas as pd
import pymysql
from pyhive import presto


# In[17]:


conn = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn1 = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[13]:


sql1 = '''
SELECT
        sb.subject_name,
        count(hwd.quiz_id) quiz_count,
        count(distinct hwd.home_work_id)hwk_count
FROM
        home_work_detail_info hwd
LEFT JOIN
        home_work hw ON hw.home_work_id  = hwd.home_work_id
LEFT JOIN
        subject sb ON sb.subject_id = hw.subject_id
WHERE
        hw.start_time >= '2019-06-17'
        AND hw.start_time < '2019-06-23'
        AND hwd.type = 8
GROUP BY
        sb.subject_name
'''

df1 = pd.read_sql(sql1,conn)

df1.head()


# In[14]:


df1.to_excel(r'E:/Desktop/6月.xlsx')


# In[ ]:


sql2 = '''
SELECT
        sb.subject_name,
        count(hwd.quiz_id) 
FROM
        home_work_detail_info hwd
LEFT JOIN
        home_work hw ON hw.home_work_id  = hwd.home_work_id
LEFT JOIN
    view_hwl_quiz_knowledge hqk ON hqk.quiztempid = hwd.quiz_id
LEFT JOIN
    bidata.hwl_paper_quiz_info hpqi ON hpqi.quiz_id = hqk.hwlquizid
LEFT JOIN
        subject sb ON sb.subject_id = hw.subject_id
WHERE
        hw.start_time >= '2019-06-17'
        AND hw.start_time < '2019-06-23'
        AND hwd.type = 8
        AND hpqi.logic_ques_type_name in ('单选','多选','多选多','填空','判断','配对','复合','完形填空','连词成句')
GROUP BY
        sb.subject_name
'''


# In[18]:


sql2_bk = '''
SELECT
        sb.subject_name,
        count(hwd.quiz_id) 
FROM
        home_work_detail_info hwd
LEFT JOIN
        home_work hw ON hw.home_work_id  = hwd.home_work_id
LEFT JOIN
        subject sb ON sb.subject_id = hw.subject_id
WHERE
        hw.start_time >= '2019-06-17'
        AND hw.start_time < '2019-06-23'
        AND hwd.type = 8
        and hwd.quiz_type = 2
 GROUP BY
        sb.subject_name
'''

df4 = pd.read_sql(sql2_bk,conn)

df4.head()


# In[ ]:





# In[ ]:





# In[ ]:





# In[5]:


df2 = pd.read_sql(sql2_bk,conn)

df2.head()


# In[6]:


df_sub = pd.merge(df1,df2,how='left',on=['subject_name'])

df_sub.head()


# In[7]:


df_sub.rename(columns={'subject_name':'学科学段','count(hwd.quiz_id)_x':'题目总数','count(hwd.quiz_id)_y':'客观题总数'},inplace=True)

df_sub.head()


# In[8]:


df_sub.to_excel(r'E:/Desktop/6yue.xlsx')

销售数据集明细日维度

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sys
sys.path.append(r'e:\Desktop\新环境\DataMachine_for_timers\src')

from utils import mysqlhelper,emailhelper
from config import config
import re
import pandas as pd
mysqlinstance_tm_new_bidata = mysqlhelper(**config.tm_new_bidata)
mysqlinstance_tm_new_hfjy = mysqlhelper(**config.tm_new_hfjy)
mysqlinstance_dmart_bidata = mysqlhelper(**config.dmart_bidata)
mysqlinstance_hrm_hrm_sale = mysqlhelper(**config.hrm_hrm_sale)
mysqlinstance_dmart_dmart = mysqlhelper(**config.dmart_dmart)


# In[2]:


def get_data():
    sql  ='''
    SELECT
    re.id ID,
    re.name 姓名,
    s.name 毕业学校,
    re.phone 电话,
    re.email 邮箱,
    re.referee 推荐人,
        (select name from view_user where re.director_id = view_user.id) 渠道跟进人,
    date_format(re.add_time,'%Y-%m-%d') resume_addtime,
    date_format(re.first_allot_time,'%Y-%m-%d') 第一次分配时间,
    re.second_allot_time 最后一次分配时间,
    re.next_flow_time 下次沟通时间,
        (case re.status when 1 then '待处理' when 2 then '再联系' when 3 then '未接通' when 4 then '已邀约' when 5 then '未参加面试' when 6 then '面试通过' when 7 then '已入培' when 8 then '出培' when 9 then '待定' when 10 then '淘汰' end)应聘状态,
        (select name from out_reason where out_reason.id = re.out_reason_id) 淘汰原因,
        (select name from out_reason where out_reason.id = re.second_reason) 淘汰详细原因,
    re.work_place 工作地点,
    re.station 面试岗位,
    ir.interview_name 面试官姓名,
    case st.level when 1 then '专员' when 2 then '助理' when 3 then '主管' when 4 then '经理' when 5 then '总监' end  级别,
    s.area 地区,
        (select name from channel where channel.id = re.first_channel) one_channel_name,
        (select name from channel where channel.id = re.second_channel) second_channel_name,
    date_format(re.interview_date,'%Y-%m-%d') interview_date,
    date_format(re.offer_time,'%Y-%m-%d') offer_time,
    date_format(re.entry_time,'%Y-%m-%d') entry_time,
    date_format(ir.face_time,'%Y-%m-%d')  face_time,
        rr.traindate 预约培训时间,
        rr.next_time 培训next_time,
        rr.add_time 培训add_time
from
    hrm_sale.view_resume re
    left join hrm_sale.view_user u on u.id = re.id 
    left join hrm_sale.interviewer_record ir on ir.r_id = re.id
    left join hrm_sale.stations st on st.id = re.id
    left join hrm_sale.school s on s.id = re.id
        left join hrm_sale.romm_record rr on rr.r_id = re.id
where
    re.add_time >= '2018-01-01' and re.add_time < CURDATE()
group by
    re.id
    '''

    df2 = mysqlinstance_hrm_hrm_sale.get_df(sql)
    return df2

def data_modify(df2):

    sub_data = df2.groupby(by=['one_channel_name', 'second_channel_name', 'resume_addtime'])[
        'ID'].count().reset_index().rename(columns={'ID': 'num', 'resume_addtime': 'resume_addtime'})

    data1 = df2.groupby(by=['one_channel_name', 'second_channel_name', 'interview_date'])[
        'ID'].count().reset_index().rename(columns={'ID': 'interview_date', 'interview_date': 'resume_addtime'})

    data2 = df2.groupby(by=['one_channel_name', 'second_channel_name', 'offer_time'])[
        'ID'].count().reset_index().rename(columns={'ID': 'offer_time', 'offer_time': 'resume_addtime'})

    data3 = df2.groupby(by=['one_channel_name', 'second_channel_name', '第一次分配时间'])[
        'ID'].count().reset_index().rename(columns={'ID': 'first_allot_time', '第一次分配时间': 'resume_addtime'})

    data4 = df2.groupby(by=['one_channel_name', 'second_channel_name', 'face_time'])['ID'].count().reset_index().rename(
        columns={'ID': 'face_time', 'face_time': 'resume_addtime'})

    sub_data = pd.merge(sub_data, data1, how='left', on=['one_channel_name', 'second_channel_name', 'resume_addtime'])
    sub_data = pd.merge(sub_data, data4, how='left', on=['one_channel_name', 'second_channel_name', 'resume_addtime'])
    sub_data = pd.merge(sub_data, data2, how='left', on=['one_channel_name', 'second_channel_name', 'resume_addtime'])
    sub_data = pd.merge(sub_data, data3, how='left', on=['one_channel_name', 'second_channel_name', 'resume_addtime'])


    data_index = ['one_channel_name', 'second_channel_name', 'resume_addtime', 'num', 'interview_date', 'face_time',
                  'offer_time', 'first_allot_time']
    wgq_sale = pd.DataFrame(data=sub_data, columns=data_index)
    return wgq_sale


# In[3]:


df = get_data()
df.head()


# In[4]:


df1 = data_modify(df)
df1.head()


# In[ ]:





# In[ ]:





# In[5]:


df1.to_excel(r'E:/Desktop/销售面试明细日维度3.xlsx')



文档更新时间: 2019-08-19 11:44   作者:admin