zxj排课率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# # 排课率 = 当月排课人数/上月底在读人数

# ### =============================月维度=============================

# In[2]:


sql1 = '''
SELECT
        '201907' as '统计时间',
        a.学段,
        count(DISTINCT student_no) as '在读学生总数量'
FROM
        (
        SELECT
                stat_date,
                student_no,
                (case when slm.exam_year >= 2025 and slm.exam_year < 2031 then '小学'
             when slm.exam_year >= 2022 and slm.exam_year < 2025 then '初中'
             when slm.exam_year >= 2020 and slm.exam_year < 2022 then '高中' else '' end) '学段',
                remaining_period as '剩余总课时',
                status
        FROM
                yxy_curriculum_status_log_monthly slm
        WHERE
                slm.stat_date = '2019-06-30'
                and remaining_period > 0
                and slm.exam_year <> 2019
        ) a
WHERE
        a.学段 <> ''
GROUP BY
        a.学段
'''

df1 = pd.read_sql(sql1,conn_dmart)

df1.head()


# In[3]:


sql2 = '''
SELECT
        '201907' as '统计时间',
        a.学段,
        count(a.student_id) as '排课学生总数量'
FROM
(
SELECT
        lp.student_id,
        DATE(lp.adjust_start_time) as 'stat_date',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'
FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
GROUP BY
        lp.student_id
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.学段
'''

df2 = pd.read_sql(sql2,conn_mysql)

df2.head()


# In[4]:


df_sub = pd.merge(df1,df2,on=['统计时间','学段'],how='left')

df_sub.head()


# In[5]:


df_sub['排课率'] = df_sub['排课学生总数量']/df_sub['在读学生总数量']
df_sub['排课率'] = df_sub['排课率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# ### =============================周维度=============================

# In[6]:


sql3 = '''
SELECT
        a.周数,
        a.学段,
        count(DISTINCT a.student_id) as '排课总人数'
FROM
(
SELECT
        lp.student_id,
        DATE(lp.adjust_start_time) as 'stat_date',
        (case when DAY(lp.adjust_start_time) in (1,2,3,4,5,6,7) then '第一周' when DAY(lp.adjust_start_time) in (8,9,10,11,12,13,14) then '第二周'
        when DAY(lp.adjust_start_time) in (15,16,17,18,19,20,21) then '第三周' when DAY(lp.adjust_start_time) in (22,23,24,25,26,27,28) then '第四周'
        when DAY(lp.adjust_start_time) in (29,30,31) then '第五周' else '' end) as '周数',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'

FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.周数,a.学段
ORDER BY
        field(a.周数,'第一周','第二周','第三周','第四周','第五周'),
        field(a.学段,'小学','初中','高中')
'''

df3 = pd.read_sql(sql3,conn_mysql)

df3.head()


# In[7]:


df4 = pd.merge(df3,df1,on='学段',how='inner')
df4


# In[8]:


df4.drop('统计时间',axis=1,inplace=True)
df4.head()


# In[9]:


df4['排课率'] = df4['排课总人数']/df4['在读学生总数量']
df4['排课率'] = df4['排课率'].apply(lambda x: format(x, '.2%')) 
df4.head()


# ### =================================日维度======================================

# In[10]:


sql5 = '''
SELECT
        a.天数,
        a.学段,
        count(DISTINCT a.student_id) as '排课学生总数量'
FROM
(
SELECT
        lp.student_id,
        DAY(lp.adjust_start_time) as '天数',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'

FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.天数,a.学段
'''

df5 = pd.read_sql(sql5,conn_mysql)

df5.head()


# In[11]:


df5 = pd.merge(df5,df1,on='学段',how='inner')
df5


# In[12]:


df5.drop('统计时间',axis=1,inplace=True)


# In[13]:


df5['排课率'] = df5['排课学生总数量']/df5['在读学生总数量']
df5['排课率'] = df5['排课率'].apply(lambda x: format(x, '.2%')) 
df5.head()


# In[14]:


writer=pd.ExcelWriter(r'C:\Users\hp\Desktop\19年7月排课率004.xlsx')
df_sub.to_excel(writer,sheet_name='月维度')
df4.to_excel(writer,sheet_name='周维度')
df5.to_excel(writer,sheet_name='日维度')
writer.save()


# In[ ]:




文档更新时间: 2019-08-19 11:58   作者:admin