课程使用讲义占比

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[2]:


sql1 = '''
SELECT
        max(b.subject_name) subject_name,
        count(b.lesson_plan_id) count_plan
FROM
(
        SELECT
                max(a.subject_name) subject_name,
                a.lesson_plan_id
        FROM
        (
                SELECT
                        max(sb.subject_name) subject_name,
                        lp.lesson_plan_id,
                        lpqh.paperid,
                        count(lpq.quiz_id) quiz_count

                FROM
                        dwd_db_hfjydb.lesson_plan lp
                LEFT JOIN
                        dwd_db_hfjydb.lesson_plan_quiz_total lpq ON lpq.lesson_plan_id = lp.lesson_plan_id
                LEFT JOIN
                        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
                LEFT JOIN
                        dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
                WHERE
                        lp.adjust_start_time >= '2019-05-01'
                        AND lp.adjust_start_time < '2019-06-01'
                        AND lp.solve_status <> 6
                        AND lp.status in (3,5)
                        AND lp.lesson_type = 1
                GROUP BY
                        lp.lesson_plan_id,lpqh.paperid
                HAVING
                        count(lpq.quiz_id) >= 5
        ) a
        GROUP BY
                a.lesson_plan_id
        HAVING
                count(a.paperid) >= 2
) b
GROUP BY
         b.subject_name
'''

df1 = pd.read_sql(sql1,conn_hive)

df1


# In[3]:


sql2 = '''
SELECT
        sb.subject_name,
        count(lp.lesson_plan_id) count_all
FROM
        dwd_db_hfjydb.lesson_plan lp
LEFT JOIN
        dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
        lp.adjust_start_time >= '2019-05-01'
        AND lp.adjust_start_time < '2019-06-01'
        AND lp.solve_status <> 6
        AND lp.status in (3,5)
        AND lp.lesson_type in (1,2)
GROUP BY
        sb.subject_name
'''
df2 = pd.read_sql(sql2,conn_hive)

df2.head()


# In[4]:


df_sub = pd.merge(df2,df1,how='left',on=['subject_name'])

df_sub


# In[5]:


df_sub['使用率'] = df_sub['count_plan']/df_sub['count_all']
df_sub['使用率'] = df_sub['使用率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# In[6]:


df_sub


# In[7]:


df_sub.to_excel(r'C:\Users\hp\Desktop\5月正式课讲义使用率.xlsx')
文档更新时间: 2019-08-19 11:58   作者:admin