讲义中的题目在一节课使用时常大于40%

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[2]:


#### 每节课实际上课时间和使用的各讲义时间

sql1 = '''
SELECT
        max(sub.subject_name) subject_name,
        lp.lesson_plan_id,
        lpqh.paperid,
        max(date_diff('second', (cast (substring(real_start_time,1,19) as TIMESTAMP)), (cast (substring(real_end_time,1,19) as TIMESTAMP)))) lesson_time,
        sum(quiz.stay_time) paper_time
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
LEFT JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
LEFT JOIN
        (select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
        lp.adjust_start_time >= '2019-07-01'
        AND lp.adjust_start_time < '2019-08-01'
        AND lp.solve_status <> 6
        AND lp.status in (3,5)
        AND lp.lesson_type = 2
GROUP BY
        lp.lesson_plan_id,lpqh.paperid
HAVING
        paperid is not null
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[3]:


df1.shape


# In[4]:


df1['pro'] = df1['paper_time']/df1['lesson_time']
df1.head()


# In[5]:


df1.dtypes


# In[6]:


df1.shape


# In[7]:


df4 = df1[df1['pro']>=0.4]
df4.head()


# In[8]:


df4.shape


# In[9]:


df4.drop_duplicates(subset='lesson_plan_id',inplace=True)
df4.head()


# In[10]:


df4.shape


# In[11]:


df5 = df4.groupby(['subject_name'])['lesson_plan_id'].count().reset_index()
df5.head()


# In[12]:


df5.rename(columns={'lesson_plan_id':'课程数'},inplace=True)
df5.head()


# In[13]:


#### 成单数量

sql3 = '''
SELECT
        sub.subject_name,
        sum((case when (select count(contract_id) from view_tms_contract tc 
        where tc.student_intention_id = s.student_intention_id 
        and status in (3,4,5,9) and submit_time >lp.adjust_start_time)>0 then 1 
        else 0 end)) '成单数',
            case when opt_user=972038 then 'AI直排' 
      when opt_user=381322 and lpo.inform_status = 5 then 'app排课' 
      when opt_user=381322 and (select count(*) from hfjydb.data_teacher_requirements where order_id = lpo.order_id) > 0 then '指定老师排课' 
      when opt_user is null then '未排课' when opt_user not in  (381322, 32547,972038) then '教务手动' else '系统未知'  end 排课方式
FROM
        lesson_plan_order lpo
LEFT JOIN
        lesson_relation lr on lr.order_id = lpo.order_id
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id = lr.plan_id
LEFT JOIN
        tms_lesson_plan_history tlph on tlph.lesson_plan_id = lr.plan_id
LEFT JOIN
        view_user_info u on u.user_id = lp.teacher_id
INNER JOIN
        subject sub on sub.subject_id = lp.subject_id
LEFT JOIN
        view_student s on s.student_id = lp.student_id
WHERE
        lp.lesson_plan_id in {0}
GROUP BY
        sub.subject_name
HAVING
        排课方式 <> '指定老师排课'
'''.format(tuple(df4['lesson_plan_id']))

df33 = pd.read_sql(sql3,conn_mysql)

df33.head()


# In[14]:


df33.shape


# In[15]:


df_sub = pd.merge(df5,df33,on='subject_name',how='left')
df_sub


# In[16]:


df_sub['贡献率'] = df_sub['成单数']/df_sub['课程数']
df_sub['贡献率'] = df_sub['贡献率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# In[17]:


df_sub.drop('排课方式', axis=1,inplace=True)
df_sub


# In[ ]:


df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率006.xlsx')


# In[ ]:


#### 8-16


# In[ ]:


#### 成单数


# In[ ]:


sql04 = '''
select
        lp.lesson_plan_id,
        tc.student_intention_id,
        tc.contract_id
from
        view_tms_contract tc
LEFT JOIN
        view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
        lesson_plan lp on lp.student_id= s.student_id
where 
        lp.lesson_plan_id in {0}
        and tc.status in (3,4,5,9)
        and tc.submit_time >lp.adjust_start_time
GROUP BY
        tc.student_intention_id
'''.format(tuple(df4['lesson_plan_id']))

df04 = pd.read_sql(sql04,conn_mysql)

df04.head()


# In[ ]:


df04.shape


# In[ ]:


df05 = pd.merge(df04,df4,on='lesson_plan_id',how='left')
df05.head()


# In[ ]:


df05.shape


# In[ ]:


df05.to_excel(r'C:\Users\hp\Desktop\df05.xlsx')


# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:


df_sub = pd.merge(df33,df4,on=['lesson_plan_id','subject_name'],how='left')
df_sub


# In[ ]:


df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率007.xlsx')


# In[ ]:


df4.to_excel(r'C:\Users\hp\Desktop\df4.xlsx')


# In[ ]:


sql01 = '''
SELECT
        s.student_intention_id,
        lp.lesson_plan_id
FROM
        lesson_plan lp
LEFT JOIN
        view_student s on s.student_id = lp.student_id
WHERE
        lp.lesson_plan_id in {0}

'''.format(tuple(df4['lesson_plan_id']))

df01 = pd.read_sql(sql01,conn_mysql)

df01.head()


# In[ ]:


sql02 = '''
select
        tc.student_intention_id,
        tc.contract_id
from
        view_tms_contract tc
LEFT JOIN
        view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
        lesson_plan lp on lp.student_id= s.student_id
where 
        tc.student_intention_id in {0}
        and tc.status in (3,4,5,9)
        and tc.submit_time >lp.adjust_start_time
GROUP BY
        tc.student_intention_id
'''.format(tuple(df01['student_intention_id']))

df02 = pd.read_sql(sql02,conn_mysql)

df02.head()


# In[ ]:


df02.shape

文档更新时间: 2019-08-19 11:58   作者:admin