讲义数据

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto


# In[2]:


conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[11]:


sql1 = '''
SELECT  
        temp.paperid paper_id,
        count(temp.more15_lesson_plan_id) more15_lesson_count,
        count(CASE WHEN temp.more15_lesson_plan_id is not null AND temp.deal_lesson_plan_id is not null then temp.lesson_plan_id end) more15_deal_lesson_count,
        count(temp.more10_lesson_plan_id) more10_lesson_count,
        count(CASE WHEN temp.more10_lesson_plan_id is not null AND temp.deal_lesson_plan_id is not null then temp.lesson_plan_id end) more10_deal_lesson_count
FROM
(SELECT 
        Lp.lesson_plan_id ,    
        lpq.paperid,
         (case when (select count(1) 
                from ods_hfjydb.view_tms_contract tc 
                where vs.student_intention_id = tc.student_intention_id 
                        and tc.status in (4,5) 
                        and tc.submit_time > lp.adjust_start_time 
                        and contract_id like 'X%' 
                        and new_sign = 1) > 0 then lp.lesson_plan_id end) deal_lesson_plan_id,
        (case when count(queid) >= 15 then Lp.lesson_plan_id  end) more15_lesson_plan_id,
        (case when count(queid) >= 10 then Lp.lesson_plan_id  end) more10_lesson_plan_id,
        count(queid) lesson_quiz_count
FROM
        ods_hfjydb.lesson_plan lp
LEFT JOIN
        ods_hfjydb.lesson_plan_quiz_total lpqt on lp.lesson_plan_id = lpqt.lesson_plan_id
LEFT JOIN
        ods_hfjydb.lesson_plan_quiz_hwlinfo_bak lpq on lp.lesson_plan_id = lpq.lessonplanid and lpqt.quiz_id=lpq.quizid
LEFT JOIN
        ods_hfjydb.jx_handouts jh on jh.paper_id = lpq.paperid
LEFT JOIN
        ods_hfjydb.view_student vs on vs.student_id = lp.student_id
where vs.name not like '%测试%'
    and lp.lesson_type = 2
    and lpqt.type=8
    and lp.status in(3,5) 
    and lp.solve_status!=6 
    and lp.adjust_start_time >= '2019-01-01'
    and lp.adjust_start_time < '2019-07-01'
group BY Lp.lesson_plan_id,lpq.paperid,vs.student_intention_id, lp.adjust_start_time) temp
GROUP BY
        temp.paperid
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[12]:


sql2 = '''
SELECT
      jh.directory_id as '讲义id',
      jh.handouts_name as '讲义名称',
      (SELECT `name` FROM view_user_info WHERE user_id=jh.principal) as '讲义负责人',
      jh.create_time as '讲义创建时间',
      jh.paper_id as 'paper_id',
      jd.dir_name as '讲义类型',
      (SELECT `value` FROM tms_ddic WHERE `code`=jd.subject_id and type='JY003') as '学科',
      (SELECT `value` FROM tms_ddic WHERE `code`=jd.grade_id and type='JY002' ) as '年级'
FROM
      jx_handouts jh
LEFT JOIN
        jx_directory jd ON jd.id = jh.directory_id
LEFT JOIn
        view_user_info u on u.user_id=jh.principal
WHERE
        account_type = 1 #正式
        and jh.`status` = 5 #讲义状态为已上线
        and jh.is_del = 0 #讲义没有删除
        and jd.course_type = 1
'''

df2 = pd.read_sql(sql2,conn_mysql)

df2.head()


# In[13]:


df_sub = pd.merge(df2,df1,how='left',on=['paper_id'])


# In[14]:


df_sub.head()


# In[15]:


df_sub.to_excel(r'E:/Desktop/6月讲义数据V4.xlsx')

文档更新时间: 2019-08-19 11:54   作者:admin