贡献率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sys
sys.path.append(r'e:\Desktop\新环境\DataMachine_for_timer\src')
import pandas as pd
import numpy as np
import pymysql
from ML import data_suit,plot_suit
from utils import mysqlhelper,elapse,prestohelper
from config import config
from datetime import datetime , time
from pyhive import presto

## 脱敏库
mysqlinstance_tm_new_hfjy = mysqlhelper(**config.tm_new_hfjy)
mysqlinstance_tm_new_bidata = mysqlhelper(**config.tm_new_bidata)

## dmart库
mysqlinstance_dmart_dmart = mysqlhelper(**config.dmart_dmart)
mysqlinstance_dmart_bidata = mysqlhelper(**config.dmart_bidata)

## hrm库
mysqlinstance_hrm_hrm_sale = mysqlhelper(**config.hrm_hrm_sale)
mysqlinstance_hrm_new_hrm = mysqlhelper(**config.hrm_new_hrm)
mysqlinstance_hrm_jobhunt = mysqlhelper(**config.hrm_jobhunt)

## hive库
prestoinstance = prestohelper(**config.hive_prosto)


# In[2]:


sql1 = '''
SELECT 
    max(sub.subject_name) subject_name,
    lp.lesson_plan_id,
    count(distinct lpq.quiz_id) quiz_count,
    count(distinct (case when lpq.type <> 6 then lpq.quiz_id  end)) sys_quiz_count,
  max(case when (select count(1)
                        from ods_hfjydb.view_tms_contract tc
                        where s.student_intention_id = tc.student_intention_id
                        and tc.status in (4,5)
                        and    tc.submit_time > lp.adjust_start_time
                        and contract_id like 'X%'
                        and new_sign = 1) > 0 then 1 else 0 end) is_deal
FROM ods_hfjydb.lesson_plan lp 
    INNER JOIN ods_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id 
     left join ods_hfjydb.subject sub on sub.subject_id = lp.subject_id
    INNER JOIN ods_hfjydb.view_student s on s.student_id = lp.student_id
WHERE    lp.status in (3,5)
    AND lp.solve_status <> 6
    and lp.lesson_type = 2
    AND s.account_type = 1
    -- and lpq.type <> 6
GROUP BY lp.lesson_plan_id
'''

df1 = prestoinstance.get_df(sql1)
df1.head()


# In[4]:


sql2 = '''
SELECT
        DISTINCT lp.lesson_plan_id,
        (
        date_diff (
        'minute',
        cast( IF ( lp.real_start_time IS NULL, lp.adjust_start_time, lp.real_start_time ) AS TIMESTAMP ),
        cast( IF ( lp.real_end_time IS NULL, lp.adjust_end_time, lp.real_end_time ) AS TIMESTAMP ) 
        ) 
        ) * 60 lesson_period,
        hpqi.paper_id,
        lrqi.quiz_id,
        lrqi.stay_time 
FROM
        ods_hfjydb.lesson_plan lp
INNER JOIN 
        edw.ls_learning_record_quiz_info lrqi ON lrqi.lesson_plan_id = lp.lesson_plan_id
INNER JOIN 
        ods_hfjydb.hwl_quiz_knowledge hqk ON hqk.quiztempid = cast( lrqi.quiz_id AS INTEGER )
INNER JOIN 
        ods.hwl_paper_quiz_info hpqi ON hpqi.quiz_id = hqk.hwlquizid 
WHERE
        lp.STATUS IN ( 3, 5 ) 
        AND lp.solve_status <> 6 
        AND lp.lesson_type = 2 -- and lp.lesson_plan_id =13130255
        AND lrqi.type = 8 
'''

df2 = prestoinstance.get_df(sql2)
df2.head()


# In[10]:


df2_1 = df2.groupby(['lesson_plan_id','paper_id'])['quiz_id'].count().reset_index()

df2_1.head()


# In[18]:


df2_1.dtypes


# In[19]:


df2_1[df2_1['quiz_id'] >= 6]


# In[21]:


df2_2 = df2.groupby(['lesson_plan_id','paper_id'])['stay_time'].sum().reset_index()

df2_2


# In[22]:


df2_3 = pd.merge(df2_1,df2_2,how='left',on=['lesson_plan_id','paper_id'])
df2_3.head()


# In[23]:


df2_4 = df2_3[df2_3['quiz_id'] >= 6]

df2_4


# In[29]:


df2_1.loc[:, ['lesson_plan_id','lesson_period']]
df2_1.head()



文档更新时间: 2019-08-19 11:41   作者:admin