└── README.md /README.md: -------------------------------------------------------------------------------- 1 | # Leetcode-SQL-rewrite-using-Python- 2 | This is a repository created by Lei Huang to record Leetcode SQL practice. 3 | 4 | # 175. Combine Two Tables 5 | 6 | sql 7 | ```sql 8 | select FirstName, LastName, City, State 9 | from Person P left join Address A on P.PersonId = A.PersonId 10 | ``` 11 | Python 12 | ```python 13 | table = Person.merge(Address, left_on = ['PersonId'], right_on = ['PersonId'], how = 'left') 14 | table['FirstName', 'LastName', 'City', 'State'] 15 | ``` 16 | 17 | # 176. Second Highest Salary 18 | sql 19 | ```sql 20 | #if there is no tie and consider null value 21 | V1: select (select distinct salary from Employee order by salary desc limit 1,1) as SecondHighestSalary  22 | 23 | V2: 24 | ifnull(x,y),若x不为空则返回x,否则返回y,这道题y=null 25 | limit x,y,找到对应的记录就停止 26 | distinct,过滤关键字 27 | 28 | select 29 | ifnull 30 | ( 31 | (select distinct Salary 32 | from Employee 33 | order by Salary desc 34 | limit 1,1), 35 | null 36 | )as 'SecondHighestSalary' 37 | 38 | #if there is a tie 39 | V1: 40 | select max(salary) SecondHighestSalary 41 | from employee 42 | where salary < (select max(salary) from employee) 43 | 44 | V2: 45 | SELECT max(salary) SecondHighestSalary 46 | FROM (SELECT salary, DENSE_RANK() OVER (ORDER BY salary DESC) rank_num 47 | FROM Employee 48 | ) 49 | WHERE rank_num = 2 50 | ``` 51 | 52 | python 53 | ```python 54 | 55 | employee['Salary'].drop_duplicates().loc[employee['salary_rank'] == 1] 56 | ``` 57 | 58 | # 177. Nth Highest Salary (create functions) 59 | sql 60 | ```sql 61 | # V1: group by to drop duplicate values, or you can use distinct 62 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 63 | BEGIN 64 | set N := N-1; 65 | RETURN ( 66 | # Write your MySQL query statement below. 67 | select Salary 68 | from Employee 69 | group by Salary 70 | order by Salary desc 71 | limit N,1 72 | ); 73 | end 74 | 75 | #v2:Find out the n-1 salary by getting n-1 salaries higher than this salary 76 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 77 | BEGIN 78 | set N := N-1; 79 | RETURN ( 80 | # Write your MySQL query statement below. 81 | select distinct e1.Salary 82 | from Employee e1 83 | where (select count(distinct Salary) from Employee e2 where e2.Salary > e1.Salary) = N 84 | ); 85 | end 86 | 87 | #v3: count how many salary is higher than this salary 88 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 89 | BEGIN 90 | set N := N-1; 91 | RETURN ( 92 | # Write your MySQL query statement below. 93 | select distinct e1.Salary 94 | from Employee e1 left join Employee e2 on e1.Salary < e2.Salary 95 | group by e1.Salary 96 | having count(distinct e2.Salary) = N 97 | ); 98 | end 99 | 100 | #v4: window function 101 | #row_number(): 同薪不同名,相当于行号,例如3000、2000、2000、1000排名后为1、2、3、4 102 | #rank(): 同薪同名,有跳级,例如3000、2000、2000、1000排名后为1、2、2、4 103 | #dense_rank(): 同薪同名,无跳级,例如3000、2000、2000、1000排名后为1、2、2、3 104 | 105 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 106 | BEGIN 107 | RETURN ( 108 | # Write your MySQL query statement below. 109 | select distinct Salary 110 | from (select Salary, dense_rank() over (order by Salary Desc) as RN 111 | from Employee) E 112 | where RN = N 113 | ); 114 | end 115 | ``` 116 | 117 | Python 118 | ```python 119 | def getNthHighestSalary(N): 120 | employee['salary_rank'] = employee['Salary'].rank(method = 'dense', ascending = 0) 121 | Salary = employee['Salary'].drop_duplicates().loc[employee['salary_rank'] == N] 122 | return Salary 123 | ``` 124 | # 178. Rank Scores 125 | sql 126 | ```sql 127 | select Score, RN as Rank 128 | from (select Score, 129 | dense_rank() over (order by Score desc) as RN 130 | from Scores) s 131 | order by RN 132 | 133 | #v2 134 | select a.Score as Score, 135 | (select count(distinct b.Score) from Scores b where b.Score >= a.Score) as Rank 136 | from Scores a 137 | order by a.Score DESC 138 | ``` 139 | python 140 | ```python 141 | Scores['rank'] = Scores['Score'].rank( 142 | 143 | 144 | 145 | 146 | = 'dense', ascending = 0) 147 | Scores[['rank','Score']].sort_values(by= ['rank']) 148 | ``` 149 | 150 | # 180. Consecutive Numbers 151 | sql 152 | ```sql 153 | select distinct l1.Num as ConsecutiveNums 154 | from Logs l1, Logs l2, Logs l3 155 | where l1.Id = l2.Id - 1 and l2.Id = l3.Id - 1 and l1.Num = l2.Num and l2.Num = l3.Num 156 | 157 | #window function 158 | select distinct(num) ConsecutiveNums 159 | from ( 160 | select num,(row_number() over(order by id )-row_number() over(partition by num order by id)) as rn 161 | from Logs 162 | ) tmp 163 | group by rn,num 164 | having count(rn)>=3; 165 | 166 | #lead and lag solutions 167 | with x as 168 | (select id, 169 | lead(Num,1,0) over (order by id asc) Lead, 170 | lag(Num,1,0) over (order by id asc) Lag 171 | from Logs 172 | ) 173 | 174 | select 175 | distinct Num as "ConsecutiveNums" 176 | from 177 | Logs L 178 | join x y on L.id = y.id and y.Lead = L.Num and y.Lag = L.Num; 179 | ``` 180 | 181 | python 182 | ```python 183 | #translating window function 184 | Logs['rn'] = Logs['Id'].rank(method = 'first') 185 | Logs['rn1'] = Logs.groupby(['Num'])['Id'].rank(method = 'first') 186 | Logs['dif'] = Logs['rn']-Logs['rn1'] 187 | Logs1 = Logs.groupby(['dif', 'Num'])['dif'].count().to_frame('count').reset_index() 188 | Logs1[Logs1['count'] >= 3]['Num'].drop_duplicates() 189 | 190 | #transating the three tables join 191 | #??? 192 | ``` 193 | 194 | # 181. Employees Earning More Than Their Managers 195 | 196 | sql 197 | ```sql 198 | select e2.Name as Employee 199 | from Employee e1 join Employee e2 on e1.Id = e2.ManagerId and e1.Salary < e2.Salary 200 | ``` 201 | 202 | python 203 | ```python 204 | ``` 205 | 206 | # 182. Duplicate Emails 207 | sql 208 | ```sql 209 | select Email 210 | from Person 211 | group by Email 212 | having count(*) > 1 213 | ``` 214 | python 215 | ```python 216 | New = Person.groupby(['Email'])['Id'].count().to_frame('count').reset_index() 217 | New[New['count'] > 1]['Email'] 218 | ``` 219 | # 183. Customers Who Never Order 220 | sql 221 | ```sql 222 | select c.Name as Customers 223 | from Customers c 224 | left join Orders o on o.CustomerId = c.Id 225 | where o.Id is null 226 | ``` 227 | 228 | python 229 | ```python 230 | new_df = pd.merge(Customers, Orders, how='left', left_on=['id'], right_on = ['id'], suffixes = ('_t1','_t2')) 231 | new_df[new_df['id_t2'] is null]['Name'] 232 | ``` 233 | # 184. Department Highest Salary 234 | sql 235 | ```sql 236 | select D.Name as Department, E.Name as Employee, E.Salary 237 | from (select *, 238 | dense_rank() over (partition by DepartmentId order by Salary desc) as rn 239 | from Employee) E join 240 | Department D on E.DepartmentId = D.Id 241 | where rn = 1 242 | order by E.Salary 243 | 244 | #v2: 245 | select d.Name Department,e.Name Employee,Salary 246 | from Employee e 247 | join Department d 248 | on e.DepartmentId=d.Id 249 | where(e.DepartmentId , Salary) IN( 250 | select DepartmentId, max(salary) 251 | from Employee 252 | group by DepartmentId 253 | ); 254 | ``` 255 | 256 | python 257 | ```python 258 | Employee = Employee.groupby(['DepartmentId'])['Salary'].rank(method = 'dense', ascending = 0) 259 | dat = pd.merge(Employee, Department, how = 'inner', left_on = 'DepartmentId', right_on = 'Id',suffixes = ('_t1','_t2')) 260 | result = dat[dat['rn'] = 1][['Name_t2','Name_t1','Salary']] 261 | result.columns = ['Department', 'Employee','Salary'] 262 | ``` 263 | 264 | # 185. Department Top Three Salaries 265 | sql 266 | ```sql 267 | select D.Name as Department, E.Name as Employee, E.Salary 268 | from (select *, 269 | dense_rank() over (partition by DepartmentId order by Salary desc) as rn 270 | from Employee) E join 271 | Department D on E.DepartmentId = D.Id 272 | where rn <= 3 273 | order by E.Salary 274 | ``` 275 | 276 | python 277 | ```python 278 | Employee = Employee.groupby(['DepartmentId'])['Salary'].rank(method = 'dense', ascending = 0) 279 | dat = pd.merge(Employee, Department, how = 'inner', left_on = 'DepartmentId', right_on = 'Id',suffixes = ('_t1','_t2')) 280 | result = dat[dat['rn'] <= 3][['Name_t2','Name_t1','Salary']] 281 | result.columns = ['Department', 'Employee','Salary'] 282 | ``` 283 | 284 | # 196. Delete Duplicate Emails 285 | sql 286 | ```sql 287 | DELETE p1 FROM Person p1, 288 | Person p2 289 | WHERE 290 | p1.Email = p2.Email AND p1.Id > p2.Id 291 | 292 | #v2: not in subquery 293 | DELETE from Person 294 | Where Id not in ( 295 | Select Id 296 | From( 297 | Select MIN(Id) as id 298 | From Person 299 | Group by Email 300 | ) t 301 | ) 302 | ``` 303 | 304 | python 305 | ```python 306 | P1 = Person.groupby(['Email'])['Id'].min().to_frame('id').reset_index() 307 | Person[~Person.Id.isin(P1['id'])] 308 | #should be Person.Id since isin need series type 309 | #.isin(P1['id']) should be P1['id'] since inside isin need list type 310 | ``` 311 | 312 | # 197. Rising Temperature 313 | sql 314 | ```sql 315 | select w1.Id 316 | from Weather w1, Weather w2 317 | where dateDiff(w1.RecordDate,w2.RecordDate) = 1 318 | and w1.Temperature > w2.Temperature 319 | ``` 320 | 321 | python 322 | ```python 323 | #datediff in python 324 | from datetime import datetime 325 | 326 | def daysdiff(d1, d2): 327 | d1 = datetime.strptime(d1, "%Y-%m-%d") 328 | d2 = datetime.strptime(d2, "%Y-%m-%d") 329 | return (d1 - d2).days 330 | ``` 331 | 332 | # 262. Trips and Users 333 | sql 334 | ```sql 335 | SELECT 336 | request_at 'Day', round(avg(Status!='completed'), 2) 'Cancellation Rate' 337 | FROM 338 | trips t JOIN users u1 ON (t.client_id = u1.users_id AND u1.banned = 'No') 339 | JOIN users u2 ON (t.driver_id = u2.users_id AND u2.banned = 'No') 340 | WHERE 341 | request_at BETWEEN '2013-10-01' AND '2013-10-03' 342 | GROUP BY 343 | request_at 344 | ``` 345 | 346 | Python 347 | ```python 348 | Trips = Trips[~Trips.Client_Id.isin.User[User['Banned'] == 'Yes']['Users_Id']] 349 | Trips = Trips[~Trips.Driver_Id.isin.User[User['Banned'] == 'Yes']['Users_Id']] 350 | Trips = Trips[(Trips.Request_at >= '2013-10-01') & (Trips.Request_at <= '2013-10-03')] 351 | #case when 352 | Trips.Status[Trips.Status == 'Completed'] = 0 353 | Trips.Status[Trips.Status != 'Completed'] = 1 354 | dat = Trips.groupby('Request_at').agg({'sum':sum, 'count': count}).to_frame().reset_index 355 | dat['Cancellation Rate'] = round(dat.sum/dat.count,2) 356 | ``` 357 | # 511. Game Play Analysis I 358 | sql 359 | ``` 360 | select player_id, event_date as first_login 361 | from (select *, 362 | row_number() over (partition by player_id order by event_date) as rn 363 | from Activity) S 364 | where rn = 1 365 | ``` 366 | 367 | python 368 | ```python 369 | dat = Activity.groupby(['player_id'])['event_date'].rank(method = first) 370 | dat[dat['rn'] == 1][['player_id', 'event_date']] 371 | ``` 372 | 373 | # 512. Game Play Analysis II 374 | sql 375 | ```sql 376 | select player_id, device_id 377 | from activity 378 | where (player_id, event_date) in 379 | (select player_id, min(event_date) 380 | from activity 381 | group by player_id) 382 | ``` 383 | 384 | 385 | 386 | 387 | # 534. Game Play Analysis III 388 | sql 389 | ```sql 390 | select A1.player_id, A1.event_date, sum(A2.games_played) as games_played_so_far 391 | from Activity A1 left join Activity A2 on A1.player_id = A2.player_id and A1.event_date >= A2.event_date 392 | group by A1.player_id, A1.event_date 393 | order by A1.player_id, A1.event_date 394 | ``` 395 | python 396 | Perform an asof merge. This is similar to a left-join except that we match on nearest key rather than equal keys. 397 | 398 | ```python 399 | 400 | ``` 401 | 402 | # 550. Game Play Analysis IV 403 | sql 404 | ```sql 405 | --注意这道题求的是首日注册后第二天连续登录的.不是任意两天连续登录就行. 406 | #V1: 407 | select round((select count(distinct(A1.player_id)) 408 | from (select player_id, min(event_date) as event_date from Activity group by player_id) A1 join Activity A2 on A1.player_id = A2.player_id and datediff(A2.event_date, A1.event_date) = 1)/count(distinct player_id),2) as fraction 409 | from Activity 410 | 411 | #V2:得出所有玩家次日登录时间,看原数据中是否存在,统计存在的个数,除以总人数。 412 | SELECT 413 | ROUND(COUNT(DISTINCT player_id)/(SELECT COUNT(distinct player_id) FROM Activity), 414 | 2) AS fraction 415 | FROM 416 | Activity 417 | WHERE 418 | (player_id,event_date) 419 | IN 420 | (SELECT 421 | player_id, 422 | Date(min(event_date)+1) 423 | FROM Activity 424 | GROUP BY player_id); 425 | ``` 426 | python 427 | ```python 428 | A1 = Activity.groupby(['player_id'])['event_date'].min().to_frame('Date').reset_index() 429 | A1['date1'] = A1['Date'].apply(lambda x: datetime.strptime(x, '%Y%m%d')) 430 | A1['Date1']= A1['Date'] + timedelta(days=1) 431 | index1 = pd.MultiIndex.from_arrays([A1[col] for col in ['player_id','Date1']]) 432 | index2 = pd.MultiIndex.from_arrays([Activity[col] for col in ['player_id','event_date']]) 433 | New = Activity.loc[index2.isin(index1)] 434 | fraction = round(New.player_id.nunique()/Activity.player_id.nunique(),2) 435 | # 436 | 437 | # 595. Big Countries 438 | sql 439 | ```sql 440 | select name, population, area 441 | from World 442 | where area > 3000000 or population > 25000000 443 | order by name 444 | ``` 445 | python 446 | ```python 447 | world = World[(world['area'] > 3000000) or (world['population'] > 25000000)] 448 | world.sort_values(by=[‘name’]) 449 | ``` 450 | 451 | # 569. Median Employee Salary 452 | sql 453 | ```sql 454 | SELECT Id, Company, Salary FROM 455 | (SELECT Id, Company, Salary, COUNT(Salary) OVER (PARTITION BY Company) AS CN, 456 | ROW_NUMBER() OVER (PARTITION BY Company ORDER BY Salary) AS RN FROM Employee) T 457 | WHERE RN = (CN+1)/2 OR RN = (CN+2)/2 458 | ``` 459 | # 570. Managers with at Least 5 Direct Reports 460 | sql 461 | ```sql 462 | select Name 463 | from Employee 464 | where Id in (select ManagerId 465 | from Employee 466 | group by ManagerId 467 | having count(distinct Id) >= 5 468 | 469 | #v2:self join 470 | select e2.Name 471 | from employee e1, employee e2 472 | where e1.ManagerId = e2.Id 473 | group by e2.Id 474 | having count(*) >= 5 475 | ``` 476 | python 477 | ```python 478 | dat = Employee.groupby(['ManagerId'])['Id'].nunique().to_frame('count') 479 | dat1 = Employee[Employee.Id.isin(dat[dat['count'] >= 5].index)].Name 480 | ``` 481 | # 571. Find Median Given Frequency of Numbers 482 | sql 483 | ```sql 484 | #建两个累计频率,用员工薪水中位数的思路来做 485 | select 486 | avg(t.number) as median 487 | from 488 | ( 489 | select 490 | n1.number, 491 | n1.frequency, 492 | (select sum(frequency) from numbers n2 where n2.number<=n1.number) as asc_frequency, 493 | (select sum(frequency) from numbers n3 where n3.number>=n1.number) as desc_frequency 494 | from numbers n1 495 | ) t 496 | where t.asc_frequency>= (select sum(frequency) from numbers)/2 497 | and t.desc_frequency>= (select sum(frequency) from numbers)/2 498 | ``` 499 | 500 | # 574. Winning Candidate 501 | sql 502 | ```sql 503 | select Name 504 | from Candidate 505 | where id in ( 506 | select CandidateId 507 | from Vote 508 | group by CandidateId 509 | having count(distinct id) >= all(select count(distinct id) 510 | from Vote 511 | group by CandidateId)) 512 | ``` 513 | python 514 | ```python 515 | dat = Vote.groupby(['CandidateId'])['id'].nunique().to_frame('count').reset_index 516 | max = dat['count'].max() 517 | Candidate[Candidate.id.isin[dat[dat['count'] >= max].CandidateId]].Name 518 | ``` 519 | # 578. Get Highest Answer Rate Question 520 | sql 521 | ```sql 522 | select question_id as survey_log 523 | from 524 | (select question_id, sum(case when action = 'answer' then 1 else 0 end)/sum(case when action = 'show' then 1 else 0 end) as rate 525 | from survey_log 526 | group by question_id) s 527 | order by rate desc 528 | limit 1 529 | ``` 530 | 531 | python 532 | ```python 533 | dat = survey_log.groupby(['question_id'])['action'].value_counts().to_frame('count').reset_index() 534 | ...??? 535 | ``` 536 | 537 | # 579. Find Cumulative Salary of an Employee 538 | sql 539 | ```sql 540 | #v1: window function 541 | select Id, Month, total_salary as salary 542 | from (select *, 543 | sum(Salary) over (partition by Id order by Month rows 2 preceding) as total_salary, 544 | row_number() over (partition by Id order by Month desc) as RN 545 | from Employee) E 546 | where RN > 1 547 | order by Id, Month desc 548 | 549 | #v2: self join and exclude max value 550 | select e1.Id, e1.Month, sum(e2.Salary) as Salary 551 | from Employee e1 join Employee e2 552 | on e1.Id = e2.Id 553 | and e1.Month >= e2.Month 554 | and e1.Month < e2.Month + 3 555 | where (e1.Id,e1.Month) not in (select Id, max(Month) as Month from Employee group by Id) 556 | group by e1.Id, e1.Month 557 | order by e1.Id, e1.Month desc 558 | ``` 559 | 560 | python 561 | ```python 562 | ??? 563 | ``` 564 | # 580. Count Student Number in Departments 565 | sql 566 | ```sql 567 | select d.dept_name, ifnull(count(distinct s.student_id),0) as student_number 568 | from student s right join department d on d.dept_id = s.dept_id 569 | group by d.dept_id 570 | order by student_number desc 571 | ``` 572 | 573 | python 574 | ```python 575 | dat = student.merge(department,how = 'right', left_on = 'dept_id', right_on = 'dept_id' ) 576 | dat1 = dat.groupby(['dept_name'])['student_id'].nunique().to_frame('student_number').reset_index() 577 | dat1 = dat1.sort_values(by=[‘student_number’],asending = 0) 578 | ``` 579 | 580 | # 585. Investments in 2016 581 | sql 582 | ```sql 583 | select sum(TIV_2016) as TIV_2016 584 | from insurance 585 | where concat(lat,lon) in (select concat(lat,lon) 586 | from insurance 587 | group by concat(lat,lon) 588 | having count(concat(lat,lon)) <= 1) 589 | and 590 | tiv_2015 in (select tiv_2015 591 | from insurance 592 | group by tiv_2015 593 | having count(tiv_2015) > 1) 594 | 595 | ``` 596 | 597 | python 598 | ```python 599 | insurance['concat'] = insurance['lat'].astype(str) + insurance['lon'].astype(str) 600 | #v2: insurance['concat'] = insurance.lat.str.cat(df.lon.str) 601 | dat1 = insurance.groupby(['concat']).count().to_frame('count').reset_index 602 | dat1 = dat1[dat1['count'] <= 1] 603 | dat2 = insurance.groupby(['tiv_2015']).count().to_frame('count').reset_index 604 | dat2 = dat1[dat1['tiv_2015'] > 1] 605 | dat3 = insurance[(insurance.concat.isin(dat1['concat'])) & (insurance.tiv_2015.isin(dat2['tiv_2015']))] 606 | dat3.TIV_2016.sum() 607 | ``` 608 | # 597. Friend Requests I: Overall Acceptance Rate 609 | sql 610 | ```sql 611 | select round(ifnull(count(distinct b.requester_id, b.accepter_id)/count(distinct a.sender_id, a.send_to_id),0), 2) accept_rate 612 | from friend_request a, request_accepted b 613 | 614 | select round( 615 | ifnull( 616 | (select count(distinct requester_id ,accepter_id) from request_accepted) / 617 | (select count(distinct sender_id ,send_to_id) from friend_request) 618 | ,0) 619 | ,2) as accept_rate ; 620 | ``` 621 | 622 | 623 | # 596. Classes More Than 5 Students 624 | sql 625 | ```sql 626 | select class 627 | from courses 628 | group by class 629 | having count(distinct(student)) >= 5 630 | ``` 631 | python 632 | ```python 633 | dat = courses.groupby(['class'])['student'].nunique().to_frame('count').reset_index 634 | dat[dat['count'] >= 5]['class'] 635 | ``` 636 | # 601. Human Traffic of Stadium 637 | sql 638 | ```sql 639 | select 640 | id, visit_date, people 641 | from 642 | (select 643 | id, visit_date, people, 644 | count(*) over (partition by offset) cnt 645 | from 646 | (select 647 | id, visit_date, people, 648 | (row_number() over (order by id) - id) offset 649 | from stadium 650 | where people >= 100 651 | ) R--get consecutive id 652 | ) R1 653 | where cnt >= 3 654 | order by id 655 | ``` 656 | python 657 | ```python 658 | dat = stadium[stadium['people'] >= 100] 659 | dat['rn'] = dat.rank(method = 'first') 660 | dat['offset'] = dat['rn'] - dat['id'] 661 | dat1 = dat.groupby(['offset']).count().to_frame('count').reset_index 662 | data = pd.merge(dat, dat1, how = 'left', left_on = 'offset', right_on = 'offset') 663 | data[data['count'] >= 3].sort_values(by=[‘id’]) 664 | ``` 665 | 666 | # 602. Friend Requests II: Who Has the Most Friends 667 | sql 668 | ```sql 669 | select accepter_id as id,sum(num) as num 670 | from 671 | ((select accepter_id, count(*) as num 672 | from request_accepted 673 | group by accepter_id) 674 | union all 675 | (select requester_id, count(*) as num 676 | from request_accepted 677 | group by requester_id)) a 678 | group by accepter_id 679 | order by sum(num) desc 680 | limit 1 681 | ``` 682 | 683 | python 684 | ```python 685 | dat1 = request_accepted.groupby(['accepter_id'])['accepter_id'].count().to_frame('num').reset_index() 686 | dat2 = request_accepted.groupby(['requester_id'])['requester_id'].count().to_frame('num').reset_index() 687 | dat = pd.concat([dat1,dat2.rename(columns={'requester_id':'accepter_id'})]) 688 | dat = dat.groupby(['accepter_id'])['num'].sum().to_frame('num').reset_index() 689 | dat = dat.sort_values(by = ['num'],ascending = 0) 690 | head(dat,1) 691 | ``` 692 | 693 | # 608. Tree Node 694 | sql 695 | ```sql 696 | select id, (case when p_id is null then 'Root' 697 | when id in (select p_id from tree) then 'Inner' 698 | else 'Leaf' end) as Type 699 | from tree 700 | ``` 701 | python 702 | ```python 703 | tree.loc[tree['p_id'].isnull(),'type'] = 'Root' 704 | tree.loc[(tree.id.isin(tree['p_id'])) & (tree['p_id'].notnull()),'type'] = 'Inner' 705 | tree.loc[tree['type'].isnull(),'type'] = 'Leaf' 706 | ``` 707 | 708 | # 612. Shortest Distance in a Plane 709 | sql 710 | ```sql 711 | select round(min(sqrt(POWER((p1.x-p2.x),2) + POWER((p1.y-p2.y),2))),2) as shortest 712 | from point_2d p1 join point_2d p2 on (p1.x,p1.y) <> (p2.x, p2.y) 713 | ``` 714 | 715 | # 615. Average Salary: Departments VS Company 716 | sql 717 | ```sql 718 | select c.pay_month,c.department_id, 719 | case 720 | when c.salary_d>d.salary_c then 'higher' 721 | when c.salary_d 0 and description <> 'boring' 784 | order by rating desc 785 | ``` 786 | 787 | python 788 | ```python 789 | cinema[(cinema['id'] % 2 != 0) & (cinema['description'] != 'boring')].sort_value(by = ['rating'], ascending = 0) 790 | ``` 791 | 792 | # 626. Exchange Seats 793 | sql 794 | ```sql 795 | select 796 | if(id%2=0, 797 | id-1, 798 | if(id=(select count(distinct id) from seat), 799 | id, 800 | id+1)) 801 | as id,student 802 | from seat 803 | order by id 804 | ``` 805 | 这道题目实际上很简单 806 | 查询id和student 807 | 808 | 若id是偶数,减1 809 | 若id是奇数,加1 810 | 问题在于当总数为奇数时,最后一个id应保持不变,加1会导致空出一位。那么我们找到最后一位,让它保持不变就可以了。 811 | 812 | 作者:fan-lu-5 813 | 链接:https://leetcode-cn.com/problems/exchange-seats/solution/jian-dan-yi-dong-xiao-lu-ji-bai-suo-you-by-fan-lu-/ 814 | 来源:力扣(LeetCode) 815 | 著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。 816 | 817 | ```sql 818 | SELECT 819 | (CASE 820 | WHEN MOD(id, 2) != 0 AND counts != id THEN id + 1 --mod is to get remaider 821 | WHEN MOD(id, 2) != 0 AND counts = id THEN id 822 | ELSE id - 1 823 | END) AS id, 824 | student 825 | FROM 826 | seat, 827 | (SELECT 828 | COUNT(*) AS counts 829 | FROM 830 | seat) AS seat_counts 831 | ORDER BY id ASC; 832 | ``` 833 | ```python 834 | ``` 835 | 836 | # 627. Swap Salary 837 | sql 838 | ```sql 839 | UPDATE salary 840 | SET sex = if(sex='m','f','m') 841 | ``` 842 | 843 | python 844 | ```python 845 | salary.sex[salary.sex == 'm'] = 'f' 846 | salary.sex[salary.sex == 'f'] = 'm' 847 | ``` 848 | 849 | # 1045. Customers Who Bought All Products 850 | sql 851 | ```sql 852 | select t1.customer_id 853 | from (select customer_id, count(distinct product_key) cnt from customer group by customer_id) t1, (select count(*) cnt from product) t2 854 | where t1.cnt = t2.cnt 855 | 856 | select c.customer_id 857 | from Product p left join Customer c on p.product_key = c.product_key 858 | where p.product_key = 5 859 | and c.customer_id in (select c.customer_id 860 | from Product p left join Customer c on p.product_key = c.product_key where p.product_key = 6) 861 | order by customer_id 862 | ``` 863 | # 1098. Unpopular Books 864 | sql 865 | ```sql 866 | select b.book_id, b.name 867 | from Books b left join Orders o on b.book_id = o.book_id 868 | where b.book_id in (select book_id 869 | from Books 870 | where datediff(month, available_from, '2019-06-23') > 1) 871 | group by b.book_id, b.name 872 | having isnull(sum(case when o.dispatch_date < '2018-06-23' then 0 else quantity end),0) < 10 873 | order by b.book_id 874 | ``` 875 | 876 | # 1107. New Users Daily Count 877 | sql 878 | ```sql 879 | #mysql 880 | select login_date,count(user_id) user_count 881 | from (select user_id, min(activity_date) login_date from Traffic 882 | where activity='login' 883 | group by user_id) t 884 | where datediff('2019-06-30',login_date)<=90 885 | group by login_date; 886 | 887 | 888 | #sql server 889 | SELECT 890 | login_date, 891 | COUNT(DISTINCT user_id) user_count 892 | FROM 893 | (SELECT 894 | user_id, 895 | MIN(activity_date) OVER(PARTITION BY user_id) login_date 896 | FROM Traffic 897 | WHERE activity = 'login') t 898 | WHERE datediff(day,login_date,'2019-06-30') <= 90 899 | GROUP BY login_date 900 | ORDER BY login_date 901 | 902 | select activity_date as login_date, count(distinct user_id) as user_count 903 | from (select *, 904 | row_number() over (partition by user_id order by activity_date) as rn 905 | from (select * from Traffic where activity = 'login')a) A 906 | where rn = 1 and datediff(day,activity_date,'2019-06-30') <= 90 907 | group by activity_date 908 | ``` 909 | we can't use row_number to select first user activity here since there are some users' first activity are not log in. 910 | wrong answer: 911 | ```sql 912 | select activity_date as login_date, count(distinct user_id) as user_count 913 | from (select *, 914 | row_number() over (partition by user_id order by activity_date) as rn 915 | from Traffic) A 916 | where rn = 1 and datediff(day,activity_date,'2019-06-30') <= 90 917 | group by activity_date 918 | ``` 919 | 920 | # 1112. Highest Grade For Each Student 921 | sql 922 | ```sql 923 | #window function 924 | select student_id, course_id, grade 925 | from (select *, 926 | rank() over (partition by student_id order by grade desc, course_id) as rn 927 | from Enrollments) S 928 | where rn = 1 929 | order by student_id 930 | 931 | #sql 932 | SELECT student_id, MIN(course_id) AS course_id, grade 933 | FROM Enrollments 934 | WHERE (student_id, grade) IN (SELECT student_id, MAX(grade) 935 | FROM Enrollments 936 | GROUP BY student_id) 937 | GROUP BY student_id 938 | ORDER BY student_id 939 | ``` 940 | 941 | python 942 | ```python 943 | Enrollments['rn'] = Enrollments.groupby(['student_id'])['grade'].rank(method = 'dense', ascending = 0) 944 | indice = Enrollments[Enrollments['rn'] == 1].groupby(['student_id'])['course_id'].idxmin # cannot work 945 | #after grouping to minimum value in pandas, how to display the matching row result entirely along min() value 946 | https://datascience.stackexchange.com/questions/26308/after-grouping-to-minimum-value-in-pandas-how-to-display-the-matching-row-resul 947 | need to figure out why 948 | ``` 949 | 950 | # 1126. Active Businesses 951 | sql 952 | ```sql 953 | #v1 954 | select business_id 955 | from(select e1.business_id, e1.event_type, (case when e1.occurences > e2.avg then 1 else 0 end) as count 956 | from Events e1 left join (select event_type, avg(occurences) as avg from Events group by event_type) e2 957 | on e1.event_type = e2.event_type) e 958 | group by business_id 959 | having sum(count) > 1 960 | 961 | #v2 962 | select business_id 963 | from events e, 964 | (select event_type,avg(occurences) avg_occ 965 | from events 966 | group by event_type) temp 967 | where e.event_type = temp.event_type and e.occurences > temp.avg_occ 968 | group by e.business_id 969 | having count(*) > 1 970 | 971 | #v3 972 | select business_id 973 | from Events e left join ( 974 | select event_type, avg(occurences) as tavg 975 | from Events 976 | group by event_type 977 | )t on e.event_type = t.event_type 978 | group by business_id 979 | having sum(case when e.occurences > t.tavg then 1 else 0 end) >1 980 | ``` 981 | 982 | python 983 | ```python 984 | ``` 985 | 986 | # 1127. User Purchase Platform 987 | sql 988 | ```sql 989 | select t2.spend_date, t2.platform, 990 | ifnull(sum(amount),0) total_amount, ifnull(count(user_id),0) total_users 991 | from 992 | (select distinct spend_date, "desktop" as platform from Spending 993 | union 994 | select distinct spend_date, "mobile" as platform from Spending 995 | union 996 | select distinct spend_date, "both" as platform from Spending 997 | ) t2 998 | left join 999 | (select spend_date, sum(amount) amount, user_id, 1000 | case when count(*) = 1 then platform else "both" end as platform 1001 | from Spending 1002 | group by spend_date, user_id) t1 1003 | on t1.spend_date = t2.spend_date 1004 | and t1.platform = t2. platform 1005 | group by t2.spend_date, t2.platform 1006 | ``` 1007 | 1008 | python 1009 | ```python 1010 | spending[‘mobile_spend’] = spending[spending.channel == ‘mobile’].spend 1011 | spending[‘desktop_spend’] = spending[spending.channel == ‘desktop’].spend 1012 | member_spend = spending.group_by([‘date’, ‘member_id’]).sum([‘mobile_spend’, ‘desktop_spend’]).to_frame([‘mobile_spend’, ‘desktop_spend’]).reset_index() 1013 | 1014 | member_spend.loc[(member_spend.mobile_spend>0) & (member_spend.desktop_spend==0), ‘channel’] = ‘mobile’ 1015 | member_spend.loc[member_spend.mobile_spend==0 & member_spend.desktop_spend>0, ‘channel’] = ‘desktop’ 1016 | member_spend.loc[member_spend.mobile_spend>0 & member_spend.desktop_spend>0, ‘channel’] = ‘both’ 1017 | 1018 | tot_members = member_spend.groupby([‘date’, ‘channel’]).size().to_frame(‘tot_members’).reset_index() 1019 | tot_spend = member_spend.groupby([‘date’, ‘channel’].agg({‘mobile_spend’:sum, ‘desktop_spend’:sum}).to_frame([‘mobile_spend’, ‘desktop_spend’]) 1020 | tot_spend[‘tot_spend’] = tot_spend[‘mobile_spend’] + tot_spend[‘desktop_spend’] 1021 | output = tot_members.concat(tot_spend[‘tot_spend’]) 1022 | ``` 1023 | # 1132. Reported Posts II 1024 | sql 1025 | ```sql 1026 | # count will not count null values 1027 | select round(100*avg(percent),2) as average_daily_percent 1028 | from (select a.action_date, count(distinct(r.post_id))/count(distinct(a.post_id)) as percent 1029 | from Removals r right join Actions a on r.post_id = a.post_id 1030 | where extra = 'spam' 1031 | group by a.action_date) P 1032 | 1033 | #the version blow is wrong since there are duplicate post_id in one day. If I count with case when, I cannot count distinct post_id. 1034 | select round(100*avg(percent),2) as average_daily_percent 1035 | from (select a.action_date, sum(case when r.post_id is null then 0 else 1 end)/count(distinct a.post_id) as percent 1036 | from Removals r right join Actions a on r.post_id = a.post_id 1037 | where extra = 'spam' 1038 | group by a.action_date) P 1039 | ``` 1040 | 1041 | python 1042 | ```python 1043 | #count distinct with group by: two solutions 1044 | df.groupby("date").agg({"duration": np.sum, "user_id": pd.Series.nunique}) 1045 | df.groupby("date").agg({"duration": np.sum, "user_id": lambda x: x.nunique()}) 1046 | ``` 1047 | 1048 | # 1141. User Activity for the Past 30 Days I 1049 | sql 1050 | ```sql 1051 | # 一定注意日期差是29天 <30.........包含2019-07-27本天 1052 | select activity_date day, count(distinct user_id) active_users 1053 | from activity 1054 | where datediff('2019-07-27', activity_date) < 30 1055 | group by activity_date 1056 | 1057 | select activity_date day, count(distinct user_id) active_users 1058 | from activity 1059 | where activity_date > date_sub('2019-07-27', interval 30 day) 1060 | and activity_date <= '2019-07-27' 1061 | group by activity_date 1062 | ``` 1063 | 1064 | python 1065 | ```python 1066 | def daysdiff(d1, d2): 1067 | d1 = datetime.strptime(d1, "%Y-%m-%d") 1068 | d2 = datetime.strptime(d2, "%Y-%m-%d") 1069 | return (d1 - d2).days 1070 | 1071 | import datetime as dt 1072 | activity['activity_date']] = pd.to_datetime(activity['activity_date']) 1073 | activity['day'] = (dt.strptime('2019-07-27', "%Y-%m-%d") - activity['activity_date']).dt.days 1074 | dat = activity[activity['day'] < 30] 1075 | dat.groupby(['activity_date'])['user_id'].nunique().to_frame('active_users').reset_index() 1076 | ``` 1077 | 1078 | 1079 | 1080 | # 1142. User Activity for the Past 30 Days II 1081 | sql 1082 | ```sql 1083 | # 为空的时候一定要显示一个0,注意一下。 1084 | select ifnull(round(count(distinct session_id)/count(distinct user_id),2), 0) average_sessions_per_user 1085 | from activity 1086 | where datediff('2019-07-27', activity_date) < 30 1087 | 1088 | #v2 1089 | select ifnull(round(sum(t.amount)/count(user_id),2), 0) average_sessions_per_user 1090 | from 1091 | (select user_id, count(distinct session_id) amount 1092 | from activity 1093 | where datediff('2019-07-27', activity_date) < 30 1094 | group by user_id) t 1095 | ``` 1096 | 1097 | # 1149. Article Views II 1098 | oracle sql 1099 | ```sql 1100 | #v1: out of time 1101 | select distinct viewer_id as id 1102 | from Views 1103 | group by viewer_id, view_date 1104 | having count(distinct article_id) > 1 1105 | order by viewer_id 1106 | ``` 1107 | 1108 | python 1109 | ```python 1110 | dat = Views.groupby(['viewer_id','view_date']).nunique().to_frame('count').reset_index() 1111 | dat[dat['count'] > 1]['viewer_id'].drop_duplicates().sort_values(by = 'viewer_id') 1112 | ``` 1113 | 1114 | # 1158. Market Analysis I 1115 | sql 1116 | ```sql 1117 | select u.user_id as buyer_id, u.join_date, count(distinct order_id) as orders_in_2019 1118 | from Users u left join Orders o on u.user_id = o.buyer_id and year(o.order_date) =2019 1119 | group by u.user_id 1120 | 1121 | #v2 1122 | select user_id buyer_id, join_date, 1123 | sum(if(year(order_date)='2019',1,0)) orders_in_2019 1124 | from users left join orders 1125 | on user_id = buyer_id 1126 | group by user_id 1127 | ``` 1128 | 1129 | ```python 1130 | df = pd.merge(Users,Orders,left_on = ['user_id'], right_on = ['buyer_id'], how = 'left') 1131 | df['date'] = pd.to_datetime(df['order_date']) 1132 | df['year'] = df['date'].dt.year 1133 | df['month'] = df['date'].dt.month 1134 | dat = df[df['year'] == 2019 ] 1135 | dat.groupby(['user_id','join_date'])['order_id'].nunique().to_frame('orders_in_2019').reset_index() 1136 | ``` 1137 | 1138 | # 1159. Market Analysis II 1139 | error message I met for this query: 1140 | ```sql 1141 | select o.seller_id, (case when u.favorite_brand = i.item_brand then 'yes' else 'no' end) as '2nd_item_fav_brand' 1142 | from 1143 | (select u.user_id, o.seller_id, u.favorite_brand, i.item_brand, 1144 | row_number() over (partition by seller_id order by order_date) as rn 1145 | from Users u left join Orders o on u.user_id = o.seller_id join Items i 1146 | on o.item_id = i.item_id) s 1147 | where rn = 2 1148 | ``` 1149 | error message:[SQL Server]The multi-part identifier "o.seller_id" could not be bound. (4104) 1150 | reasons: https://stackoverflow.com/questions/7314134/the-multi-part-identifier-could-not-be-bound 1151 | 1152 | sql 1153 | ```sql 1154 | SELECT 1155 | u.user_id seller_id, 1156 | (case when u.favorite_brand = i.item_brand 1157 | then 'yes' 1158 | else 'no' 1159 | end) '2nd_item_fav_brand' 1160 | FROM 1161 | Users u left join 1162 | (SELECT seller_id,item_id,row_number() over (partition by seller_id order by order_date) 1163 | as ranking FROM Orders) t 1164 | ON 1165 | u.user_id = t.seller_id and ranking =2 1166 | LEFT JOIN Items i 1167 | ON 1168 | t.item_id = i.item_id 1169 | #烦死了这道题,又简单又出错,气死,晚点再看 1170 | ``` 1171 | python 1172 | ```python 1173 | Orders['rn'] = Orders.groupby(['seller_id'])['order_date'].rank(method = 'first') 1174 | dat = pd.merge[Users, Orders, left_on = 'user_id',right_on = 'seller_id', how = 'left'] 1175 | dat = dat[dat['rn'] == 2] 1176 | dat1 = pd.merge[dat, Items, left_on = 'item_id', right_on = 'item_id', how = 'left'] 1177 | #case when 1178 | dat1.loc[(dat1.favorite_brand == dat1.item_brand),'2nd_item_fav_brand'] = 'Yes' 1179 | dat1.loc[(dat1.favorite_brand != dat1.item_brand),'2nd_item_fav_brand'] = 'No' 1180 | 1181 | dat1[['seller_id','2nd_item_fav_brand' ]] 1182 | ``` 1183 | 1184 | # 1164. Product Price at a Given Date 1185 | sql 1186 | ```sql 1187 | (select product_id, new_price as price 1188 | from (select *, 1189 | row_number() over(partition by product_id order by datediff(day,change_date,'2019-08-16')) as rn 1190 | from Products 1191 | where datediff(day,change_date,'2019-08-16')>= 0) s 1192 | where rn = 1) 1193 | union 1194 | (select product_id as product_id,10 as price 1195 | from Products 1196 | group by product_id 1197 | having min(change_date)>'2019-08-16') 1198 | 1199 | #v2: 1200 | (select product_id as product_id,10 as price 1201 | from Products 1202 | group by product_id 1203 | having min(change_date)>'2019-08-16') 1204 | union 1205 | (select product_id,new_price as price 1206 | from Products 1207 | where (product_id,change_date) in 1208 | (select product_id,max(change_date) as max_date 1209 | from Products 1210 | where change_date<='2019-08-16' 1211 | group by product_id)) 1212 | ``` 1213 | 1214 | python 1215 | ```python 1216 | Products['change_date'] = Products['change_date'].apply(lambda x: datetime.strptime(x, '%Y%m%d')) 1217 | def daysdiff(d1, d2): 1218 | d1 = datetime.strptime(d1, "%Y-%m-%d") 1219 | d2 = datetime.strptime(d2, "%Y-%m-%d") 1220 | return (d1 - d2).days 1221 | 1222 | import datetime as dt 1223 | Products['day'] = (dt.strptime('2019-08-16', "%Y-%m-%d") - Products['change_date']).dt.days 1224 | Products1 = Products[Products['day'] >= 0] 1225 | Products1['rn'] = Products1.groupby(['product_id'])['day'].rank(method = first) 1226 | dat1 = Products1[Products1['rn'] == 1][['product_id', 'new_price']] 1227 | 1228 | dat2 = Products.groupby(['product_id'])['change_date'].min().to_frame('min').reset_index() 1229 | dat2 = dat2[dat2['min'] >'2019-08-16'][product_id] #should change to date 1230 | dat2['price'] = 10 1231 | pd.concat(dat1,dat2) 1232 | ``` 1233 | 1234 | # 1174. Immediate Food Delivery II 1235 | sql 1236 | ```sql 1237 | #v1: 1238 | select round(100*sum(case when order_date=customer_pref_delivery_date then 1 else 0 end)/sum(1),2)immediate_percentage 1239 | from Delivery 1240 | where (customer_id,order_date) in 1241 | (select customer_id,min(order_date) mindate 1242 | from Delivery 1243 | group by customer_id) 1244 | ``` 1245 | 1246 | # 1193. Monthly Transactions I 1247 | sql 1248 | ```sql 1249 | v1: mysql 1250 | select DATE_FORMAT(trans_date,"%Y-%m") as month, country, count(*) as trans_count, 1251 | sum(case when state = 'approved' then 1 else 0 end) as approved_count, 1252 | sum(amount) as trans_total_amount, 1253 | sum(case when state = 'approved' then amount else 0 end) as approved_total_amount 1254 | from Transactions 1255 | group by country, DATE_FORMAT(trans_date,"%Y-%m") 1256 | ``` 1257 | 1258 | python 1259 | ```python 1260 | 1261 | ``` 1262 | 1263 | 1264 | --------------------------------------------------------------------------------