Datasets:
answer (string) | context (string) | question (string) |
---|---|---|
"SELECT COUNT(*) FROM head WHERE age > 56" | "CREATE TABLE head (age INTEGER)" | "How many heads of the departments are older than 56 ?" |
"SELECT name, born_state, age FROM head ORDER BY age" | "CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)" | "List the name, born state and age of the heads of departments ordered by age." |
"SELECT creation, name, budget_in_billions FROM department" | "CREATE TABLE department (creation VARCHAR, name VARCHAR, budget_in_billions VARCHAR)" | "List the creation year, name and budget of each department." |
"SELECT MAX(budget_in_billions), MIN(budget_in_billions) FROM department" | "CREATE TABLE department (budget_in_billions INTEGER)" | "What are the maximum and minimum budget of the departments?" |
"SELECT AVG(num_employees) FROM department WHERE ranking BETWEEN 10 AND 15" | "CREATE TABLE department (num_employees INTEGER, ranking INTEGER)" | "What is the average number of employees of the departments whose rank is between 10 and 15?" |
"SELECT name FROM head WHERE born_state <> 'California'" | "CREATE TABLE head (name VARCHAR, born_state VARCHAR)" | "What are the names of the heads who are born outside the California state?" |
"SELECT DISTINCT T1.creation FROM department AS T1 JOIN management AS T2 ON T1.department_id = T2.department_id JOIN head AS T3 ON T2.head_id = T3.head_id WHERE T3.born_state = 'Alabama'" | "CREATE TABLE department (creation VARCHAR, department_id VARCHAR); CREATE TABLE management (department_id VARCHAR, head_id VARCHAR); CREATE TABLE head (head_id VARCHAR, born_state VARCHAR)" | "What are the distinct creation years of the departments managed by a secretary born in state 'Alabama'?" |
"SELECT born_state FROM head GROUP BY born_state HAVING COUNT(*) >= 3" | "CREATE TABLE head (born_state VARCHAR)" | "What are the names of the states where at least 3 heads were born?" |
"SELECT creation FROM department GROUP BY creation ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE department (creation VARCHAR)" | "In which year were most departments established?" |
"SELECT T1.name, T1.num_employees FROM department AS T1 JOIN management AS T2 ON T1.department_id = T2.department_id WHERE T2.temporary_acting = 'Yes'" | "CREATE TABLE management (department_id VARCHAR, temporary_acting VARCHAR); CREATE TABLE department (name VARCHAR, num_employees VARCHAR, department_id VARCHAR)" | "Show the name and number of employees for the departments managed by heads whose temporary acting value is 'Yes'?" |
"SELECT COUNT(DISTINCT temporary_acting) FROM management" | "CREATE TABLE management (temporary_acting VARCHAR)" | "How many acting statuses are there?" |
"SELECT COUNT(*) FROM department WHERE NOT department_id IN (SELECT department_id FROM management)" | "CREATE TABLE management (department_id VARCHAR); CREATE TABLE department (department_id VARCHAR)" | "How many departments are led by heads who are not mentioned?" |
"SELECT DISTINCT T1.age FROM management AS T2 JOIN head AS T1 ON T1.head_id = T2.head_id WHERE T2.temporary_acting = 'Yes'" | "CREATE TABLE head (age VARCHAR, head_id VARCHAR); CREATE TABLE management (head_id VARCHAR, temporary_acting VARCHAR)" | "What are the distinct ages of the heads who are acting?" |
"SELECT T3.born_state FROM department AS T1 JOIN management AS T2 ON T1.department_id = T2.department_id JOIN head AS T3 ON T2.head_id = T3.head_id WHERE T1.name = 'Treasury' INTERSECT SELECT T3.born_state FROM department AS T1 JOIN management AS T2 ON T1.department_id = T2.department_id JOIN head AS T3 ON T2.head_id = T3.head_id WHERE T1.name = 'Homeland Security'" | "CREATE TABLE management (department_id VARCHAR, head_id VARCHAR); CREATE TABLE head (born_state VARCHAR, head_id VARCHAR); CREATE TABLE department (department_id VARCHAR, name VARCHAR)" | "List the states where both the secretary of 'Treasury' department and the secretary of 'Homeland Security' were born." |
"SELECT T1.department_id, T1.name, COUNT(*) FROM management AS T2 JOIN department AS T1 ON T1.department_id = T2.department_id GROUP BY T1.department_id HAVING COUNT(*) > 1" | "CREATE TABLE management (department_id VARCHAR); CREATE TABLE department (department_id VARCHAR, name VARCHAR)" | "Which department has more than 1 head at a time? List the id, name and the number of heads." |
"SELECT head_id, name FROM head WHERE name LIKE '%Ha%'" | "CREATE TABLE head (head_id VARCHAR, name VARCHAR)" | "Which head's name has the substring 'Ha'? List the id and name." |
"SELECT COUNT(*) FROM farm" | "CREATE TABLE farm (Id VARCHAR)" | "How many farms are there?" |
"SELECT Total_Horses FROM farm ORDER BY Total_Horses" | "CREATE TABLE farm (Total_Horses VARCHAR)" | "List the total number of horses on farms in ascending order." |
"SELECT Hosts FROM farm_competition WHERE Theme <> 'Aliens'" | "CREATE TABLE farm_competition (Hosts VARCHAR, Theme VARCHAR)" | "What are the hosts of competitions whose theme is not "Aliens"?" |
"SELECT Theme FROM farm_competition ORDER BY YEAR" | "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)" | "What are the themes of farm competitions sorted by year in ascending order?" |
"SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 5000" | "CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)" | "What is the average number of working horses of farms with more than 5000 total number of horses?" |
"SELECT MAX(Cows), MIN(Cows) FROM farm" | "CREATE TABLE farm (Cows INTEGER)" | "What are the maximum and minimum number of cows across all farms." |
"SELECT COUNT(DISTINCT Status) FROM city" | "CREATE TABLE city (Status VARCHAR)" | "How many different statuses do cities have?" |
"SELECT Official_Name FROM city ORDER BY Population DESC" | "CREATE TABLE city (Official_Name VARCHAR, Population VARCHAR)" | "List official names of cities in descending order of population." |
"SELECT Official_Name, Status FROM city ORDER BY Population DESC LIMIT 1" | "CREATE TABLE city (Official_Name VARCHAR, Status VARCHAR, Population VARCHAR)" | "List the official name and status of the city with the largest population." |
"SELECT T2.Year, T1.Official_Name FROM city AS T1 JOIN farm_competition AS T2 ON T1.City_ID = T2.Host_city_ID" | "CREATE TABLE city (Official_Name VARCHAR, City_ID VARCHAR); CREATE TABLE farm_competition (Year VARCHAR, Host_city_ID VARCHAR)" | "Show the years and the official names of the host cities of competitions." |
"SELECT T1.Official_Name FROM city AS T1 JOIN farm_competition AS T2 ON T1.City_ID = T2.Host_city_ID GROUP BY T2.Host_city_ID HAVING COUNT(*) > 1" | "CREATE TABLE farm_competition (Host_city_ID VARCHAR); CREATE TABLE city (Official_Name VARCHAR, City_ID VARCHAR)" | "Show the official names of the cities that have hosted more than one competition." |
"SELECT T1.Status FROM city AS T1 JOIN farm_competition AS T2 ON T1.City_ID = T2.Host_city_ID GROUP BY T2.Host_city_ID ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE city (Status VARCHAR, City_ID VARCHAR); CREATE TABLE farm_competition (Host_city_ID VARCHAR)" | "Show the status of the city that has hosted the greatest number of competitions." |
"SELECT T2.Theme FROM city AS T1 JOIN farm_competition AS T2 ON T1.City_ID = T2.Host_city_ID WHERE T1.Population > 1000" | "CREATE TABLE city (City_ID VARCHAR, Population INTEGER); CREATE TABLE farm_competition (Theme VARCHAR, Host_city_ID VARCHAR)" | "Please show the themes of competitions with host cities having populations larger than 1000." |
"SELECT Status, AVG(Population) FROM city GROUP BY Status" | "CREATE TABLE city (Status VARCHAR, Population INTEGER)" | "Please show the different statuses of cities and the average population of cities with each status." |
"SELECT Status FROM city GROUP BY Status ORDER BY COUNT(*)" | "CREATE TABLE city (Status VARCHAR)" | "Please show the different statuses, ordered by the number of cities that have each." |
"SELECT Status FROM city GROUP BY Status ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE city (Status VARCHAR)" | "List the most common type of Status across cities." |
"SELECT Official_Name FROM city WHERE NOT City_ID IN (SELECT Host_city_ID FROM farm_competition)" | "CREATE TABLE farm_competition (Official_Name VARCHAR, City_ID VARCHAR, Host_city_ID VARCHAR); CREATE TABLE city (Official_Name VARCHAR, City_ID VARCHAR, Host_city_ID VARCHAR)" | "List the official names of cities that have not held any competition." |
"SELECT Status FROM city WHERE Population > 1500 INTERSECT SELECT Status FROM city WHERE Population < 500" | "CREATE TABLE city (Status VARCHAR, Population INTEGER)" | "Show the status shared by cities with population bigger than 1500 and smaller than 500." |
"SELECT Official_Name FROM city WHERE Population > 1500 OR Population < 500" | "CREATE TABLE city (Official_Name VARCHAR, Population VARCHAR)" | "Find the official names of cities with population bigger than 1500 or smaller than 500." |
"SELECT Census_Ranking FROM city WHERE Status <> "Village"" | "CREATE TABLE city (Census_Ranking VARCHAR, Status VARCHAR)" | "Show the census ranking of cities whose status are not "Village"." |
"SELECT T1.course_name FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_Id GROUP BY T1.course_id ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE courses (course_name VARCHAR, course_id VARCHAR); CREATE TABLE student_course_registrations (course_Id VARCHAR)" | "which course has most number of registered students?" |
"SELECT student_id FROM student_course_registrations GROUP BY student_id ORDER BY COUNT(*) LIMIT 1" | "CREATE TABLE student_course_registrations (student_id VARCHAR)" | "what is id of students who registered some courses but the least number of courses in these students?" |
"SELECT T2.first_name, T2.last_name FROM candidates AS T1 JOIN people AS T2 ON T1.candidate_id = T2.person_id" | "CREATE TABLE candidates (candidate_id VARCHAR); CREATE TABLE people (first_name VARCHAR, last_name VARCHAR, person_id VARCHAR)" | "what are the first name and last name of all candidates?" |
"SELECT student_id FROM students WHERE NOT student_id IN (SELECT student_id FROM student_course_attendance)" | "CREATE TABLE student_course_attendance (student_id VARCHAR); CREATE TABLE students (student_id VARCHAR)" | "List the id of students who never attends courses?" |
"SELECT student_id FROM student_course_attendance" | "CREATE TABLE student_course_attendance (student_id VARCHAR)" | "List the id of students who attended some courses?" |
"SELECT T1.student_id, T2.course_name FROM student_course_registrations AS T1 JOIN courses AS T2 ON T1.course_id = T2.course_id" | "CREATE TABLE courses (course_name VARCHAR, course_id VARCHAR); CREATE TABLE student_course_registrations (student_id VARCHAR, course_id VARCHAR)" | "What are the ids of all students for courses and what are the names of those courses?" |
"SELECT T2.student_details FROM student_course_registrations AS T1 JOIN students AS T2 ON T1.student_id = T2.student_id ORDER BY T1.registration_date DESC LIMIT 1" | "CREATE TABLE student_course_registrations (student_id VARCHAR, registration_date VARCHAR); CREATE TABLE students (student_details VARCHAR, student_id VARCHAR)" | "What is detail of the student who most recently registered course?" |
"SELECT COUNT(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = "English"" | "CREATE TABLE student_course_attendance (course_id VARCHAR); CREATE TABLE courses (course_id VARCHAR, course_name VARCHAR)" | "How many students attend course English?" |
"SELECT COUNT(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T2.student_id = 171" | "CREATE TABLE courses (course_id VARCHAR); CREATE TABLE student_course_attendance (course_id VARCHAR, student_id VARCHAR)" | "How many courses do the student whose id is 171 attend?" |
"SELECT T2.candidate_id FROM people AS T1 JOIN candidates AS T2 ON T1.person_id = T2.candidate_id WHERE T1.email_address = "[email protected]"" | "CREATE TABLE candidates (candidate_id VARCHAR); CREATE TABLE people (person_id VARCHAR, email_address VARCHAR)" | "Find id of the candidate whose email is [email protected]?" |
"SELECT candidate_id FROM candidate_assessments ORDER BY assessment_date DESC LIMIT 1" | "CREATE TABLE candidate_assessments (candidate_id VARCHAR, assessment_date VARCHAR)" | "Find id of the candidate who most recently accessed the course?" |
"SELECT T1.student_details FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE students (student_details VARCHAR, student_id VARCHAR); CREATE TABLE student_course_registrations (student_id VARCHAR)" | "What is detail of the student who registered the most number of courses?" |
"SELECT T1.student_id, COUNT(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id" | "CREATE TABLE students (student_id VARCHAR); CREATE TABLE student_course_registrations (student_id VARCHAR)" | "List the id of students who registered some courses and the number of their registered courses?" |
"SELECT T3.course_name, COUNT(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id JOIN courses AS T3 ON T2.course_id = T3.course_id GROUP BY T2.course_id" | "CREATE TABLE students (student_id VARCHAR); CREATE TABLE courses (course_name VARCHAR, course_id VARCHAR); CREATE TABLE student_course_registrations (course_id VARCHAR, student_id VARCHAR)" | "How many registed students do each course have? List course name and the number of their registered students?" |
"SELECT candidate_id FROM candidate_assessments WHERE asessment_outcome_code = "Pass"" | "CREATE TABLE candidate_assessments (candidate_id VARCHAR, asessment_outcome_code VARCHAR)" | "Find id of candidates whose assessment code is "Pass"?" |
"SELECT T3.cell_mobile_number FROM candidates AS T1 JOIN candidate_assessments AS T2 ON T1.candidate_id = T2.candidate_id JOIN people AS T3 ON T1.candidate_id = T3.person_id WHERE T2.asessment_outcome_code = "Fail"" | "CREATE TABLE candidates (candidate_id VARCHAR); CREATE TABLE people (cell_mobile_number VARCHAR, person_id VARCHAR); CREATE TABLE candidate_assessments (candidate_id VARCHAR, asessment_outcome_code VARCHAR)" | "Find the cell mobile number of the candidates whose assessment code is "Fail"?" |
"SELECT student_id FROM student_course_attendance WHERE course_id = 301" | "CREATE TABLE student_course_attendance (student_id VARCHAR, course_id VARCHAR)" | "What are the id of students who registered course 301?" |
"SELECT student_id FROM student_course_attendance WHERE course_id = 301 ORDER BY date_of_attendance DESC LIMIT 1" | "CREATE TABLE student_course_attendance (student_id VARCHAR, course_id VARCHAR, date_of_attendance VARCHAR)" | "What is the id of the student who most recently registered course 301?" |
"SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id" | "CREATE TABLE addresses (city VARCHAR, address_id VARCHAR); CREATE TABLE people_addresses (address_id VARCHAR)" | "Find distinct cities of addresses of people?" |
"SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id JOIN students AS T3 ON T2.person_id = T3.student_id" | "CREATE TABLE students (student_id VARCHAR); CREATE TABLE addresses (city VARCHAR, address_id VARCHAR); CREATE TABLE people_addresses (address_id VARCHAR, person_id VARCHAR)" | "Find distinct cities of address of students?" |
"SELECT course_name FROM courses ORDER BY course_name" | "CREATE TABLE courses (course_name VARCHAR)" | "List the names of courses in alphabetical order?" |
"SELECT first_name FROM people ORDER BY first_name" | "CREATE TABLE people (first_name VARCHAR)" | "List the first names of people in alphabetical order?" |
"SELECT student_id FROM student_course_registrations UNION SELECT student_id FROM student_course_attendance" | "CREATE TABLE student_course_attendance (student_id VARCHAR); CREATE TABLE student_course_registrations (student_id VARCHAR)" | "What are the id of students who registered courses or attended courses?" |
"SELECT course_id FROM student_course_registrations WHERE student_id = 121 UNION SELECT course_id FROM student_course_attendance WHERE student_id = 121" | "CREATE TABLE student_course_attendance (course_id VARCHAR, student_id VARCHAR); CREATE TABLE student_course_registrations (course_id VARCHAR, student_id VARCHAR)" | "Find the id of courses which are registered or attended by student whose id is 121?" |
"SELECT * FROM student_course_registrations WHERE NOT student_id IN (SELECT student_id FROM student_course_attendance)" | "CREATE TABLE student_course_attendance (student_id VARCHAR); CREATE TABLE student_course_registrations (student_id VARCHAR)" | "What are all info of students who registered courses but not attended courses?" |
"SELECT T2.student_id FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = "statistics" ORDER BY T2.registration_date" | "CREATE TABLE student_course_registrations (student_id VARCHAR, course_id VARCHAR, registration_date VARCHAR); CREATE TABLE courses (course_id VARCHAR, course_name VARCHAR)" | "List the id of students who registered course statistics in the order of registration date." |
"SELECT T2.student_id FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = "statistics" ORDER BY T2.date_of_attendance" | "CREATE TABLE student_course_attendance (student_id VARCHAR, course_id VARCHAR, date_of_attendance VARCHAR); CREATE TABLE courses (course_id VARCHAR, course_name VARCHAR)" | "List the id of students who attended statistics courses in the order of attendance date." |
"SELECT date FROM weather WHERE max_temperature_f > 85" | "CREATE TABLE weather (date VARCHAR, max_temperature_f INTEGER)" | "Give me the dates when the max temperature was higher than 85." |
"SELECT name FROM station WHERE lat < 37.5" | "CREATE TABLE station (name VARCHAR, lat INTEGER)" | "What are the names of stations that have latitude lower than 37.5?" |
"SELECT city, MAX(lat) FROM station GROUP BY city" | "CREATE TABLE station (city VARCHAR, lat INTEGER)" | "For each city, return the highest latitude among its stations." |
"SELECT start_station_name, end_station_name FROM trip ORDER BY id LIMIT 3" | "CREATE TABLE trip (start_station_name VARCHAR, end_station_name VARCHAR, id VARCHAR)" | "Give me the start station and end station for the trips with the three oldest id." |
"SELECT AVG(lat), AVG(long) FROM station WHERE city = "San Jose"" | "CREATE TABLE station (lat INTEGER, long INTEGER, city VARCHAR)" | "What is the average latitude and longitude of stations located in San Jose city?" |
"SELECT id FROM trip ORDER BY duration LIMIT 1" | "CREATE TABLE trip (id VARCHAR, duration VARCHAR)" | "What is the id of the trip that has the shortest duration?" |
"SELECT SUM(duration), MAX(duration) FROM trip WHERE bike_id = 636" | "CREATE TABLE trip (duration INTEGER, bike_id VARCHAR)" | "What is the total and maximum duration of trips with bike id 636?" |
"SELECT zip_code, AVG(mean_temperature_f) FROM weather WHERE date LIKE "8/%" GROUP BY zip_code" | "CREATE TABLE weather (zip_code VARCHAR, mean_temperature_f INTEGER, date VARCHAR)" | "For each zip code, return the average mean temperature of August there." |
"SELECT COUNT(DISTINCT bike_id) FROM trip" | "CREATE TABLE trip (bike_id VARCHAR)" | "From the trip record, find the number of unique bikes." |
"SELECT COUNT(DISTINCT city) FROM station" | "CREATE TABLE station (city VARCHAR)" | "What is the number of distinct cities the stations are located at?" |
"SELECT COUNT(*) FROM station WHERE city = "Mountain View"" | "CREATE TABLE station (city VARCHAR)" | "How many stations does Mountain View city has?" |
"SELECT DISTINCT T1.name FROM station AS T1 JOIN status AS T2 ON T1.id = T2.station_id WHERE T2.bikes_available = 7" | "CREATE TABLE station (name VARCHAR, id VARCHAR); CREATE TABLE status (station_id VARCHAR, bikes_available VARCHAR)" | "Return the unique name for stations that have ever had 7 bikes available." |
"SELECT start_station_name, start_station_id FROM trip WHERE start_date LIKE "8/%" GROUP BY start_station_name ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE trip (start_station_name VARCHAR, start_station_id VARCHAR, start_date VARCHAR)" | "Which start station had the most trips starting from August? Give me the name and id of the station." |
"SELECT bike_id FROM trip WHERE zip_code = 94002 GROUP BY bike_id ORDER BY COUNT(*) DESC LIMIT 1" | "CREATE TABLE trip (bike_id VARCHAR, zip_code VARCHAR)" | "Which bike traveled the most often in zip code 94002?" |
"SELECT COUNT(*) FROM weather WHERE mean_humidity > 50 AND mean_visibility_miles > 8" | "CREATE TABLE weather (mean_humidity VARCHAR, mean_visibility_miles VARCHAR)" | "How many days had both mean humidity above 50 and mean visibility above 8?" |
"SELECT T1.lat, T1.long, T1.city FROM station AS T1 JOIN trip AS T2 ON T1.id = T2.start_station_id ORDER BY T2.duration LIMIT 1" | "CREATE TABLE trip (start_station_id VARCHAR, duration VARCHAR); CREATE TABLE station (lat VARCHAR, long VARCHAR, city VARCHAR, id VARCHAR)" | "What is the latitude, longitude, city of the station from which the shortest trip started?" |
"SELECT id FROM station WHERE city = "San Francisco" INTERSECT SELECT station_id FROM status GROUP BY station_id HAVING AVG(bikes_available) > 10" | "CREATE TABLE status (id VARCHAR, station_id VARCHAR, city VARCHAR, bikes_available INTEGER); CREATE TABLE station (id VARCHAR, station_id VARCHAR, city VARCHAR, bikes_available INTEGER)" | "What are the ids of stations that are located in San Francisco and have average bike availability above 10." |
"SELECT T1.name, T1.id FROM station AS T1 JOIN status AS T2 ON T1.id = T2.station_id GROUP BY T2.station_id HAVING AVG(T2.bikes_available) > 14 UNION SELECT name, id FROM station WHERE installation_date LIKE "12/%"" | "CREATE TABLE station (name VARCHAR, id VARCHAR); CREATE TABLE station (name VARCHAR, id VARCHAR, installation_date VARCHAR); CREATE TABLE status (station_id VARCHAR, bikes_available INTEGER)" | "What are the names and ids of stations that had more than 14 bikes available on average or were installed in December?" |
"SELECT cloud_cover FROM weather WHERE zip_code = 94107 GROUP BY cloud_cover ORDER BY COUNT(*) DESC LIMIT 3" | "CREATE TABLE weather (cloud_cover VARCHAR, zip_code VARCHAR)" | "What is the 3 most common cloud cover rates in the region of zip code 94107?" |
"SELECT zip_code FROM weather GROUP BY zip_code ORDER BY AVG(mean_sea_level_pressure_inches) LIMIT 1" | "CREATE TABLE weather (zip_code VARCHAR, mean_sea_level_pressure_inches INTEGER)" | "What is the zip code in which the average mean sea level pressure is the lowest?" |
"SELECT AVG(bikes_available) FROM status WHERE NOT station_id IN (SELECT id FROM station WHERE city = "Palo Alto")" | "CREATE TABLE status (bikes_available INTEGER, station_id VARCHAR, id VARCHAR, city VARCHAR); CREATE TABLE station (bikes_available INTEGER, station_id VARCHAR, id VARCHAR, city VARCHAR)" | "What is the average bike availability in stations that are not located in Palo Alto?" |
"SELECT AVG(long) FROM station WHERE NOT id IN (SELECT station_id FROM status GROUP BY station_id HAVING MAX(bikes_available) > 10)" | "CREATE TABLE station (long INTEGER, id VARCHAR, station_id VARCHAR, bikes_available INTEGER); CREATE TABLE status (long INTEGER, id VARCHAR, station_id VARCHAR, bikes_available INTEGER)" | "What is the average longitude of stations that never had bike availability more than 10?" |
"SELECT date, zip_code FROM weather WHERE max_temperature_f >= 80" | "CREATE TABLE weather (date VARCHAR, zip_code VARCHAR, max_temperature_f VARCHAR)" | "When and in what zip code did max temperature reach 80?" |
"SELECT T1.id FROM trip AS T1 JOIN weather AS T2 ON T1.zip_code = T2.zip_code GROUP BY T2.zip_code HAVING AVG(T2.mean_temperature_f) > 60" | "CREATE TABLE trip (id VARCHAR, zip_code VARCHAR); CREATE TABLE weather (zip_code VARCHAR, mean_temperature_f INTEGER)" | "Give me ids for all the trip that took place in a zip code area with average mean temperature above 60." |
"SELECT zip_code, COUNT(*) FROM weather WHERE max_wind_Speed_mph >= 25 GROUP BY zip_code" | "CREATE TABLE weather (zip_code VARCHAR, max_wind_Speed_mph VARCHAR)" | "For each zip code, return how many times max wind speed reached 25?" |
"SELECT date, zip_code FROM weather WHERE min_dew_point_f < (SELECT MIN(min_dew_point_f) FROM weather WHERE zip_code = 94107)" | "CREATE TABLE weather (date VARCHAR, zip_code VARCHAR, min_dew_point_f INTEGER)" | "On which day and in which zip code was the min dew point lower than any day in zip code 94107?" |
"SELECT T1.id, T2.installation_date FROM trip AS T1 JOIN station AS T2 ON T1.end_station_id = T2.id" | "CREATE TABLE station (installation_date VARCHAR, id VARCHAR); CREATE TABLE trip (id VARCHAR, end_station_id VARCHAR)" | "For each trip, return its ending station's installation date." |
"SELECT T1.id FROM trip AS T1 JOIN station AS T2 ON T1.start_station_id = T2.id ORDER BY T2.dock_count DESC LIMIT 1" | "CREATE TABLE trip (id VARCHAR, start_station_id VARCHAR); CREATE TABLE station (id VARCHAR, dock_count VARCHAR)" | "Which trip started from the station with the largest dock count? Give me the trip id." |
"SELECT COUNT(*) FROM trip AS T1 JOIN station AS T2 ON T1.end_station_id = T2.id WHERE T2.city <> "San Francisco"" | "CREATE TABLE trip (end_station_id VARCHAR); CREATE TABLE station (id VARCHAR, city VARCHAR)" | "Count the number of trips that did not end in San Francisco city." |
"SELECT date FROM weather WHERE zip_code = 94107 AND EVENTS <> "Fog" AND EVENTS <> "Rain"" | "CREATE TABLE weather (date VARCHAR, EVENTS VARCHAR, zip_code VARCHAR)" | "In zip code 94107, on which day neither Fog nor Rain was not observed?" |
"SELECT id FROM station WHERE lat > 37.4 EXCEPT SELECT station_id FROM status GROUP BY station_id HAVING MIN(bikes_available) < 7" | "CREATE TABLE status (id VARCHAR, station_id VARCHAR, lat INTEGER, bikes_available INTEGER); CREATE TABLE station (id VARCHAR, station_id VARCHAR, lat INTEGER, bikes_available INTEGER)" | "What are the ids of stations that have latitude above 37.4 and never had bike availability below 7?" |
"SELECT T1.name FROM station AS T1 JOIN status AS T2 ON T1.id = T2.station_id GROUP BY T2.station_id HAVING AVG(bikes_available) > 10 EXCEPT SELECT name FROM station WHERE city = "San Jose"" | "CREATE TABLE station (name VARCHAR, id VARCHAR); CREATE TABLE status (station_id VARCHAR); CREATE TABLE station (name VARCHAR, city VARCHAR, bikes_available INTEGER)" | "What are names of stations that have average bike availability above 10 and are not located in San Jose city?" |
"SELECT name, lat, city FROM station ORDER BY lat LIMIT 1" | "CREATE TABLE station (name VARCHAR, lat VARCHAR, city VARCHAR)" | "What are the name, latitude, and city of the station with the lowest latitude?" |
"SELECT date, mean_temperature_f, mean_humidity FROM weather ORDER BY max_gust_speed_mph DESC LIMIT 3" | "CREATE TABLE weather (date VARCHAR, mean_temperature_f VARCHAR, mean_humidity VARCHAR, max_gust_speed_mph VARCHAR)" | "What are the date, mean temperature and mean humidity for the top 3 days with the largest max gust speeds?" |
"SELECT city, COUNT(*) FROM station GROUP BY city HAVING COUNT(*) >= 15" | "CREATE TABLE station (city VARCHAR)" | "List the name and the number of stations for all the cities that have at least 15 stations." |
"SELECT start_station_id, start_station_name FROM trip GROUP BY start_station_name HAVING COUNT(*) >= 200" | "CREATE TABLE trip (start_station_id VARCHAR, start_station_name VARCHAR)" | "Find the ids and names of stations from which at least 200 trips started." |
"SELECT zip_code FROM weather GROUP BY zip_code HAVING AVG(mean_visibility_miles) < 10" | "CREATE TABLE weather (zip_code VARCHAR, mean_visibility_miles INTEGER)" | "Find the zip code in which the average mean visibility is lower than 10." |
Overview
This dataset builds from WikiSQL and Spider.
There are 78,577 examples of natural language queries, SQL CREATE TABLE statements, and SQL Query answering the question using the CREATE statement as context. This dataset was built with text-to-sql LLMs in mind, intending to prevent hallucination of column and table names often seen when trained on text-to-sql datasets. The CREATE TABLE statement can often be copy and pasted from different DBMS and provides table names, column names and their data types. By providing just the CREATE TABLE statement as context, we can hopefully provide better grounding for models without having to provide actual rows of data, limiting token usage and exposure to private, sensitive, or proprietary data.
Cleansing and Augmentation
Cleansing and data augmentation has been done on the combined WikiSQL and Spider data. I used SQLGlot on queries from Spider and WikiSQL and parsed them into different tables and columns, I then inferred column data types based on usage of >
<
operators as well as the use of MIN()
MAX()
AVG()
SUM()
on columns. While this isn't perfect, it increases the likelihood of inferring the correct datatype for a column, the columns otherwise default to VARCHAR type. These tables and columns are then used to generate CREATE TABLE statements using the inferred types. SQLGlot is used again to ensure both the SQL queries and CREATE TABLE statements parse without errors.
Some queries that do not have column names, e.g. SELECT * FROM table, have a default Id column added to the CREATE TABLE statement. Some other queries which use the generic table
as the FROM table have instead been changed to a variation of table_name_1
or some other number which is also reflected in the CREATE TABLE statement.
TODO
- Further augment the data by converting queries and CREATE TABLE statements into different SQL dialects, this can be done with SQLGlot. Reference to the dialect might also be added to the question.
- Support other informative contexts beyond CREATE TABLE
Random sample:
{
"question": "Please show the themes of competitions with host cities having populations larger than 1000.",
"context": "CREATE TABLE city (City_ID VARCHAR, Population INTEGER); CREATE TABLE farm_competition (Theme VARCHAR, Host_city_ID VARCHAR)",
"answer": "SELECT T2.Theme FROM city AS T1 JOIN farm_competition AS T2 ON T1.City_ID = T2.Host_city_ID WHERE T1.Population > 1000"
},
{
"question": "Please show the different statuses of cities and the average population of cities with each status.",
"context": "CREATE TABLE city (Status VARCHAR, Population INTEGER)",
"answer": "SELECT Status, AVG(Population) FROM city GROUP BY Status"
},
- Downloads last month
- 2,985