Java Minimal Perfect Hash Table Implementation Using Cichelli’s Method

import java.io.BufferedReader;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.HashMap;

import java.util.LinkedHashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

import java.util.Set;

import java.util.Stack;

 

import javax.swing.text.html.HTMLDocument.Iterator;

 

public class Driver {

public static ArrayList<String> words = new ArrayList<>();

public static HashMap<String, Integer> wordsValuesMap = new HashMap<>();

public static HashMap<String, Integer> testValuesMap = new HashMap<>();

public static HashMap<Character, Integer> letterValuesMap = new HashMap<>();

public static HashMap<Character, Integer> gValuesMap = new HashMap<>();

 

// g is the value associated with a given letter. The value ranges between 0 and

// some maximum value. The max value is one of the inputs to the Cichelli

// Algorithm.

public static int g = 0;

public static int max = 0;

 

public static void main(String[] args) {

 

CountLetters();

 

System.out.println(“Letters Values Map ” + letterValuesMap.toString());

System.out.println(“————————————————————–“);

System.out.println(“Words Values Map ” + wordsValuesMap.toString());

System.out.println(“————————————————————–“);

System.out.println(“G Values Map ” + gValuesMap.toString());

 

}

 

public static int h(String word) {

char first = word.charAt(0);

char last = word.charAt(word.charAt(word.length() – 1));

return (word.length() + (g * first) + (g * last)) % words.size();

}

 

public static void CountLetters() {

 

String[] str = { “alabama”, “maine”, “montana”, “nevada”, “idaho” };

for (int i = 0; i < str.length; i++) {

char first = str[i].charAt(0);

char last = str[i].charAt(str[i].length() – 1);

 

// first = Character.toLowerCase(first);

if (!letterValuesMap.containsKey(first)) {

letterValuesMap.put(first, 1);

} else {

letterValuesMap.put(first, letterValuesMap.get(first) + 1);

}

 

if (!letterValuesMap.containsKey(last)) {

letterValuesMap.put(last, 1);

} else {

letterValuesMap.put(last, letterValuesMap.get(last) + 1);

}

 

} // end of for loop

// sort the list … highest values first

letterValuesMap = sortByValues(letterValuesMap);

 

// calculate the first+last word value

for (int i = 0; i < str.length; i++) {

char first = str[i].charAt(0);

char last = str[i].charAt(str[i].length() – 1);

int sum = letterValuesMap.get(first) + letterValuesMap.get(last);

wordsValuesMap.put(str[i], sum);

}

wordsValuesMap = sortByValues(wordsValuesMap);

 

// pick max value (max occurrence of a letter/2)

max = letterValuesMap.get(letterValuesMap.keySet().iterator().next()) / 2;

 

}

 

public static boolean cichelli(Stack wordStack) {

initGValues();

while (!wordStack.isEmpty()) {

String word = (String) wordStack.pop();

char first = word.charAt(0);

char last = word.charAt(word.length() – 1);

int wordLength = word.length();

 

if (!letterValuesMap.containsKey(first) && !letterValuesMap.containsKey(first)) {

 

}

 

}

 

return false;

 

}

 

public static void initGValues() {

// init g map

for (char c : letterValuesMap.keySet()) {

gValuesMap.put(c, 0);

}

 

for (String word : wordsValuesMap.keySet()) {

testValuesMap.put(word, h(word));

 

}

 

}

 

private static HashMap sortByValues(HashMap map) {

List<?> list = new LinkedList(map.entrySet());

// Defined Custom Comparator here

Collections.sort(list, new Comparator() {

public int compare(Object o1, Object o2) {

return ((Comparable) ((Map.Entry) (o2)).getValue()).compareTo(((Map.Entry) (o1)).getValue());

}

});

 

// Here I am copying the sorted list in HashMap

// using LinkedHashMap to preserve the insertion order

HashMap sortedHashMap = new LinkedHashMap();

for (java.util.Iterator it = list.iterator(); ((java.util.Iterator) it).hasNext();) {

Map.Entry entry = (Map.Entry) it.next();

sortedHashMap.put(entry.getKey(), entry.getValue());

}

return sortedHashMap;

}

 

public static void readFile() {

String file = “data.txt”;

BufferedReader br = null;

String line = “”;

 

try {

 

br = new BufferedReader(new FileReader(file));

 

while ((line = br.readLine()) != null) {

String[] studentData = line.split(“,”);

 

// addStudent(Long.parseLong(studentData[0].trim()), studentData[1].trim(),

// studentData[2].trim());

 

} // end of while

 

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

if (br != null) {

try {

br.close();

} catch (IOException e) {

e.printStackTrace();

}

} // end of if

}

 

}// end of read file

 

}

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

Operational : 4

1.Assignment(4)   dont select facebook, apple, select small companies

[Horizontal Integration is a type of strategy pursued by a company in order to strengthen its position in the industry.Within your Thompson (2020) text, read the Chapter 6 Assurance of Learning Exercise #1 related to Live Nation (https://www.livenationentertainment.com/) and respond to the following questions:

  • How has the company used horizontal mergers and acquisitions to strengthen its competitive position?
  • Are these moves primarily offensive or defensive? Please explain.
  • Has either Live Nation or Ticketmaster achieved any type of advantage based on the timing of its strategic moves?
  • Relate your response to each of the above to our coursework (Thompson text) from this week.

Submission Details:

  • Your analysis must be driven by facts, research, and data.
  • Your analysis should be 350 words.
  • Incorporate a minimum of at least one course and one non-course scholarly/peer reviewed source in your paper. All written assignments must include a coverage page, introductory and concluding paragraphs, reference page, and proper in-text citations using APA guidelines. ] – 350 words

NOTE: Make sure you choose a company that is unique.

————————————————————————————————————————————————————————————————————-

2.  dont select facebook, apple, select small companies

Discussion(4)

[ Provide an example of a vertical or horizontal integration strategy that a firm applied it based on the reading from our Thompson text and the associated other material.  How did the integration aid the company in building competitive advantage? Explain what the advantages and disadvantages of applying that integration strategy are in the context of the company and given our course work during the week.

Your initial response to the discussion question should be 250 words.

You must have at least one course (our text) and one non-course scholarly/peer reviewed source in your initial posting.

Sources require in-text citations and must be incorporated into the body of the post in addition to a full APA citation at the end of the post.] – 250 words

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

Excel 2013 Homework

Excel 2013 Chapter 1 Creating and Editing Workbooks Last Updated: 2/4/15 Page 1

USING MICROSOFT EXCEL 2013 Independent Project 1-6

Independent Project 1-6 You have been hired as the accounts receivable clerk for a privately owned accounting company called Livingood Income Tax & Accounting. It is your job to track all the payments from clients every day. Your supervisor has requested that you convert your payment table to an Excel spreadsheet.

Skills Covered in This Project

 Create and save a workbook.

 Enter text and numbers.

 Change font size and attributes.

 Use AutoSum.

 Adjust column width and row height.

 Spell check.

 Apply Freeze Panes.

 Change zoom level.

 Apply a theme and Cell Styles.

 Apply page layout options.

 Hide a row.

 Rename and apply color to sheet tabs.

 

1. Open EX2013-IndependentProject-1-6 start file. The file will be renamed automatically to include your name. Change the project file name if directed to do so by your instructor, and save it.

Note: If the workbook opens in Protected View, click the Enable Editing button in the Message Bar at

the top of the workbook so you can modify it.

Important: The start file for this project is intentionally blank.

2. Apply the Organic theme to the worksheet. Change the theme font to Calibri.

Important: To ensure accurate grading for later instructions involving applying colors, instruction 2

must be completed.

3. Select A1 and type Livingood

Income Tax and

Accounting, press Enter, type

Payment Schedule, and press Enter again.

4. Type in the remaining worksheet

data from Figure 1-108.

5. Edit the title in A1 to replace the

word “and” with the symbol &.

6. Edit the value in cell B5 to

451.25. Change “Over Due” in

cell F4 to Overdue.

7. Apply the Title style to A1.

8. Apply formatting to cell ranges.

a. Increase the font size of A4:G11 to 12 pt.

b. Select cells B5:B11 and display the Format Cells dialog box. Select the Accounting format and

change the Symbol to None.

9. Add the title Total in cell A13 and calculate the total for B13 using AutoSum. Adjust the cell range reference in the Formula bar.

10. Apply additional formatting.

a. Apply the Total cell style to cells A13:G13.

b. Select A13:G13 and increase the font size to 12 pt.

c. Bold the entries in A4:G4.

d. Center the data in A4:G4, A5:A13, D5:D13, and F5:F13

Step 1

Download start file

 

 

 

 

Excel 2013 Chapter 1 Creating and Editing Workbooks Last Updated: 2/4/15 Page 2

USING MICROSOFT EXCEL 2013 Independent Project 1-6

e. Select A4:G4 and open the Format Cells dialog box. Add a thick Green, Accent 1, Darker 50%

bottom border and a Green, Accent 1, Lighter 80% fill color using the second color in the

fifth column.

f. Select the cells in rows 6, 8, and 10 and apply the same fill color.

g. Use the Border button and apply a bottom border to cells A2:G2.

11. Adjust column width and row height.

a. Change the width of columns A:G to 14.0.

b. Change the row height for rows 4 and 13 to 19.50.

12. Hide row 12.

13. Rename Sheet1 10-27-2015 and color the sheet tab Green, Accent 1 (first color in the fifth Theme Color column).

14. Spell check the worksheet.

15. Apply Freeze Panes to B5.

16. Increase the magnification of the view to 125%.

17. Apply page layout options.

a. Change the orientation to Landscape and scale the page to fit on one page.

b. Center the worksheet horizontally on the page.

c. Click the Custom Header button and add the Insert Sheet Name field in the Left Header

section. Click the Format Text button and apply the font color Green, Accent 1, Darker 50% to

the header field.

d. Add the page number to the right section of the footer.

e. Select print preview in the Page Setup dialog box to view your settings.

18. Save and close the workbook (Figure 1-109).

19. Upload and save your project file.

20. Submit project for grading.

1-109 Excel 1-6 completed

Step 2

Upload & Save

Step 3

Grade my Project

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

JAVA 2.3

[IFT 102] Introduction to Java Technologies

 

Lab 2: Control Flow & Arrays

Score: 50 pts (10 pts * 5)

I. Prelab Exercises (10 pts)

 

A. Textbook Sections 5.1-5.3

 

1. Rewrite each condition below in valid Java syntax (give a boolean expression):

a. x > y > z

b. x and y are both less than 0

c. neither x nor y is less than 0

d. x is equal to y but not equal to z

 

2. Suppose gpa is a variable containing the grade point average of a student. Suppose the goal of a program is to let a student know if he/she made the Dean’s list (the gpa must be 3.5 or above). Write an if… else… statement that prints out the appropriate message (either “Congratulations—you made the Dean’s List” or “Sorry you didn’t make the Dean’sList”).

 

3. Complete the following program to determine the raise and new salary for an employee by adding if … else statements to compute the raise. The input to the program includes the current annual salary for the employee and a number indicating the performance rating (1=excellent, 2=good, and 3=poor). An employee with a rating of 1 will receive a 6% raise, an employee with a rating of 2 will receive a 4% raise, and one with a rating of 3 will receive a 1.5% raise.

 

// ************************************************************

// Salary.java

// Computes the raise and new salary for an employee

// ************************************************************

import java.util.Scanner;

public class Salary

{

public static void main (String[] args)

{

double currentSalary; // current annual salary

double rating; // performance rating

double raise; // dollar amount of the raise

Scanner scan = new Scanner(System.in);

// Get the current salary and performance rating

System.out.print (“Enter the current salary: “);

currentSalary = scan.nextDouble();

System.out.print (“Enter the performance rating: “);

rating = scan.nextDouble();

// Compute the raise — Use if … else …

// Print the results

System.out.println (“Amount of your raise: $” + raise);

System.out.println (“Your new salary: $” + currentSalary + raise);

}

}

B. Textbook Section 5.4

 

In a while loop, execution of a set of statements (the body of the loop) continues until the boolean expression controlling the loop (the condition) becomes false. As for an if statement, the condition must be enclosed in parentheses. For example, the loop below prints the numbers from 1 to LIMIT:

 

final int LIMIT = 100; // setup

int count = 1;

while (count <= LIMIT) // condition

{ // body

System.out.println(count); // — perform task

count = count + 1; // — update condition

}

 

There are three parts to a loop:

 

· The setup, or initialization. This comes before the actual loop, and is where variables are initialized in preparation for the first time through the loop.

· The condition, which is the boolean expression that controls the loop. This expression is evaluated each time through the loop. If it evaluates to true, the body of the loop is executed, and then the condition is evaluated again; if it evaluates to false, the loop terminates.

· The body of the loop. The body typically needs to do two things:

· Do some work toward the task that the loop is trying to accomplish. This might involve printing, calculation, input and output, method calls—this code can be arbitrarily complex.

· Update the condition. Something has to happen inside the loop so that the condition will eventually be false — otherwise the loop will go on forever (an infinite loop). This code can also be complex, but often it simply involves incrementing a counter or reading in a new value.

Sometimes doing the work and updating the condition are related. For example, in the loop above, the print statement is doing work, while the statement that increments count is both doing work (since the loop’s task is to print the values of count) and updating the condition (since the loop stops when count hits a certain value).

 

The loop above is an example of a count-controlled loop, that is, a loop that contains a counter (a variable that increases or decreases by a fixed value—usually 1—each time through the loop) and that stops when the counter reaches a certain value.

Not all loops with counters are count-controlled; consider the example below, which determines how many even numbers must be added together, starting at 2, to reach or exceed a given limit.

 

final int LIMIT = 16; TRACE

int count = 1; sum nextVal count

int sum = 0; — ——- —–

int nextVal = 2;

while (sum < LIMIT)

{

sum = sum + nextVal;

nextVal = nextVal + 2;

count = count + 1;

}

System.out.println(“Had to add together ” + (count-1) + ” even numbers ” +

“to reach value ” + LIMIT + “. Sum is ” + sum);

 

Note that although this loop counts how many times the body is executed, the condition does not depend on the value of count.

 

Not all loops have counters. For example, if the task in the loop above were simply to add together even numbers until the sum reached a certain limit and then print the sum (as opposed to printing the number of things added together), there would be no need for the counter. Similarly, the loop below sums integers input by the user and prints the sum; it contains no counter.

 

int sum = 0; //setup

String keepGoing = “y”;

int nextVal;

while (keepGoing.equals(“y”) || keepGoing.equals(“Y”))

{

System.out.print(“Enter the next integer: “); //do work

nextVal = scan.nextInt();

sum = sum + nextVal;

System.out.println(“Type y or Y to keep going”); //update condition

keepGoing = scan.next();

}

System.out.println(“The sum of your integers is ” + sum);

 

Exercises

 

1. In the first loop above, the println statement comes before the value of count is incremented. What would happen if you reversed the order of these statements so that count was incremented before its value was printed? Would the loop still print the same values? Explain.

 

2. Consider the second loop above.

a. Trace this loop, that is, in the table next to the code show values for variables nextVal, sum and count at each iteration. Then show what the code prints.

b. Note that when the loop terminates, the number of even numbers added together before reaching the limit is count-1, not count. How could you modify the code so that when the loop terminates, the number of things added together is simply count?

 

3. Write a while loop that will print “I love computer science!!” 100 times. Is this loop count-controlled?

 

4. Add a counter to the third example loop above (the one that reads and sums integers input by the user). After the loop, print the number of integers read as well as the sum. Just note your changes on the example code. Is your loop now count-controlled?

 

5. The code below is supposed to print the integers from 10 to 1 backwards. What is wrong with it? (Hint: there are two problems!) Correct the code so it does the right thing.

 

count = 10;

while (count >= 0)

{

System.out.println(count);

count = count + 1;

}

II. Two Meanings of Plus (10 pts)

 

In Java, the symbol + can be used to add numbers or to concatenate strings. This part illustrates both uses.

When using a string literal (a sequence of characters enclosed in double quotation marks) in Java the complete string must fit on one line. The following is NOT legal (it would result in a compile-time error).

 

System.out.println (“It is NOT okay to go to the next line

in a LONG string!!!”);

 

The solution is to break the long string up into two shorter strings that are joined using the concatenation operator (which is the + symbol). This is discussed in Section 2.1 in the text. So the following would be legal

 

System.out.println (“It is OKAY to break a long string into ” +

“parts and join them with a + symbol.”);

 

So, when working with strings the + symbol means to concatenate the strings (join them). BUT, when working with numbers the + means what it has always meant—add!

 

1. Observing the Behavior of +

To see the behavior of + in different settings do the following:

 

a. Study the program below, which is in file PlusTest.java.

 

// ************************************************************

// PlusTest.java

//

// Demonstrate the different behaviors of the + operator

// ************************************************************

public class PlusTest

{

// ————————————————-

// main prints some expressions using the + operator

// ————————————————-

public static void main (String[] args)

{

System.out.println (“This is a long string that is the ” +

“concatenation of two shorter strings.”);

System.out.println (“The first computer was invented about” + 55+

“years ago.”);

System.out.println (“8 plus 5 is ” + 8 + 5);

System.out.println (“8 plus 5 is ” + (8 + 5)) ;

System.out.println (8 + 5 + ” equals 8 plus 5.”);

}

}

 

b. Save PlusTest.java to your directory.

 

c. Compile and run the program. For each of the last three output statements (the ones dealing with 8 plus 5) write down what was printed. Now for each explain why the computer printed what it did given that the following rules are used for +. Write out complete explanations.

· If both operands are numbers + is treated as ordinary addition. (NOTE: in the expression a + b the a and b are called the operands.)

· If at least one operand is a string the other operand is converted to a string and + is the concatenation operator.

· If an expression contains more than one operation expressions inside parentheses are evaluated first. If there are no parentheses the expression is evaluated left to right.

 

d. The statement about when the computer was invented is too scrunched up. How should that be fixed?

 

2. Writing Your Own Program With +

Now write a complete Java program that prints out the following sentence:

 

Ten robins plus 13 canaries is 23 birds.

 

Your program must use only one statement that invokes the println method. It must use the + operator both to do arithmetic and string concatenation.

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

III. Tracking Sales (10 pts)

 

File Sales.java contains a Java program that prompts for and reads in the sales for each of 5 salespeople in a company. It then prints out the id and amount of sales for each salesperson and the total sales. Study the code, then compile and run the program to see how it works. Now modify the program as follows:

 

1. Compute and print the average sale. (You can compute this directly from the total; no loop is necessary.)

 

2. Find and print the maximum sale. Print both the id of the salesperson with the max sale and the amount of the sale, e.g., “Salesperson 3 had the highest sale with $4500.” Note that you don’t need another loop for this; you can do it in the same loop where the values are read and the sum is computed.

 

3. Do the same for the minimum sale.

 

4. After the list, sum, average, max and min have been printed, ask the user to enter a value. Then print the id of each salesperson who exceeded that amount, and the amount of their sales. Also print the total number of salespeople whose sales exceeded the value entered.

 

5. The salespeople are objecting to having an id of 0—no one wants that designation. Modify your program so that the ids run from 1-5 instead of 0-4. Do not modify the array—just make the information for salesperson 1 reside in array location 0, and so on.

 

6. Instead of always reading in 5 sales amounts, at the beginning ask the user for the number of sales people and then create an array that is just the right size. The program can then proceed as before.

 

// ***************************************************************

// Sales.java

//

// Reads in and stores sales for each of 5 salespeople. Displays

// sales entered by salesperson id and total sales for all salespeople.

//

// ***************************************************************

import java.util.Scanner;

public class Sales

{

public static void main(String[] args)

{

final int SALESPEOPLE = 5;

int[] sales = new int[SALESPEOPLE];

int sum;

Scanner scan = new Scanner(System.in);

for (int i=0; i<sales.length; i++)

{

System.out.print(“Enter sales for salesperson ” + i + “: “);

sales[i] = scan.nextInt();

}

System.out.println(“\nSalesperson Sales”);

System.out.println(” —————— “);

sum = 0;

for (int i=0; i<sales.length; i++)

{

System.out.println(” ” + i + ” ” + sales[i]);

sum += sales[i];

}

System.out.println(“\nTotal sales: ” + sum);

}

}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

IV. User and Item Average Ratings (20 pts: 10pts for problem analysis & 10 pts for Java code)

 

An online store shows a collection of products that the users can purchase. Each product (or item) has a unique ID. Similarly, each user has a unique ID. Users are allowed to rate items by giving them a value between 1 and 5, with 1 representing the worst rating and five representing the best rating.

You are given a file that contains the ratings that different users gave for various items. Each line in this file holds three values, namely, the user ID, the item ID and the rating value. For example, this line:

173 215 3

Shows that the user with ID 173 has rated the item with ID 215, and she gave that item a rating of 3. You can assume that the user IDs run in sequence starting at 0, and the same applies to the item IDs.

You are required to write a Java program to read in all the user-item ratings, compute the average rating per item, and the average rating per user, and print the average rating for each user and for each item.

Start by analyzing your problem. Your problem analysis should clearly state:

1. The input to your program

2. The output of your program

3. The different modules of your program To define your modules, think about how to divide your problem into sub-problems that each of them needs to be solved.

4. The algorithm for each module, and for the whole program. The algorithm for the program should simply list the sequence in which the various modules will be invoked.

 

You will find the user-item-ratings file in the assignment folder on BlackBoard. You will also find the code for reading a file, which was discussed in class, on BlackBoard.

 

 

 

 

 

Deliverables

1. Complete all the programming activities in sections II thru IV in this lab; then zip all your Java SOURCE CODE FILES for submission.

 

2. Write a lab report in Word or PDF document. The report should be named lab2.docx or lab2.pdf and must contain the following:

 

a. The first page should be a cover page that includes the Class Number, Lab Activity Number, Date, and Instructor’s name.

 

b. Answer of all the questions in section I: Prelab Exercises. No code file is required here. All the code involved, if any, must be copied and pasted in this report.

 

c. Provide a 1-paragraph conclusion about your experience; how long you spent completing the lab, the challenges you faced, and what you learned.

 

3. Upload your lab report and the zip file of your source code to Blackboard. DO NOT SUBMIT separate source files. If you do, they will be ignored.

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

Cis 122l

CIS-122L WEEK 8 ASSIGNMENTS

STARTS: Thursday, March 28, 2019

DUE: Midnight Wednesday, April 3, 2019

 

Part I: Textbook Microsoft Excel 2016

Read Module 8 – Working with Advanced Functions – pp. EX463-EX522 in your textbook.

 

In the buff colored sections of the textbook lesson you are to do the hands-on exercises along with the book.

 

The files you need to follow along with the books are in the Student Data Files—Excel 2016 folder in the Assignments area of the Blackboard course site.

 

In the Course Documents area locate the Additional Instructions & Corrections for Week 8. They contain corrections and add information to the textbook module.

 

Complete the 4 parts of the Week 8 assignments as listed below. Use the link provided on the Assignments page to upload your two completed files.

 

· Assignment #1 — Pages EX463-EX522. Read Module 8 and complete the steps in the buff colored sections. Do not submit that workbook.

 

· Optional Assignment #2 — Pages EX523. Review Exercise – Optionally complete steps 1 – 14. Do not submit that workbook. A solution will be posted in the Assignments section of Blackboard.

 

· Assignment #3 — Page EX524-EX525. Case Problem 1 – Complete steps 1 – 13 in the textbook then complete the steps 14-16 below in the same workbook. Before starting, be sure to read the “Notes for Assignment #3” below.

 

14. Select J1:L2 and give those cells a range name of your first initial and last name.

15. Save the file with your name and Rickys Popcorn (e.g. SmithJ Rickys Popcorn).

16. Submit that file using the provided link on the Assignments page by the deadline.

Notes for Assignment #3 – Module 8 Case 1:

· Step #5 – In E16:E26, you do not create a list from the values in B33:E41, just the Input Message. Whoever fills out the form then types a flavor from the list in B33:E41 manually.

 

· Step #13 – When you have entered all of the data, you should get the results shown below.

 

Part II: Certification Skills

· Assignment #4 – Complete the 15 steps below:

1. Open the AdvancedFunctions.xlsx workbook.

2. On the OrderOfOperations sheet, in B50, type Completed by followed by your name.

3. Test your understanding of order of operations by completing the OrderOfOperations sheet.

4. On the Dollar Store sheet, calculate in D7 the Total for an order with 2 items. Select D7:D9 and drag the Fill Handle down to D21 to copy the formula into the remaining four rows. If the order with 27 items is not $23.10, try again as you did not get the order of operations right in D7.

5. On the Food Pantry sheet, calculate the Value Per Person in F6. Copy down to F9. Calculate the Average of the Value Per Person in F11. The value should be $14.28.

 

6. On the Absolute sheet, calculate the Credit Due in D7 by reducing the Sale Amount by the Restocking Fee in cell G2. You must use G2 in your formula; typing 17% or 0.17 in the formula will not get any credit. Copy down to D25. The value in D25 should be $199.30.

7. On the TotalInterest sheet, calculate the total interest cost in B5 by multiplying the Rate in B4 by the Loan Amount in A5 by the Period (years) in B2. However, since you wish to copy that formula to the remainder of the table, each of the three cell addresses in that formula will need either absolute or mixed addresses. Adjust the formula with dollar signs as needed. Remember the F4 key can do that for you. Finally copy down and then across (or vice-versa) to complete the data table. The final value in J23 should be $125,000.00. If it isn’t, you need to fix the dollar signs in B5 and re-copy.

8. On the COUNTIF sheet, in M2:M5, use the COUNTIF( ) function to calculate the number of students in each of the four grades (9th, 10th, 11th, and 12th). The grade for each student is in column I. Then sum the number of students in M6. The value in M6 should be 97.

NOTE: For this and the next five problems, do not use the entire column address inside the COUNTIF( ) function as in =COUNTIF(I:I,L2). Although it often works, it will cause problems if there is data further down the sheet.

9. On the COUNTIF sheet, use the COUNTIF( ) function to calculate the number of female and male students in M10:M11. The gender of each student is in column B. Then sum the number of students in M12. The value in M12 should be 97.

10. On the SUMIF sheet, in M2:M5, use the SUMIF( ) function to calculate the total club dues for the students in each of the four grades (9th, 10th, 11th, and 12th). The grade for each student is in column I. The Club Dues are in column J. Then sum the Total Dues in M6. The value in M6 should be 4542.

11. On the SUMIF sheet, use the SUMIF( ) function to calculate the total club dues for the female and male students in M10:M11. The gender of each student is in column B. The Club Dues are in column J. Then sum the Total Dues in M12. The value in M12 should be 4542.

12. On the AVERAGEIF sheet, in M2:M5, calculate the Average GPA for the students in each of the four grades (9th, 10th, 11th, and 12th). Format to 2 decimal places. You should get 2.96, 2.92, 2.98, and 2.97.

13. On the AVERAGEIF sheet, calculate the Average GPA for the students who do or don’t pay club dues in M10:M11. The Yes or No for whether they pay club dues is in column D. Format to 2 decimal places. You should get 2.97 and 2.94.

14. Save the file with your name and AdvancedFunctions (e.g. SmithJ AdvancedFunctions).

15. Submit this workbook using the provided link on the assignments page by the deadline.

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

Introduction To Machine Learning -Python

Introduction to Machine Learning (CS 5710)

Assignment 2

Due by 26th September (Thursday) 11:59pm

In this assignment, you need to complete the following four sectoins:

  1. KNN
  2. Linear regression
  3. Logistic regression
  4. Regularization

Submission guideline

  1. Open this notebook with jupyter notebook and start writing codes
  2. After finishing writing your codes, click the Save button at the top of the Jupyter Notebook.
  3. Please make sure to have entered your UCM ID below.
  4. Select Cell -> All Output -> Clear. This will clear all the outputs from all cells (but will keep the content of all cells).
  5. Select Cell -> Run All. This will run all the cells in order.
  6. Once you’ve rerun everything, select File -> Download as -> HTML or PDF via LaTeX
  7. Look at the HTML/PDF file and make sure all your solutions are there, displayed correctly.
  8. Zip BOTH the HTML/PDF file and this .ipynb notebook (updated with your codes). Rem
  9. Submit your zipped file.

# Please Write Your UCM ID Here:

Section 1. KNN [30 pts]

The following KNN assignment is modified from Stanford CS231n. Please complete and hand in this completed worksheet.

In [1]:

# Run some setup code for this notebook.

from __future__ import print_function
import random
import numpy as np
from data_utils import load_CIFAR10
import matplotlib.pyplot as plt


# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

Download data:

Once you have the starter code (regardless of which method you choose above), you will need to download the CIFAR-10 dataset. Run the following from the assignment1 directory:

cd data ./get_datasets.sh

In [2]:

# Load the raw CIFAR-10 data.
cifar10_dir = 'data/cifar-10-batches-py'

# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
   del X_train, y_train
   del X_test, y_test
   print('Clear previously loaded data.')
except:
   pass

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
Training data shape:  (50000, 32, 32, 3)
Training labels shape:  (50000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)

In [3]:

# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y)
    idxs = np.random.choice(idxs, samples_per_class, replace=False)
    for i, idx in enumerate(idxs):
        plt_idx = i * num_classes + y + 1
        plt.subplot(samples_per_class, num_classes, plt_idx)
        plt.imshow(X_train[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(cls)
plt.show()

 In [4]:

# Subsample the data for more efficient code execution in this exercise
num_training = 5000
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

In [5]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)
(5000, 3072) (500, 3072)

To make things much structural, we now put everything together into the KNearestNeighbor class. You don’t need to implement any fucntion in this class now. Later you will need to come back here and implement the asked function, per the instruction.

In [6]:

import numpy as np
class KNearestNeighbor(object):
  """ a kNN classifier with L2 distance """

  def __init__(self):
    pass

  def train(self, X, y):
    """
    Train the classifier. For k-nearest neighbors this is just 
    memorizing the training data.

    Inputs:
    - X: A numpy array of shape (num_train, D) containing the training data
      consisting of num_train samples each of dimension D.
    - y: A numpy array of shape (N,) containing the training labels, where
         y[i] is the label for X[i].
    """
    self.X_train = X
    self.y_train = y
    
  def predict(self, X, k=1, num_loops=0):
    """
    Predict labels for test data using this classifier.

    Inputs:
    - X: A numpy array of shape (num_test, D) containing test data consisting
         of num_test samples each of dimension D.
    - k: The number of nearest neighbors that vote for the predicted labels.
    - num_loops: Determines which implementation to use to compute distances
      between training points and testing points.

    Returns:
    - y: A numpy array of shape (num_test,) containing predicted labels for the
      test data, where y[i] is the predicted label for the test point X[i].  
    """
    if num_loops == 0:
      dists = self.compute_distances_no_loops(X)
    elif num_loops == 1:
      dists = self.compute_distances_one_loop(X)
    elif num_loops == 2:
      dists = self.compute_distances_two_loops(X)
    else:
      raise ValueError('Invalid value %d for num_loops' % num_loops)

    return self.predict_labels(dists, k=k)

  def compute_distances_two_loops(self, X):
    """
    Compute the distance between each test point in X and each training point
    in self.X_train using a nested loop over both the training data and the 
    test data.

    Inputs:
    - X: A numpy array of shape (num_test, D) containing test data.

    Returns:
    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
      is the Euclidean distance between the ith test point and the jth training
      point.
    """
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in range(num_test):
      for j in range(num_train):
        #####################################################################
        # TODO:                                                             #
        # Compute the l2 distance between the ith test point and the jth    #
        # training point, and store the result in dists[i, j]. You should   #
        # not use a loop over dimension.                                    #
        #####################################################################

        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################
    return dists

  def compute_distances_one_loop(self, X):
    """
    Compute the distance between each test point in X and each training point
    in self.X_train using a single loop over the test data.

    Input / Output: Same as compute_distances_two_loops
    """
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in range(num_test):
      #######################################################################
      # TODO:                                                               #
      # Compute the l2 distance between the ith test point and all training #
      # points, and store the result in dists[i, :].                        #
      #######################################################################
      #######################################################################
      #######################################################################
    return dists

  def compute_distances_no_loops(self, X):
    """
    Compute the distance between each test point in X and each training point
    in self.X_train using no explicit loops.

    Input / Output: Same as compute_distances_two_loops
    """
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train)) 
    #########################################################################
    # TODO:                                                                 #
    # Compute the l2 distance between all test points and all training      #
    # points without using any explicit loops, and store the result in      #
    # dists.                                                                #
    #                                                                       #
    # You should implement this function using only basic array operations; #
    # in particular you should not use functions from scipy.                #
    #                                                                       #
    # HINT: Try to formulate the l2 distance using matrix multiplication    #
    #       and two broadcast sums.                                         #
    #########################################################################

    #########################################################################
    #                         END OF YOUR CODE                              #
    #########################################################################
    return dists

  def predict_labels(self, dists, k=1):
    """
    Given a matrix of distances between test points and training points,
    predict a label for each test point.

    Inputs:
    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
      gives the distance betwen the ith test point and the jth training point.

    Returns:
    - y: A numpy array of shape (num_test,) containing predicted labels for the
      test data, where y[i] is the predicted label for the test point X[i].  
    """
    num_test = dists.shape[0] #num_test has same no of elements as testing data
    y_pred = np.zeros(num_test)
    for i in range(num_test):
      # A list of length k storing the labels of the k nearest neighbors to
      # the ith test point.

      #########################################################################
      # TODO:                                                                 #
      # Use the distance matrix to find the k nearest neighbors of the ith    #
      # testing point, and use self.y_train to find the labels of these       #
      # neighbors. Store these labels in closest_y.                           #
      # Hint: Look up the function numpy.argsort.                             #
      #########################################################################
      #########################################################################
      # TODO:                                                                 #
      # Now that you have found the labels of the k nearest neighbors, you    #
      # need to find the most common label in the list closest_y of labels.   #
      # Store this label in y_pred[i]. Break ties by choosing the smaller     #
      # label.                                                                #
      #########################################################################
      #########################################################################
      #                           END OF YOUR CODE                            # 
      #########################################################################

    return y_pred

In [7]:

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:

  1. First we must compute the distances between all test examples and all train examples.
  2. Given these distances, for each test example we find the k nearest examples and have them vote for the label

Lets begin with computing the distance matrix between all training and test examples. For example, if there are Ntr training examples and Nte test examples, this stage should result in a Nte x Ntr matrix where each element (i,j) is the distance between the i-th test and j-th train example.

First, open k_nearest_neighbor.py and implement the function compute_distances_two_loops that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

In [8]:

# Implement compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
(500, 5000)

In [9]:

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()

Second, open k_nearest_neighbor.py and implement the function predict_labels that predicts a label for each test point.

In [10]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
Got 137 / 500 correct => accuracy: 0.274000

You should expect to see approximately 27% accuracy. Now lets try out a larger k, say k = 5:

In [11]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
Got 139 / 500 correct => accuracy: 0.278000

In [12]:

# Now lets speed up distance matrix computation by using partial vectorization
# with one loop. Implement the function compute_distances_one_loop and run the
# code below:
dists_one = classifier.compute_distances_one_loop(X_test)

# To ensure that our vectorized implementation is correct, we make sure that it
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')
Difference was: 0.000000
Good! The distance matrices are the same

In [13]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')
Difference was: 0.000005
Good! The distance matrices are the same

In [14]:

# Let's compare how fast the implementations are
def time_function(f, *args):
    """
    Call a function f with args and return the time (in seconds) that it took to execute.
    """
    import time
    tic = time.time()
    f(*args)
    toc = time.time()
    return toc - tic

two_loop_time = time_function(classifier.compute_distances_two_loops, X_test)
print('Two loop version took %f seconds' % two_loop_time)

one_loop_time = time_function(classifier.compute_distances_one_loop, X_test)
print('One loop version took %f seconds' % one_loop_time)

no_loop_time = time_function(classifier.compute_distances_no_loops, X_test)
print('No loop version took %f seconds' % no_loop_time)

# you should see significantly faster performance with the fully vectorized implementation
Two loop version took 65.921167 seconds
One loop version took 149.449537 seconds
No loop version took 0.807989 seconds

Cross-validation

We have implemented the k-Nearest Neighbor classifier but we set the value k = 5 arbitrarily. We will now determine the best value of this hyperparameter with cross-validation.

In [15]:

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
# Split up the training data into folds. After splitting, X_train_folds and    #
# y_train_folds should each be lists of length num_folds, where                #
# y_train_folds[i] is the label vector for the points in X_train_folds[i].     #
# Hint: Look up the numpy array_split function.                                #
################################################################################
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# A dictionary holding the accuracies for different values of k that we find
# when running cross-validation. After running cross-validation,
# k_to_accuracies[k] should be a list of length num_folds giving the different
# accuracy values that we found when using that value of k.


################################################################################
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
k = 1, accuracy = 0.263000
k = 1, accuracy = 0.257000
k = 1, accuracy = 0.264000
k = 1, accuracy = 0.278000
k = 1, accuracy = 0.266000
k = 3, accuracy = 0.239000
k = 3, accuracy = 0.249000
k = 3, accuracy = 0.240000
k = 3, accuracy = 0.266000
k = 3, accuracy = 0.254000
k = 5, accuracy = 0.248000
k = 5, accuracy = 0.266000
k = 5, accuracy = 0.280000
k = 5, accuracy = 0.292000
k = 5, accuracy = 0.280000
k = 8, accuracy = 0.262000
k = 8, accuracy = 0.282000
k = 8, accuracy = 0.273000
k = 8, accuracy = 0.290000
k = 8, accuracy = 0.273000
k = 10, accuracy = 0.265000
k = 10, accuracy = 0.296000
k = 10, accuracy = 0.276000
k = 10, accuracy = 0.284000
k = 10, accuracy = 0.280000
k = 12, accuracy = 0.260000
k = 12, accuracy = 0.295000
k = 12, accuracy = 0.279000
k = 12, accuracy = 0.283000
k = 12, accuracy = 0.280000
k = 15, accuracy = 0.252000
k = 15, accuracy = 0.289000
k = 15, accuracy = 0.278000
k = 15, accuracy = 0.282000
k = 15, accuracy = 0.274000
k = 20, accuracy = 0.270000
k = 20, accuracy = 0.279000
k = 20, accuracy = 0.279000
k = 20, accuracy = 0.282000
k = 20, accuracy = 0.285000
k = 50, accuracy = 0.271000
k = 50, accuracy = 0.288000
k = 50, accuracy = 0.278000
k = 50, accuracy = 0.269000
k = 50, accuracy = 0.266000
k = 100, accuracy = 0.256000
k = 100, accuracy = 0.270000
k = 100, accuracy = 0.263000
k = 100, accuracy = 0.256000
k = 100, accuracy = 0.263000

In [16]:

# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

 In [17]:

# Based on the cross-validation results above, choose the best value for k,   
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.
best_k = 10

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
y_test_pred = classifier.predict(X_test, k=best_k)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
Got 141 / 500 correct => accuracy: 0.282000

Section 2. Linear Regression [25 pts]

The following linear regression assignment is modified from Stanford CS229. Please complete and hand in this completed worksheet.

Linear regression with one variable

Before starting on any task, it is often useful to understand the data by visualizing it. For this dataset, you can use a scatter plot to visualize the data, since it has only two properties to plot (profit and population). (Many other problems that you will encounter in real life are multi-dimensional and can’t be plotted on a 2-d plot.)

The dataset is loaded from the data file into the variables X and y:

In [18]:

data = np.loadtxt('data/ex1data1.txt', delimiter=",") # read comma separated data
m = data.shape[0]                                     # number of training example
X = data[:,0].reshape(m,1)
y = data[:,1].reshape(m,1)                             
print (X.shape)
print (y.shape)
(97, 1)
(97, 1)

In [19]:

plt.plot(X,y, 'rx')                         # Plot the data
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.show()

In this part, you will fit the linear regression parameters $\theta$ to our dataset using gradient descent.

The objective of linear regression is to minimize the cost function \begin{equation*} J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)})^2 \end{equation*}

where the hypothesis $h_\theta(x)$ is given by the linear mode \begin{equation*} h_{\theta}(x^{(i)}) = \theta^Tx = \theta_0 + \theta_1 x_1 \end{equation*}

Recall that the parameters of your model are the $\theta_j$ values. These are the values you will adjust to minimize cost $J(\theta)$. One way to do this is to use the batch gradient descent algorithm. In batch gradient descent, each iteration performs the update \begin{equation*} \theta_j := \theta_j – \alpha \frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)}) x_j^{(i)} \end{equation*}

With each step of gradient descent, your parameters $\theta_j$ come closer to the optimal values that will achieve the lowest cost $J(\theta)$.

As you perform gradient descent to learn minimize the cost function J(θ), it is helpful to monitor the convergence by computing the cost. In this section, you will implement a function to calculate J(θ) so you can check the convergence of your gradient descent implementation.

Your next task is to complete the compute_cost function, which is a function that computes J(θ). As you are doing this, remember that the variables X and y are not scalar values, but matrices whose rows represent the examples from the training set.

In [20]:

def compute_cost(X, y, theta):
    m = len(y)
    # You need to return the following variables correctly 
    J = 0    #####################################################################
    # Compute the cost of a particular choice of theta                  #
    #               You should set J to the cost.                       #
    #####################################################################
    #####################################################################
    #                       END OF YOUR CODE                            #
    ####################################################################
    return J

In [21]:

X = np.concatenate((np.ones((m, 1)), data[:,0].reshape(m,1)), axis=1)
theta = np.zeros((2, 1)) 

compute_cost(X, y, theta)

Out[21]:

32.072733877455676

You should expect to see a cost of 32.07.

Next, you will implement gradient descent function. The loop structure has been written for you, and you only need to supply the updates to θ within each iteration.

As you program, make sure you understand what you are trying to optimize and what is being updated. Keep in mind that the cost J(θ) is parameterized by the vector θ, not X and y. That is, we minimize the value of J(θ) by changing the values of the vector θ, not by changing X or y.

A good way to verify that gradient descent is working correctly is to look at the value of J(θ) and check that it is decreasing with each step. The starter code calls compute_cost on every iteration and prints the cost. Assuming you have implemented gradient descent and compute_cost correctly, your value of J(θ) should never increase, and should converge to a steady value by the end of the algorithm.

In [22]:

def gradient_descent(X, y, theta, alpha, num_iters):
    # GRADIENTDESCENT Performs gradient descent to learn theta
    # theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by 
    # taking num_iters gradient steps with learning rate alpha

    # Initialize some useful values
    m = len(y)
    J_history = []

    
    for iter in range(num_iters):

        
        #####################################################################
        # Instructions: Perform a single gradient step on the parameter     #
        #               vector theta.                                       #
        #                                                                   #      
        # Hint: While debugging, it can be useful to print out the values   #
        #       of the cost function (compute_cost) and gradient here.       # 
        #####################################################################
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################


        # Save the cost J in every iteration 
        J = compute_cost(X, y, theta)
        J_history.append(J)
    
    return theta, J_history

Now let’s find the parameter θ and plot the linear fit.

In [23]:

print('Running Gradient Descent ...\n')

X = np.concatenate((np.ones((m, 1)), data[:,0].reshape(m,1)), axis=1) # Add a column of ones to x
theta = np.zeros((2, 1))                                              # initialize fitting parameters

# Some gradient descent settings
iterations = 1500
alpha = 0.01

# gradient descent
theta, J_history = gradient_descent(X, y, theta, alpha, iterations)
print('Theta found by gradient descent: ')
print(theta[0], theta[1])


plt.plot(X[:,1], y, 'rx')                         # Plot the data
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')

plt.plot(X[:,1], np.dot(X, theta), '-')
plt.show()
Running Gradient Descent ...

Theta found by gradient descent: 
[-3.63029144] [1.16636235]

Linear regression with multiple variable

In this part, you will implement linear regression with multiple variables to predict the prices of houses. Suppose you are selling your house and you want to know what a good market price would be. One way to do this is to first collect information on recent houses sold and make a model of housing prices.

The file ex1data2.txt contains a training set of housing prices in Portland, Oregon. The first column is the size of the house (in square feet), the second column is the number of bedrooms, and the third column is the price of the house.

In [24]:

data = np.loadtxt('data/ex1data2.txt', delimiter=",") # read comma separated data
m = data.shape[0]                                     # number of training example
X = data[:,0:2].reshape(m,2)
y = data[:,2].reshape(m,1)   

By looking at the values, note that house sizes are about 1000 times the number of bedrooms. When features differ by orders of magnitude, first performing feature scaling can make gradient descent converge much more quickly.

In [25]:

def feature_normalize(X):
    
    # FEATURENORMALIZE Normalizes the features in X 
    #   FEATURENORMALIZE(X) returns a normalized version of X where the mean value of each
    #   feature is 0 and the standard deviation is 1. This is often a good preprocessing 
    #   step to do when working with learning algorithms.

    # You need to set these values correctly
    X_norm = X
    mu     = 0
    sigma  = 0

    #####################################################################
    # Instructions: First, for each feature dimension, compute the mean #
    #               of the feature and subtract it from the dataset,    #
    #               storing the mean value in mu. Next, compute the     #
    #               standard deviation of each feature and divide       #
    #               each feature by it's standard deviation, storing    #
    #               the standard deviation in sigma.                    #
    #                                                                   #
    #               Note that X is a matrix where each column is a      #
    #               feature and each row is an example. You need        #
    #               to perform the normalization separately for         #
    #               each feature.                                       #
    #                                                                   #
    # Hint: You might find the 'mean' and 'std' functions useful.       #
    #####################################################################
    #####################################################################
    #                       END OF YOUR CODE                            #
    #####################################################################


    return X_norm, mu, sigma

Previously, you implemented gradient descent on a univariate regression problem. The only difference now is that there is one more feature in the matrix X. The hypothesis function and the batch gradient descent update rule remain unchanged.

You should complete the function gradientDescentMulti to implement the gradient descent for linear regression with multiple variables.

Make sure your code supports any number of features and is well-vectorized.

In [26]:

X = np.concatenate((np.ones((m, 1)),feature_normalize(data[:,0:2].reshape(m,2))[0]), axis=1)
theta = np.zeros((3, 1)) 

compute_cost(X, y, theta)

Out[26]:

65591548106.45744

You should expect to see a cost of 65591548106.

Next, you will implement gradient descent function with multiple variable.

In [27]:

def gradient_descent_multi(X, y, theta, alpha, num_iters):
    #GRADIENTDESCENTMULTI Performs gradient descent to learn theta
    #   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
    #   taking num_iters gradient steps with learning rate alpha

    # Initialize some useful values
    m = len(y)
    J_history = []
    
    
    for iter in range(num_iters):

        
        #####################################################################
        # Instructions: Perform a single gradient step on the parameter     #
        #               vector theta.                                       #
        #                                                                   #      
        # Hint: While debugging, it can be useful to print out the values   #
        #       of the cost function (compute_cost) and gradient here.      # 
        #####################################################################
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################


        # Save the cost J in every iteration 
        J = compute_cost(X, y, theta)
        print(J)
        J_history.append(J)
    
    return theta, J_history

Now let’s find the parameter θ and plot the linear fit.

In [28]:

alpha = 0.01;
num_iters = 400;
print(np.dot(X,theta).shape)
theta = np.zeros((3, 1))
theta, J_history = gradient_descent_multi(X, y, theta, alpha, num_iters)
(47, 1)
64297776251.62011
63031018305.52132
61790694237.53249
60576236901.991035
59387091739.9886
58222716488.38939
57082580895.8954
55966166445.97885
54872966086.50778
53802483965.89506
52754235175.605446
51727745498.85994
50722551165.380974
49738198612.02588
48774244249.16026
47830254232.6268
46905804241.168976
46000479259.1725
45113873364.59137
44245589521.92844
43395239380.144295
42562443075.371216
41746829038.312386
40948033806.20948
40165701839.264984
39399485341.40871
38649044085.30025
37914045241.46274
37194163211.44539
36489079464.91514
35798482380.58049
35122067090.852936
34459535330.1538
33810595286.77683
33174961458.219242
32552354509.895863
31942501137.15358
31345133930.505222
30759991244.004223
30186817066.683086
29625360896.981297
29075377620.08948
28536627388.13903
28008875503.167988
27491892302.795826
26985453048.54132
26489337816.71971
26003331391.85654
25527223162.557613
25060807019.775593
24603881257.415604
24156248475.223606
23717715483.902462
23288093212.402443
22867196617.33388
22454844594.4512
22050859892.15871
21655069026.99004
21267302201.013813
20887393221.120007
20515179420.14188
20150501579.77011
19793203855.216385
19443133701.585064
19100141801.912357
18764081996.833694
18434811215.84058
18112189410.089695
17796079486.72735
17486347244.693893
17182861311.972996
16885493084.252026
16594116664.960405
16308608806.65347
16028848853.710594
15754718686.316616
15486102665.696718
15222887580.575449
14964962594.831402
14712219196.319695
14464551146.835112
14221854433.189379
13984027219.376747
13750969799.802755
13522584553.551311
13298775899.666433
13079450253.424904
12864515983.577183
12653883370.534124
12447464565.477882
12245173550.375597
12046926098.875242
11852639738.063328
11662233711.064756
11475628940.465513
11292747992.539381
11113515042.260374
10937855839.082855
10765697673.47193
10596969344.166983
10431601126.161716
10269524739.384388
10110673318.062336
9954981380.75535
9802384801.04264
9652820778.848698
9506227812.393553
9362545670.75337
9221715367.017591
9083679132.02917
8948380388.694815
8815763726.852383
8685774878.682936
8558360694.655188
8433469119.990486
8311049171.636583
8191050915.738868
8073425445.597918
7958124860.102508
7845102242.627467
7734311640.386057
7625708044.226663
7519247368.864025
7414886433.535263
7312582943.071296
7212295469.37446
7113983433.293285
7017607086.885628
6923127496.061648
6830506523.598125
6739706812.515992
6650691769.813038
6563425550.543964
6477873042.240136
6393999849.661561
6311772279.873769
6231157327.642514
6152122661.139253
6074636607.950605
5998668141.385206
5924186867.071307
5851163009.838901
5779567400.880069
5709371465.181493
5640547209.223233
5573067208.9379015
5506904597.924616
5442033055.912168
5378426797.46598
5316060560.933568
5254909597.623319
5194949661.2115345
5136156997.3727865
5078508333.628749
5021980869.410768
4966552266.331585
4912200638.661632
4858904544.005542
4806642974.174524
4755395346.250381
4705141493.837055
4655861658.495639
4607536481.3589525
4560146994.921772
4513674615.002972
4468101132.875878
4423408707.563237
4379579858.293242
4336597457.113228
4294444721.657557
4253105208.066543
4212562804.053023
4172801722.1135597
4133806492.8811145
4095561958.6161766
4058053266.8334436
4021265864.061104
3985185489.7299514
3949798170.1895227
3915090212.8486094
3881048200.437472
3847658985.3891406
3814909684.337368
3782787672.7286496
3751280579.545978
3720376282.141932
3690062901.1787825
3660328795.6733546
3631162558.1444583
3602553009.8606644
3574489196.1863756
3546960382.0240526
3519956047.350615
3493465882.846027
3467479785.6120825
3441987854.979558
3416980388.401809
3392447877.4330564
3368381003.789511
3344770635.491656
3321607823.085977
3298883795.944417
3276589958.64
3254717887.3969865
3233259326.613998
3212206185.458606
3191550534.531864
3171284602.6013308
3151400773.401179
3131891582.497912
3112749714.2204375
3093967998.653024
3075539408.689931
3057457057.1503615
3039714193.9525356
3022304203.3455877
3005220601.1981487
2988457032.342386
2972007267.972377
2955865203.095665
2940024854.0369077
2924480355.9925337
2909225960.6353436
2894256033.7680163
2879565053.0245204
2865147605.618414
2850998386.1370893
2837112194.380988
2823483933.2468657
2810108606.654196
2796981317.513811
2784097265.7379103
2771451746.29061
2759040147.2781234
2746857948.0778456
2734900717.505465
2723164112.0193577
2711643873.9614825
2700335829.8340216
2689235888.6110344
2678340040.0844083
2667644353.24338
2657144974.6869745
2646838127.0686274
2636720107.572393
2626787286.4200387
2617036105.408408
2607463076.476443
2598064780.3012333
2588837864.9225197
2579779044.395034
2570885097.4681597
2562152866.292284
2553579255.1513553
2545161229.221069
2536895813.352176
2528780090.878393
2520811202.4484067
2512986344.881492
2505302770.046249
2497757783.761987
2490348744.7222986
2483073063.440365
2475928201.215545
2468911669.120834
2462021027.0107284
2455253882.5491185
2448607890.2567806
2442080750.5780616
2435670208.9663877
2429374054.9881926
2423190121.444897
2417116283.5125785
2411150457.8989606
2405290602.017369
2399534713.177319
2393880827.7913885
2388327020.598047
2382871403.900107
2377512126.818505
2372247374.561065
2367075367.7059803
2361994361.4996643
2357002645.168741
2352098541.2458296
2347280404.908872
2342546623.333738
2337895615.0598025
2333325829.3682785
2328835745.673001
2324423872.9234447
2320088749.0197077
2315828940.2392287
2311643040.674992
2307529671.684991
2303487481.3527303
2299515143.958527
2295611359.4614096
2291774852.991374
2288004374.351835
2284298697.5320034
2280656620.2290435
2277076963.379789
2273558570.701808
2270100308.243673
2266701063.944202
2263359747.200523
2260075288.4447656
2256846638.7292147
2253672769.319745
2250552671.297394
2247485355.1678667
2244469850.4788623
2241505205.445017
2238590486.5803514
2235724778.33804
2232907182.7573757
2230136819.1177626
2227412823.5996304
2224734348.952087
2222100564.1672115
2219510654.1608334
2216963819.4596634
2214459275.894684
2211996254.3006086
2209574000.221364
2207191773.6214075
2204848848.6028104
2202544513.1279545
2200278068.747763
2198048830.3353295
2195856125.8248405
2193699295.9557047
2191577694.021751
2189490685.625428
2187437648.4368777
2185417971.9578023
2183431057.290015
2181476316.9086003
2179553174.4395676
2177661064.4419203
2175799432.194059
2173967733.484414
2172165434.4062343
2170392011.156456
2168646949.838549
2166929746.269276
2165239905.7892857
2163576943.0774593
2161940381.9689326
2160329755.27673
2158744604.616923
2157184480.237262
2155648940.849195
2154137553.4632096
2152649893.227437
2151185543.269453
2149744094.541205
2148325145.667005
2146928302.7945316
2145553179.4487762
2144199396.388881
2142866581.4677956
2141554369.494718
2140262402.10025
2138990327.6042066
2137737800.8860512
2136504483.257877
2135290042.3399014
2134094151.9384081
2132916491.9261124
2131756748.124874
2130614612.1907258
2129489781.5011702
2128381959.0446966
2127290853.3124804
2126216178.1922035
2125157652.8639822
2124115001.6983278
2123087954.1561348
2122076244.6906202
2121079612.6512039
2120097802.1892736
2119130562.1658094
2118177646.0608199
2117238811.884559
2116313822.09049
2115402443.4899635
2114504447.1685586
2113619608.404084
2112747706.5861764
2111888525.137485
2111041851.4363952
2110207476.7412794
2109385196.1162214
2108574808.3582084
2107776115.925742
2106988924.8688555
2106213044.760492
2105448288.6292474

Let’s plot the convergence graph

In [29]:

plt.plot(list(range(0, len(J_history))), J_history, '-b')                         # Plot the data
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()

Section 3. Logistic Regression [25 pts]

The following logistic regression assignment is modified from Stanford CS229. Please complete and hand in this completed worksheet.

Logistic Regression

In this section, you need to implement logsitic regression to solve a binary classification problem. Let’s first get our data ready:

In [30]:

# Only use the first 70 samples for training (and validation),
# and treat the rest of them as hold-out testing set.
X = np.loadtxt('data/logistic_x_.txt') 
y = np.loadtxt('data/logistic_y_.txt').reshape(-1, 1) 


X, mu, std = feature_normalize(X)

# Add a column of ones to X for the bias weight.
m = len(X)
X = np.concatenate((np.ones((m, 1)), X), axis=1)

Here, the input $x^{(i)}\in\mathbb{R^2}$ and $y^{(i)}\in\{-1, 1\}$. Like we have mentioned, it is better to visualize the data first before you start working on it.

In [31]:

# Plot the feature according to their class label.
# Note that we exclude column 0, which is the colunm we padded with one in the previous block.
plt.plot(X[np.where(y==1), 1], X[np.where(y==1), 2], 'rx')
plt.plot(X[np.where(y==-1), 1], X[np.where(y==-1), 2], 'bo')  
plt.xlabel('x2')
plt.ylabel('x1')
plt.show()

In the following, you need to implement logistic regression. Recall that when $y^{(i)}\in{-1,1}$, the objective function for binary logistic regression can be expressed as: \begin{equation*} J(\theta) = \frac{1}{m}\sum_{i=1}^{m}\log{\left(1+e^{-y^{(i)\theta^Tx^{(i)}}}\right)}=-\frac{1}{m}\sum_{i=1}^m\log{\left(h_{\theta}(y^{(i)}x^{(i)})\right)} \end{equation*} where the hypothesis is the sigmoid function: \begin{equation*} h_\theta(y^{(i)}x^{(i)})=\frac{1}{1+e^{-y^{(i)}\theta^{T}x^{(i)}}} \end{equation*} which we have seen in class (and assignment 0). Similar to the previous section, we can minimize the objective function $J(\theta)$ using batch gradient descent: \begin{equation*} \theta_j := \theta_j – \alpha \frac{1}{m}\sum_{i=1}^{m}h_\theta(-y^{(i)}x_j^{(i)})(-y^{(i)}x_j^{(i)}) \end{equation*}

Now, your task is to complete the function sigmoid, compute_cost, gradient_descent for logistic regression.

In [32]:

def sigmoid(z):
    #####################################################################
    # Instructions: Implement sigmoid function g                        #
    #####################################################################

    #####################################################################
    #                       END OF YOUR CODE                            #
    #####################################################################
    return g

def compute_cost(X, y, theta):
    
    # You need to return the following variables correctly 
    J = 0;
    #####################################################################
    # Instructions: Implement the objective function J(theta)           #
    #####################################################################
    #####################################################################
    #                       END OF YOUR CODE                            #
    #####################################################################
    return J

def compute_gradient(X, y, theta):
    #####################################################################
    # Instructions: Implement gradient function gradient_               #
    #####################################################################
    #####################################################################
    #                       END OF YOUR CODE                            #
    #####################################################################
    return gradient_


def gradient_descent_logistic(X, y, theta, alpha, num_iters):
    m = len(y)
    J_history = []
    for iter in range(num_iters):
        theta = 0

        #####################################################################
        # Instructions: Perform a single gradient step on the parameter     #
        #               vector theta using the implemented compute_gradient #
        #                                                                   #      
        # Hint: While debugging, it can be useful to print out the values   #
        #       of the cost function (compute_cost) and gradient here.      # 
        #####################################################################
        
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################


        # Save the cost J in every iteration 
        J = compute_cost(X, y, theta)
        print(J)
        J_history.append(J)
    
    return theta, J_history

Now, fit your model, and see if it is learning.

In [33]:

# Train your model.
theta = np.zeros((X.shape[1], 1))
alpha = 0.1;
num_iters = 400;
theta, J_history = gradient_descent_logistic(X, y, theta, alpha, num_iters)
0.6767318709516239
0.6612759836177738
0.6467213868522412
0.6330122173560181
0.6200951054326651
0.6079193220430319
0.5964368590615188
0.5856024539525343
0.5753735694532695
0.565710337891584
0.5565754786361017
0.547934195984304
0.5397540636253004
0.5320049007207323
0.5246586436612436
0.5176892166916711
0.5110724038582442
0.5047857241105058
0.4988083108795615
0.49312079704042866
0.48770520583666754
0.4825448480874091
0.47762422579843516
0.4729289421494971
0.4684456177202448
0.46416181273904
0.4600659550858426
0.4561472737467817
0.4523957373993958
0.448801997800213
0.44535733764742047
0.44205362259850367
0.4388832571341371
0.43583914397384127
0.4329146467649332
0.43010355578324566
0.4274000564013932
0.4247987000975499
0.42229437779447193
0.4198822953346259
0.417557950912616
0.4153171143005741
0.4131558077157223
0.41107028819193164
0.4090570313288082
0.40711271630263857
0.40523421203348253
0.4034185644118437
0.401662984496735
0.39996483760461843
0.3983216332157216
0.3967310156306258
0.3951907553158623
0.3936987408825787
0.3922529716471801
0.39085155072727046
0.389492678630239
0.3881746472954943
0.386895834554683
0.38565469897726545
0.3844497750715729
0.38327966881399933
0.3821430534812575
0.38103866576272566
0.3799653021318076
0.378921815456965
0.37790711183465914
0.3769201476278858
0.3759599266953023
0.3750254977971449
0.37411595216524024
0.37323042122541344
0.3723680744615167
0.3715281174111411
0.3707097897838466
0.3699123636934485
0.3691351419965436
0.36837745673005506
0.3676386676411173
0.36691816080311873
0.36621534731218414
0.3655296620587962
0.3648605625696431
0.3642075279151391
0.36357005767838924
0.36294767098167463
0.3623399055668146
0.36174631692601383
0.36116647748004677
0.3605999758008458
0.36004641587576497
0.35950541641097494
0.35897661017162175
0.35845964335653885
0.3579541750054525
0.3574598764367555
0.3569764307140532
0.3565035321398041
0.35604088577448395
0.3555882069798088
0.35514522098464013
0.35471166247229
0.3542872751880217
0.3538718115656143
0.3534650323719397
0.3530667063685542
0.3526766099893788
0.3522945270335911
0.3519202483729106
0.35155357167250495
0.3511943011247944
0.3508422471954714
0.3504972263810946
0.3501590609776562
0.34982757885955074
0.3495026132684144
0.34918400261132765
0.348871590267909
0.348565224405848
0.34826475780445815
0.3479700476858499
0.34768095555334516
0.3473973470367804
0.34711909174436023
0.3468460631207451
0.3465781383110707
0.34631519803061844
0.34605712643986486
0.34580381102465674
0.34555514248127206
0.345311014606137
0.3450713241899841
0.34483597091624574
0.3446048572634892
0.3443778884117099
0.34415497215230506
0.3439360188015665
0.34372094111753104
0.34350965422004076
0.3433020755138719
0.34309812461479494
0.3428977232784407
0.3427007953318474
0.34250726660757574
0.34231706488027847
0.3421301198056233
0.3419463628614649
0.3417657272911741
0.341588148049033
0.3414135617476077
0.34124190660702
0.34107312240603516
0.3409071504348955
0.3407439334498255
0.34058341562914135
0.34042554253089924
0.34027026105202324
0.34011751938884927
0.33996726699903296
0.33981945456476426
0.3396740339572391
0.3395309582023386
0.33939018144746774
0.3392516589295106
0.33911534694385576
0.33898120281445493
0.33884918486487225
0.33871925239028705
0.33859136563041586
0.33846548574331686
0.3383415747800449
0.3382195956601256
0.3380995121478175
0.33798128882913353
0.3378648910895934
0.33775028509268207
0.3376374377589863
0.337526316745985
0.3374168904284732
0.3373091278795917
0.33720299885244454
0.33709847376228236
0.33699552366923224
0.3368941202615529
0.3367942358393999
0.336695843299079
0.33659891611777665
0.33650342833874436
0.33640935455692644
0.3363166699050138
0.3362253500399096
0.33613537112959346
0.3360467098403689
0.33595934332448446
0.335873249208112
0.33578840557967354
0.3357047909785037
0.3356223843838368
0.3355411652041074
0.33546111326655587
0.33538220880712655
0.3353044324606509
0.33522776525130665
0.33515218858334067
0.3350776842320533
0.3350042343350292
0.33493182138361116
0.33486042821460876
0.33479003800223184
0.33472063425024473
0.3346522007843335
0.3345847217446783
0.3345181815787264
0.33445256503415804
0.3343878571520404
0.3343240432601633
0.33426110896655176
0.3341990401531485
0.33413782296966316
0.33407744382758214
0.33401788939433474
0.3339591465876101
0.3339012025698215
0.33384404474271323
0.3337876607421052
0.3337320384327727
0.3336771659034561
0.3336230314619962
0.33356962363059456
0.33351693114119074
0.3334649429309573
0.33341364813790614
0.3333630360966051
0.3333130963339994
0.3332638185653382
0.3332151926901997
0.33316720878861517
0.33311985711728553
0.3330731281058928
0.3330270123534994
0.33298150062503384
0.332936583847863
0.3328922531084448
0.33284849964906227
0.33280531486463416
0.33276269029960326
0.3327206176448954
0.332679088734953
0.33263809554483686
0.33259763018739597
0.33255768491050325
0.3325182520943564
0.33247932424884014
0.3324408940109503
0.33240295414227644
0.33236549752654304
0.3323285171672061
0.33229200618510496
0.33225595781616735
0.33222036540916666
0.33218522242352894
0.3321505224271905
0.3321162590945021
0.3320824262041812
0.332049017637309
0.33201602737537256
0.3319834494983492
0.3319512781828345
0.33191950770021006
0.33188813241485315
0.33185714678238315
0.33182654534794825
0.33179632274454823
0.3317664736913936
0.3317369929923008
0.3317078755341208
0.33167911628520264
0.33165071029388815
0.3316226526870411
0.33159493866860495
0.3315675635181929
0.331540522589707
0.3315138113099868
0.33148742517748564
0.3314613597609752
0.33143561069827676
0.33141017369501896
0.3313850445234213
0.33136021902110263
0.33133569308991445
0.33131146269479794
0.33128752386266447
0.331263872681299
0.3312405052982859
0.33121741791995624
0.33119460681035684
0.3311720682902398
0.3311497987360723
0.3311277945790667
0.33110605230422985
0.3310845684494312
0.3310633396044891
0.33104236241027657
0.33102163355784303
0.33100114978755546
0.33098090788825313
0.33096090469642286
0.3309411370953882
0.330921602014514
0.33090229642842806
0.33088321735625664
0.3308643618608752
0.33084572704817367
0.3308273100663361
0.3308091081051331
0.330791118395229
0.3307733382075021
0.33075576485237684
0.3307383956791697
0.3307212280754471
0.3307042594663958
0.33068748731420383
0.3306709091174556
0.3306545224105354
0.3306383247630441
0.33062231377922613
0.33060648709740686
0.33059084238944064
0.3305753773601688
0.33056008974688883
0.33054497731883087
0.3305300378766469
0.3305152692519064
0.3305006693066033
0.33048623593267107
0.33047196705150633
0.33045786061350146
0.3304439145975868
0.3304301270107778
0.33041649588773486
0.33040301929032695
0.33038969530720547
0.33037652205338547
0.330363497669833
0.3303506203230612
0.33033788820473337
0.3303252995312721
0.3303128525434763
0.3303005455061445
0.3302883767077052
0.3302763444598531
0.33026444709719155
0.3302526829768828
0.3302410504783019
0.3302295480026991
0.3302181739728658
0.330206926832808
0.33019580504742474
0.3301848071021919
0.33017393150285146
0.33016317677510665
0.33015254146432144
0.3301420241352258
0.330131623371626
0.33012133777611874
0.3301111659698119
0.33010110659204867
0.3300911583001365
0.3300813197690814
0.33007158969132516
0.3300619667764893
0.33005244975112075
0.33004303735844304
0.33003372835811245
0.33002452152597644
0.3300154156538369
0.33000640954921795
0.32999750203513634
0.3299886919498766
0.32997997814676927
0.32997135949397327
0.3299628348742615
0.32995440318480934
0.32994606333698856
0.3299378142561623
0.3299296548814842
0.32992158416570166
0.32991360107496076
0.3299057045886162
0.32989789369904143
0.3298901674114456
0.3298825247436895
0.3298749647261081
0.3298674864013327
0.32986008882411827
0.32985277106117206
0.3298455321909866
0.3298383713036725
0.32983128750079727
0.3298242798952241

Again, plot and check to see if the model is converging.

In [34]:

plt.plot(list(range(0, len(J_history))), J_history, '-b')  
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()
print (theta)

[[-0.03801384]
 [ 1.38454246]
 [ 1.91783357]]

Decision Boundary

In addition to checking convergence graph and accuracy, we can also plot out the decision boundary to see what does the model actually learn.

In [35]:

# Plot the feature according to their class label.
# Note that we exclude column 0, which is the colunm we padded with one in the previous block.
plt.plot(X[np.where(y==1), 1], X[np.where(y==1), 2], 'rx')
plt.plot(X[np.where(y==-1), 1], X[np.where(y==-1), 2], 'bo')

#####################################################################
# Instructions: Plot out the decision boundary.                     #
# Hint: To plot the boundary, which is a straight line in our case, #
#       you need to find the two ends of the line, and plot it with #
#       plt.plot(). Note that the decision boundary is the line that#
#       y = 0.                                                      # 
#####################################################################
#####################################################################
#                       END OF YOUR CODE                            #
#####################################################################

plt.xlabel('x1')
plt.ylabel('x2')
plt.show()

Section 4. Regularization [30 pts]

In this section, you need to incorporate L2 regularization into your logistic regression.

L2 Regularization

Overfitting is a notorious problem in the world of machine learning. One simple way to counter this issue is to put constraints on your model weights $\theta$, as we have discussed in class. In this section, you need to modify the the objective function to impose L2 regularization on the logistic regression: \begin{equation*} J(\theta) = -\frac{1}{m}\sum_{i=1}^m\log{\left(h_{\theta}(y^{(i)}x^{(i)})\right)} + \lambda\vert\vert\theta\vert\vert_2^2 \end{equation*} Derive the gradient for this new objective to incorporate it into your logistic regression model.

To make things much structural, we now put everything together into a class. Please use the class template below to implement your logistic regression. Note that you can add your own class methods if needed.

In [36]:

class LogisticRegression(object):
    
    def __init__(self, alpha=0.1, lamb=0.1, regularization=None):
        # setting the class attribute.
        self.alpha = alpha                   # Set up your learning rate alpha.
        self.lamb = lamb                     # Strength of regularization.
        self.regularization = regularization 
        assert regularization == 'l2' or regularization == None # we only consider these two cases
    
    def _compute_cost(self, X, y):
        #####################################################################
        # Instructions: Compute the cost function here.                     #
        #               You need to handle both the cases with, and without #
        #               regularization here.                                #
        #####################################################################
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################
        return J
        
    def _compute_gradient(self, X, y):
        #####################################################################
        # Instructions: Compute the gradient here.                          #
        #               You need to handle both the cases with, and without #
        #               regularization here.                                #
        #####################################################################
        #####################################################################
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################
        return gradient

    def fit(self, X, y, num_iter=5):
        self.theta = np.zeros((X.shape[1], 1))
        m = len(y)
        J_history = []
        #####################################################################
        #####################################################################
       
        #                       END OF YOUR CODE                            #
        #####################################################################
        return J_history
    
    def predict(self, X):
        #####################################################################
        # Instructions: Use your hypothese to make predictions.             #
        #####################################################################
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################
        return y_hat

Load the wine datasets, in which $x_j\in\mathbb{R}^{12}$ is different attribute for alcohol, and $y\in\{-1,1\}$ is that class label (red or white wine).

In [37]:

# Load dataset
X_train = np.loadtxt('data/wine_train_X.txt')
y_train = np.loadtxt('data/wine_train_y.txt').reshape(-1, 1)
X_test = np.loadtxt('data/wine_test_X.txt')
y_test = np.loadtxt('data/wine_test_y.txt').reshape(-1, 1)

X_train, mu, std = feature_normalize(X_train)
X_test, mu, std = feature_normalize(X_test)


X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)

Now, let’s train two different logistic regression models: one with, and one without regularization.

In [38]:

log_reg = LogisticRegression(alpha=0.1) # Without regularization
log_reg_l2 = LogisticRegression(alpha=0.1, lamb=1.0, regularization='l2') # Without regularization

J_history = log_reg.fit(X_train, y_train, num_iter=500)
J_history_l2 = log_reg_l2.fit(X_train, y_train, num_iter=500)

Next, we evaluate the accuracy for each method:

In [39]:

def evaluate_accuracy(X, y, model):
    y_pred = model.predict(X)
    y_pred[y_pred > 0.5] = 1
    y_pred[y_pred <= 0.5] = -1
    return np.mean(y_pred == y)

print("Accuracy on training set: ", evaluate_accuracy(X_train, y_train, log_reg))
print("Accuracy on testing set: ", evaluate_accuracy(X_test, y_test, log_reg))
print("Accuracy w/ L2 training set: ", evaluate_accuracy(X_train, y_train, log_reg_l2))
print("Accuracy w/ L2 testing set: ", evaluate_accuracy(X_test, y_test, log_reg_l2))
Accuracy on training set:  0.9925
Accuracy on testing set:  0.9925
Accuracy w/ L2 training set:  0.9925
Accuracy w/ L2 testing set:  0.9925

To see the effect of regularization on $\theta$, we can plot out each $\theta_j$ under different $\lambda$.

In [40]:

def plot_theta(theta, lamb):
    """
    Helper function for plotting out the value of theta with respect to different lambda.
    theta  (list): list of theta under different lambda.
    lambda (list): list of lambda values you tried.
    """
    plt.hlines(y=0, xmin=0, xmax=np.max(lamb), color='red', linewidth = 2, linestyle = '--')
    for i in range(theta.shape[1]):
        plt.plot(lamb, theta[:,i])
    plt.ylabel('theta')
    plt.xlabel('lambda')
    plt.xscale('log')
    plt.show()

In [41]:

lamb = [0.1, 1, 10, 100, 1000]
theta = []

#####################################################################
# Instructions: For each value in lamb, try a model for it, and     #
#               append the trained weights into the theta           #
#####################################################################
#####################################################################
#                       END OF YOUR CODE                            #
#####################################################################

plot_theta(np.array(theta), lamb)

 In [ ]:


 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

CSMA 802.11 Scripting In Python

You are to study the performance of multiple access protocols in a wireless setting. Consider the network shown in Figure 1.

The circles denote the communication range R of each station. We are interested in the following two scenarios:

A. Concurrent Communications: Stations A, B, C, and D of Figure 1(a) are within the same collision domain (any transmission is received by all). Communication takes place between pairs A → B and C → D. Traffic is generated at A and C according to a Poisson arrival process with parameters λA and λC, respectively. B. Hidden Terminals: Stations A, B, and C of Figure 1(b), belong to two collision domains. Communication takes place between pairs A → B and C → B. Traffic is generated at A and C according to a Poisson arrival process with parameters λA and λC, respectively. For each scenario, compute relevant performance metrics for the following multiple access protocols. A time-slotted system is assumed. 1. CSMA with Collision Avoidance (CSMA/CA) according to the 802.11 DCF function. (a) A station T x ready to transmit (when a frame has arrived for transmission from the upper layers of the network stack) selects a random backoff value in [0, CW0 − 1]. It first senses the channel for an initial period of DIFS time.

(b) If the channel is busy, T x (and every other station with a frame for transmission) monitors the channel until it becomes idle. When the channel becomes idle, T x decrements his counter by one with every idle slot. If the channel becomes busy, T x freezes its backoff counter. When the counter reaches zero, T x transmits its frame. (c) If the frame is successfully received (no collision) by Rx, the station Rx replies with an ACK frame after SIFS time. This completes the transmission round and the protocol repeats for the next transmission. For successive transmissions, the station has to sense for DIFS time before starting the countdown. (d) If a collision occurs, the stations that collided double their contention window CW and repeat the backoff process. After k collisions, the backoff value is selected from [0, 2 kCW0 − 1]. The CW value cannot exceed threshold CWmax

so basically the concept in there is that a global time is ongoing, and each stations randomly transmit data in any slot.

(so I have to create simulation where packets are sent at random times by station A and B).

Then each packet sense media by first sending DIFS to see if media is free and then send RTS to control media and then sends data.
Meanwhile station B should sees media is busy and should wait.

I am like lost on what could be best approach to implement it, I looked at producer consumer concept in python, but it does not synchronize well in my logic flow.

Any ideas/directions with examples will be highly appreciated.

Here is the algorithm overview:
https://i.stack.imgur.com/Qhmwl.png

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

TEST 3

5.0/ 5.0 Points
You have a number of digital pictures you recently took on your smartphone. You would like to share these pictures with all of your friends and family. What is a “Cloud”-like example of sharing these pictures with them?

A. Upload them to www.flickr.com and share a link with them.
B. Email them to everyone by way of email attachments.
C. Save them to your My Pictures folder on your Windows 8 tablet.
D. Save them to a fractional website that offers file transfer capabilities.
Question 2 of 20
0.0/ 5.0 Points
Your CEO is concerned that too much productivity is lost by having employees call each other, only to be directed to voice mail. He asks you if something can be done to counter this. What do you suggest?

A. Integrate Dropbox into the company network.
B. Integrate RDP into the company network.
C. Integrate Microsoft Lync into the company network.
D. Integrate TeamViewer into the company network.
Question 3 of 20
5.0/ 5.0 Points
Your sister is considering purchasing a tablet computer that utilizes RT as the operating system. She asks you if RT is any different from her Windows 8 desktop she has at home. What do you tell her?

A. Unlike her Windows 8 PC, RT does not have a tile-based interface.
B. RT is an Android-based operating system so it is completely different.
C. RT isn’t touch-enabled so it is a poor choice for a tablet.
D. She won’t be able to install regular Windows application on RT.
Question 4 of 20
0.0/ 5.0 Points
What is a chief concern of cloud computing?

A. Cost
B. Security
C. Redundancy
D. Speed
Question 5 of 20
0.0/ 5.0 Points
Your company is trying to get out of the responsibility of purchasing and hosting its own hardware so it won’t have to finance or manage a datacenter anymore. Your boss has told you that you need to install an operating system on all of the cloud-based servers. This is an example of what type of cloud computing?

A. Infrastructure as a Service
B. Network as a Service
C. Platform as a Service
D. Software as a Service
Question 6 of 20
5.0/ 5.0 Points
You have a Bluetooth headset that integrates with your computer so that you can talk to partners through Microsoft Lync. This is an example of what type of wireless networking?

A. WLAN
B. WPAN
C. WMAN
D. WWIRE
Question 7 of 20
5.0/ 5.0 Points
Your boss wants you to configure his laptop so that he can access the company network when he is on the road. You suggest a VPN connection to him. He is very concerned about security and asks you how secure VPN is. What do you tell him?

A. Only mouse clicks and keyboard commands are transferred over the connection.
B. All traffic that flows between the laptop and VPN server is encrypted.
C. All of the tasks your boss will do will be on the desktop of the remote computer.
D. VPN integrates with NTFS so that only secure files are opened and modified.
Question 8 of 20
5.0/ 5.0 Points
You install TeamViewer on your workstation at home so that you can access it when on the road. How can you be assured that unknown users can’t access your computer through TeamViewer?

A. They have to have the same encryption key as you.
B. They have to know your User ID and password.
C. They have to have a signed certificate by your computer.
D. Their computer has to be in the same domain as yours.
Question 9 of 20
0.0/ 5.0 Points
Your boss calls you from his home to use the VPN connection you configured for him on his laptop. He has traditionally depended on Remote Desktop to access the server. Your boss tells you that the VPN connection shows that it is connected but the server’s desktop is not appearing on his screen. What do you tell him?

A. The firewall on his home router must be blocking the remote connection.
B. His ISP must not allow encrypted connections through their network.
C. He must be running Windows 8 which doesn’t support VPN.
D. VPN doesn’t bring up a remote desktop on the local computer.
Question 10 of 20
0.0/ 5.0 Points
Your mom wants to purchase a computer. She has heard about how the Windows 8 operating system is best-geared for a touch-enabled computing device. What type of computer do you recommend that she purchase?

A. A laptop
B. A tablet
C. A PC
D. A netbook
Question 11 of 20
5.0/ 5.0 Points
Your mom wants to start using some type of cloud storage so that she can access some of her important business files from anywhere without having to remote into another machine. What do you suggest?

A. TeamViewer
B. GoToMyPC
C. Dropbox
D. Microsoft RT
Question 12 of 20
5.0/ 5.0 Points
Why isn’t 802.1X a good choice for home-based wireless networks?

A. It only creates open wireless connections.
B. It requires an authentication server.
C. Its encryption is too easy to hack.
D. It requires multiple access points.
Question 13 of 20
5.0/ 5.0 Points
You have purchased an Apple desktop computer and want to set it up so that you can access your computer desktop when you are on the road. How might you do this?

A. Install Remote Desktop for Apple on your desktop.
B. Install Apple+ VPN on your desktop.
C. Configure your desktop for Platform as a Service.
D. Install the GoToMyPC client on your desktop.
Question 14 of 20
0.0/ 5.0 Points
Your boss calls you from the road. She is trying to connect to the local hotel guest wireless network but there is a yellow shield displayed beside that particular wireless selection. She is asking you what that shield means. What do you tell her?

A. It means the connection is not being broadcasted.
B. It means the connection is an open connection.
C. It means her wireless card doesn’t support that connection.
D. It means the wireless connection requires a password.
Question 15 of 20
0.0/ 5.0 Points
Making a phone call through Lync from your laptop using only your headset is an example of:

A. a PBX phone connection.
B. an encrypted phone conversation.
C. a legacy phone conversation.
D. a peer-to-peer phone conversation.
Question 16 of 20
0.0/ 5.0 Points
When you connect to a remote VPN server with your laptop running Windows 8, what key item is your computer allocated?

A. The desktop of the VPN server
B. The desktop of the logon server
C. An IP address from the remote network
D. A web browser with an SSL connection
Question 17 of 20
0.0/ 5.0 Points
You are the IT Manager for a school system. You just went with a new application that teachers will use to record the attendance of their students as well as their grades. The application is hosted by a third party, and teachers access the application through their web browsers. This is an example of what type of cloud computing?

A. Infrastructure as a Service
B. Network as a Service
C. Platform as a Service Windows 8 can’t find a driver for the scanner
D. Software as a Service
Question 18 of 20
5.0/ 5.0 Points
Which of the operating systems below is able to join a domain?

A. Microsoft Surface Pro
B. Microsoft Surface RT
C. Google Android
D. iPad
Question 19 of 20
5.0/ 5.0 Points
Which of these devices is usually the default gateway for most home networks?

A. A workstation
B. A server
C. A smartphone
D. A wireless router
Question 20 of 20
0.0/ 5.0 Points
In order for your laptop to make a wireless connection, it must first find an available __________ to connect to.

A. SSID
B. VPN
C. RSAT
D. WEP
 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

ITCC Wk4

Assignment Instructions

The scenario:

Johnston Smith, Associate Director of Sales as Pasedena HVAC Manufacturer, has asked you to calculate the cost of running HVAC units in summer and provide a report.

For this assignment, you will need the following files:

    New blank Access database

HVAC_Cooling

You will save your files as:

Lastname_Firstname_HVAC_Cooling

Lastname_Firstname_Cooling_Costs

1. Open the HVAC_Cooling Excel file, and save the file as Lastname_Firstname_HVAC_Cooling.

2. Insert the your name in the footer.

3. In the worksheet, insert an Excel table with a header row.

4. Add a calculated column that calculates the cost of cooling using $0.00124 per Cooling BTU formatted with the Accounting Number Format.

5. Filter the data to display only one Heating BTU number of your choice.

6. Apply Best Fit to all columns.

7. Center the worksheet horizontally on a landscape page.

8. In Access, create a new database and save it as Lastname_Firstname_Cooling_Costs

9. Import your Lastname_Firstname_HVAC_Cooling Excel file.

10. In the table, filter the data to show only data for the Cooling BTU greater than 60,000 and the Power unit of your choice.

11. Create a report based on your results.

12. Delete the four measurement fields following the Heating BTU field,  and be sure the title fits on one line.

· Special Instructions: Use the Supporting Materials below to complete the project.

· Grading: Please review the rubrics for particulars.

Grading Rubrics
Performance Level Exemplary Accomplished Developing Beginning Points
Performance Element You consistently applied the relevant skills. You mostly applied the relevant skills. You sometimes, but not always, applied the relevant skills. You rarely or never applied the relevant skills.  

10/10

Modify the 4E HVAC Cooling worksheet: insert a table, add a calculated column, and filter data Worksheet table is crated, calculated column is added, and data is filtered accurately Worksheet table is crated, calculated column is added, and data is filtered, but there are two or fewer errors Worksheet table is crated, calculated column is added, and data is filtered, but there are more than two errors One or more item was not complete Exemplary 10

Accomplished 7-9

Developing 4-6

Beginning 0-3

 

Points:

Import the 4E HVAC Cooling worksheet to Access table, and filter table Table is imported accurately, and filtering is applied accurately Table is imported accurately, and filtering is applied, but there are two or fewer errors Table is imported accurately, and filtering is applied, but there are more than two errors Table is not imported and/or filtering is not applied Exemplary 10

Accomplished 7-9

Developing 4-6

Beginning 0-3

 

Points:

Create a report based on the 4E HVAC Cooling table Report is created accurately Report is created with two or fewer errors Report is created with more than two errors Report is not created Exemplary 10

Accomplished 7-9

Developing 4-6

Beginning 0-3

 

Points:

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!

Case Study

Discussion Questions

1. Some  organizational factors increase a project’s likelihood of success. Identify these “facilitators” for the Green project.

2. Other organizational factors decrease a project’s likelihood of success. Identify these “barriers” for the Green project.

3. Outline the things that McCann needs to do right away.

Discussion Questions

1. What evidence is the CEO using to suggest that Genex is not using technology competitively?

2. Did Devlin need to hire Sandy, a “high-priced technology consultant,” to tell him that technology at Genex was a mess?

3. Devise a strategy to successfully implement enterprisewide systems (such as SAP) at Genex.

 
Do you need a similar assignment done for you from scratch? Order now!
Use Discount Code "Newclient" for a 15% Discount!