From 221784f594582bc1962b2d4fc9e7c7b56657b79c Mon Sep 17 00:00:00 2001 From: Dave Dietrick Date: Mon, 21 Nov 2022 14:04:21 -0500 Subject: [PATCH] Started interview prep section, data structures --- docs/.vitepress/config.js | 7 +-- docs/interview/ds/array.md | 18 ++++++++ docs/interview/ds/complexity.md | 26 ++++++++++++ docs/interview/ds/graph.md | 33 +++++++++++++++ docs/interview/ds/hash.md | 15 +++++++ docs/interview/ds/index.md | 4 ++ docs/interview/ds/linkedlist.md | 17 ++++++++ docs/interview/ds/memory.md | 7 +++ docs/interview/ds/stackqueue.md | 75 +++++++++++++++++++++++++++++++++ docs/interview/ds/string.md | 19 +++++++++ docs/interview/ds/tree.md | 21 +++++++++ docs/interview/index.md | 1 + docs/interview/sd/scaling.md | 20 +++++++++ docs/interview/sidebar.json | 29 +++++++++++++ 14 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 docs/interview/ds/array.md create mode 100644 docs/interview/ds/complexity.md create mode 100644 docs/interview/ds/graph.md create mode 100644 docs/interview/ds/hash.md create mode 100644 docs/interview/ds/index.md create mode 100644 docs/interview/ds/linkedlist.md create mode 100644 docs/interview/ds/memory.md create mode 100644 docs/interview/ds/stackqueue.md create mode 100644 docs/interview/ds/string.md create mode 100644 docs/interview/ds/tree.md create mode 100644 docs/interview/index.md create mode 100644 docs/interview/sd/scaling.md create mode 100644 docs/interview/sidebar.json diff --git a/docs/.vitepress/config.js b/docs/.vitepress/config.js index 6cee0a3..3bc081e 100644 --- a/docs/.vitepress/config.js +++ b/docs/.vitepress/config.js @@ -10,7 +10,8 @@ export default { '/rust/': require('../rust/sidebar.json'), '/nuxt/': require('../nuxt/sidebar.json'), '/ts/': require('../ts/sidebar.json'), - '/terraform': require('../terraform/sidebar.json'), + '/terraform/': require('../terraform/sidebar.json'), + '/interview/': require('../interview/sidebar.json'), '/': [ { text: 'Home', @@ -18,9 +19,9 @@ export default { {text: 'Introduction', link: '/'}, {text: 'Languages', link: '/languages'}, {text: 'Frameworks', link: '/frameworks'}, - {text: 'Devops', link: '/devops'} + {text: 'Devops', link: '/devops'}, + {text: 'Interview Prep', link: '/interview/'} ] - } ], } diff --git a/docs/interview/ds/array.md b/docs/interview/ds/array.md new file mode 100644 index 0000000..aa386e1 --- /dev/null +++ b/docs/interview/ds/array.md @@ -0,0 +1,18 @@ +# Arrays + +An array is a linear collection of data values that are accessible at numbered indices, starting at 0. Arrays are stored in continguous memory. There are two types of arrays, **static** and **dynamic**. **Static** arrays are fixed length, meaning they will always take up the same amount of memory. + +**Dynamic** arrays can change in size, and in statically typed languages like C++ there are called vectors. **Dynamic** arrays allocate double the amount of memory you have specified to account for adding values to it. When you reach a full array, then it allocates a new array with double the size and copies the values over which is an O(n) operation. + +## Complexity + +- Accessing at index: O(1) +- Updating at index: O(1) +- Inserting at beginning: O(n) +- Inserting in middle: O(n) +- Inserting at end: O(1) for dynamic, O(n) for static or reallocating dynamic +- Removing from beginning: O(n) +- Removing from middle: O(n) +- Removing from end: O(1) +- Copying: O(n) +- Traversing (including mapping, filtering, etc): O(n) \ No newline at end of file diff --git a/docs/interview/ds/complexity.md b/docs/interview/ds/complexity.md new file mode 100644 index 0000000..0987e0a --- /dev/null +++ b/docs/interview/ds/complexity.md @@ -0,0 +1,26 @@ +# Complexity + +Complexity is the way we rank the effectiveness of different data structures and algorithms. We are generally concerned with two types of complexity, **time** and **space**. **Time complexity** measures the amount of time an algorithm takes. **Space complexity** measures how much memory an algorithm uses. + +## Big O Notation + +Big O Notation is a tool that allows us to generalize the complexity of an algorithm as a function of its input size. For time complexity, the ratio between the size of the input and the amount of time it takes that algorithm to complete on a growing input tells us the effectiveness of that algorithm. The same goes for space complexity and the increase in memory required to complete algorithms. Some examples include (from fastest to slowest): + +- Constant - O(1) No change in run time +- Logarithmic - O(log(n)) +- Linear - O(n) +- Quadratic - O(n^2) +- Exponential - O(2^n) +- Factorial - O(n!) + +## Logarithms + +Logarithms in computer science are defined by the following equation: + +``` +log(N) = y if 2^y = N +``` + +As opposed to normal logarithms in in math which default to base 10, we default to base 2. This basically means that as in the input size doubles, it only increases by one unit. Linear time would double as the input doubles. + +When an algorithm takes the input and divides it repeatedly in some way, it is best to think about logarithmic complexity. Trees for example are often traversed down one half of the tree and its corresponding branches. \ No newline at end of file diff --git a/docs/interview/ds/graph.md b/docs/interview/ds/graph.md new file mode 100644 index 0000000..a9b35d7 --- /dev/null +++ b/docs/interview/ds/graph.md @@ -0,0 +1,33 @@ +# Graphs + +A graph is a collection of nodes called **vertices** that are connected to one another through **edges**. + +## Direction + +Direction relates to the way vertices are connected with one another. If one vertex points to another vertex, but that other vertex does not necessarily point back, then we have a **directed graph**. If the connection goes both ways, then it is an **undirected graph** or a **bi-directional** graph. + +## Connection + +A graph is considered **connected** when you can reach any vertex from any other vertex, or that there are no sets of vertices that are disconnected from the other vertices. A graph is considered **strongly connected** if all of the connections are bi-directional. Otherwise it is **weakly connected**. + +## Cycles + +A cycle is when three or more vertices connect to form a closed loop. A graph that has no cycles is called a **acyclic graph**. A graph with at least one cycle is a **cyclic graph**. When traversing cyclic graphs you need to be careful to not get into an infinite loop, and instead mark nodes as visited. + +## Respresenting in code + +Graphs are created using an **adjacency list**, or list of nodes with their adjacent nodes in another list. + +```python +class Vertex: + def __init__(self, val, edges): + self.var = val + self.edges = list(edges) # Pointers +``` + +## Complexity + +The space complexity of a graph is O(v+e) where v is the number of vertices and e is the number of edges. + +- Depth First Search Traversing: O(v+e) +- Bredth First Search Traversing: O(v+e) \ No newline at end of file diff --git a/docs/interview/ds/hash.md b/docs/interview/ds/hash.md new file mode 100644 index 0000000..ac97365 --- /dev/null +++ b/docs/interview/ds/hash.md @@ -0,0 +1,15 @@ +# Hash Tables + +A hash table is a key/value store, where each key points to a different value. This provides easy and fast access to the elements by the key. The advantage to hash tables is that inserting, removing, and accessing values are constant time operations, or O(1). + +Under the hood, hash tables are built with a **dynamic array** of **linked lists**. The keys are transformed into an index of the array using a **hashing function**. These hashing functions will always return the same output with a given input. Each index of the hash table is actually a linked list, where the values are stored as nodes. If two keys equate to the same index when run through the hashing function, then the linked list will store multiple values. These values then point back to the key via another pointer, so we know which key is associated to which value. + +Theoretically, if every key links to the same index, then the time complexity of operations is O(n). Most modern hashing functions are designed to minimize collisions, so we can assume O(1) complexity on average. Similar to a dynamic array, the hash table can **resize** itself once it starts to fill up, rehash the keys, and replace the values in their new spots. + +``` +[ + 0: (value1, key1) -> null + 1: null + 2: (value2, key2) -> (value3, key3) -> (value4, key4) -> null +] +``` \ No newline at end of file diff --git a/docs/interview/ds/index.md b/docs/interview/ds/index.md new file mode 100644 index 0000000..337fe83 --- /dev/null +++ b/docs/interview/ds/index.md @@ -0,0 +1,4 @@ +# Data Structures + +Data structures are just a way to organize and manage data. They provide a structure that allows you to perform algorithms to manipulate this data in different ways. Each data structure has different relationships between its elements and have different strengths and weaknesses. + diff --git a/docs/interview/ds/linkedlist.md b/docs/interview/ds/linkedlist.md new file mode 100644 index 0000000..27030b0 --- /dev/null +++ b/docs/interview/ds/linkedlist.md @@ -0,0 +1,17 @@ +# Linked Lists + +Linked lists are a collection of nodes, each of which have a value and a pointer to the next node in the list. These are typically refered to as `value` and `next`. The first node in the linked list is refered to as the `head`, and the last node is the `tail` who's `next` pointer points to `null`. Linked lists differ from arrays in that since they use pointers to link values, they do not need to occupy contiguous memory. + +## Complexity + +- Accessing head - O(1) +- Accessing tail - O(n) +- Accessing middle - O(n) +- Inserting/removing head - O(1) +- Inserting/removing tail - O(n) +- Inserting/removing middle - O(n) +- Searching - O(n) + +## Doubly Linked Lists + +These are very similar to a singularly linked list, only that each node also has a `prev` pointer pointing to the previous node. For the head, this is set to `null`. The complexity is mostly the same as the singularly linked list as well, only operations on the `tail` are O(1) time. \ No newline at end of file diff --git a/docs/interview/ds/memory.md b/docs/interview/ds/memory.md new file mode 100644 index 0000000..533df25 --- /dev/null +++ b/docs/interview/ds/memory.md @@ -0,0 +1,7 @@ +# Memory + +Memory is the mechanism by which a computer keeps data. It's a collection of memory slots that can hold bits (0's and 1's). Each memory slot can hold 8 bits, or 1 byte. Our typical fixed-width integers we use in programming are 32-bit integers, meaning they take up 4 bytes of memory to represent a number. These bytes need to be contiguous in order to store these integers. Accessing these memory slots directly is very fast, similar to looking up a value in an array by it's index. + +Endianness describes the order in which these bytes are stored in memory. Big-endian means the most significant byte comes first in the sequence, and little-endian means the least significant bit comes first. + +Pointers are values stored in memory that are addresses of other memory slots. Each memory slot is identified by a memory address, which in itself is a base 2 integer. This helps solve the problem of having to store values contiguously. You can instead store pointers next to each other that represent that actual values stored elsewhere where there is space. \ No newline at end of file diff --git a/docs/interview/ds/stackqueue.md b/docs/interview/ds/stackqueue.md new file mode 100644 index 0000000..2a1d770 --- /dev/null +++ b/docs/interview/ds/stackqueue.md @@ -0,0 +1,75 @@ +# Stacks and Queues + +Stacks and Queues are both array like structures that have different methods for storing and removing values from them. You can only add elements to the head of a stack or a queue by design. + +## Stacks + +Stacks follow the **LIFO** rule: Last In, First Out. This can be thought of as a stack of plates, where you add plates to the top of the stack, and then also remove them from the top. This is typically implemented with a **singularly linked list** or a **dynamic array**. + +```python +class Node: + def __init__(self, val): + self.next = None + self.val = val + +class Stack: + def __init__(self): + self.head = None + + def push(self, node): + node.next = self.head + self.head = node + + def pop(self): + ret = self.head + self.head = self.head.next + return ret + + def peek(self): + return self.head.val if self.head else None +``` + +### Complexity + +- Pushing onto stack: O(1) +- Popping off stack: O(1) +- Peeking the top element: O(1) +- Searching: O(n) + +## Queue + +Queues follow the **FIFO** rule: First In, First Out. This can be thought of a line of people, where the first person in the line is the first to leave. These are typically implemented with a **doubly linked list** + +```python +class Node: + def __init__(self, val): + self.val = val + self.next = None + self.prev = None + +class Queue: + def __init__(self): + self.head = None + self.tail = None + + def enqueue(self, node): + self.head.prev = node + node.next = self.head + self.head = node + + def dequeue(self, node): + ret = self.tail + self.tail = self.tail.prev + self.tail.next = None + return ret + + def peek(self): + return self.tail.val if self.tail else None +``` + +### Complexity + +- Enqueuing (adding) to queue: O(1) +- Dequeuing (removing) from queue: O(1) +- Peeking the front element: O(1) +- Searching: O(n) \ No newline at end of file diff --git a/docs/interview/ds/string.md b/docs/interview/ds/string.md new file mode 100644 index 0000000..1e97fc3 --- /dev/null +++ b/docs/interview/ds/string.md @@ -0,0 +1,19 @@ +# Strings + +Strings are basically arrays of integers that map to ASCII characters. ASCII has less that 256 characters so each character takes less than 1 byte. In most languages, strings are **immutable** so some operations are more expensive than a dynamic array. Any mutation of the string will require a full reconstruction of the string. + +## Complexity + +- Traverse: O(n) +- Copying: O(n) +- Getting at index: O(1) + +Because strings are immutable in most languages (C++ is a notable exception), the below example will be O(n^2) because it will need to recreate the string every time it appends a character. + +```python +string = "this is a string" +newString = "" + +for char in string: + newString += char +``` \ No newline at end of file diff --git a/docs/interview/ds/tree.md b/docs/interview/ds/tree.md new file mode 100644 index 0000000..386b409 --- /dev/null +++ b/docs/interview/ds/tree.md @@ -0,0 +1,21 @@ +# Trees + +When thinking about trees, think **recursion**. Trees are a type of graph where child nodes consist of their own **subtrees**. Trees have a **root** node at the top. The edges of a tree are typically **directional** towards a node's children, and are **acyclic**. Each child node can only have one parent. The space complexity of trees are O(n) because you have n nodes and constant edges based on n. + +## Binary Tree + +Each node in a binary tree has at most 2 children. Many operations on a binary tree have logarithmic time complexity. **K-ary trees** is a way of describing trees where nodes have at most k children. + +### Types of Binary Trees + +**Perfect Binary Trees** are where all the interior nodes have two children, and the leaf (bottom) nodes have the same depth. + +**Complete Binary Trees** are where all interior nodes have two children, but the leaf nodes aren't all at the same depth. Essentially, the bottom level is not fully filled, however it must be filled from left to right. + +**Balanced Binary Trees** are where all left and right subtrees depth differs by no more than 1. This is what allows trees to have a O(log(n)) search complexity. + +**Full Binary Trees** are where all child nodes have either zero or two child nodes. No child nodes have only one child. + +### Binary Search Tree + +A binary search tree or BST is a tree where the left child node is always less than the parent, and the right child node is always greater. This allows for very fast searching since it is easy to make a decision on which path to check down next. \ No newline at end of file diff --git a/docs/interview/index.md b/docs/interview/index.md new file mode 100644 index 0000000..8712012 --- /dev/null +++ b/docs/interview/index.md @@ -0,0 +1 @@ +# Interview Prep Material \ No newline at end of file diff --git a/docs/interview/sd/scaling.md b/docs/interview/sd/scaling.md new file mode 100644 index 0000000..f13bdff --- /dev/null +++ b/docs/interview/sd/scaling.md @@ -0,0 +1,20 @@ +# Scaling + +Scaling is the problem of supporting more and more users as your applications grow. In order to handle more requests, we need more hardware. + +## Horizontal Scaling + +Horizontal scaling increases capability by adding machines to handle requests from users. This allows the processing to be split up across multiple machines and the requests are routed to available machines. + +Some of the advantages of this approach are resiliency and overall scalability. It is resilient because if one node fails, then the load can automatically be distributed across the available nodes and the service is uninterrupted to the user. It is better for overall scalability because as your usage increases, you can simply add more machines to the network. + +Some of the disadvantages of this approad are requiring load balancing, slower interprocess communication, and data inconsistency. Load balancing is required to direct requests to available nodes which is another level of abstraction in our system. If processes running on different nodes need to communicate with one another, this is slower than a vertical solution because this requires network calls which will be slower than interprocess communication. In addition, if these processes require the same data, it can be inconsistent at any given moment as it takes time to update all the dependent processes of any changes. + +## Vertical Scaling + +Vertical scaling is a method of increasing scalability by adding capablility to a single machine. To +process more requests, you make your server faster by increasing the hardware. + +Some of the advantages of this approach compared to horizontal scaling are that there is no load balancing required, communication between processes is faster, and data will be consistent among processes on the server. + +Some of the disadvantages of this approach are reliability and hardware limitations. As opposed to the horizontal solution, if the node fails then the service will be unavailable. We have a single point of failure. Scaling is also limited because there is a limit to how powerful you can make a machine. Once that limit is reached you will need to scale horizontally as well. \ No newline at end of file diff --git a/docs/interview/sidebar.json b/docs/interview/sidebar.json new file mode 100644 index 0000000..41326f8 --- /dev/null +++ b/docs/interview/sidebar.json @@ -0,0 +1,29 @@ +[ + { + "text": "Introduction", + "items": [ + {"text": "Introduction", "link": "/interview/"} + ] + }, + { + "text": "Data Structures", + "items": [ + {"text": "Introduction", "link": "/interview/ds/"}, + {"text": "Memory", "link": "/interview/ds/memory"}, + {"text": "Complexity", "link": "/interview/ds/complexity"}, + {"text": "Arrays", "link": "/interview/ds/array"}, + {"text": "Linked Lists", "link": "/interview/ds/linkedlist"}, + {"text": "Hash Tables", "link": "/interview/ds/hash"}, + {"text": "Stacks and Queues", "link": "/interview/ds/stackqueue"}, + {"text": "Strings", "link": "/interview/ds/string"}, + {"text": "Graphs", "link": "/interview/ds/graph"}, + {"text": "Trees", "link": "/interview/ds/tree"} + ] + }, + { + "text": "System Design", + "items": [ + {"text": "Scaling", "link": "/interview/sd/scaling"} + ] + } +] \ No newline at end of file