Sets - Advanced Programming Techniques (2011, 2013)

Advanced Programming Techniques (2011, 2013)

6. Sets

85

Many computer programming problems deal with sets. In naive set theory, a set is simply a collection of unique elements; in other words, none of the elements are repeated in the set. A list is also a collection of elements, but a list may contain duplicate elements. People often use the term list for something that is really a set. For example, the list of employees at a company is really a set because each employee’s information appears only once in the list. The same is true for the list of students in a class or the list of cars on a sales lot. All of these are really sets and not just lists.

There are multiple operations that we want a computer to perform with sets, such as

· contains - determine if some element is a member of a set

· is subset - determine if a set is a subset of another set

· equals - determine if a set is equal to another set

· intersect - compute the intersection of two sets

· union - compute the union of two sets

· relative complement - compute the complement of one set within another set

Because this list of operations is so long and because the operations are all related, a set should be designed and implemented in a program as a class.

One difficulty with computing set operations is that the size of the resultant set is not known until after computing the set operation. This means the computer does not know how much memory to allocate for storing the result set until after computing the contents of the result set. There are at least three solutions to this problem.

1. Compute the result twice: the first time counting the size of the result set, and the second time storing the results.

2. Before computing the result, determine a maximum bound to the size of the result set. Allocate an array with this maximum size. Compute and store the result in the array. Truncate the array to the actual size of the result set if necessary.

3. Compute the set operation storing the result in a collection that can grow such as a Java ArrayList or a Visual Basic Collection.

The first two Java classes in this chapter, StringSet and SortedStringSet, use solution 2. The last class, StringBitSet, uses solution 3.

86

Unsorted Sets

StringSet

–array : String[]

+StringSet(a : String[])
–StringSet(n : int)
+size() : int
+contains(term : String) : boolean
+isSubset(setB : StringSet) : boolean
+equals(setB : StringSet) : boolean
+intersect(setB : StringSet) : StringSet
+relCompl(setB : StringSet) : StringSet
+union(setB : StringSet) : StringSet

Figure 25: A UML class diagram for an unsorted set of strings.

Figure 25 shows a UML class diagram for class StringSet. An object created from this class stores a set of Strings in an array and does not assume the Strings are sorted. The code for this class is below and uses naive or obvious algorithms based on the linear search algorithm. These naive algorithms are simple and quickly implemented but will execute slowly for large sets.

import java.util.Arrays;

/** An unsorted set of Strings. */

public class StringSet {

/** The array that holds the elements of this set. */

private final String[] array;

/** Creates a StringSet from an array. */

public StringSet(String[] a) {

array = Arrays.copyOf(a, a.length);

}

/** Creates a StringSet from an array but using

* only the first n elements of that array. */

private StringSet(String[] a, int n) {

array = Arrays.copyOfRange(a, 0, n);

}

/** Returns the number of elements in this set. */

public int size() { return array.length; }

Example 1: Determine If an Element Is a Member of a Set

/** Returns true if this set contains the

* specified term; otherwise returns false. */

public boolean contains(String term) {

for (int i = 0; i < array.length; ++i) {

if (array[i].equals(term)) {

return true;

}

}

return false;

}

Desk Check

array

"apple"

"pear"

"plum"

"cherry"

"peach"

[0]

[1]

[2]

[3]

[4]

term

i

return

"plum"

87

Example 2: Determine If a Set Is a Subset

/** Returns true if this set is a subset

* of setB; otherwise returns false. */

public boolean isSubset(StringSet setB) {

for (int i = 0; i < this.array.length; ++i) {

String termA = this.array[i];

if (!setB.contains(termA)) {

return false;

}

}

return true;

}

Desk Check

i

termA

return

this

"elm"

"pine"

"rose"

[0]

[1]

[2]

setB

"lilac"

"pine"

"fir"

"elm"

[0]

[1]

[2]

[3]

/** Returns true if this set is equal

* to setB; otherwise returns false. */

public boolean equals(StringSet setB) {

return this.size() == setB.size() && this.isSubset(setB);

}

Example 3: Compute the Intersection of Two Sets

/** Returns the intersection of this set and setB. */

public StringSet intersect(StringSet setB) {

// Allocate an array large enough to hold the results.

int ceil = Math.min(this.size(), setB.size());

String[] arrayC = new String[ceil];

// Compute the intersection of this set and setB.

int n = 0;

for (int i = 0; i < this.array.length; ++i) {

String termA = this.array[i];

if (setB.contains(termA)) {

arrayC[n++] = termA;

}

}

// Return a new set that contains the results.

return new StringSet(arrayC,

88

Desk Check

ceil

i

termA

n

this

"elm"

"pine"

"rose"

"lilac"

[0]

[1]

[2]

[3]

setB

"lilac"

"pine"

"fir"

[0]

[1]

[2]

arrayC

[0]

[1]

[2]

Example 4: Compute the Complement of a Set Relative to Another

/** Returns the complement of setB within this set. */

public StringSet relCompl(StringSet setB) {

// Allocate an array large enough to hold the results.

int ceil = this.size();

String[] arrayC = new String[ceil];

// Compute the complement of setB within this set.

int n = 0;

for (int i = 0; i < this.array.length; ++i) {

String termA = this.array[i];

if (!setB.contains(termA)) {

arrayC[n++] = termA;

}

}

// Return a new set that contains the results.

return new StringSet(arrayC, n);

}

Desk Check

ceil

i

termA

n

this

"elm"

"pine"

"rose"

"lilac"

[0]

[1]

[2]

[3]

setB

"lilac"

"pine"

"fir"

[0]

[1]

[2]

arrayC

[0]

[1]

[2]

[3]

89

Example 5: Compute the Union of Two Sets

/** Returns the union of this set and setB. */

public StringSet union(StringSet setB) {

// Allocate an array large enough to hold the results.

int ceil = this.size() + setB.size();

String[] arrayC = new String[ceil];

// Compute the union of this set and setB by

// 1. copying to setC all the elements in this set, and

for (int i = 0; i < this.array.length; ++i) {

arrayC[i] = this.array[i];

}

int n = this.array.length;

// 2. copying to setC all the elements that

// appear in setB but not in this set.

for (int i = 0; i < setB.array.length; ++i) {

String termB = setB.array[i];

if (!this.contains(termB)) {

arrayC[n++] = termB;

}

}

// Return a new set that contains the results.

return new StringSet(arrayC, n);

}

Desk Check

ceil

i

termB

n

this

"elm"

"pine"

"rose"

"lilac"

[0]

[1]

[2]

[3]

setB

"lilac"

"pine"

"fir"

[0]

[1]

[2]

arrayC

"elm"

"pine"

"rose"

"lilac"

"fir"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

/** Returns a string representation of this set. */

@Override

public String toString() {

return Arrays.toString(array);

}

}

90

Sorted Sets

If the elements in our sets are sorted, we can use much faster

algorithms when computing set operations. If a set is sorted, we

can use the binary search algorithm to determine if an element is a

member of that set. Also, if two sets are sorted, we can use merge

algorithms to determine if one set is a subset of the other and to

compute the intersection, union, and other set operations.

A merge algorithm computes a set operation

by simultaneously iterating through two or more sorted sets and merging

the sets together. This merging is far faster than the naive algorithms

shown previously in this chapter because the naive algorithms read

through the data in set B multiple times, but the merge algorithms read

through both sets only once. In fact, if two sets, A and B, contain

m and n elements, respectively, the naive

intersection algorithm will perform a maximum of

m * n comparisons, but

the merge intersection algorithm will perform a maximum of

m + n − 1

comparisons. As m and n become large, this makes a

huge difference in the number of comparisons needed to compute the

intersection as shown in figure 26.

Maximum number of comparisons required to compute the intersection of two sets using a naive algorithm versus using a merge algorithm

Figure 26: Maximum number of comparisons required to compute

the intersection of two sets using a naive algorithm versus using a

merge algorithm.

Figure 27 shows a UML class

diagram for a sorted set class. An object made from this class

holds one set of strings, always keeps that set in sorted order, and

uses merge algorithms to compute the intersection, relative complement,

and union of two sets as shown in the Java code below.

SortedStringSet

–array : String[]

+SortedStringSet(a : String[])
–SortedStringSet(n : int)
+size() : int
+contains(term : String) : boolean
+isSubset(setB : SortedStringSet) : boolean
+equals(setB : SortedStringSet) : boolean
+intersect(setB : SortedStringSet) : SortedStringSet
+relCompl(setB : SortedStringSet) : SortedStringSet
+union(setB : SortedStringSet) : SortedStringSet

Figure 27: A UML class diagram for a sorted set of strings.

91

import java.util.Arrays;

/** A sorted set of Strings. */

public class SortedStringSet {

/** The array that holds the elements of this set. */

private final String[] array;

/** Creates a SortedStringSet from an array. */

public SortedStringSet(String[] a) {

// Create and sort a copy of the array a.

array = Arrays.copyOf(a, a.length);

Arrays.sort(array);

}

/** Creates a SortedStringSet from an array using

* only the first n elements of that array. */

private SortedStringSet(String[] a, int n) {

array = Arrays.copyOfRange(a, 0, n);

}

/** Returns the number of elements in this set. */

public int size() { return array.length; }

/** Returns true if this set contains the

* specified term; otherwise returns false. */

public boolean contains(String term) {

return Arrays.binarySearch(array, term) >= 0;

}

Example 6: Determine If a Set Is a Subset

/** Returns true if this set is a subset

* of setB; otherwise returns false. */

public boolean isSubset(SortedStringSet setB) {

int sizeA = this.size();

int sizeB = setB.size();

int a = 0, b = 0;

while (a < sizeA && b < sizeB) {

int cmp = this.array[a].compareTo(setB.array[b]);

if (cmp < 0) {

return false;

}

else if (cmp == 0) {

a++;

}

b++;

}

return a == sizeA;

}

Desk Check

sizeA

sizeB

a

b

cmp

return

this

"elm"

"pine"

"rose"

[0]

[1]

[2]

setB

"elm"

"fir"

"lilac"

"pine"

[0]

[1]

[2]

[3]

92

/** Returns true if this set is equal

* to setB; otherwise returns false. */

public boolean equals(SortedStringSet setB) {

return this.size() == setB.size() && this.isSubset(setB);

}

Example 7: Compute the Intersection of Two Sets

/** Returns the intersection of this set and setB. */

public SortedStringSet intersect(SortedStringSet setB) {

// Allocate an array large enough to hold the results.

int sizeA = this.size();

int sizeB = setB.size();

int ceil = Math.min(sizeA, sizeB);

String[] arrayC = new String[ceil];

// Copy the elements that are in both this set

// and setB to setC using a merge algorithm.

// Assumes both sets are sorted.

int a = 0, b = 0, n = 0;

while (a < sizeA && b < sizeB) {

String termA = this.array[a];

String termB = setB.array[b];

int cmp = termA.compareTo(termB);

if (cmp < 0) {

a++;

}

else if (cmp > 0) {

b++;

}

else {

arrayC[n++] = termA;

a++;

b++;

}

}

// Return a new set that contains the results.

return new SortedStringSet(arrayC, n);

}

Desk Check

a

termA

b

termB

cmp

n

this

"elm"

"lilac"

"pine"

"rose"

[0]

[1]

[2]

[3]

setB

"fir"

"lilac"

"pine"

[0]

[1]

[2]

arrayC

sizeA

sizeB

ceil

[0]

[1]

[2]

93

Example 8: Compute the Complement of a Set Relative to Another

/** Returns the complement of setB within this set. */

public SortedStringSet relCompl(SortedStringSet setB) {

// Allocate an array large enough to hold the results.

int sizeA = this.size();

int sizeB = setB.size();

int ceil = sizeA;

String[] arrayC = new String[ceil];

// Copy the elements that are in setA (this) but

// not in setB to setC using a merge algorithm.

// Assumes both sets are sorted.

int a = 0, b = 0, n = 0;

while (a < sizeA && b < sizeB) {

int cmp = this.array[a].compareTo(setB.array[b]);

if (cmp < 0) {

arrayC[n++] = this.array[a++];

} else if (cmp > 0) {

b++;

} else {

a++;

b++;

}

}

while (a < sizeA) {

arrayC[n++] = this.array[a++];

}

// Return a new set that contains the results.

return new SortedStringSet(arrayC, n);

}

Desk Check

a

termA

b

termB

cmp

n

this

"elm"

"lilac"

"pine"

"rose"

[0]

[1]

[2]

[3]

setB

"fir"

"lilac"

"pine"

[0]

[1]

[2]

arrayC

[0]

[1]

[2]

[3]

sizeA

sizeB

ceil

94

Example 9: Compute the Union of Two Sets

/** Returns the union of this set and setB. */

public SortedStringSet union(SortedStringSet setB) {

// Allocate an array large enough to hold the results.

int sizeA = this.size();

int sizeB = setB.size();

int ceil = sizeA + sizeB;

String[] arrayC = new String[ceil];

// Copy the elements that are in setA (this) or

// in setB to setC using a merge algorithm.

// Assumes both sets are sorted.

int a = 0, b = 0, n = 0;

while (a < sizeA && b < sizeB) {

int cmp = this.array[a].compareTo(setB.array[b]);

if (cmp < 0) {

arrayC[n++] = this.array[a++];

} else if (cmp > 0) {

arrayC[n++] = setB.array[b++];

} else {

arrayC[n++] = this.array[a++];

b++;

}

}

while (a < sizeA) {

arrayC[n++] = this.array[a++];

}

while (b < sizeB) {

arrayC[n++] = setB.array[b++];

}

// Return a new set that contains the results.

return new SortedStringSet(arrayC, n);

}

Desk Check

a

termA

b

termB

cmp

n

this

"elm"

"lilac"

"pine"

"rose"

[0]

[1]

[2]

[3]

setB

"fir"

"lilac"

"pine"

[0]

[1]

[2]

sizeA

sizeB

ceil

arrayC

"elm"

"fir"

"lilac"

"pine"

"rose"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

/** Returns a string representation of this set. */

@Override

public String toString() {

return Arrays.toString(array);

}

}

95

Bit Sets

Another way to store sets is to store all possible elements in a

single large set, which in set theory a mathematician would call the

universe. Then each set can be represented as a

bitset. (See chapter 5 for the

definition of a bitset.) If an element from the universe is not

present in a set, then its corresponding bit in the bitset is 0.

If an element is present in a set, then its corresponding bit in the

bitset is 1.

StringBitSet

universe : Universe
–bitset : BitSet

+StringBitSet(a : String[])
–StringBitSet(original : StringBitSet)
+add(term : String) : void
+remove(term : String) : void
+size() : int
+contains(term : String) : boolean
+isSubset(setB : StringBitSet) : boolean
+equals(setB : StringBitSet) : boolean
+intersect(setB : StringBitSet) : StringBitSet
+relCompl(setB : StringBitSet) : StringBitSet
+union(setB : StringBitSet) : StringBitSet

Universe

–list : ArrayList<String>
–map : HashMap<String, Integer>

+Universe()
+size() : int
+get(index : int) : String
+find(term : String) : int
+add(term : String) : int

Figure 28: A UML class diagram for a set of strings

implemented with a bitset.

Representing sets as bitsets may save memory depending on how large

the universe is and how large each set is. Also, storing sets as

bitsets dramatically speeds up the set operations because no comparisons

are needed to compute the intersection, complement, or union of

sets. Instead of comparisons, the set operations are computed

using the bitwise operators: not, and,

or, exclusive or. Notice in the code

below how simple the functions isSubset,

intersect, relCompl, and union

are.

import java.util.ArrayList;

import java.util.BitSet;

import java.util.HashMap;

/** A set of Strings. */

public class StringBitSet {

/** A shared list of all the terms stored in all sets. */

private static final Universe universe = new Universe();

/** The bitset that represents the terms in this set. */

private final BitSet bitset;

/** Creates a set from an array of Strings. */

public StringBitSet(String[] a) {

bitset = new BitSet(universe.size());

for (String s : a) {

add(s);

}

}

96

/** Creates a new set that is a copy of an existing set. */

public StringBitSet(StringBitSet original) {

bitset = (BitSet)original.bitset.clone();

}

/** Returns the number of terms in this set. */

public int size() { return bitset.cardinality(); }

Example 10: Add an Element to a Set

/** Adds the specified term to this set if it is not

* already present. Returns true if the set was changed. */

public boolean add(String term) {

boolean found;

int index = universe.find(term);

if (index == -1) {

found = false;

index = universe.add(term);

bitset.set(index);

}

else {

found = bitset.get(index);

if (!found) {

bitset.set(index);

}

}

return !found;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

bitset

term

index

found

i

return

0101 0000

"fir"

Example 11: Remove an Element from a Set

/** Removes the specified term from this set if it

* is present. Returns true if the set was changed. */

public boolean remove(String term) {

boolean found = false;

int index = universe.find(term);

if (index != -1) {

found = bitset.get(index);

bitset.clear(index);

}

return found;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

bitset

term

index

i

found

return

1101 1000

"pine"

97

Example 12: Determine if an Element Is a Member of a Set

/** Returns true if this set contains the

* specified term; otherwise returns false. */

public boolean contains(String term) {

boolean found = false;

int index = universe.find(term);

if (index != -1) {

found = bitset.get(index);

}

return found;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

bitset

term

index

found

1101 1000

"lilac"

Example 13: Determine if a Set is a Subset

/** Returns true if this set is a subset

* of setB; otherwise returns false. */

public boolean isSubset(StringBitSet setB) {

BitSet temp = (BitSet)this.bitset.clone();

temp.and(setB.bitset);

return temp.equals(this.bitset);

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

this.bitset

setB.bitset

temp

return

1110 000

1101 1000

/** Returns true if this set is equal

* to setB; otherwise returns false. */

public boolean equals(StringBitSet setB) {

return this.bitset.equals(setB.bitset);

}

98

Example 14: Compute the Intersection of Two Sets

/** Returns the intersection of this set and setB. */

public StringBitSet intersect(StringBitSet setB) {

StringBitSet result = new StringBitSet(this);

result.bitset.and(setB.bitset);

return result;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

this.bitset

setB.bitset

result.bitset

1110 0000

1101 1000

Example 15: Compute the Complement of a Set Relative to Another

/** Returns the complement of setB in this set. */

public StringBitSet relCompl(StringBitSet setB) {

StringBitSet result = new StringBitSet(this);

result.bitset.andNot(setB.bitset);

return result;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

this.bitset

setB.bitset

result.bitset

1110 0000

1101 1000

Example 16: Compute the Union of Two Sets

/** Returns the union of this set and setB. */

public StringBitSet union(StringBitSet setB) {

StringBitSet result = new StringBitSet(this);

result.bitset.or(setB.bitset);

return result;

}

Desk Check

universe.list

"elm"

"pine"

"rose"

"lilac"

"fir"

"ash"

[0]

[1]

[2]

[3]

[4]

[5]

[6]

[7]

this.bitset

setB.bitset

result.bitset

1110 0000

1101 1000

99

/** Returns a string representation of this set. */

@Override

public String toString() {

StringBuilder sb = new StringBuilder();

sb.append("[");

String separator = "";

for (int i = 0; (i = bitset.nextSetBit(i)) != -1; ++i) {

sb.append(separator).append(universe.get(i));

separator = ", ";

}

sb.append("]");

return sb.toString();

}

/** The universe of all Strings that

* can appear in StringBitSets. */

private static final class Universe {

/** A list of all the terms in this universe. */

private final ArrayList<String> list =

new ArrayList<String>();

/** A map from a term to its

* corresponding index in this universe. */

private final HashMap<String, Integer> map =

new HashMap<String, Integer>();

/** Returns the number of elements in this universe. */

public int size() { return list.size(); }

/** Returns the term in this universe located at index. */

public String get(int index) { return list.get(index); }

/** Returns the index of a term within this universe if

* it exists in this universe; otherwise returns -1. */

public int find(String term) {

Integer index = map.get(term);

return index != null ? index.intValue() : -1;

}

/** Adds a term to this universe. */

public int add(String term) {

int i = list.size();

list.add(term);

map.put(term, new Integer(i));

return i;

}

}

}

n);

}